rltk3 3.0.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/AUTHORS +1 -0
- data/LICENSE +27 -0
- data/README.md +852 -0
- data/Rakefile +197 -0
- data/lib/rltk/ast.rb +573 -0
- data/lib/rltk/cfg.rb +683 -0
- data/lib/rltk/cg/basic_block.rb +157 -0
- data/lib/rltk/cg/bindings.rb +151 -0
- data/lib/rltk/cg/builder.rb +1127 -0
- data/lib/rltk/cg/context.rb +48 -0
- data/lib/rltk/cg/contractor.rb +51 -0
- data/lib/rltk/cg/execution_engine.rb +194 -0
- data/lib/rltk/cg/function.rb +237 -0
- data/lib/rltk/cg/generated_bindings.rb +8118 -0
- data/lib/rltk/cg/generic_value.rb +95 -0
- data/lib/rltk/cg/instruction.rb +519 -0
- data/lib/rltk/cg/llvm.rb +150 -0
- data/lib/rltk/cg/memory_buffer.rb +75 -0
- data/lib/rltk/cg/module.rb +451 -0
- data/lib/rltk/cg/pass_manager.rb +252 -0
- data/lib/rltk/cg/support.rb +29 -0
- data/lib/rltk/cg/target.rb +230 -0
- data/lib/rltk/cg/triple.rb +58 -0
- data/lib/rltk/cg/type.rb +554 -0
- data/lib/rltk/cg/value.rb +1272 -0
- data/lib/rltk/cg.rb +32 -0
- data/lib/rltk/lexer.rb +372 -0
- data/lib/rltk/lexers/calculator.rb +44 -0
- data/lib/rltk/lexers/ebnf.rb +38 -0
- data/lib/rltk/parser.rb +1702 -0
- data/lib/rltk/parsers/infix_calc.rb +43 -0
- data/lib/rltk/parsers/postfix_calc.rb +34 -0
- data/lib/rltk/parsers/prefix_calc.rb +34 -0
- data/lib/rltk/token.rb +90 -0
- data/lib/rltk/version.rb +11 -0
- data/lib/rltk.rb +16 -0
- data/test/cg/tc_basic_block.rb +83 -0
- data/test/cg/tc_control_flow.rb +191 -0
- data/test/cg/tc_function.rb +54 -0
- data/test/cg/tc_generic_value.rb +33 -0
- data/test/cg/tc_instruction.rb +256 -0
- data/test/cg/tc_llvm.rb +25 -0
- data/test/cg/tc_math.rb +88 -0
- data/test/cg/tc_module.rb +89 -0
- data/test/cg/tc_transforms.rb +68 -0
- data/test/cg/tc_type.rb +69 -0
- data/test/cg/tc_value.rb +151 -0
- data/test/cg/ts_cg.rb +23 -0
- data/test/tc_ast.rb +332 -0
- data/test/tc_cfg.rb +164 -0
- data/test/tc_lexer.rb +216 -0
- data/test/tc_parser.rb +711 -0
- data/test/tc_token.rb +34 -0
- data/test/ts_rltk.rb +47 -0
- metadata +317 -0
data/lib/rltk/parser.rb
ADDED
@@ -0,0 +1,1702 @@
|
|
1
|
+
# Author: Chris Wailes <chris.wailes@gmail.com>
|
2
|
+
# Project: Ruby Language Toolkit
|
3
|
+
# Date: 2011/01/19
|
4
|
+
# Description: This file contains the base class for parsers that use RLTK.
|
5
|
+
|
6
|
+
############
|
7
|
+
# Requires #
|
8
|
+
############
|
9
|
+
|
10
|
+
# Ruby Language Toolkit
|
11
|
+
require 'rltk/cfg'
|
12
|
+
|
13
|
+
#######################
|
14
|
+
# Classes and Modules #
|
15
|
+
#######################
|
16
|
+
|
17
|
+
# The RLTK root module
|
18
|
+
module RLTK
|
19
|
+
# A BadToken error indicates that a token was observed in the input stream
|
20
|
+
# that wasn't used in the grammar's definition.
|
21
|
+
class BadToken < StandardError
|
22
|
+
# @return [String] String representation of the error.
|
23
|
+
def initialize(token)
|
24
|
+
@token = token
|
25
|
+
end
|
26
|
+
|
27
|
+
def to_s
|
28
|
+
"Unexpected token: #{@token.inspect}. Token not present in grammar definition."
|
29
|
+
end
|
30
|
+
end
|
31
|
+
|
32
|
+
# A NotInLanguage error is raised whenever there is no valid parse tree
|
33
|
+
# for a given token stream. In other words, the input string is not in the
|
34
|
+
# defined language.
|
35
|
+
class NotInLanguage < StandardError
|
36
|
+
|
37
|
+
class << self
|
38
|
+
def default_context_length
|
39
|
+
@default_context_length || 100
|
40
|
+
end
|
41
|
+
|
42
|
+
def default_context_length=(v)
|
43
|
+
@default_context_length = v
|
44
|
+
end
|
45
|
+
end
|
46
|
+
|
47
|
+
# @return [Array<Token>] List of tokens that have been successfully parsed
|
48
|
+
attr_reader :seen
|
49
|
+
|
50
|
+
# @return [Token] Token that caused the parser to stop
|
51
|
+
attr_reader :current
|
52
|
+
|
53
|
+
# @return [Array<Token>] List of tokens that have yet to be seen
|
54
|
+
attr_reader :remaining
|
55
|
+
|
56
|
+
# @param [Array<Token>] seen Tokens that have been successfully parsed
|
57
|
+
# @param [Token] current Token that caused the parser to stop
|
58
|
+
# @param [Array<Token>] remaining Tokens that have yet to be seen
|
59
|
+
def initialize(seen, current, remaining, context_length = self.class.default_context_length)
|
60
|
+
@seen = seen
|
61
|
+
@current = current
|
62
|
+
@remaining = remaining
|
63
|
+
@context_length = context_length
|
64
|
+
end
|
65
|
+
|
66
|
+
# @return [String] String representation of the error.
|
67
|
+
def to_s
|
68
|
+
seen = @context_length == :all ? @seen : @seen[-@context_length..-1]
|
69
|
+
remaining = @context_length == :all ? @remaining : @remaining[0..@context_length]
|
70
|
+
"String not in language. Token info:\n\tSeen: #{seen}\n\tCurrent: #{@current}\n\tRemaining: #{remaining}"
|
71
|
+
end
|
72
|
+
end
|
73
|
+
|
74
|
+
# An error of this type is raised when the parser encountered a error that
|
75
|
+
# was handled by an error production.
|
76
|
+
class HandledError < StandardError
|
77
|
+
|
78
|
+
# The errors as reported by the parser.
|
79
|
+
#
|
80
|
+
# @return [Array<Object>]
|
81
|
+
attr_reader :errors
|
82
|
+
|
83
|
+
# The result that would have been returned by the call to *parse*.
|
84
|
+
attr_reader :result
|
85
|
+
|
86
|
+
# Instantiate a new HandledError object with *errors*.
|
87
|
+
#
|
88
|
+
# @param [Array<Object>] errors Errors added to the parsing environment by calls to {Parser::Environment#error}.
|
89
|
+
# @param [Object] result Object resulting from parsing Tokens before the error occurred.
|
90
|
+
def initialize(errors, result)
|
91
|
+
@errors = errors
|
92
|
+
@result = result
|
93
|
+
end
|
94
|
+
end
|
95
|
+
|
96
|
+
# Used for exceptions that occure during parser construction.
|
97
|
+
class ParserConstructionException < Exception; end
|
98
|
+
|
99
|
+
# Used for runtime exceptions that are the parsers fault. These should
|
100
|
+
# never be observed in the wild.
|
101
|
+
class InternalParserException < Exception; end
|
102
|
+
|
103
|
+
# Used to indicate that a parser is empty or hasn't been finalized.
|
104
|
+
class UselessParserException < Exception
|
105
|
+
# Sets the error messsage for this exception.
|
106
|
+
def initialize
|
107
|
+
super('Parser has not been finalized.')
|
108
|
+
end
|
109
|
+
end
|
110
|
+
|
111
|
+
# The Parser class may be sub-classed to produce new parsers. These
|
112
|
+
# parsers have a lot of features, and are described in the main
|
113
|
+
# documentation.
|
114
|
+
class Parser
|
115
|
+
# @return [Environment] Environment used by the instantiated parser.
|
116
|
+
attr_reader :env
|
117
|
+
|
118
|
+
#################
|
119
|
+
# Class Methods #
|
120
|
+
#################
|
121
|
+
|
122
|
+
class << self
|
123
|
+
# The overridden new prevents un-finalized parsers from being
|
124
|
+
# instantiated.
|
125
|
+
def new(*args)
|
126
|
+
if not @symbols
|
127
|
+
raise UselessParserException
|
128
|
+
else
|
129
|
+
super(*args)
|
130
|
+
end
|
131
|
+
end
|
132
|
+
|
133
|
+
# Installs instance class varialbes into a class.
|
134
|
+
#
|
135
|
+
# @return [void]
|
136
|
+
def install_icvars
|
137
|
+
@curr_lhs = nil
|
138
|
+
@curr_prec = nil
|
139
|
+
|
140
|
+
@conflicts = Hash.new {|h, k| h[k] = Array.new}
|
141
|
+
@grammar = CFG.new
|
142
|
+
@grammar_prime = nil
|
143
|
+
|
144
|
+
@lh_sides = Hash.new
|
145
|
+
@procs = Array.new
|
146
|
+
@states = Array.new
|
147
|
+
|
148
|
+
@symbols = nil
|
149
|
+
|
150
|
+
# Variables for dealing with precedence.
|
151
|
+
@prec_counts = {:left => 0, :right => 0, :non => 0}
|
152
|
+
@production_precs = Array.new
|
153
|
+
@token_precs = Hash.new
|
154
|
+
@token_hooks = Hash.new {|h, k| h[k] = []}
|
155
|
+
|
156
|
+
# Set the default argument handling policy. Valid values
|
157
|
+
# are :array and :splat.
|
158
|
+
@default_arg_type = :splat
|
159
|
+
|
160
|
+
@grammar.callback do |type, which, p, sels = []|
|
161
|
+
@procs[p.id] = [
|
162
|
+
case type
|
163
|
+
when :optional
|
164
|
+
case which
|
165
|
+
when :empty then ProdProc.new { || nil }
|
166
|
+
else ProdProc.new { |o| o }
|
167
|
+
end
|
168
|
+
|
169
|
+
when :elp
|
170
|
+
case which
|
171
|
+
when :empty then ProdProc.new { || [] }
|
172
|
+
else ProdProc.new { |prime| prime }
|
173
|
+
end
|
174
|
+
|
175
|
+
when :nelp
|
176
|
+
case which
|
177
|
+
when :single
|
178
|
+
ProdProc.new { |el| [el] }
|
179
|
+
|
180
|
+
when :multiple
|
181
|
+
ProdProc.new(:splat, sels) do |*syms|
|
182
|
+
el = syms[1..-1]
|
183
|
+
syms.first << (el.length == 1 ? el.first : el)
|
184
|
+
end
|
185
|
+
|
186
|
+
else
|
187
|
+
ProdProc.new { |*el| el.length == 1 ? el.first : el }
|
188
|
+
end
|
189
|
+
end,
|
190
|
+
p.rhs.length
|
191
|
+
]
|
192
|
+
|
193
|
+
@production_precs[p.id] = p.last_terminal
|
194
|
+
end
|
195
|
+
end
|
196
|
+
|
197
|
+
# Called when the Lexer class is sub-classed, it installes
|
198
|
+
# necessary instance class variables.
|
199
|
+
#
|
200
|
+
# @return [void]
|
201
|
+
def inherited(klass)
|
202
|
+
klass.install_icvars
|
203
|
+
end
|
204
|
+
|
205
|
+
# If *state* (or its equivalent) is not in the state list it is
|
206
|
+
# added and it's ID is returned. If there is already a state
|
207
|
+
# with the same items as *state* in the state list its ID is
|
208
|
+
# returned and *state* is discarded.
|
209
|
+
#
|
210
|
+
# @param [State] state State to add to the parser.
|
211
|
+
#
|
212
|
+
# @return [Integer] The ID of the state.
|
213
|
+
def add_state(state)
|
214
|
+
if (id = @states.index(state))
|
215
|
+
id
|
216
|
+
else
|
217
|
+
state.id = @states.length
|
218
|
+
|
219
|
+
@states << state
|
220
|
+
|
221
|
+
@states.length - 1
|
222
|
+
end
|
223
|
+
end
|
224
|
+
|
225
|
+
# Build a hash with the default options for Parser.finalize
|
226
|
+
# and then update it with the values from *opts*.
|
227
|
+
#
|
228
|
+
# @param [Hash{Symbol => Object}] opts Hash containing options for finalize.
|
229
|
+
#
|
230
|
+
# @return [Hash{Symbol => Object}]
|
231
|
+
def build_finalize_opts(opts)
|
232
|
+
opts[:explain] = self.get_io(opts[:explain])
|
233
|
+
|
234
|
+
{
|
235
|
+
explain: false,
|
236
|
+
lookahead: true,
|
237
|
+
precedence: true,
|
238
|
+
use: false
|
239
|
+
}.update(opts)
|
240
|
+
end
|
241
|
+
private :build_finalize_opts
|
242
|
+
|
243
|
+
# Build a hash with the default options for Parser.parse and
|
244
|
+
# then update it with the values from *opts*.
|
245
|
+
#
|
246
|
+
# @param [Hash{Symbol => Object}] opts Hash containing options for parse.
|
247
|
+
#
|
248
|
+
# @return [Hash{Symbol => Object}]
|
249
|
+
def build_parse_opts(opts)
|
250
|
+
opts[:parse_tree] = self.get_io(opts[:parse_tree])
|
251
|
+
opts[:verbose] = self.get_io(opts[:verbose])
|
252
|
+
|
253
|
+
{
|
254
|
+
accept: :first,
|
255
|
+
env: self::Environment.new,
|
256
|
+
parse_tree: false,
|
257
|
+
verbose: false
|
258
|
+
}.update(opts)
|
259
|
+
end
|
260
|
+
private :build_parse_opts
|
261
|
+
|
262
|
+
# This method is used to (surprise) check the sanity of the
|
263
|
+
# constructed parser. It checks to make sure all non-terminals
|
264
|
+
# used in the grammar definition appear on the left-hand side of
|
265
|
+
# one or more productions, and that none of the parser's states
|
266
|
+
# have invalid actions. If a problem is encountered a
|
267
|
+
# ParserConstructionException is raised.
|
268
|
+
#
|
269
|
+
# @return [void]
|
270
|
+
def check_sanity
|
271
|
+
# Check to make sure all non-terminals appear on the
|
272
|
+
# left-hand side of some production.
|
273
|
+
@grammar.nonterms.each do |sym|
|
274
|
+
if not @lh_sides.values.include?(sym)
|
275
|
+
raise ParserConstructionException, "Non-terminal #{sym} does not appear on the left-hand side of any production."
|
276
|
+
end
|
277
|
+
end
|
278
|
+
|
279
|
+
# Check the actions in each state.
|
280
|
+
each_state do |state|
|
281
|
+
state.actions.each do |sym, actions|
|
282
|
+
if CFG::is_terminal?(sym)
|
283
|
+
# Here we check actions for terminals.
|
284
|
+
actions.each do |action|
|
285
|
+
if action.is_a?(Accept)
|
286
|
+
if sym != :EOS
|
287
|
+
raise ParserConstructionException, "Accept action found for terminal #{sym} in state #{state.id}."
|
288
|
+
end
|
289
|
+
|
290
|
+
elsif not (action.is_a?(GoTo) or action.is_a?(Reduce) or action.is_a?(Shift))
|
291
|
+
raise ParserConstructionException, "Object of type #{action.class} found in actions for terminal " +
|
292
|
+
"#{sym} in state #{state.id}."
|
293
|
+
|
294
|
+
end
|
295
|
+
end
|
296
|
+
|
297
|
+
if (conflict = state.conflict_on?(sym))
|
298
|
+
self.inform_conflict(state.id, conflict, sym)
|
299
|
+
end
|
300
|
+
else
|
301
|
+
# Here we check actions for non-terminals.
|
302
|
+
if actions.length > 1
|
303
|
+
raise ParserConstructionException, "State #{state.id} has multiple GoTo actions for non-terminal #{sym}."
|
304
|
+
|
305
|
+
elsif actions.length == 1 and not actions.first.is_a?(GoTo)
|
306
|
+
raise ParserConstructionException, "State #{state.id} has non-GoTo action for non-terminal #{sym}."
|
307
|
+
|
308
|
+
end
|
309
|
+
end
|
310
|
+
end
|
311
|
+
end
|
312
|
+
end
|
313
|
+
|
314
|
+
# This method checks to see if the parser would be in parse state
|
315
|
+
# *dest* after starting in state *start* and reading *symbols*.
|
316
|
+
#
|
317
|
+
# @param [Symbol] start Symbol representing a CFG production.
|
318
|
+
# @param [Symbol] dest Symbol representing a CFG production.
|
319
|
+
# @param [Array<Symbol>] symbols Grammar symbols.
|
320
|
+
#
|
321
|
+
# @return [Boolean] If the destination symbol is reachable from the start symbol after reading *symbols*.
|
322
|
+
def check_reachability(start, dest, symbols)
|
323
|
+
path_exists = true
|
324
|
+
cur_state = start
|
325
|
+
|
326
|
+
symbols.each do |sym|
|
327
|
+
|
328
|
+
actions = @states[cur_state.id].on?(sym)
|
329
|
+
actions = actions.select { |a| a.is_a?(Shift) } if CFG::is_terminal?(sym)
|
330
|
+
|
331
|
+
if actions.empty?
|
332
|
+
path_exists = false
|
333
|
+
break
|
334
|
+
end
|
335
|
+
|
336
|
+
# There can only be one Shift action for terminals and
|
337
|
+
# one GoTo action for non-terminals, so we know the
|
338
|
+
# first action is the only one in the list.
|
339
|
+
cur_state = @states[actions.first.id]
|
340
|
+
end
|
341
|
+
|
342
|
+
path_exists and cur_state.id == dest.id
|
343
|
+
end
|
344
|
+
|
345
|
+
# Declares a new clause inside of a production. The right-hand
|
346
|
+
# side is specified by *expression* and the precedence of this
|
347
|
+
# production can be changed by setting the *precedence* argument
|
348
|
+
# to some terminal symbol.
|
349
|
+
#
|
350
|
+
# @param [String, Symbol] expression Right-hand side of a production.
|
351
|
+
# @param [Symbol] precedence Symbol representing the precedence of this production.
|
352
|
+
# @param [:array, :splat] arg_type Method to use when passing arguments to the action.
|
353
|
+
# @param [Proc] action Action to be taken when the production is reduced.
|
354
|
+
#
|
355
|
+
# @return [void]
|
356
|
+
def clause(expression, precedence = nil, arg_type = @default_arg_type, &action)
|
357
|
+
# Use the curr_prec only if it isn't overridden for this
|
358
|
+
# clause.
|
359
|
+
precedence ||= @curr_prec
|
360
|
+
|
361
|
+
production, selections = @grammar.clause(expression)
|
362
|
+
|
363
|
+
# Check to make sure the action's arity matches the number
|
364
|
+
# of symbols on the right-hand side.
|
365
|
+
expected_arity = (selections.empty? ? production.rhs.length : selections.length)
|
366
|
+
if arg_type == :splat and action.arity != expected_arity
|
367
|
+
raise ParserConstructionException,
|
368
|
+
"Incorrect number of action parameters. Expected #{expected_arity} but got #{action.arity}." +
|
369
|
+
' Action arity must match the number of terminals and non-terminals in the clause.'
|
370
|
+
end
|
371
|
+
|
372
|
+
# Add the action to our proc list.
|
373
|
+
@procs[production.id] = [ProdProc.new(arg_type, selections, &action), production.rhs.length]
|
374
|
+
|
375
|
+
# If no precedence is specified use the precedence of the
|
376
|
+
# last terminal in the production.
|
377
|
+
@production_precs[production.id] = precedence || production.last_terminal
|
378
|
+
end
|
379
|
+
alias :c :clause
|
380
|
+
|
381
|
+
# Removes resources that were needed to generate the parser but
|
382
|
+
# aren't needed when actually parsing input.
|
383
|
+
#
|
384
|
+
# @return [void]
|
385
|
+
def clean
|
386
|
+
# We've told the developer about conflicts by now.
|
387
|
+
@conflicts = nil
|
388
|
+
|
389
|
+
# Drop the grammar and the grammar'.
|
390
|
+
@grammar = nil
|
391
|
+
@grammar_prime = nil
|
392
|
+
|
393
|
+
# Drop precedence and bookkeeping information.
|
394
|
+
@cur_lhs = nil
|
395
|
+
@cur_prec = nil
|
396
|
+
|
397
|
+
@prec_counts = nil
|
398
|
+
@production_precs = nil
|
399
|
+
@token_precs = nil
|
400
|
+
|
401
|
+
# Drop the items from each of the states.
|
402
|
+
each_state { |state| state.clean }
|
403
|
+
end
|
404
|
+
|
405
|
+
# Set the default argument type for the actions associated with
|
406
|
+
# clauses. All actions defined after this call will be passed
|
407
|
+
# arguments in the way specified here, unless overridden in the
|
408
|
+
# call to {Parser.clause}.
|
409
|
+
#
|
410
|
+
# @param [:array, :splat] type The default argument type.
|
411
|
+
#
|
412
|
+
# @return [void]
|
413
|
+
def default_arg_type(type)
|
414
|
+
@default_arg_type = type if type == :array or type == :splat
|
415
|
+
end
|
416
|
+
alias :dat :default_arg_type
|
417
|
+
|
418
|
+
# Adds productions and actions for parsing empty lists.
|
419
|
+
#
|
420
|
+
# @see CFG#empty_list_production
|
421
|
+
def build_list_production(symbol, list_elements, separator = '')
|
422
|
+
@grammar.build_list_production(symbol, list_elements, separator)
|
423
|
+
end
|
424
|
+
alias :list :build_list_production
|
425
|
+
|
426
|
+
# This function will print a description of the parser to the
|
427
|
+
# provided IO object.
|
428
|
+
#
|
429
|
+
# @param [IO] io Input/Output object used for printing the parser's explanation.
|
430
|
+
#
|
431
|
+
# @return [void]
|
432
|
+
def explain(io)
|
433
|
+
if @grammar and not @states.empty?
|
434
|
+
io.puts('###############')
|
435
|
+
io.puts('# Productions #')
|
436
|
+
io.puts('###############')
|
437
|
+
io.puts
|
438
|
+
|
439
|
+
max_id_length = @grammar.productions(:id).length.to_s.length
|
440
|
+
|
441
|
+
# Print the productions.
|
442
|
+
@grammar.productions.each do |sym, productions|
|
443
|
+
|
444
|
+
max_rhs_length = productions.inject(0) { |m, p| if (len = p.to_s.length) > m then len else m end }
|
445
|
+
|
446
|
+
productions.each do |production|
|
447
|
+
p_string = production.to_s
|
448
|
+
|
449
|
+
io.print("\tProduction #{sprintf("%#{max_id_length}d", production.id)}: #{p_string}")
|
450
|
+
|
451
|
+
if (prec = @production_precs[production.id])
|
452
|
+
io.print(' ' * (max_rhs_length - p_string.length))
|
453
|
+
io.print(" : (#{sprintf("%-5s", prec.first)}, #{prec.last})")
|
454
|
+
end
|
455
|
+
|
456
|
+
io.puts
|
457
|
+
end
|
458
|
+
|
459
|
+
io.puts
|
460
|
+
end
|
461
|
+
|
462
|
+
io.puts('##########')
|
463
|
+
io.puts('# Tokens #')
|
464
|
+
io.puts('##########')
|
465
|
+
io.puts
|
466
|
+
|
467
|
+
max_token_len = @grammar.terms.inject(0) { |m, t| if t.length > m then t.length else m end }
|
468
|
+
|
469
|
+
@grammar.terms.sort {|a,b| a.to_s <=> b.to_s }.each do |term|
|
470
|
+
io.print("\t#{term}")
|
471
|
+
|
472
|
+
if (prec = @token_precs[term])
|
473
|
+
io.print(' ' * (max_token_len - term.length))
|
474
|
+
io.print(" : (#{sprintf("%-5s", prec.first)}, #{prec.last})")
|
475
|
+
end
|
476
|
+
|
477
|
+
io.puts
|
478
|
+
end
|
479
|
+
|
480
|
+
io.puts
|
481
|
+
|
482
|
+
io.puts('#####################')
|
483
|
+
io.puts('# Table Information #')
|
484
|
+
io.puts('#####################')
|
485
|
+
io.puts
|
486
|
+
|
487
|
+
io.puts("\tStart symbol: #{@grammar.start_symbol}'")
|
488
|
+
io.puts
|
489
|
+
|
490
|
+
io.puts("\tTotal number of states: #{@states.length}")
|
491
|
+
io.puts
|
492
|
+
|
493
|
+
io.puts("\tTotal conflicts: #{@conflicts.values.flatten(1).length}")
|
494
|
+
io.puts
|
495
|
+
|
496
|
+
@conflicts.each do |state_id, conflicts|
|
497
|
+
io.puts("\tState #{state_id} has #{conflicts.length} conflict(s)")
|
498
|
+
end
|
499
|
+
|
500
|
+
io.puts if not @conflicts.empty?
|
501
|
+
|
502
|
+
# Print the parse table.
|
503
|
+
io.puts('###############')
|
504
|
+
io.puts('# Parse Table #')
|
505
|
+
io.puts('###############')
|
506
|
+
io.puts
|
507
|
+
|
508
|
+
each_state do |state|
|
509
|
+
io.puts("State #{state.id}:")
|
510
|
+
io.puts
|
511
|
+
|
512
|
+
io.puts("\t# ITEMS #")
|
513
|
+
max = state.items.inject(0) do |inner_max, item|
|
514
|
+
if item.lhs.to_s.length > inner_max then item.lhs.to_s.length else inner_max end
|
515
|
+
end
|
516
|
+
|
517
|
+
state.each do |item|
|
518
|
+
io.puts("\t#{item.to_s(max)}")
|
519
|
+
end
|
520
|
+
|
521
|
+
io.puts
|
522
|
+
io.puts("\t# ACTIONS #")
|
523
|
+
|
524
|
+
state.actions.keys.sort {|a,b| a.to_s <=> b.to_s}.each do |sym|
|
525
|
+
state.actions[sym].each do |action|
|
526
|
+
io.puts("\tOn #{sym} #{action}")
|
527
|
+
end
|
528
|
+
end
|
529
|
+
|
530
|
+
io.puts
|
531
|
+
io.puts("\t# CONFLICTS #")
|
532
|
+
|
533
|
+
if @conflicts[state.id].length == 0
|
534
|
+
io.puts("\tNone\n\n")
|
535
|
+
else
|
536
|
+
@conflicts[state.id].each do |conflict|
|
537
|
+
type, sym = conflict
|
538
|
+
|
539
|
+
io.print("\t#{if type == :SR then "Shift/Reduce" else "Reduce/Reduce" end} conflict")
|
540
|
+
|
541
|
+
io.puts(" on #{sym}")
|
542
|
+
end
|
543
|
+
|
544
|
+
io.puts
|
545
|
+
end
|
546
|
+
end
|
547
|
+
|
548
|
+
# Close any IO objects that aren't $stdout.
|
549
|
+
io.close if io.is_a?(IO) and io != $stdout
|
550
|
+
else
|
551
|
+
raise ParserConstructionException, 'Parser.explain called outside of finalize.'
|
552
|
+
end
|
553
|
+
end
|
554
|
+
|
555
|
+
# This method will finalize the parser causing the construction
|
556
|
+
# of states and their actions, and the resolution of conflicts
|
557
|
+
# using lookahead and precedence information.
|
558
|
+
#
|
559
|
+
# No calls to {Parser.production} may appear after the call to
|
560
|
+
# Parser.finalize.
|
561
|
+
#
|
562
|
+
# @param [Hash] opts Options describing how to finalize the parser.
|
563
|
+
#
|
564
|
+
# @option opts [Boolean,String,IO] :explain To explain the parser or not.
|
565
|
+
# @option opts [Boolean] :lookahead To use lookahead info for conflict resolution.
|
566
|
+
# @option opts [Boolean] :precedence To use precedence info for conflict resolution.
|
567
|
+
# @option opts [String,IO] :use A file name or object that is used to load/save the parser.
|
568
|
+
#
|
569
|
+
# @return [void]
|
570
|
+
def finalize(opts = {})
|
571
|
+
|
572
|
+
if @grammar.productions.empty?
|
573
|
+
raise ParserConstructionException,
|
574
|
+
"Parser has no productions. Cowardly refusing to construct an empty parser."
|
575
|
+
end
|
576
|
+
|
577
|
+
# Get the full options hash.
|
578
|
+
opts = build_finalize_opts(opts)
|
579
|
+
|
580
|
+
# Get the name of the file in which the parser is defined.
|
581
|
+
#
|
582
|
+
# FIXME: See why this is failing for the simple ListParser example.
|
583
|
+
def_file = caller()[2].split(':')[0] if opts[:use]
|
584
|
+
|
585
|
+
# Check to make sure we can load the necessary information
|
586
|
+
# from the specified object.
|
587
|
+
if opts[:use] and (
|
588
|
+
(opts[:use].is_a?(String) and File.exist?(opts[:use]) and File.mtime(opts[:use]) > File.mtime(def_file)) or
|
589
|
+
(opts[:use].is_a?(File) and opts[:use].mtime > File.mtime(def_file))
|
590
|
+
)
|
591
|
+
|
592
|
+
file = self.get_io(opts[:use], 'r')
|
593
|
+
|
594
|
+
# Un-marshal our saved data structures.
|
595
|
+
file.flock(File::LOCK_SH)
|
596
|
+
@lh_sides, @states, @symbols = Marshal.load(file)
|
597
|
+
file.flock(File::LOCK_UN)
|
598
|
+
|
599
|
+
# Close the file if we opened it.
|
600
|
+
file.close if opts[:use].is_a?(String)
|
601
|
+
|
602
|
+
# Remove any un-needed data and return.
|
603
|
+
return self.clean
|
604
|
+
end
|
605
|
+
|
606
|
+
# Grab all of the symbols that comprise the grammar
|
607
|
+
# (besides the start symbol).
|
608
|
+
@symbols = @grammar.symbols << :ERROR
|
609
|
+
|
610
|
+
# Add our starting state to the state list.
|
611
|
+
@start_symbol = (@grammar.start_symbol.to_s + '\'').to_sym
|
612
|
+
start_production, _ = @grammar.production(@start_symbol, @grammar.start_symbol).first
|
613
|
+
start_state = State.new(@symbols, [start_production.to_item])
|
614
|
+
|
615
|
+
start_state.close(@grammar.productions)
|
616
|
+
|
617
|
+
self.add_state(start_state)
|
618
|
+
|
619
|
+
# Translate the precedence of productions from tokens to
|
620
|
+
# (associativity, precedence) pairs.
|
621
|
+
@production_precs.map! { |prec| @token_precs[prec] }
|
622
|
+
|
623
|
+
# Build the rest of the transition table.
|
624
|
+
each_state do |state|
|
625
|
+
#Transition states.
|
626
|
+
tstates = Hash.new { |h,k| h[k] = State.new(@symbols) }
|
627
|
+
|
628
|
+
#Bin each item in this set into reachable transition
|
629
|
+
#states.
|
630
|
+
state.each do |item|
|
631
|
+
if (next_symbol = item.next_symbol)
|
632
|
+
tstates[next_symbol] << item.copy
|
633
|
+
end
|
634
|
+
end
|
635
|
+
|
636
|
+
# For each transition state:
|
637
|
+
# 1) Get transition symbol
|
638
|
+
# 2) Advance dot
|
639
|
+
# 3) Close it
|
640
|
+
# 4) Get state id and add transition
|
641
|
+
tstates.each do |symbol, tstate|
|
642
|
+
tstate.each { |item| item.advance }
|
643
|
+
|
644
|
+
tstate.close(@grammar.productions)
|
645
|
+
|
646
|
+
id = self.add_state(tstate)
|
647
|
+
|
648
|
+
# Add Goto and Shift actions.
|
649
|
+
state.on(symbol, CFG::is_nonterminal?(symbol) ? GoTo.new(id) : Shift.new(id))
|
650
|
+
end
|
651
|
+
|
652
|
+
# Find the Accept and Reduce actions for this state.
|
653
|
+
state.each do |item|
|
654
|
+
if item.at_end?
|
655
|
+
if item.lhs == @start_symbol
|
656
|
+
state.on(:EOS, Accept.new)
|
657
|
+
else
|
658
|
+
state.add_reduction(@grammar.productions(:id)[item.id])
|
659
|
+
end
|
660
|
+
end
|
661
|
+
end
|
662
|
+
end
|
663
|
+
|
664
|
+
# Build the production.id -> production.lhs map.
|
665
|
+
@grammar.productions(:id).each { |id, production| @lh_sides[id] = production.lhs }
|
666
|
+
|
667
|
+
# Prune the parsing table for unnecessary reduce actions.
|
668
|
+
self.prune(opts[:lookahead], opts[:precedence])
|
669
|
+
|
670
|
+
# Check the parser for inconsistencies.
|
671
|
+
self.check_sanity
|
672
|
+
|
673
|
+
# Print the table if requested.
|
674
|
+
self.explain(opts[:explain]) if opts[:explain]
|
675
|
+
|
676
|
+
# Remove any data that is no longer needed.
|
677
|
+
self.clean
|
678
|
+
|
679
|
+
# Store the parser's final data structures if requested.
|
680
|
+
if opts[:use]
|
681
|
+
io = self.get_io(opts[:use])
|
682
|
+
|
683
|
+
io.flock(File::LOCK_EX) if io.is_a?(File)
|
684
|
+
Marshal.dump([@lh_sides, @states, @symbols], io)
|
685
|
+
io.flock(File::LOCK_UN) if io.is_a?(File)
|
686
|
+
|
687
|
+
# Close the IO object if we opened it.
|
688
|
+
io.close if opts[:use].is_a?(String)
|
689
|
+
end
|
690
|
+
end
|
691
|
+
|
692
|
+
# Converts an object into an IO object as appropriate.
|
693
|
+
#
|
694
|
+
# @param [Object] o Object to be converted into an IO object.
|
695
|
+
# @param [String] mode String representing the mode to open the IO object in.
|
696
|
+
#
|
697
|
+
# @return [IO, false] The IO object or false if a conversion wasn't possible.
|
698
|
+
def get_io(o, mode = 'w')
|
699
|
+
if o.is_a?(TrueClass)
|
700
|
+
$stdout
|
701
|
+
elsif o.is_a?(String)
|
702
|
+
File.open(o, mode)
|
703
|
+
elsif o.is_a?(IO)
|
704
|
+
o
|
705
|
+
else
|
706
|
+
false
|
707
|
+
end
|
708
|
+
end
|
709
|
+
|
710
|
+
# Iterate over the parser's states.
|
711
|
+
#
|
712
|
+
# @yieldparam [State] state One of the parser automaton's state objects
|
713
|
+
#
|
714
|
+
# @return [void]
|
715
|
+
def each_state
|
716
|
+
current_state = 0
|
717
|
+
while current_state < @states.count
|
718
|
+
yield @states.at(current_state)
|
719
|
+
current_state += 1
|
720
|
+
end
|
721
|
+
end
|
722
|
+
|
723
|
+
# @return [CFG] The grammar that can be parsed by this Parser.
|
724
|
+
def grammar
|
725
|
+
@grammar.clone
|
726
|
+
end
|
727
|
+
|
728
|
+
# This method generates and memoizes the G' grammar used to
|
729
|
+
# calculate the LALR(1) lookahead sets. Information about this
|
730
|
+
# grammar and its use can be found in the following paper:
|
731
|
+
#
|
732
|
+
# Simple Computation of LALR(1) Lookahead Sets
|
733
|
+
# Manuel E. Bermudez and George Logothetis
|
734
|
+
# Information Processing Letters 31 - 1989
|
735
|
+
#
|
736
|
+
# @return [CFG]
|
737
|
+
def grammar_prime
|
738
|
+
if not @grammar_prime
|
739
|
+
@grammar_prime = CFG.new
|
740
|
+
|
741
|
+
each_state do |state|
|
742
|
+
state.each do |item|
|
743
|
+
lhs = "#{state.id}_#{item.next_symbol}".to_sym
|
744
|
+
|
745
|
+
next unless CFG::is_nonterminal?(item.next_symbol) and not @grammar_prime.productions.keys.include?(lhs)
|
746
|
+
|
747
|
+
@grammar.productions[item.next_symbol].each do |production|
|
748
|
+
rhs = ''
|
749
|
+
|
750
|
+
cstate = state
|
751
|
+
|
752
|
+
production.rhs.each do |symbol|
|
753
|
+
rhs += "#{cstate.id}_#{symbol} "
|
754
|
+
|
755
|
+
cstate = @states[cstate.on?(symbol).first.id]
|
756
|
+
end
|
757
|
+
|
758
|
+
@grammar_prime.production(lhs, rhs)
|
759
|
+
end
|
760
|
+
end
|
761
|
+
end
|
762
|
+
end
|
763
|
+
|
764
|
+
@grammar_prime
|
765
|
+
end
|
766
|
+
|
767
|
+
# Inform the parser core that a conflict has been detected.
|
768
|
+
#
|
769
|
+
# @param [Integer] state_id ID of the state where the conflict was encountered.
|
770
|
+
# @param [:RR, :SR] type Reduce/Reduce or Shift/Reduce conflict.
|
771
|
+
# @param [Symbol] sym Symbol that caused the conflict.
|
772
|
+
#
|
773
|
+
# @return [void]
|
774
|
+
def inform_conflict(state_id, type, sym)
|
775
|
+
@conflicts[state_id] << [type, sym]
|
776
|
+
end
|
777
|
+
|
778
|
+
# This method is used to specify that the symbols in *symbols*
|
779
|
+
# are left-associative. Subsequent calls to this method will
|
780
|
+
# give their arguments higher precedence.
|
781
|
+
#
|
782
|
+
# @param [Array<Symbol>] symbols Symbols that are left associative.
|
783
|
+
#
|
784
|
+
# @return [void]
|
785
|
+
def left(*symbols)
|
786
|
+
prec_level = @prec_counts[:left] += 1
|
787
|
+
|
788
|
+
symbols.map { |s| s.to_sym }.each do |sym|
|
789
|
+
@token_precs[sym] = [:left, prec_level]
|
790
|
+
end
|
791
|
+
end
|
792
|
+
|
793
|
+
# This method is used to specify that the symbols in *symbols*
|
794
|
+
# are non-associative.
|
795
|
+
#
|
796
|
+
# @param [Array<Symbol>] symbols Symbols that are non-associative.
|
797
|
+
#
|
798
|
+
# @return [void]
|
799
|
+
def nonassoc(*symbols)
|
800
|
+
prec_level = @prec_counts[:non] += 1
|
801
|
+
|
802
|
+
symbols.map { |s| s.to_sym }.each do |sym|
|
803
|
+
@token_precs[sym] = [:non, prec_level]
|
804
|
+
end
|
805
|
+
end
|
806
|
+
|
807
|
+
# Adds productions and actions for parsing nonempty lists.
|
808
|
+
#
|
809
|
+
# @see CFG#nonempty_list_production
|
810
|
+
def build_nonempty_list_production(symbol, list_elements, separator = '')
|
811
|
+
@grammar.build_nonempty_list_production(symbol, list_elements, separator)
|
812
|
+
end
|
813
|
+
alias :nonempty_list :build_nonempty_list_production
|
814
|
+
|
815
|
+
# This function is where actual parsing takes place. The
|
816
|
+
# _tokens_ argument must be an array of Token objects, the last
|
817
|
+
# of which has type EOS. By default this method will return the
|
818
|
+
# value computed by the first successful parse tree found.
|
819
|
+
#
|
820
|
+
# Additional information about the parsing options can be found in
|
821
|
+
# the main documentation.
|
822
|
+
#
|
823
|
+
# @param [Array<Token>] tokens Tokens to be parsed.
|
824
|
+
# @param [Hash] opts Options to use when parsing input.
|
825
|
+
#
|
826
|
+
# @option opts [:first, :all] :accept Either :first or :all.
|
827
|
+
# @option opts [Object] :env The environment in which to evaluate the production action.
|
828
|
+
# @option opts [Boolean,String,IO] :parse_tree To print parse trees in the DOT language or not.
|
829
|
+
# @option opts [Boolean,String,IO] :verbose To be verbose or not.
|
830
|
+
#
|
831
|
+
# @return [Object, Array<Object>] Result or results of parsing the given tokens.
|
832
|
+
def parse(tokens, opts = {})
|
833
|
+
# Get the full options hash.
|
834
|
+
opts = build_parse_opts(opts)
|
835
|
+
v = opts[:verbose]
|
836
|
+
|
837
|
+
if opts[:verbose]
|
838
|
+
v.puts("Input tokens:")
|
839
|
+
v.puts(tokens.map { |t| t.type }.inspect)
|
840
|
+
v.puts
|
841
|
+
end
|
842
|
+
|
843
|
+
# Stack IDs to keep track of them during parsing.
|
844
|
+
stack_id = 0
|
845
|
+
|
846
|
+
# Error mode indicators.
|
847
|
+
error_mode = false
|
848
|
+
reduction_guard = false
|
849
|
+
|
850
|
+
# Our various list of stacks.
|
851
|
+
accepted = []
|
852
|
+
moving_on = []
|
853
|
+
processing = [ParseStack.new(stack_id += 1)]
|
854
|
+
|
855
|
+
# Iterate over the tokens. We don't procede to the
|
856
|
+
# next token until every stack is done with the
|
857
|
+
# current one.
|
858
|
+
tokens.each_with_index do |token, index|
|
859
|
+
# Check to make sure this token was seen in the
|
860
|
+
# grammar definition.
|
861
|
+
raise BadToken.new(token) if not @symbols.include?(token.type)
|
862
|
+
|
863
|
+
v.puts("Current token: #{token.type}#{if token.value then "(#{token.value})" end}") if v
|
864
|
+
|
865
|
+
# Iterate over the stacks until each one is done.
|
866
|
+
while (stack = processing.shift)
|
867
|
+
# Execute any token hooks in this stack's environment.
|
868
|
+
@token_hooks[token.type].each { |hook| opts[:env].instance_exec(&hook)}
|
869
|
+
|
870
|
+
# Get the available actions for this stack.
|
871
|
+
actions = @states[stack.state].on?(token.type)
|
872
|
+
|
873
|
+
if actions.empty?
|
874
|
+
# If we are already in error mode and there
|
875
|
+
# are no actions we skip this token.
|
876
|
+
if error_mode
|
877
|
+
v.puts("Discarding token: #{token.type}#{if token.value then "(#{token.value})" end}") if v
|
878
|
+
|
879
|
+
# Add the current token to the array
|
880
|
+
# that corresponds to the output value
|
881
|
+
# for the ERROR token.
|
882
|
+
stack.output_stack.last << token
|
883
|
+
|
884
|
+
moving_on << stack
|
885
|
+
next
|
886
|
+
end
|
887
|
+
|
888
|
+
# We would be dropping the last stack so we
|
889
|
+
# are going to go into error mode.
|
890
|
+
if accepted.empty? and moving_on.empty? and processing.empty?
|
891
|
+
|
892
|
+
if v
|
893
|
+
v.puts
|
894
|
+
v.puts('Current stack:')
|
895
|
+
v.puts("\tID: #{stack.id}")
|
896
|
+
v.puts("\tState stack:\t#{stack.state_stack.inspect}")
|
897
|
+
v.puts("\tOutput Stack:\t#{stack.output_stack.inspect}")
|
898
|
+
v.puts
|
899
|
+
end
|
900
|
+
|
901
|
+
# Try and find a valid error state.
|
902
|
+
while stack.state
|
903
|
+
if (actions = @states[stack.state].on?(:ERROR)).empty?
|
904
|
+
# This state doesn't have an
|
905
|
+
# error production. Moving on.
|
906
|
+
stack.pop
|
907
|
+
else
|
908
|
+
# Enter the found error state.
|
909
|
+
stack.push(actions.first.id, [token], :ERROR, token.position)
|
910
|
+
|
911
|
+
break
|
912
|
+
end
|
913
|
+
end
|
914
|
+
|
915
|
+
if stack.state
|
916
|
+
# We found a valid error state.
|
917
|
+
error_mode = reduction_guard = true
|
918
|
+
opts[:env].he = true
|
919
|
+
moving_on << stack
|
920
|
+
|
921
|
+
if v
|
922
|
+
v.puts('Invalid input encountered. Entering error handling mode.')
|
923
|
+
v.puts("Discarding token: #{token.type}#{if token.value then "(#{token.value})" end}")
|
924
|
+
end
|
925
|
+
else
|
926
|
+
# No valid error states could be
|
927
|
+
# found. Time to print a message
|
928
|
+
# and leave.
|
929
|
+
|
930
|
+
v.puts("No more actions for stack #{stack.id}. Dropping stack.") if v
|
931
|
+
end
|
932
|
+
else
|
933
|
+
v.puts("No more actions for stack #{stack.id}. Dropping stack.") if v
|
934
|
+
end
|
935
|
+
|
936
|
+
next
|
937
|
+
end
|
938
|
+
|
939
|
+
# Make (stack, action) pairs, duplicating the
|
940
|
+
# stack as necessary.
|
941
|
+
pairs = [[stack, actions.pop]] + actions.map {|action| [stack.branch(stack_id += 1), action] }
|
942
|
+
|
943
|
+
pairs.each do |new_stack, action|
|
944
|
+
if v
|
945
|
+
v.puts
|
946
|
+
v.puts('Current stack:')
|
947
|
+
v.puts("\tID: #{new_stack.id}")
|
948
|
+
v.puts("\tState stack:\t#{new_stack.state_stack.inspect}")
|
949
|
+
v.puts("\tOutput Stack:\t#{new_stack.output_stack.inspect}")
|
950
|
+
v.puts
|
951
|
+
v.puts("Action taken: #{action.to_s}")
|
952
|
+
end
|
953
|
+
|
954
|
+
if action.is_a?(Accept)
|
955
|
+
if opts[:accept] == :all
|
956
|
+
accepted << new_stack
|
957
|
+
else
|
958
|
+
v.puts('Accepting input.') if v
|
959
|
+
opts[:parse_tree].puts(new_stack.tree) if opts[:parse_tree]
|
960
|
+
|
961
|
+
if opts[:env].he
|
962
|
+
raise HandledError.new(opts[:env].errors, new_stack.result)
|
963
|
+
else
|
964
|
+
return new_stack.result
|
965
|
+
end
|
966
|
+
end
|
967
|
+
|
968
|
+
elsif action.is_a?(Reduce)
|
969
|
+
# Get the production associated with this reduction.
|
970
|
+
production_proc, pop_size = @procs[action.id]
|
971
|
+
|
972
|
+
if not production_proc
|
973
|
+
raise InternalParserException, "No production #{action.id} found."
|
974
|
+
end
|
975
|
+
|
976
|
+
args, positions = new_stack.pop(pop_size)
|
977
|
+
opts[:env].set_positions(positions)
|
978
|
+
|
979
|
+
if not production_proc.selections.empty?
|
980
|
+
args = args.values_at(*production_proc.selections)
|
981
|
+
end
|
982
|
+
|
983
|
+
result =
|
984
|
+
if production_proc.arg_type == :array
|
985
|
+
opts[:env].instance_exec(args, &production_proc)
|
986
|
+
else
|
987
|
+
opts[:env].instance_exec(*args, &production_proc)
|
988
|
+
end
|
989
|
+
|
990
|
+
if (goto = @states[new_stack.state].on?(@lh_sides[action.id]).first)
|
991
|
+
|
992
|
+
v.puts("Going to state #{goto.id}.\n") if v
|
993
|
+
|
994
|
+
pos0 = nil
|
995
|
+
|
996
|
+
if args.empty?
|
997
|
+
# Empty productions need to be
|
998
|
+
# handled specially.
|
999
|
+
pos0 = new_stack.position
|
1000
|
+
|
1001
|
+
pos0.stream_offset += pos0.length + 1
|
1002
|
+
pos0.line_offset += pos0.length + 1
|
1003
|
+
|
1004
|
+
pos0.length = 0
|
1005
|
+
else
|
1006
|
+
pos0 = opts[:env].pos( 0)
|
1007
|
+
pos1 = opts[:env].pos(-1)
|
1008
|
+
|
1009
|
+
pos0.length = (pos1.stream_offset + pos1.length) - pos0.stream_offset
|
1010
|
+
end
|
1011
|
+
|
1012
|
+
new_stack.push(goto.id, result, @lh_sides[action.id], pos0)
|
1013
|
+
else
|
1014
|
+
raise InternalParserException, "No GoTo action found in state #{stack.state} " +
|
1015
|
+
"after reducing by production #{action.id}"
|
1016
|
+
end
|
1017
|
+
|
1018
|
+
# This stack is NOT ready for the next
|
1019
|
+
# token.
|
1020
|
+
processing << new_stack
|
1021
|
+
|
1022
|
+
# Exit error mode if necessary.
|
1023
|
+
error_mode = false if error_mode and not reduction_guard
|
1024
|
+
|
1025
|
+
elsif action.is_a?(Shift)
|
1026
|
+
new_stack.push(action.id, token.value, token.type, token.position)
|
1027
|
+
|
1028
|
+
# This stack is ready for the next
|
1029
|
+
# token.
|
1030
|
+
moving_on << new_stack
|
1031
|
+
|
1032
|
+
# Exit error mode.
|
1033
|
+
error_mode = false
|
1034
|
+
end
|
1035
|
+
end
|
1036
|
+
end
|
1037
|
+
|
1038
|
+
v.puts("\n\n") if v
|
1039
|
+
|
1040
|
+
processing = moving_on
|
1041
|
+
moving_on = []
|
1042
|
+
|
1043
|
+
# If we don't have any active stacks at this point the
|
1044
|
+
# string isn't in the language.
|
1045
|
+
if opts[:accept] == :first and processing.length == 0
|
1046
|
+
v.close if v and v != $stdout
|
1047
|
+
raise NotInLanguage.new(tokens[0...index], tokens[index], tokens[index.next..-1])
|
1048
|
+
end
|
1049
|
+
|
1050
|
+
reduction_guard = false
|
1051
|
+
end
|
1052
|
+
|
1053
|
+
# If we have reached this point we are accepting all parse
|
1054
|
+
# trees.
|
1055
|
+
if v
|
1056
|
+
v.puts("Accepting input with #{accepted.length} derivation(s).")
|
1057
|
+
|
1058
|
+
v.close if v != $stdout
|
1059
|
+
end
|
1060
|
+
|
1061
|
+
accepted.each do |stack|
|
1062
|
+
opts[:parse_tree].puts(stack.tree)
|
1063
|
+
end if opts[:parse_tree]
|
1064
|
+
|
1065
|
+
results = accepted.map { |stack| stack.result }
|
1066
|
+
|
1067
|
+
if opts[:env].he
|
1068
|
+
raise HandledError.new(opts[:env].errors, results)
|
1069
|
+
else
|
1070
|
+
return results
|
1071
|
+
end
|
1072
|
+
end
|
1073
|
+
|
1074
|
+
# Adds a new production to the parser with a left-hand value of
|
1075
|
+
# *symbol*. If *expression* is specified it is taken as the
|
1076
|
+
# right-hand side of the production and *action* is associated
|
1077
|
+
# with the production. If *expression* is nil then *action* is
|
1078
|
+
# evaluated and expected to make one or more calls to
|
1079
|
+
# Parser.clause. A precedence can be associate with this
|
1080
|
+
# production by setting *precedence* to a terminal symbol.
|
1081
|
+
#
|
1082
|
+
# @param [Symbol] symbol Left-hand side of the production.
|
1083
|
+
# @param [String, Symbol, nil] expression Right-hand side of the production.
|
1084
|
+
# @param [Symbol, nil] precedence Symbol representing the precedence of this produciton.
|
1085
|
+
# @param [:array, :splat] arg_type Method to use when passing arguments to the action.
|
1086
|
+
# @param [Proc] action Action associated with this production.
|
1087
|
+
#
|
1088
|
+
# @return [void]
|
1089
|
+
def production(symbol, expression = nil, precedence = nil, arg_type = @default_arg_type, &action)
|
1090
|
+
|
1091
|
+
# Check the symbol.
|
1092
|
+
if not (symbol.is_a?(Symbol) or symbol.is_a?(String)) or not CFG::is_nonterminal?(symbol)
|
1093
|
+
raise ParserConstructionException, 'Production symbols must be Strings or Symbols and be in all lowercase.'
|
1094
|
+
end
|
1095
|
+
|
1096
|
+
@grammar.curr_lhs = symbol.to_sym
|
1097
|
+
@curr_prec = precedence
|
1098
|
+
|
1099
|
+
orig_dat = nil
|
1100
|
+
if arg_type != @default_arg_type
|
1101
|
+
orig_dat = @default_arg_type
|
1102
|
+
@default_arg_type = arg_type
|
1103
|
+
end
|
1104
|
+
|
1105
|
+
if expression
|
1106
|
+
self.clause(expression, precedence, &action)
|
1107
|
+
else
|
1108
|
+
self.instance_exec(&action)
|
1109
|
+
end
|
1110
|
+
|
1111
|
+
@default_arg_type = orig_dat if not orig_dat.nil?
|
1112
|
+
|
1113
|
+
@grammar.curr_lhs = nil
|
1114
|
+
@curr_prec = nil
|
1115
|
+
end
|
1116
|
+
alias :p :production
|
1117
|
+
|
1118
|
+
# This method uses lookahead sets and precedence information to
|
1119
|
+
# resolve conflicts and remove unnecessary reduce actions.
|
1120
|
+
#
|
1121
|
+
# @param [Boolean] do_lookahead Prune based on lookahead sets or not.
|
1122
|
+
# @param [Boolean] do_precedence Prune based on precedence or not.
|
1123
|
+
#
|
1124
|
+
# @return [void]
|
1125
|
+
def prune(do_lookahead, do_precedence)
|
1126
|
+
terms = @grammar.terms
|
1127
|
+
|
1128
|
+
# If both options are false there is no pruning to do.
|
1129
|
+
return if not (do_lookahead or do_precedence)
|
1130
|
+
|
1131
|
+
each_state do |state0|
|
1132
|
+
|
1133
|
+
#####################
|
1134
|
+
# Lookahead Pruning #
|
1135
|
+
#####################
|
1136
|
+
|
1137
|
+
if do_lookahead
|
1138
|
+
# Find all of the reductions in this state.
|
1139
|
+
reductions = state0.actions.values.flatten.uniq.select { |a| a.is_a?(Reduce) }
|
1140
|
+
|
1141
|
+
reductions.each do |reduction|
|
1142
|
+
production = @grammar.productions(:id)[reduction.id]
|
1143
|
+
|
1144
|
+
lookahead = Array.new
|
1145
|
+
|
1146
|
+
# Build the lookahead set.
|
1147
|
+
each_state do |state1|
|
1148
|
+
if self.check_reachability(state1, state0, production.rhs)
|
1149
|
+
lookahead |= self.grammar_prime.follow_set("#{state1.id}_#{production.lhs}".to_sym)
|
1150
|
+
end
|
1151
|
+
end
|
1152
|
+
|
1153
|
+
# Translate the G' follow symbols into G
|
1154
|
+
# lookahead symbols.
|
1155
|
+
lookahead = lookahead.map { |sym| sym.to_s.split('_', 2).last.to_sym }.uniq
|
1156
|
+
|
1157
|
+
# Here we remove the unnecessary reductions.
|
1158
|
+
# If there are error productions we need to
|
1159
|
+
# scale back the amount of pruning done.
|
1160
|
+
pruning_candidates = terms - lookahead
|
1161
|
+
|
1162
|
+
if terms.include?(:ERROR)
|
1163
|
+
pruning_candidates.each do |sym|
|
1164
|
+
state0.actions[sym].delete(reduction) if state0.conflict_on?(sym)
|
1165
|
+
end
|
1166
|
+
else
|
1167
|
+
pruning_candidates.each { |sym| state0.actions[sym].delete(reduction) }
|
1168
|
+
end
|
1169
|
+
end
|
1170
|
+
end
|
1171
|
+
|
1172
|
+
########################################
|
1173
|
+
# Precedence and Associativity Pruning #
|
1174
|
+
########################################
|
1175
|
+
|
1176
|
+
if do_precedence
|
1177
|
+
state0.actions.each do |symbol, actions|
|
1178
|
+
|
1179
|
+
# We are only interested in pruning actions
|
1180
|
+
# for terminal symbols.
|
1181
|
+
next unless CFG::is_terminal?(symbol)
|
1182
|
+
|
1183
|
+
# Skip to the next one if there is no
|
1184
|
+
# possibility of a Shift/Reduce or
|
1185
|
+
# Reduce/Reduce conflict.
|
1186
|
+
next unless actions and actions.length > 1
|
1187
|
+
|
1188
|
+
resolve_ok = actions.inject(true) do |m, a|
|
1189
|
+
if a.is_a?(Reduce)
|
1190
|
+
m and @production_precs[a.id]
|
1191
|
+
else
|
1192
|
+
m
|
1193
|
+
end
|
1194
|
+
end and actions.inject(false) { |m, a| m or a.is_a?(Shift) }
|
1195
|
+
|
1196
|
+
if @token_precs[symbol] and resolve_ok
|
1197
|
+
max_prec = 0
|
1198
|
+
selected_action = nil
|
1199
|
+
|
1200
|
+
# Grab the associativity and precedence
|
1201
|
+
# for the input token.
|
1202
|
+
tassoc, tprec = @token_precs[symbol]
|
1203
|
+
|
1204
|
+
actions.each do |a|
|
1205
|
+
assoc, prec = a.is_a?(Shift) ? [tassoc, tprec] : @production_precs[a.id]
|
1206
|
+
|
1207
|
+
# If two actions have the same precedence we
|
1208
|
+
# will only replace the previous production if:
|
1209
|
+
# * The token is left associative and the current action is a Reduce
|
1210
|
+
# * The token is right associative and the current action is a Shift
|
1211
|
+
if prec > max_prec or (prec == max_prec and tassoc == (a.is_a?(Shift) ? :right : :left))
|
1212
|
+
max_prec = prec
|
1213
|
+
selected_action = a
|
1214
|
+
|
1215
|
+
elsif prec == max_prec and assoc == :nonassoc
|
1216
|
+
raise ParserConstructionException, 'Non-associative token found during conflict resolution.'
|
1217
|
+
|
1218
|
+
end
|
1219
|
+
end
|
1220
|
+
|
1221
|
+
state0.actions[symbol] = [selected_action]
|
1222
|
+
end
|
1223
|
+
end
|
1224
|
+
end
|
1225
|
+
end
|
1226
|
+
end
|
1227
|
+
|
1228
|
+
# This method is used to specify that the symbols in _symbols_
|
1229
|
+
# are right associative. Subsequent calls to this method will
|
1230
|
+
# give their arguments higher precedence.
|
1231
|
+
#
|
1232
|
+
# @param [Array<Symbol>] symbols Symbols that are right-associative.
|
1233
|
+
#
|
1234
|
+
# @return [void]
|
1235
|
+
def right(*symbols)
|
1236
|
+
prec_level = @prec_counts[:right] += 1
|
1237
|
+
|
1238
|
+
symbols.map { |s| s.to_sym }.each do |sym|
|
1239
|
+
@token_precs[sym] = [:right, prec_level]
|
1240
|
+
end
|
1241
|
+
end
|
1242
|
+
|
1243
|
+
# Changes the starting symbol of the parser.
|
1244
|
+
#
|
1245
|
+
# @param [Symbol] symbol The starting symbol of the grammar.
|
1246
|
+
#
|
1247
|
+
# @return [void]
|
1248
|
+
def start(symbol)
|
1249
|
+
@grammar.start symbol
|
1250
|
+
end
|
1251
|
+
|
1252
|
+
# Add a hook that is executed whenever *sym* is seen.
|
1253
|
+
#
|
1254
|
+
# The *sym* must be a terminal symbol.
|
1255
|
+
#
|
1256
|
+
# @param [Symbol] sym Symbol to hook into
|
1257
|
+
# @param [Proc] proc Code to execute when the block is seen
|
1258
|
+
#
|
1259
|
+
# @return [void]
|
1260
|
+
def token_hook(sym, &proc)
|
1261
|
+
if CFG::is_terminal?(sym)
|
1262
|
+
@token_hooks[sym] << proc
|
1263
|
+
else
|
1264
|
+
raise 'Method token_hook expects `sym` to be non-terminal.'
|
1265
|
+
end
|
1266
|
+
end
|
1267
|
+
end
|
1268
|
+
|
1269
|
+
####################
|
1270
|
+
# Instance Methods #
|
1271
|
+
####################
|
1272
|
+
|
1273
|
+
# Instantiates a new parser and creates an environment to be
|
1274
|
+
# used for subsequent calls.
|
1275
|
+
def initialize
|
1276
|
+
@env = self.class::Environment.new
|
1277
|
+
end
|
1278
|
+
|
1279
|
+
# Parses the given token stream using the encapsulated environment.
|
1280
|
+
#
|
1281
|
+
# @see .parse
|
1282
|
+
def parse(tokens, opts = {})
|
1283
|
+
self.class.parse(tokens, {:env => @env}.update(opts))
|
1284
|
+
end
|
1285
|
+
|
1286
|
+
################################
|
1287
|
+
|
1288
|
+
# All actions passed to Parser.producation and Parser.clause are
|
1289
|
+
# evaluated inside an instance of the Environment class or its
|
1290
|
+
# subclass (which must have the same name).
|
1291
|
+
class Environment
|
1292
|
+
# Indicates if an error was encountered and handled.
|
1293
|
+
#
|
1294
|
+
# @return [Boolean]
|
1295
|
+
attr_accessor :he
|
1296
|
+
|
1297
|
+
# A list of all objects added using the *error* method.
|
1298
|
+
#
|
1299
|
+
# @return [Array<Object>]
|
1300
|
+
attr_reader :errors
|
1301
|
+
|
1302
|
+
# Instantiate a new Environment object.
|
1303
|
+
def initialize
|
1304
|
+
self.reset
|
1305
|
+
end
|
1306
|
+
|
1307
|
+
# Adds an object to the list of errors.
|
1308
|
+
#
|
1309
|
+
# @return [void]
|
1310
|
+
def error(o)
|
1311
|
+
@errors << o
|
1312
|
+
end
|
1313
|
+
|
1314
|
+
# Returns a StreamPosition object for the symbol at location n,
|
1315
|
+
# indexed from zero.
|
1316
|
+
#
|
1317
|
+
# @param [Integer] n Index for symbol position.
|
1318
|
+
#
|
1319
|
+
# @return [StreamPosition] Position of symbol at index n.
|
1320
|
+
def pos(n)
|
1321
|
+
@positions[n]
|
1322
|
+
end
|
1323
|
+
|
1324
|
+
# Reset any variables that need to be re-initialized between
|
1325
|
+
# parse calls.
|
1326
|
+
#
|
1327
|
+
# @return [void]
|
1328
|
+
def reset
|
1329
|
+
@errors = Array.new
|
1330
|
+
@he = false
|
1331
|
+
end
|
1332
|
+
|
1333
|
+
# Setter for the *positions* array.
|
1334
|
+
#
|
1335
|
+
# @param [Array<StreamPosition>] positions
|
1336
|
+
#
|
1337
|
+
# @return [Array<StreamPosition>] The same array of positions.
|
1338
|
+
def set_positions(positions)
|
1339
|
+
@positions = positions
|
1340
|
+
end
|
1341
|
+
end
|
1342
|
+
|
1343
|
+
# The ParseStack class is used by a Parser to keep track of state
|
1344
|
+
# during parsing.
|
1345
|
+
class ParseStack
|
1346
|
+
# @return [Integer] ID of this parse stack.
|
1347
|
+
attr_reader :id
|
1348
|
+
|
1349
|
+
# @return [Array<Object>] Array of objects produced by {Reduce} actions.
|
1350
|
+
attr_reader :output_stack
|
1351
|
+
|
1352
|
+
# @return [Array<Integer>] Array of states used when performing {Reduce} actions.
|
1353
|
+
attr_reader :state_stack
|
1354
|
+
|
1355
|
+
# Instantiate a new ParserStack object.
|
1356
|
+
#
|
1357
|
+
# @param [Integer] id ID for this parse stack. Used by GLR algorithm.
|
1358
|
+
# @param [Array<Object>] ostack Output stack. Holds results of {Reduce} and {Shift} actions.
|
1359
|
+
# @param [Array<Integer>] sstack State stack. Holds states that have been shifted due to {Shift} actions.
|
1360
|
+
# @param [Array<Integer>] nstack Node stack. Holds dot language IDs for nodes in the parse tree.
|
1361
|
+
# @param [Array<Array<Integer>>] connections Integer pairs representing edges in the parse tree.
|
1362
|
+
# @param [Array<Symbol>] labels Labels for nodes in the parse tree.
|
1363
|
+
# @param [Array<StreamPosition>] positions Position data for symbols that have been shifted.
|
1364
|
+
def initialize(id, ostack = [], sstack = [0], nstack = [], connections = [], labels = [], positions = [])
|
1365
|
+
@id = id
|
1366
|
+
|
1367
|
+
@node_stack = nstack
|
1368
|
+
@output_stack = ostack
|
1369
|
+
@state_stack = sstack
|
1370
|
+
|
1371
|
+
@connections = connections
|
1372
|
+
@labels = labels
|
1373
|
+
@positions = positions
|
1374
|
+
end
|
1375
|
+
|
1376
|
+
# Branch this stack, effectively creating a new copy of its
|
1377
|
+
# internal state.
|
1378
|
+
#
|
1379
|
+
# @param [Integer] new_id ID for the new ParseStack.
|
1380
|
+
#
|
1381
|
+
# @return [ParseStack]
|
1382
|
+
def branch(new_id)
|
1383
|
+
# We have to do a deeper copy of the output stack to avoid
|
1384
|
+
# interactions between the Proc objects for the different
|
1385
|
+
# parsing paths.
|
1386
|
+
#
|
1387
|
+
# The being/rescue block is needed because some classes
|
1388
|
+
# respond to `clone` but always raise an error.
|
1389
|
+
new_output_stack = @output_stack.map do |o|
|
1390
|
+
# Check to see if we can obtain a deep copy.
|
1391
|
+
if 0.respond_to?(:copy)
|
1392
|
+
o.copy
|
1393
|
+
|
1394
|
+
else
|
1395
|
+
begin o.clone rescue o end
|
1396
|
+
end
|
1397
|
+
end
|
1398
|
+
|
1399
|
+
ParseStack.new(new_id, new_output_stack, @state_stack.clone,
|
1400
|
+
@node_stack.clone, @connections.clone, @labels.clone, @positions.clone)
|
1401
|
+
end
|
1402
|
+
|
1403
|
+
# @return [StreamPosition] Position data for the last symbol on the stack.
|
1404
|
+
def position
|
1405
|
+
if @positions.empty?
|
1406
|
+
StreamPosition.new
|
1407
|
+
else
|
1408
|
+
@positions.last.clone
|
1409
|
+
end
|
1410
|
+
end
|
1411
|
+
|
1412
|
+
# Push new state and other information onto the stack.
|
1413
|
+
#
|
1414
|
+
# @param [Integer] state ID of the shifted state.
|
1415
|
+
# @param [Object] o Value of Token that caused the shift.
|
1416
|
+
# @param [Symbol] node0 Label for node in parse tree.
|
1417
|
+
# @param [StreamPosition] position Position token that got shifted.
|
1418
|
+
#
|
1419
|
+
# @return [void]
|
1420
|
+
def push(state, o, node0, position)
|
1421
|
+
@state_stack << state
|
1422
|
+
@output_stack << o
|
1423
|
+
@node_stack << @labels.length
|
1424
|
+
@labels << if CFG::is_terminal?(node0) and o then node0.to_s + "(#{o})" else node0 end
|
1425
|
+
@positions << position
|
1426
|
+
|
1427
|
+
if CFG::is_nonterminal?(node0)
|
1428
|
+
@cbuffer.each do |node1|
|
1429
|
+
@connections << [@labels.length - 1, node1]
|
1430
|
+
end
|
1431
|
+
end
|
1432
|
+
end
|
1433
|
+
|
1434
|
+
# Pop some number of objects off of the inside stacks.
|
1435
|
+
#
|
1436
|
+
# @param [Integer] n Number of object to pop off the stack.
|
1437
|
+
#
|
1438
|
+
# @return [Array(Object, StreamPosition)] Values popped from the output and positions stacks.
|
1439
|
+
def pop(n = 1)
|
1440
|
+
@state_stack.pop(n)
|
1441
|
+
|
1442
|
+
# Pop the node stack so that the proper edges can be added
|
1443
|
+
# when the production's left-hand side non-terminal is
|
1444
|
+
# pushed onto the stack.
|
1445
|
+
@cbuffer = @node_stack.pop(n)
|
1446
|
+
|
1447
|
+
[@output_stack.pop(n), @positions.pop(n)]
|
1448
|
+
end
|
1449
|
+
|
1450
|
+
# Fetch the result stored in this ParseStack. If there is more
|
1451
|
+
# than one object left on the output stack there is an error.
|
1452
|
+
#
|
1453
|
+
# @return [Object] The end result of this parse stack.
|
1454
|
+
def result
|
1455
|
+
if @output_stack.length == 1
|
1456
|
+
return @output_stack.last
|
1457
|
+
else
|
1458
|
+
raise InternalParserException, "The parsing stack should have 1 element on the output stack, not #{@output_stack.length}."
|
1459
|
+
end
|
1460
|
+
end
|
1461
|
+
|
1462
|
+
# @return [Integer] Current state of this ParseStack.
|
1463
|
+
def state
|
1464
|
+
@state_stack.last
|
1465
|
+
end
|
1466
|
+
|
1467
|
+
# @return [String] Representation of the parse tree in the DOT langauge.
|
1468
|
+
def tree
|
1469
|
+
tree = "digraph tree#{@id} {\n"
|
1470
|
+
|
1471
|
+
@labels.each_with_index do |label, i|
|
1472
|
+
tree += "\tnode#{i} [label=\"#{label}\""
|
1473
|
+
|
1474
|
+
if CFG::is_terminal?(label)
|
1475
|
+
tree += " shape=box"
|
1476
|
+
end
|
1477
|
+
|
1478
|
+
tree += "];\n"
|
1479
|
+
end
|
1480
|
+
|
1481
|
+
tree += "\n"
|
1482
|
+
|
1483
|
+
@connections.each do |from, to|
|
1484
|
+
tree += "\tnode#{from} -> node#{to};\n"
|
1485
|
+
end
|
1486
|
+
|
1487
|
+
tree += "}"
|
1488
|
+
end
|
1489
|
+
end
|
1490
|
+
|
1491
|
+
# The State class is used to represent sets of items and actions to be
|
1492
|
+
# used during parsing.
|
1493
|
+
class State
|
1494
|
+
# @return [Integer] State's ID.
|
1495
|
+
attr_accessor :id
|
1496
|
+
|
1497
|
+
# @return [Array<CFG::Item>] Item objects that comprise this state
|
1498
|
+
attr_reader :items
|
1499
|
+
|
1500
|
+
# @return [Hash{Symbol => Array<Action>}] Maps lookahead symbols to actions
|
1501
|
+
attr_reader :actions
|
1502
|
+
|
1503
|
+
# Instantiate a new State object.
|
1504
|
+
#
|
1505
|
+
# @param [Array<Symbol>] tokens Tokens that represent this state
|
1506
|
+
# @param [Array<CFG::Item>] items Items that make up this state
|
1507
|
+
def initialize(tokens, items = [])
|
1508
|
+
@id = nil
|
1509
|
+
@items = items
|
1510
|
+
@actions = tokens.inject(Hash.new) { |h, t| h[t] = Array.new; h }
|
1511
|
+
end
|
1512
|
+
|
1513
|
+
# Compare one State to another. Two States are equal if they
|
1514
|
+
# have the same items or, if the items have been cleaned, if
|
1515
|
+
# the States have the same ID.
|
1516
|
+
#
|
1517
|
+
# @param [State] other Another State to compare to
|
1518
|
+
#
|
1519
|
+
# @return [Boolean]
|
1520
|
+
def ==(other)
|
1521
|
+
if self.items and other.items then self.items == other.items else self.id == other.id end
|
1522
|
+
end
|
1523
|
+
|
1524
|
+
# Add a Reduce action to the state.
|
1525
|
+
#
|
1526
|
+
# @param [Production] production Production used to perform the reduction
|
1527
|
+
#
|
1528
|
+
# @return [void]
|
1529
|
+
def add_reduction(production)
|
1530
|
+
action = Reduce.new(production)
|
1531
|
+
|
1532
|
+
# Reduce actions are not allowed for the ERROR terminal.
|
1533
|
+
@actions.each { |k, v| if CFG::is_terminal?(k) and k != :ERROR then v << action end }
|
1534
|
+
end
|
1535
|
+
|
1536
|
+
# @param [CFG::Item] item Item to add to this state.
|
1537
|
+
def append(item)
|
1538
|
+
if item.is_a?(CFG::Item) and not @items.include?(item) then @items << item end
|
1539
|
+
end
|
1540
|
+
alias :<< :append
|
1541
|
+
|
1542
|
+
# Clean this State by removing the list of {CFG::Item} objects.
|
1543
|
+
#
|
1544
|
+
# @return [void]
|
1545
|
+
def clean
|
1546
|
+
@items = nil
|
1547
|
+
end
|
1548
|
+
|
1549
|
+
# Close this state using *productions*.
|
1550
|
+
#
|
1551
|
+
# @param [Array<CFG::Production>] productions Productions used to close this state.
|
1552
|
+
#
|
1553
|
+
# @return [vod]
|
1554
|
+
def close(productions)
|
1555
|
+
self.each do |item|
|
1556
|
+
if (next_symbol = item.next_symbol) and CFG::is_nonterminal?(next_symbol)
|
1557
|
+
productions[next_symbol].each { |p| self << p.to_item }
|
1558
|
+
end
|
1559
|
+
end
|
1560
|
+
end
|
1561
|
+
|
1562
|
+
# Checks to see if there is a conflict in this state, given a
|
1563
|
+
# input of *sym*. Returns :SR if a shift/reduce conflict is
|
1564
|
+
# detected and :RR if a reduce/reduce conflict is detected. If
|
1565
|
+
# no conflict is detected nil is returned.
|
1566
|
+
#
|
1567
|
+
# @param [Symbol] sym Symbol to check for conflicts on.
|
1568
|
+
#
|
1569
|
+
# @return [:SR, :RR, nil]
|
1570
|
+
def conflict_on?(sym)
|
1571
|
+
|
1572
|
+
reductions = 0
|
1573
|
+
shifts = 0
|
1574
|
+
|
1575
|
+
@actions[sym].each do |action|
|
1576
|
+
if action.is_a?(Reduce)
|
1577
|
+
reductions += 1
|
1578
|
+
|
1579
|
+
elsif action.is_a?(Shift)
|
1580
|
+
shifts += 1
|
1581
|
+
|
1582
|
+
end
|
1583
|
+
end
|
1584
|
+
|
1585
|
+
if shifts == 1 and reductions > 0
|
1586
|
+
:SR
|
1587
|
+
elsif reductions > 1
|
1588
|
+
:RR
|
1589
|
+
else
|
1590
|
+
nil
|
1591
|
+
end
|
1592
|
+
end
|
1593
|
+
|
1594
|
+
# Iterate over the state's items.
|
1595
|
+
#
|
1596
|
+
# @return [void]
|
1597
|
+
def each
|
1598
|
+
current_item = 0
|
1599
|
+
while current_item < @items.count
|
1600
|
+
yield @items.at(current_item)
|
1601
|
+
current_item += 1
|
1602
|
+
end
|
1603
|
+
end
|
1604
|
+
|
1605
|
+
# Specify an Action to perform when the input token is *symbol*.
|
1606
|
+
#
|
1607
|
+
# @param [Symbol] symbol Symbol to add action for.
|
1608
|
+
# @param [Action] action Action for symbol.
|
1609
|
+
#
|
1610
|
+
# @return [void]
|
1611
|
+
def on(symbol, action)
|
1612
|
+
if @actions.key?(symbol)
|
1613
|
+
@actions[symbol] << action
|
1614
|
+
else
|
1615
|
+
raise ParserConstructionException, "Attempting to set action for token (#{symbol}) not seen in grammar definition."
|
1616
|
+
end
|
1617
|
+
end
|
1618
|
+
|
1619
|
+
# Returns that actions that should be taken when the input token
|
1620
|
+
# is *symbol*.
|
1621
|
+
#
|
1622
|
+
# @param [Symbol] symbol Symbol we want the actions for.
|
1623
|
+
#
|
1624
|
+
# @return [Array<Action>] Actions that should be taken.
|
1625
|
+
def on?(symbol)
|
1626
|
+
@actions[symbol].clone
|
1627
|
+
end
|
1628
|
+
end
|
1629
|
+
|
1630
|
+
# A subclass of Proc that indicates how it should be passed arguments
|
1631
|
+
# by the parser.
|
1632
|
+
class ProdProc < Proc
|
1633
|
+
# @return [:array, :splat] Method that should be used to pass arguments to this proc.
|
1634
|
+
attr_reader :arg_type
|
1635
|
+
|
1636
|
+
# @return [Array<Integer>] Mask for selection of tokens to pass to action. Empty mask means pass all.
|
1637
|
+
attr_reader :selections
|
1638
|
+
|
1639
|
+
def initialize(arg_type = :splat, selections = [])
|
1640
|
+
super()
|
1641
|
+
@arg_type = arg_type
|
1642
|
+
@selections = selections
|
1643
|
+
end
|
1644
|
+
end
|
1645
|
+
|
1646
|
+
# The Action class is used to indicate what action the parser should
|
1647
|
+
# take given a current state and input token.
|
1648
|
+
class Action
|
1649
|
+
# @return [Integer] ID of this action.
|
1650
|
+
attr_reader :id
|
1651
|
+
|
1652
|
+
# @param [Integer] id ID of this action.
|
1653
|
+
def initialize(id = nil)
|
1654
|
+
@id = id
|
1655
|
+
end
|
1656
|
+
end
|
1657
|
+
|
1658
|
+
# The Accept class indicates to the parser that it should accept the
|
1659
|
+
# current parse tree.
|
1660
|
+
class Accept < Action
|
1661
|
+
# @return [String] String representation of this action.
|
1662
|
+
def to_s
|
1663
|
+
"Accept"
|
1664
|
+
end
|
1665
|
+
end
|
1666
|
+
|
1667
|
+
# The GoTo class indicates to the parser that it should goto the state
|
1668
|
+
# specified by GoTo.id.
|
1669
|
+
class GoTo < Action
|
1670
|
+
# @return [String] String representation of this action.
|
1671
|
+
def to_s
|
1672
|
+
"GoTo #{self.id}"
|
1673
|
+
end
|
1674
|
+
end
|
1675
|
+
|
1676
|
+
# The Reduce class indicates to the parser that it should reduce the
|
1677
|
+
# input stack by the rule specified by Reduce.id.
|
1678
|
+
class Reduce < Action
|
1679
|
+
|
1680
|
+
# @param [Production] production Production to reduce by
|
1681
|
+
def initialize(production)
|
1682
|
+
super(production.id)
|
1683
|
+
|
1684
|
+
@production = production
|
1685
|
+
end
|
1686
|
+
|
1687
|
+
# @return [String] String representation of this action.
|
1688
|
+
def to_s
|
1689
|
+
"Reduce by Production #{self.id} : #{@production}"
|
1690
|
+
end
|
1691
|
+
end
|
1692
|
+
|
1693
|
+
# The Shift class indicates to the parser that it should shift the
|
1694
|
+
# current input token.
|
1695
|
+
class Shift < Action
|
1696
|
+
# @return [String] String representation of this action.
|
1697
|
+
def to_s
|
1698
|
+
"Shift to State #{self.id}"
|
1699
|
+
end
|
1700
|
+
end
|
1701
|
+
end
|
1702
|
+
end
|