rltk 2.2.1 → 3.0.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/LICENSE +12 -12
- data/README.md +458 -285
- data/Rakefile +99 -92
- data/lib/rltk/ast.rb +221 -126
- data/lib/rltk/cfg.rb +218 -239
- data/lib/rltk/cg/basic_block.rb +1 -1
- data/lib/rltk/cg/bindings.rb +9 -26
- data/lib/rltk/cg/builder.rb +40 -8
- data/lib/rltk/cg/context.rb +1 -1
- data/lib/rltk/cg/contractor.rb +51 -0
- data/lib/rltk/cg/execution_engine.rb +45 -8
- data/lib/rltk/cg/function.rb +12 -2
- data/lib/rltk/cg/generated_bindings.rb +2541 -575
- data/lib/rltk/cg/generic_value.rb +2 -2
- data/lib/rltk/cg/instruction.rb +104 -83
- data/lib/rltk/cg/llvm.rb +44 -3
- data/lib/rltk/cg/memory_buffer.rb +22 -5
- data/lib/rltk/cg/module.rb +85 -36
- data/lib/rltk/cg/old_generated_bindings.rb +6152 -0
- data/lib/rltk/cg/pass_manager.rb +87 -43
- data/lib/rltk/cg/support.rb +2 -4
- data/lib/rltk/cg/target.rb +158 -28
- data/lib/rltk/cg/triple.rb +8 -8
- data/lib/rltk/cg/type.rb +69 -25
- data/lib/rltk/cg/value.rb +107 -66
- data/lib/rltk/cg.rb +16 -17
- data/lib/rltk/lexer.rb +21 -11
- data/lib/rltk/lexers/calculator.rb +1 -1
- data/lib/rltk/lexers/ebnf.rb +8 -7
- data/lib/rltk/parser.rb +300 -247
- data/lib/rltk/parsers/infix_calc.rb +1 -1
- data/lib/rltk/parsers/postfix_calc.rb +2 -2
- data/lib/rltk/parsers/prefix_calc.rb +2 -2
- data/lib/rltk/token.rb +1 -2
- data/lib/rltk/version.rb +3 -3
- data/lib/rltk.rb +6 -6
- data/test/cg/tc_basic_block.rb +83 -0
- data/test/cg/tc_control_flow.rb +191 -0
- data/test/cg/tc_function.rb +54 -0
- data/test/cg/tc_generic_value.rb +33 -0
- data/test/cg/tc_instruction.rb +256 -0
- data/test/cg/tc_llvm.rb +25 -0
- data/test/cg/tc_math.rb +88 -0
- data/test/cg/tc_module.rb +89 -0
- data/test/cg/tc_transforms.rb +68 -0
- data/test/cg/tc_type.rb +69 -0
- data/test/cg/tc_value.rb +151 -0
- data/test/cg/ts_cg.rb +23 -0
- data/test/tc_ast.rb +105 -8
- data/test/tc_cfg.rb +63 -48
- data/test/tc_lexer.rb +84 -96
- data/test/tc_parser.rb +224 -52
- data/test/tc_token.rb +6 -6
- data/test/ts_rltk.rb +12 -15
- metadata +149 -75
- data/lib/rltk/cg/generated_extended_bindings.rb +0 -287
- data/lib/rltk/util/abstract_class.rb +0 -25
- data/lib/rltk/util/monkeys.rb +0 -129
data/lib/rltk/parser.rb
CHANGED
@@ -14,8 +14,8 @@ require 'rltk/cfg'
|
|
14
14
|
# Classes and Modules #
|
15
15
|
#######################
|
16
16
|
|
17
|
-
|
18
|
-
|
17
|
+
# The RLTK root module
|
18
|
+
module RLTK
|
19
19
|
# A BadToken error indicates that a token was observed in the input stream
|
20
20
|
# that wasn't used in the grammar's definition.
|
21
21
|
class BadToken < StandardError
|
@@ -29,9 +29,28 @@ module RLTK # :nodoc:
|
|
29
29
|
# for a given token stream. In other words, the input string is not in the
|
30
30
|
# defined language.
|
31
31
|
class NotInLanguage < StandardError
|
32
|
+
|
33
|
+
# @return [Array<Token>] List of tokens that have been successfully parsed
|
34
|
+
attr_reader :seen
|
35
|
+
|
36
|
+
# @return [Token] Token that caused the parser to stop
|
37
|
+
attr_reader :current
|
38
|
+
|
39
|
+
# @return [Array<Token>] List of tokens that have yet to be seen
|
40
|
+
attr_reader :remaining
|
41
|
+
|
42
|
+
# @param [Array<Token>] seen Tokens that have been successfully parsed
|
43
|
+
# @param [Token] current Token that caused the parser to stop
|
44
|
+
# @param [Array<Token>] remaining Tokens that have yet to be seen
|
45
|
+
def initialize(seen, current, remaining)
|
46
|
+
@seen = seen
|
47
|
+
@current = current
|
48
|
+
@remaining = remaining
|
49
|
+
end
|
50
|
+
|
32
51
|
# @return [String] String representation of the error.
|
33
52
|
def to_s
|
34
|
-
|
53
|
+
"String not in language. Token info:\n\tSeen: #{@seen}\n\tCurrent: #{@current}\n\tRemaining: #{@remaining}"
|
35
54
|
end
|
36
55
|
end
|
37
56
|
|
@@ -80,57 +99,53 @@ module RLTK # :nodoc:
|
|
80
99
|
#
|
81
100
|
# @return [void]
|
82
101
|
def install_icvars
|
83
|
-
@curr_lhs
|
84
|
-
@curr_prec
|
102
|
+
@curr_lhs = nil
|
103
|
+
@curr_prec = nil
|
85
104
|
|
86
|
-
@conflicts
|
87
|
-
@grammar
|
105
|
+
@conflicts = Hash.new {|h, k| h[k] = Array.new}
|
106
|
+
@grammar = CFG.new
|
88
107
|
|
89
|
-
@lh_sides
|
90
|
-
@procs
|
91
|
-
@states
|
108
|
+
@lh_sides = Hash.new
|
109
|
+
@procs = Array.new
|
110
|
+
@states = Array.new
|
92
111
|
|
93
112
|
# Variables for dealing with precedence.
|
94
|
-
@prec_counts
|
95
|
-
@production_precs
|
96
|
-
@token_precs
|
113
|
+
@prec_counts = {:left => 0, :right => 0, :non => 0}
|
114
|
+
@production_precs = Array.new
|
115
|
+
@token_precs = Hash.new
|
97
116
|
|
98
|
-
# Set the default argument handling policy.
|
99
|
-
|
117
|
+
# Set the default argument handling policy. Valid values
|
118
|
+
# are :array and :splat.
|
119
|
+
@default_arg_type = :splat
|
100
120
|
|
101
|
-
@grammar.callback do |
|
102
|
-
@procs[p.id] =
|
103
|
-
[
|
121
|
+
@grammar.callback do |type, which, p, sels = []|
|
122
|
+
@procs[p.id] = [
|
104
123
|
case type
|
105
|
-
when
|
106
|
-
case
|
107
|
-
when :
|
108
|
-
else
|
109
|
-
end
|
110
|
-
|
111
|
-
when :+
|
112
|
-
case num
|
113
|
-
when :first then Proc.new { |o| [o] }
|
114
|
-
else Proc.new { |os, o| os << o }
|
115
|
-
end
|
116
|
-
|
117
|
-
when :'?'
|
118
|
-
case num
|
119
|
-
when :first then Proc.new { || nil }
|
120
|
-
else Proc.new { |o| o }
|
124
|
+
when :optional
|
125
|
+
case which
|
126
|
+
when :empty then ProdProc.new { || nil }
|
127
|
+
else ProdProc.new { |o| o }
|
121
128
|
end
|
122
129
|
|
123
130
|
when :elp
|
124
|
-
case
|
125
|
-
when :
|
126
|
-
else
|
131
|
+
case which
|
132
|
+
when :empty then ProdProc.new { || [] }
|
133
|
+
else ProdProc.new { |prime| prime }
|
127
134
|
end
|
128
135
|
|
129
136
|
when :nelp
|
130
|
-
case
|
131
|
-
when :
|
132
|
-
|
133
|
-
|
137
|
+
case which
|
138
|
+
when :single
|
139
|
+
ProdProc.new { |el| [el] }
|
140
|
+
|
141
|
+
when :multiple
|
142
|
+
ProdProc.new(:splat, sels) do |*syms|
|
143
|
+
el = syms[1..-1]
|
144
|
+
syms.first << (el.length == 1 ? el.first : el)
|
145
|
+
end
|
146
|
+
|
147
|
+
else
|
148
|
+
ProdProc.new { |*el| el.length == 1 ? el.first : el }
|
134
149
|
end
|
135
150
|
end,
|
136
151
|
p.rhs.length
|
@@ -168,58 +183,6 @@ module RLTK # :nodoc:
|
|
168
183
|
end
|
169
184
|
end
|
170
185
|
|
171
|
-
# Calling this method will cause the parser to pass right-hand
|
172
|
-
# side values as arrays instead of splats. This method must be
|
173
|
-
# called before ANY calls to Parser.production.
|
174
|
-
#
|
175
|
-
# @return [void]
|
176
|
-
def array_args
|
177
|
-
if @grammar.productions.length == 0
|
178
|
-
@args = :array
|
179
|
-
|
180
|
-
@grammar.callback do |p, type, num|
|
181
|
-
@procs[p.id] =
|
182
|
-
[
|
183
|
-
case type
|
184
|
-
when :*
|
185
|
-
case num
|
186
|
-
when :first then Proc.new { |v| [] }
|
187
|
-
else Proc.new { |v| v[0] << v[1] }
|
188
|
-
end
|
189
|
-
|
190
|
-
when :+
|
191
|
-
case num
|
192
|
-
when :first then Proc.new { |v| [v[0]] }
|
193
|
-
else Proc.new { |v| v[0] << v[1] }
|
194
|
-
end
|
195
|
-
|
196
|
-
when :'?'
|
197
|
-
case num
|
198
|
-
when :first then Proc.new { |v| nil }
|
199
|
-
else Proc.new { |v| v[0] }
|
200
|
-
end
|
201
|
-
|
202
|
-
when :elp
|
203
|
-
case num
|
204
|
-
when :first then Proc.new { |v| [] }
|
205
|
-
else Proc.new { |v| v[0] }
|
206
|
-
end
|
207
|
-
|
208
|
-
when :nelp
|
209
|
-
case num
|
210
|
-
when :first then Proc.new { |v| v }
|
211
|
-
when :second then Proc.new { |v| v[0] + [v[2]] }
|
212
|
-
else Proc.new { |v| if v.length == 1 then v.first else v end }
|
213
|
-
end
|
214
|
-
end,
|
215
|
-
p.rhs.length
|
216
|
-
]
|
217
|
-
|
218
|
-
@production_precs[p.id] = p.last_terminal
|
219
|
-
end
|
220
|
-
end
|
221
|
-
end
|
222
|
-
|
223
186
|
# Build a hash with the default options for Parser.finalize
|
224
187
|
# and then update it with the values from *opts*.
|
225
188
|
#
|
@@ -230,10 +193,10 @@ module RLTK # :nodoc:
|
|
230
193
|
opts[:explain] = self.get_io(opts[:explain])
|
231
194
|
|
232
195
|
{
|
233
|
-
:
|
234
|
-
:
|
235
|
-
:
|
236
|
-
:
|
196
|
+
explain: false,
|
197
|
+
lookahead: true,
|
198
|
+
precedence: true,
|
199
|
+
use: false
|
237
200
|
}.update(opts)
|
238
201
|
end
|
239
202
|
private :build_finalize_opts
|
@@ -245,14 +208,14 @@ module RLTK # :nodoc:
|
|
245
208
|
#
|
246
209
|
# @return [Hash{Symbol => Object}]
|
247
210
|
def build_parse_opts(opts)
|
248
|
-
opts[:parse_tree]
|
249
|
-
opts[:verbose]
|
211
|
+
opts[:parse_tree] = self.get_io(opts[:parse_tree])
|
212
|
+
opts[:verbose] = self.get_io(opts[:verbose])
|
250
213
|
|
251
214
|
{
|
252
|
-
|
253
|
-
:
|
254
|
-
:
|
255
|
-
:
|
215
|
+
accept: :first,
|
216
|
+
env: self::Environment.new,
|
217
|
+
parse_tree: false,
|
218
|
+
verbose: false
|
256
219
|
}.update(opts)
|
257
220
|
end
|
258
221
|
private :build_parse_opts
|
@@ -275,7 +238,7 @@ module RLTK # :nodoc:
|
|
275
238
|
end
|
276
239
|
|
277
240
|
# Check the actions in each state.
|
278
|
-
|
241
|
+
each_state do |state|
|
279
242
|
state.actions.each do |sym, actions|
|
280
243
|
if CFG::is_terminal?(sym)
|
281
244
|
# Here we check actions for terminals.
|
@@ -312,14 +275,14 @@ module RLTK # :nodoc:
|
|
312
275
|
# This method checks to see if the parser would be in parse state
|
313
276
|
# *dest* after starting in state *start* and reading *symbols*.
|
314
277
|
#
|
315
|
-
# @param [Symbol]
|
316
|
-
# @param [Symbol]
|
317
|
-
# @param [Array<Symbol>]
|
278
|
+
# @param [Symbol] start Symbol representing a CFG production.
|
279
|
+
# @param [Symbol] dest Symbol representing a CFG production.
|
280
|
+
# @param [Array<Symbol>] symbols Grammar symbols.
|
318
281
|
#
|
319
282
|
# @return [Boolean] If the destination symbol is reachable from the start symbol after reading *symbols*.
|
320
283
|
def check_reachability(start, dest, symbols)
|
321
|
-
path_exists
|
322
|
-
cur_state
|
284
|
+
path_exists = true
|
285
|
+
cur_state = start
|
323
286
|
|
324
287
|
symbols.each do |sym|
|
325
288
|
|
@@ -345,27 +308,30 @@ module RLTK # :nodoc:
|
|
345
308
|
# production can be changed by setting the *precedence* argument
|
346
309
|
# to some terminal symbol.
|
347
310
|
#
|
348
|
-
# @param [String]
|
349
|
-
# @param [Symbol]
|
350
|
-
# @param [
|
311
|
+
# @param [String, Symbol] expression Right-hand side of a production.
|
312
|
+
# @param [Symbol] precedence Symbol representing the precedence of this production.
|
313
|
+
# @param [:array, :splat] arg_type Method to use when passing arguments to the action.
|
314
|
+
# @param [Proc] action Action to be taken when the production is reduced.
|
351
315
|
#
|
352
316
|
# @return [void]
|
353
|
-
def clause(expression, precedence = nil, &action)
|
317
|
+
def clause(expression, precedence = nil, arg_type = @default_arg_type, &action)
|
354
318
|
# Use the curr_prec only if it isn't overridden for this
|
355
319
|
# clause.
|
356
320
|
precedence ||= @curr_prec
|
357
321
|
|
358
|
-
production = @grammar.clause(expression)
|
322
|
+
production, selections = @grammar.clause(expression)
|
359
323
|
|
360
324
|
# Check to make sure the action's arity matches the number
|
361
325
|
# of symbols on the right-hand side.
|
362
|
-
|
363
|
-
|
364
|
-
|
326
|
+
expected_arity = (selections.empty? ? production.rhs.length : selections.length)
|
327
|
+
if arg_type == :splat and action.arity != expected_arity
|
328
|
+
raise ParserConstructionException,
|
329
|
+
"Incorrect number of action parameters. Expected #{expected_arity} but got #{action.arity}." +
|
330
|
+
' Action arity must match the number of terminals and non-terminals in the clause.'
|
365
331
|
end
|
366
332
|
|
367
333
|
# Add the action to our proc list.
|
368
|
-
@procs[production.id] = [action, production.rhs.length]
|
334
|
+
@procs[production.id] = [ProdProc.new(arg_type, selections, &action), production.rhs.length]
|
369
335
|
|
370
336
|
# If no precedence is specified use the precedence of the
|
371
337
|
# last terminal in the production.
|
@@ -382,28 +348,41 @@ module RLTK # :nodoc:
|
|
382
348
|
@conflicts = nil
|
383
349
|
|
384
350
|
# Drop the grammar and the grammar'.
|
385
|
-
@grammar
|
386
|
-
@grammar_prime
|
351
|
+
@grammar = nil
|
352
|
+
@grammar_prime = nil
|
387
353
|
|
388
354
|
# Drop precedence and bookkeeping information.
|
389
|
-
@cur_lhs
|
390
|
-
@cur_prec
|
355
|
+
@cur_lhs = nil
|
356
|
+
@cur_prec = nil
|
391
357
|
|
392
|
-
@prec_counts
|
393
|
-
@production_precs
|
394
|
-
@token_precs
|
358
|
+
@prec_counts = nil
|
359
|
+
@production_precs = nil
|
360
|
+
@token_precs = nil
|
395
361
|
|
396
362
|
# Drop the items from each of the states.
|
397
|
-
|
363
|
+
each_state { |state| state.clean }
|
364
|
+
end
|
365
|
+
|
366
|
+
# Set the default argument type for the actions associated with
|
367
|
+
# clauses. All actions defined after this call will be passed
|
368
|
+
# arguments in the way specified here, unless overridden in the
|
369
|
+
# call to {Parser.clause}.
|
370
|
+
#
|
371
|
+
# @param [:array, :splat] type The default argument type.
|
372
|
+
#
|
373
|
+
# @return [void]
|
374
|
+
def default_arg_type(type)
|
375
|
+
@default_arg_type = type if type == :array or type == :splat
|
398
376
|
end
|
377
|
+
alias :dat :default_arg_type
|
399
378
|
|
400
379
|
# Adds productions and actions for parsing empty lists.
|
401
380
|
#
|
402
381
|
# @see CFG#empty_list_production
|
403
|
-
def
|
404
|
-
@grammar.
|
382
|
+
def build_list_production(symbol, list_elements, separator = '')
|
383
|
+
@grammar.build_list_production(symbol, list_elements, separator)
|
405
384
|
end
|
406
|
-
alias :
|
385
|
+
alias :list :build_list_production
|
407
386
|
|
408
387
|
# This function will print a description of the parser to the
|
409
388
|
# provided IO object.
|
@@ -418,13 +397,21 @@ module RLTK # :nodoc:
|
|
418
397
|
io.puts('###############')
|
419
398
|
io.puts
|
420
399
|
|
400
|
+
max_id_length = @grammar.productions(:id).length.to_s.length
|
401
|
+
|
421
402
|
# Print the productions.
|
422
403
|
@grammar.productions.each do |sym, productions|
|
404
|
+
|
405
|
+
max_rhs_length = productions.inject(0) { |m, p| if (len = p.to_s.length) > m then len else m end }
|
406
|
+
|
423
407
|
productions.each do |production|
|
424
|
-
|
408
|
+
p_string = production.to_s
|
409
|
+
|
410
|
+
io.print("\tProduction #{sprintf("%#{max_id_length}d", production.id)}: #{p_string}")
|
425
411
|
|
426
412
|
if (prec = @production_precs[production.id])
|
427
|
-
io.print(
|
413
|
+
io.print(' ' * (max_rhs_length - p_string.length))
|
414
|
+
io.print(" : (#{sprintf("%-5s", prec.first)}, #{prec.last})")
|
428
415
|
end
|
429
416
|
|
430
417
|
io.puts
|
@@ -438,11 +425,14 @@ module RLTK # :nodoc:
|
|
438
425
|
io.puts('##########')
|
439
426
|
io.puts
|
440
427
|
|
428
|
+
max_token_len = @grammar.terms.inject(0) { |m, t| if t.length > m then t.length else m end }
|
429
|
+
|
441
430
|
@grammar.terms.sort {|a,b| a.to_s <=> b.to_s }.each do |term|
|
442
431
|
io.print("\t#{term}")
|
443
432
|
|
444
433
|
if (prec = @token_precs[term])
|
445
|
-
io.print(
|
434
|
+
io.print(' ' * (max_token_len - term.length))
|
435
|
+
io.print(" : (#{sprintf("%-5s", prec.first)}, #{prec.last})")
|
446
436
|
end
|
447
437
|
|
448
438
|
io.puts
|
@@ -455,7 +445,7 @@ module RLTK # :nodoc:
|
|
455
445
|
io.puts('#####################')
|
456
446
|
io.puts
|
457
447
|
|
458
|
-
io.puts("\tStart symbol: #{@grammar.start_symbol}")
|
448
|
+
io.puts("\tStart symbol: #{@grammar.start_symbol}'")
|
459
449
|
io.puts
|
460
450
|
|
461
451
|
io.puts("\tTotal number of states: #{@states.length}")
|
@@ -476,7 +466,7 @@ module RLTK # :nodoc:
|
|
476
466
|
io.puts('###############')
|
477
467
|
io.puts
|
478
468
|
|
479
|
-
|
469
|
+
each_state do |state|
|
480
470
|
io.puts("State #{state.id}:")
|
481
471
|
io.puts
|
482
472
|
|
@@ -527,18 +517,15 @@ module RLTK # :nodoc:
|
|
527
517
|
# of states and their actions, and the resolution of conflicts
|
528
518
|
# using lookahead and precedence information.
|
529
519
|
#
|
530
|
-
# The *opts* hash may contain the following options, which are
|
531
|
-
# described in more detail in the main documentation:
|
532
|
-
#
|
533
|
-
# * :explain - To explain the parser or not.
|
534
|
-
# * :lookahead - To use lookahead info for conflict resolution.
|
535
|
-
# * :precedence - To use precedence info for conflict resolution.
|
536
|
-
# * :use - A file name or object that is used to load/save the parser.
|
537
|
-
#
|
538
520
|
# No calls to {Parser.production} may appear after the call to
|
539
521
|
# Parser.finalize.
|
540
522
|
#
|
541
|
-
# @param [Hash
|
523
|
+
# @param [Hash] opts Options describing how to finalize the parser.
|
524
|
+
#
|
525
|
+
# @option opts [Boolean,String,IO] :explain To explain the parser or not.
|
526
|
+
# @option opts [Boolean] :lookahead To use lookahead info for conflict resolution.
|
527
|
+
# @option opts [Boolean] :precedence To use precedence info for conflict resolution.
|
528
|
+
# @option opts [String,IO] :use A file name or object that is used to load/save the parser.
|
542
529
|
#
|
543
530
|
# @return [void]
|
544
531
|
def finalize(opts = {})
|
@@ -572,13 +559,14 @@ module RLTK # :nodoc:
|
|
572
559
|
return self.clean
|
573
560
|
end
|
574
561
|
|
575
|
-
# Grab all of the symbols that comprise the grammar
|
576
|
-
# the start symbol).
|
562
|
+
# Grab all of the symbols that comprise the grammar
|
563
|
+
# (besides the start symbol).
|
577
564
|
@symbols = @grammar.symbols << :ERROR
|
578
565
|
|
579
566
|
# Add our starting state to the state list.
|
580
|
-
|
581
|
-
|
567
|
+
@start_symbol = (@grammar.start_symbol.to_s + '\'').to_sym
|
568
|
+
start_production, _ = @grammar.production(@start_symbol, @grammar.start_symbol).first
|
569
|
+
start_state = State.new(@symbols, [start_production.to_item])
|
582
570
|
|
583
571
|
start_state.close(@grammar.productions)
|
584
572
|
|
@@ -586,12 +574,10 @@ module RLTK # :nodoc:
|
|
586
574
|
|
587
575
|
# Translate the precedence of productions from tokens to
|
588
576
|
# (associativity, precedence) pairs.
|
589
|
-
@production_precs.
|
590
|
-
@production_precs[id] = @token_precs[prec]
|
591
|
-
end
|
577
|
+
@production_precs.map! { |prec| @token_precs[prec] }
|
592
578
|
|
593
579
|
# Build the rest of the transition table.
|
594
|
-
|
580
|
+
each_state do |state|
|
595
581
|
#Transition states.
|
596
582
|
tstates = Hash.new { |h,k| h[k] = State.new(@symbols) }
|
597
583
|
|
@@ -622,23 +608,17 @@ module RLTK # :nodoc:
|
|
622
608
|
# Find the Accept and Reduce actions for this state.
|
623
609
|
state.each do |item|
|
624
610
|
if item.at_end?
|
625
|
-
if item.lhs ==
|
611
|
+
if item.lhs == @start_symbol
|
626
612
|
state.on(:EOS, Accept.new)
|
627
613
|
else
|
628
|
-
state.add_reduction(item.id)
|
614
|
+
state.add_reduction(@grammar.productions(:id)[item.id])
|
629
615
|
end
|
630
616
|
end
|
631
617
|
end
|
632
618
|
end
|
633
619
|
|
634
620
|
# Build the production.id -> production.lhs map.
|
635
|
-
@grammar.productions(:id).
|
636
|
-
id, production = pair
|
637
|
-
|
638
|
-
h[id] = production.lhs
|
639
|
-
|
640
|
-
h
|
641
|
-
end
|
621
|
+
@grammar.productions(:id).each { |id, production| @lh_sides[id] = production.lhs }
|
642
622
|
|
643
623
|
# Prune the parsing table for unnecessary reduce actions.
|
644
624
|
self.prune(opts[:lookahead], opts[:precedence])
|
@@ -667,8 +647,8 @@ module RLTK # :nodoc:
|
|
667
647
|
|
668
648
|
# Converts an object into an IO object as appropriate.
|
669
649
|
#
|
670
|
-
# @param [Object]
|
671
|
-
# @param [String]
|
650
|
+
# @param [Object] o Object to be converted into an IO object.
|
651
|
+
# @param [String] mode String representing the mode to open the IO object in.
|
672
652
|
#
|
673
653
|
# @return [IO, false] The IO object or false if a conversion wasn't possible.
|
674
654
|
def get_io(o, mode = 'w')
|
@@ -683,7 +663,20 @@ module RLTK # :nodoc:
|
|
683
663
|
end
|
684
664
|
end
|
685
665
|
|
686
|
-
#
|
666
|
+
# Iterate over the parser's states.
|
667
|
+
#
|
668
|
+
# @yieldparam [State] state One of the parser automaton's state objects
|
669
|
+
#
|
670
|
+
# @return [void]
|
671
|
+
def each_state
|
672
|
+
current_state = 0
|
673
|
+
while current_state < @states.count
|
674
|
+
yield @states.at(current_state)
|
675
|
+
current_state += 1
|
676
|
+
end
|
677
|
+
end
|
678
|
+
|
679
|
+
# @return [CFG] The grammar that can be parsed by this Parser.
|
687
680
|
def grammar
|
688
681
|
@grammar.clone
|
689
682
|
end
|
@@ -692,7 +685,7 @@ module RLTK # :nodoc:
|
|
692
685
|
# calculate the LALR(1) lookahead sets. Information about this
|
693
686
|
# grammar and its use can be found in the following paper:
|
694
687
|
#
|
695
|
-
# Simple Computation of LALR(1)
|
688
|
+
# Simple Computation of LALR(1) Lookahead Sets
|
696
689
|
# Manuel E. Bermudez and George Logothetis
|
697
690
|
# Information Processing Letters 31 - 1989
|
698
691
|
#
|
@@ -701,14 +694,14 @@ module RLTK # :nodoc:
|
|
701
694
|
if not @grammar_prime
|
702
695
|
@grammar_prime = CFG.new
|
703
696
|
|
704
|
-
|
697
|
+
each_state do |state|
|
705
698
|
state.each do |item|
|
706
699
|
lhs = "#{state.id}_#{item.next_symbol}".to_sym
|
707
700
|
|
708
701
|
next unless CFG::is_nonterminal?(item.next_symbol) and not @grammar_prime.productions.keys.include?(lhs)
|
709
702
|
|
710
703
|
@grammar.productions[item.next_symbol].each do |production|
|
711
|
-
rhs =
|
704
|
+
rhs = ''
|
712
705
|
|
713
706
|
cstate = state
|
714
707
|
|
@@ -729,9 +722,9 @@ module RLTK # :nodoc:
|
|
729
722
|
|
730
723
|
# Inform the parser core that a conflict has been detected.
|
731
724
|
#
|
732
|
-
# @param [Integer]
|
733
|
-
# @param [:RR, :SR]
|
734
|
-
# @param [Symbol]
|
725
|
+
# @param [Integer] state_id ID of the state where the conflict was encountered.
|
726
|
+
# @param [:RR, :SR] type Reduce/Reduce or Shift/Reduce conflict.
|
727
|
+
# @param [Symbol] sym Symbol that caused the conflict.
|
735
728
|
#
|
736
729
|
# @return [void]
|
737
730
|
def inform_conflict(state_id, type, sym)
|
@@ -742,7 +735,7 @@ module RLTK # :nodoc:
|
|
742
735
|
# are left-associative. Subsequent calls to this method will
|
743
736
|
# give their arguments higher precedence.
|
744
737
|
#
|
745
|
-
# @param [Array<Symbol>]
|
738
|
+
# @param [Array<Symbol>] symbols Symbols that are left associative.
|
746
739
|
#
|
747
740
|
# @return [void]
|
748
741
|
def left(*symbols)
|
@@ -756,7 +749,7 @@ module RLTK # :nodoc:
|
|
756
749
|
# This method is used to specify that the symbols in *symbols*
|
757
750
|
# are non-associative.
|
758
751
|
#
|
759
|
-
# @param [Array<Symbol>]
|
752
|
+
# @param [Array<Symbol>] symbols Symbols that are non-associative.
|
760
753
|
#
|
761
754
|
# @return [void]
|
762
755
|
def nonassoc(*symbols)
|
@@ -770,29 +763,28 @@ module RLTK # :nodoc:
|
|
770
763
|
# Adds productions and actions for parsing nonempty lists.
|
771
764
|
#
|
772
765
|
# @see CFG#nonempty_list_production
|
773
|
-
def
|
774
|
-
@grammar.
|
766
|
+
def build_nonempty_list_production(symbol, list_elements, separator = '')
|
767
|
+
@grammar.build_nonempty_list_production(symbol, list_elements, separator)
|
775
768
|
end
|
776
|
-
alias :nonempty_list :
|
769
|
+
alias :nonempty_list :build_nonempty_list_production
|
777
770
|
|
778
771
|
# This function is where actual parsing takes place. The
|
779
772
|
# _tokens_ argument must be an array of Token objects, the last
|
780
773
|
# of which has type EOS. By default this method will return the
|
781
|
-
# value computed by the first successful parse tree found.
|
782
|
-
# possible to adjust this behavior using the _opts_ hash as
|
783
|
-
# follows:
|
774
|
+
# value computed by the first successful parse tree found.
|
784
775
|
#
|
785
|
-
#
|
786
|
-
#
|
787
|
-
#
|
788
|
-
#
|
789
|
-
#
|
790
|
-
# Additional information for these options can be found in the
|
791
|
-
# main documentation.
|
776
|
+
# Additional information about the parsing options can be found in
|
777
|
+
# the main documentation.
|
778
|
+
#
|
779
|
+
# @param [Array<Token>] tokens Tokens to be parsed.
|
780
|
+
# @param [Hash] opts Options to use when parsing input.
|
792
781
|
#
|
793
|
-
# @
|
782
|
+
# @option opts [:first, :all] :accept Either :first or :all.
|
783
|
+
# @option opts [Object] :env The environment in which to evaluate the production action.
|
784
|
+
# @option opts [Boolean,String,IO] :parse_tree To print parse trees in the DOT language or not.
|
785
|
+
# @option opts [Boolean,String,IO] :verbose To be verbose or not.
|
794
786
|
#
|
795
|
-
# @return [Object, Array<Object>]
|
787
|
+
# @return [Object, Array<Object>] Result or results of parsing the given tokens.
|
796
788
|
def parse(tokens, opts = {})
|
797
789
|
# Get the full options hash.
|
798
790
|
opts = build_parse_opts(opts)
|
@@ -808,18 +800,18 @@ module RLTK # :nodoc:
|
|
808
800
|
stack_id = 0
|
809
801
|
|
810
802
|
# Error mode indicators.
|
811
|
-
error_mode
|
812
|
-
reduction_guard
|
803
|
+
error_mode = false
|
804
|
+
reduction_guard = false
|
813
805
|
|
814
806
|
# Our various list of stacks.
|
815
|
-
accepted
|
816
|
-
moving_on
|
817
|
-
processing
|
807
|
+
accepted = []
|
808
|
+
moving_on = []
|
809
|
+
processing = [ParseStack.new(stack_id += 1)]
|
818
810
|
|
819
811
|
# Iterate over the tokens. We don't procede to the
|
820
812
|
# next token until every stack is done with the
|
821
813
|
# current one.
|
822
|
-
tokens.
|
814
|
+
tokens.each_with_index do |token, index|
|
823
815
|
# Check to make sure this token was seen in the
|
824
816
|
# grammar definition.
|
825
817
|
raise BadToken if not @symbols.include?(token.type)
|
@@ -937,8 +929,12 @@ module RLTK # :nodoc:
|
|
937
929
|
args, positions = stack.pop(pop_size)
|
938
930
|
opts[:env].set_positions(positions)
|
939
931
|
|
932
|
+
if not production_proc.selections.empty?
|
933
|
+
args = args.values_at(*production_proc.selections)
|
934
|
+
end
|
935
|
+
|
940
936
|
result =
|
941
|
-
if
|
937
|
+
if production_proc.arg_type == :array
|
942
938
|
opts[:env].instance_exec(args, &production_proc)
|
943
939
|
else
|
944
940
|
opts[:env].instance_exec(*args, &production_proc)
|
@@ -994,14 +990,14 @@ module RLTK # :nodoc:
|
|
994
990
|
|
995
991
|
v.puts("\n\n") if v
|
996
992
|
|
997
|
-
processing
|
998
|
-
moving_on
|
993
|
+
processing = moving_on
|
994
|
+
moving_on = []
|
999
995
|
|
1000
996
|
# If we don't have any active stacks at this point the
|
1001
997
|
# string isn't in the language.
|
1002
998
|
if opts[:accept] == :first and processing.length == 0
|
1003
999
|
v.close if v and v != $stdout
|
1004
|
-
raise NotInLanguage
|
1000
|
+
raise NotInLanguage.new(tokens[0...index], tokens[index], tokens[index.next..-1])
|
1005
1001
|
end
|
1006
1002
|
|
1007
1003
|
reduction_guard = false
|
@@ -1036,21 +1032,28 @@ module RLTK # :nodoc:
|
|
1036
1032
|
# Parser.clause. A precedence can be associate with this
|
1037
1033
|
# production by setting *precedence* to a terminal symbol.
|
1038
1034
|
#
|
1039
|
-
# @param [Symbol]
|
1040
|
-
# @param [String, nil] expression Right-hand side of the production.
|
1041
|
-
# @param [Symbol, nil]
|
1042
|
-
# @param [
|
1035
|
+
# @param [Symbol] symbol Left-hand side of the production.
|
1036
|
+
# @param [String, Symbol, nil] expression Right-hand side of the production.
|
1037
|
+
# @param [Symbol, nil] precedence Symbol representing the precedence of this produciton.
|
1038
|
+
# @param [:array, :splat] arg_type Method to use when passing arguments to the action.
|
1039
|
+
# @param [Proc] action Action associated with this production.
|
1043
1040
|
#
|
1044
1041
|
# @return [void]
|
1045
|
-
def production(symbol, expression = nil, precedence = nil, &action)
|
1042
|
+
def production(symbol, expression = nil, precedence = nil, arg_type = @default_arg_type, &action)
|
1046
1043
|
|
1047
1044
|
# Check the symbol.
|
1048
1045
|
if not (symbol.is_a?(Symbol) or symbol.is_a?(String)) or not CFG::is_nonterminal?(symbol)
|
1049
|
-
|
1046
|
+
raise ParserConstructionException, 'Production symbols must be Strings or Symbols and be in all lowercase.'
|
1050
1047
|
end
|
1051
1048
|
|
1052
|
-
@grammar.curr_lhs
|
1053
|
-
@curr_prec
|
1049
|
+
@grammar.curr_lhs = symbol.to_sym
|
1050
|
+
@curr_prec = precedence
|
1051
|
+
|
1052
|
+
orig_dat = nil
|
1053
|
+
if arg_type != @default_arg_type
|
1054
|
+
orig_dat = @default_arg_type
|
1055
|
+
@default_arg_type = arg_type
|
1056
|
+
end
|
1054
1057
|
|
1055
1058
|
if expression
|
1056
1059
|
self.clause(expression, precedence, &action)
|
@@ -1058,16 +1061,18 @@ module RLTK # :nodoc:
|
|
1058
1061
|
self.instance_exec(&action)
|
1059
1062
|
end
|
1060
1063
|
|
1061
|
-
@
|
1062
|
-
|
1064
|
+
@default_arg_type = orig_dat if not orig_dat.nil?
|
1065
|
+
|
1066
|
+
@grammar.curr_lhs = nil
|
1067
|
+
@curr_prec = nil
|
1063
1068
|
end
|
1064
1069
|
alias :p :production
|
1065
1070
|
|
1066
1071
|
# This method uses lookahead sets and precedence information to
|
1067
1072
|
# resolve conflicts and remove unnecessary reduce actions.
|
1068
1073
|
#
|
1069
|
-
# @param [Boolean]
|
1070
|
-
# @param [Boolean]
|
1074
|
+
# @param [Boolean] do_lookahead Prune based on lookahead sets or not.
|
1075
|
+
# @param [Boolean] do_precedence Prune based on precedence or not.
|
1071
1076
|
#
|
1072
1077
|
# @return [void]
|
1073
1078
|
def prune(do_lookahead, do_precedence)
|
@@ -1076,7 +1081,7 @@ module RLTK # :nodoc:
|
|
1076
1081
|
# If both options are false there is no pruning to do.
|
1077
1082
|
return if not (do_lookahead or do_precedence)
|
1078
1083
|
|
1079
|
-
|
1084
|
+
each_state do |state0|
|
1080
1085
|
|
1081
1086
|
#####################
|
1082
1087
|
# Lookahead Pruning #
|
@@ -1092,23 +1097,27 @@ module RLTK # :nodoc:
|
|
1092
1097
|
lookahead = Array.new
|
1093
1098
|
|
1094
1099
|
# Build the lookahead set.
|
1095
|
-
|
1100
|
+
each_state do |state1|
|
1096
1101
|
if self.check_reachability(state1, state0, production.rhs)
|
1097
1102
|
lookahead |= self.grammar_prime.follow_set("#{state1.id}_#{production.lhs}".to_sym)
|
1098
1103
|
end
|
1099
1104
|
end
|
1100
1105
|
|
1101
|
-
# Translate the G' follow symbols into G
|
1102
|
-
# symbols.
|
1103
|
-
lookahead = lookahead.map { |sym| sym.to_s.split('_').last.to_sym }.uniq
|
1106
|
+
# Translate the G' follow symbols into G
|
1107
|
+
# lookahead symbols.
|
1108
|
+
lookahead = lookahead.map { |sym| sym.to_s.split('_', 2).last.to_sym }.uniq
|
1104
1109
|
|
1105
1110
|
# Here we remove the unnecessary reductions.
|
1106
1111
|
# If there are error productions we need to
|
1107
1112
|
# scale back the amount of pruning done.
|
1108
|
-
|
1109
|
-
|
1110
|
-
|
1113
|
+
pruning_candidates = terms - lookahead
|
1114
|
+
|
1115
|
+
if terms.include?(:ERROR)
|
1116
|
+
pruning_candidates.each do |sym|
|
1117
|
+
state0.actions[sym].delete(reduction) if state0.conflict_on?(sym)
|
1111
1118
|
end
|
1119
|
+
else
|
1120
|
+
pruning_candidates.each { |sym| state0.actions[sym].delete(reduction) }
|
1112
1121
|
end
|
1113
1122
|
end
|
1114
1123
|
end
|
@@ -1153,8 +1162,8 @@ module RLTK # :nodoc:
|
|
1153
1162
|
# * The token is left associative and the current action is a Reduce
|
1154
1163
|
# * The token is right associative and the current action is a Shift
|
1155
1164
|
if prec > max_prec or (prec == max_prec and tassoc == (a.is_a?(Shift) ? :right : :left))
|
1156
|
-
max_prec
|
1157
|
-
selected_action
|
1165
|
+
max_prec = prec
|
1166
|
+
selected_action = a
|
1158
1167
|
|
1159
1168
|
elsif prec == max_prec and assoc == :nonassoc
|
1160
1169
|
raise ParserConstructionException, 'Non-associative token found during conflict resolution.'
|
@@ -1282,23 +1291,23 @@ module RLTK # :nodoc:
|
|
1282
1291
|
|
1283
1292
|
# Instantiate a new ParserStack object.
|
1284
1293
|
#
|
1285
|
-
# @param [Integer]
|
1286
|
-
# @param [Array<Object>]
|
1287
|
-
# @param [Array<Integer>]
|
1288
|
-
# @param [Array<Integer>]
|
1289
|
-
# @param [Array<Array<Integer>>]
|
1290
|
-
# @param [Array<Symbol>]
|
1291
|
-
# @param [Array<StreamPosition>]
|
1294
|
+
# @param [Integer] id ID for this parse stack. Used by GLR algorithm.
|
1295
|
+
# @param [Array<Object>] ostack Output stack. Holds results of {Reduce} and {Shift} actions.
|
1296
|
+
# @param [Array<Integer>] sstack State stack. Holds states that have been shifted due to {Shift} actions.
|
1297
|
+
# @param [Array<Integer>] nstack Node stack. Holds dot language IDs for nodes in the parse tree.
|
1298
|
+
# @param [Array<Array<Integer>>] connections Integer pairs representing edges in the parse tree.
|
1299
|
+
# @param [Array<Symbol>] labels Labels for nodes in the parse tree.
|
1300
|
+
# @param [Array<StreamPosition>] positions Position data for symbols that have been shifted.
|
1292
1301
|
def initialize(id, ostack = [], sstack = [0], nstack = [], connections = [], labels = [], positions = [])
|
1293
1302
|
@id = id
|
1294
1303
|
|
1295
|
-
@node_stack
|
1296
|
-
@output_stack
|
1297
|
-
@state_stack
|
1304
|
+
@node_stack = nstack
|
1305
|
+
@output_stack = ostack
|
1306
|
+
@state_stack = sstack
|
1298
1307
|
|
1299
|
-
@connections
|
1300
|
-
@labels
|
1301
|
-
@positions
|
1308
|
+
@connections = connections
|
1309
|
+
@labels = labels
|
1310
|
+
@positions = positions
|
1302
1311
|
end
|
1303
1312
|
|
1304
1313
|
# Branch this stack, effectively creating a new copy of its
|
@@ -1308,8 +1317,24 @@ module RLTK # :nodoc:
|
|
1308
1317
|
#
|
1309
1318
|
# @return [ParseStack]
|
1310
1319
|
def branch(new_id)
|
1311
|
-
|
1312
|
-
|
1320
|
+
# We have to do a deeper copy of the output stack to avoid
|
1321
|
+
# interactions between the Proc objects for the different
|
1322
|
+
# parsing paths.
|
1323
|
+
#
|
1324
|
+
# The being/rescue block is needed because some classes
|
1325
|
+
# respond to `clone` but always raise an error.
|
1326
|
+
new_output_stack = @output_stack.map do |o|
|
1327
|
+
# Check to see if we can obtain a deep copy.
|
1328
|
+
if 0.respond_to?(:copy)
|
1329
|
+
o.copy
|
1330
|
+
|
1331
|
+
else
|
1332
|
+
begin o.clone rescue o end
|
1333
|
+
end
|
1334
|
+
end
|
1335
|
+
|
1336
|
+
ParseStack.new(new_id, new_output_stack, @state_stack.clone,
|
1337
|
+
@node_stack.clone, @connections.clone, @labels.clone, @positions.clone)
|
1313
1338
|
end
|
1314
1339
|
|
1315
1340
|
# @return [StreamPosition] Position data for the last symbol on the stack.
|
@@ -1347,7 +1372,7 @@ module RLTK # :nodoc:
|
|
1347
1372
|
#
|
1348
1373
|
# @param [Integer] n Number of object to pop off the stack.
|
1349
1374
|
#
|
1350
|
-
# @return [Array
|
1375
|
+
# @return [Array(Object, StreamPosition)] Values popped from the output and positions stacks.
|
1351
1376
|
def pop(n = 1)
|
1352
1377
|
@state_stack.pop(n)
|
1353
1378
|
|
@@ -1406,27 +1431,27 @@ module RLTK # :nodoc:
|
|
1406
1431
|
# @return [Integer] State's ID.
|
1407
1432
|
attr_accessor :id
|
1408
1433
|
|
1409
|
-
# @return
|
1434
|
+
# @return [Array<CFG::Item>] Item objects that comprise this state
|
1410
1435
|
attr_reader :items
|
1411
1436
|
|
1412
|
-
# @return [Array<Action>]
|
1437
|
+
# @return [Hash{Symbol => Array<Action>}] Maps lookahead symbols to actions
|
1413
1438
|
attr_reader :actions
|
1414
1439
|
|
1415
1440
|
# Instantiate a new State object.
|
1416
1441
|
#
|
1417
|
-
# @param [Array<
|
1418
|
-
# @param [Array<CFG::Item>]
|
1442
|
+
# @param [Array<Symbol>] tokens Tokens that represent this state
|
1443
|
+
# @param [Array<CFG::Item>] items Items that make up this state
|
1419
1444
|
def initialize(tokens, items = [])
|
1420
|
-
@id
|
1421
|
-
@items
|
1422
|
-
@actions
|
1445
|
+
@id = nil
|
1446
|
+
@items = items
|
1447
|
+
@actions = tokens.inject(Hash.new) { |h, t| h[t] = Array.new; h }
|
1423
1448
|
end
|
1424
1449
|
|
1425
1450
|
# Compare one State to another. Two States are equal if they
|
1426
1451
|
# have the same items or, if the items have been cleaned, if
|
1427
1452
|
# the States have the same ID.
|
1428
1453
|
#
|
1429
|
-
# @param [State]
|
1454
|
+
# @param [State] other Another State to compare to
|
1430
1455
|
#
|
1431
1456
|
# @return [Boolean]
|
1432
1457
|
def ==(other)
|
@@ -1435,11 +1460,11 @@ module RLTK # :nodoc:
|
|
1435
1460
|
|
1436
1461
|
# Add a Reduce action to the state.
|
1437
1462
|
#
|
1438
|
-
# @param [
|
1463
|
+
# @param [Production] production Production used to perform the reduction
|
1439
1464
|
#
|
1440
1465
|
# @return [void]
|
1441
|
-
def add_reduction(
|
1442
|
-
action = Reduce.new(
|
1466
|
+
def add_reduction(production)
|
1467
|
+
action = Reduce.new(production)
|
1443
1468
|
|
1444
1469
|
# Reduce actions are not allowed for the ERROR terminal.
|
1445
1470
|
@actions.each { |k, v| if CFG::is_terminal?(k) and k != :ERROR then v << action end }
|
@@ -1507,7 +1532,11 @@ module RLTK # :nodoc:
|
|
1507
1532
|
#
|
1508
1533
|
# @return [void]
|
1509
1534
|
def each
|
1510
|
-
|
1535
|
+
current_item = 0
|
1536
|
+
while current_item < @items.count
|
1537
|
+
yield @items.at(current_item)
|
1538
|
+
current_item += 1
|
1539
|
+
end
|
1511
1540
|
end
|
1512
1541
|
|
1513
1542
|
# Specify an Action to perform when the input token is *symbol*.
|
@@ -1535,6 +1564,22 @@ module RLTK # :nodoc:
|
|
1535
1564
|
end
|
1536
1565
|
end
|
1537
1566
|
|
1567
|
+
# A subclass of Proc that indicates how it should be passed arguments
|
1568
|
+
# by the parser.
|
1569
|
+
class ProdProc < Proc
|
1570
|
+
# @return [:array, :splat] Method that should be used to pass arguments to this proc.
|
1571
|
+
attr_reader :arg_type
|
1572
|
+
|
1573
|
+
# @return [Array<Integer>] Mask for selection of tokens to pass to action. Empty mask means pass all.
|
1574
|
+
attr_reader :selections
|
1575
|
+
|
1576
|
+
def initialize(arg_type = :splat, selections = [])
|
1577
|
+
super()
|
1578
|
+
@arg_type = arg_type
|
1579
|
+
@selections = selections
|
1580
|
+
end
|
1581
|
+
end
|
1582
|
+
|
1538
1583
|
# The Action class is used to indicate what action the parser should
|
1539
1584
|
# take given a current state and input token.
|
1540
1585
|
class Action
|
@@ -1568,9 +1613,17 @@ module RLTK # :nodoc:
|
|
1568
1613
|
# The Reduce class indicates to the parser that it should reduce the
|
1569
1614
|
# input stack by the rule specified by Reduce.id.
|
1570
1615
|
class Reduce < Action
|
1616
|
+
|
1617
|
+
# @param [Production] production Production to reduce by
|
1618
|
+
def initialize(production)
|
1619
|
+
super(production.id)
|
1620
|
+
|
1621
|
+
@production = production
|
1622
|
+
end
|
1623
|
+
|
1571
1624
|
# @return [String] String representation of this action.
|
1572
1625
|
def to_s
|
1573
|
-
"Reduce by Production #{self.id}"
|
1626
|
+
"Reduce by Production #{self.id} : #{@production}"
|
1574
1627
|
end
|
1575
1628
|
end
|
1576
1629
|
|