rltk 1.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,1354 @@
1
+ # Author: Chris Wailes <chris.wailes@gmail.com>
2
+ # Project: Ruby Language Toolkit
3
+ # Date: 2011/01/19
4
+ # Description: This file contains the base class for parsers that use RLTK.
5
+
6
+ ############
7
+ # Requires #
8
+ ############
9
+
10
+ # Ruby Language Toolkit
11
+ require 'rltk/cfg'
12
+
13
+ #######################
14
+ # Classes and Modules #
15
+ #######################
16
+
17
+ module RLTK # :nodoc:
18
+
19
+ # A BadToken exception indicates that a token was observed in the input
20
+ # stream that wasn't used in the grammar's definition.
21
+ class BadToken < Exception
22
+ def to_s
23
+ 'Unexpected token. Token not present in grammar definition.'
24
+ end
25
+ end
26
+
27
+ # A NotInLanguage exception is raised whenever there is no valid parse tree
28
+ # for a given token stream. In other words, the input string is not in the
29
+ # defined language.
30
+ class NotInLanguage < Exception
31
+ def to_s
32
+ 'String not in language.'
33
+ end
34
+ end
35
+
36
+ # An exception of this type is raised when the parser encountered a error
37
+ # that was handled by an error production.
38
+ class HandledError < Exception
39
+
40
+ # The errors as reported by the parser.
41
+ attr_reader :errors
42
+
43
+ # The result that would have been returned by the call to _parse_.
44
+ attr_reader :result
45
+
46
+ # Instantiate a new HandledError object with _errors_.
47
+ def initialize(errors, result)
48
+ @errors = errors
49
+ @result = result
50
+ end
51
+ end
52
+
53
+ # Used for errors that occure during parser construction.
54
+ class ParserConstructionError < Exception; end
55
+
56
+ # Used for runtime errors that are the parsers fault. These should never
57
+ # be observed in the wild.
58
+ class InternalParserError < Exception; end
59
+
60
+ # The Parser class may be sub-classed to produce new parsers. These
61
+ # parsers have a lot of features, and are described in the main
62
+ # documentation.
63
+ class Parser
64
+
65
+ # Called when the Parser class is sub-classed, this method adds a
66
+ # ParserCore to the new class, and installs some needed class and
67
+ # instance methods.
68
+ def Parser.inherited(klass)
69
+ klass.class_exec do
70
+ @core = ParserCore.new
71
+
72
+ # Returns this class's ParserCore object.
73
+ def self.core
74
+ @core
75
+ end
76
+
77
+ # Routes method calls to the new subclass to the ParserCore
78
+ # object.
79
+ def self.method_missing(method, *args, &proc)
80
+ @core.send(method, *args, &proc)
81
+ end
82
+
83
+ # Alias for RLTK::Parser::ParserCore.p that needs to be
84
+ # manually connected.
85
+ def self.p(*args, &proc)
86
+ @core.p(*args, &proc)
87
+ end
88
+
89
+ # Parses the given token stream using a newly instantiated
90
+ # environment. See ParserCore.parse for a description of
91
+ # the _opts_ option hash.
92
+ def self.parse(tokens, opts = {})
93
+ opts[:env] ||= self::Environment.new
94
+
95
+ @core.parse(tokens, opts)
96
+ end
97
+
98
+ # Instantiates a new parser and creates an environment to be
99
+ # used for subsequent calls.
100
+ def initialize
101
+ @env = self.class::Environment.new
102
+ end
103
+
104
+ # Returns the environment used by the instantiated parser.
105
+ def env
106
+ @env
107
+ end
108
+
109
+ # Parses the given token stream using the encapsulated
110
+ # environment. See ParserCore.parse for a description of
111
+ # the _opts_ option hash.
112
+ def parse(tokens, opts = {})
113
+ self.class.core.parse(tokens, {:env => @env}.update(opts))
114
+ end
115
+ end
116
+ end
117
+
118
+ # All actions passed to ParserCore.rule and ParserCore.clause are
119
+ # evaluated inside an instance of the Environment class or its
120
+ # subclass (which must have the same name).
121
+ class Environment
122
+ # Indicates if an error was encountered and handled.
123
+ attr_accessor :he
124
+
125
+ # A list of all objects added using the _error_ method.
126
+ attr_reader :errors
127
+
128
+ # Instantiate a new Environment object.
129
+ def initialize
130
+ self.reset
131
+ end
132
+
133
+ # Adds an object to the list of errors.
134
+ def error(o)
135
+ @errors << o
136
+ end
137
+
138
+ # Returns a StreamPosition object for the symbol at location n,
139
+ # indexed from zero.
140
+ def pos(n)
141
+ @positions[n]
142
+ end
143
+
144
+ # Reset any variables that need to be re-initialized between
145
+ # parse calls.
146
+ def reset
147
+ @errors = Array.new
148
+ @he = false
149
+ end
150
+
151
+ # Setter for the _positions_ array.
152
+ def set_positions(positions)
153
+ @positions = positions
154
+ end
155
+ end
156
+
157
+ # The ParserCore class provides mos of the functionality of the Parser
158
+ # class. A ParserCore is instantiated for each subclass of Parser,
159
+ # thereby allowing multiple parsers to be defined inside a single Ruby
160
+ # program.
161
+ class ParserCore
162
+
163
+ # The grammar that can be parsed by this ParserCore. The grammar
164
+ # is used internally and should not be manipulated outside of the
165
+ # ParserCore object.
166
+ attr_reader :grammar
167
+
168
+ # Instantiates a new ParserCore object with the needed data
169
+ # structures.
170
+ def initialize
171
+ @curr_lhs = nil
172
+ @curr_prec = nil
173
+
174
+ @conflicts = Hash.new {|h, k| h[k] = Array.new}
175
+ @grammar = CFG.new
176
+
177
+ @lh_sides = Hash.new
178
+ @procs = Array.new
179
+ @states = Array.new
180
+
181
+ # Variables for dealing with precedence.
182
+ @prec_counts = {:left => 0, :right => 0, :non => 0}
183
+ @production_precs = Array.new
184
+ @token_precs = Hash.new
185
+
186
+ # Set the default argument handling policy.
187
+ @args = :splat
188
+
189
+ @grammar.callback do |p, type, num|
190
+ @procs[p.id] =
191
+ [if type == :*
192
+ if num == :first
193
+ Proc.new { || [] }
194
+ else
195
+ Proc.new { |o, os| [o] + os }
196
+ end
197
+ elsif type == :+
198
+ if num == :first
199
+ Proc.new { |o| [o] }
200
+ else
201
+ Proc.new { |o, os| [o] + os }
202
+ end
203
+ elsif type == :'?'
204
+ if num == :first
205
+ Proc.new { || nil }
206
+ else
207
+ Proc.new { |o| o }
208
+ end
209
+ end, p.rhs.length]
210
+
211
+ @production_precs[p.id] = p.last_terminal
212
+ end
213
+ end
214
+
215
+ # If _state_ (or its equivalent) is not in the state list it is
216
+ # added and it's ID is returned. If there is already a state
217
+ # with the same items as _state_ in the state list its ID is
218
+ # returned and _state_ is discarded.
219
+ def add_state(state)
220
+ if (id = @states.index(state))
221
+ id
222
+ else
223
+ state.id = @states.length
224
+
225
+ @states << state
226
+
227
+ @states.length - 1
228
+ end
229
+ end
230
+
231
+ # Calling this method will cause the parser to pass right-hand
232
+ # side values as arrays instead of splats. This method must be
233
+ # called before ANY calls to ParserCore.production.
234
+ def array_args
235
+ if @grammar.productions.length == 0
236
+ @args = :array
237
+
238
+ @grammar.callback do |p, type, num|
239
+ @procs[p.id] =
240
+ [if type == :*
241
+ if num == :first
242
+ Proc.new { |v| [] }
243
+ else
244
+ Proc.new { |v| [v[0]] + v[1] }
245
+ end
246
+ elsif type == :+
247
+ if num == :first
248
+ Proc.new { |v| v[0] }
249
+ else
250
+ Proc.new { |v| [v[0]] + v[1] }
251
+ end
252
+ elsif type == :'?'
253
+ if num == :first
254
+ Proc.new { |v| nil }
255
+ else
256
+ Proc.new { |v| v[0] }
257
+ end
258
+ end, p.rhs.length]
259
+
260
+ @production_precs[p.id] = p.last_terminal
261
+ end
262
+ end
263
+ end
264
+
265
+ # Build a hash with the default options for ParserCore.finalize
266
+ # and then update it with the values from _opts_.
267
+ def build_finalize_opts(opts)
268
+ opts[:explain] = self.get_io(opts[:explain])
269
+
270
+ {
271
+ :explain => false,
272
+ :lookahead => true,
273
+ :precedence => true,
274
+ :use => false
275
+ }.update(opts)
276
+ end
277
+
278
+ # Build a hash with the default options for ParserCore.parse and
279
+ # then update it with the values from _opts_.
280
+ def build_parse_opts(opts)
281
+ opts[:parse_tree] = self.get_io(opts[:parse_tree])
282
+ opts[:verbose] = self.get_io(opts[:verbose])
283
+
284
+ {
285
+ :accept => :first,
286
+ :env => Environment.new,
287
+ :parse_tree => false,
288
+ :verbose => false
289
+ }.update(opts)
290
+ end
291
+
292
+ # This method is used to (surprise) check the sanity of the
293
+ # constructed parser. It checks to make sure all non-terminals
294
+ # used in the grammar definition appear on the left-hand side of
295
+ # one or more productions, and that none of the parser's states
296
+ # have invalid actions. If a problem is encountered a
297
+ # ParserConstructionError is raised.
298
+ def check_sanity
299
+ # Check to make sure all non-terminals appear on the
300
+ # left-hand side of some production.
301
+ @grammar.nonterms.each do |sym|
302
+ if not @lh_sides.values.include?(sym)
303
+ raise ParserConstructionError, "Non-terminal #{sym} does not appear on the left-hand side of any production."
304
+ end
305
+ end
306
+
307
+ # Check the actions in each state.
308
+ @states.each do |state|
309
+ state.actions.each do |sym, actions|
310
+ if CFG::is_terminal?(sym)
311
+ # Here we check actions for terminals.
312
+ actions.each do |action|
313
+ if action.is_a?(Accept)
314
+ if sym != :EOS
315
+ raise ParserConstructionError, "Accept action found for terminal #{sym} in state #{state.id}."
316
+ end
317
+
318
+ elsif not (action.is_a?(GoTo) or action.is_a?(Reduce) or action.is_a?(Shift))
319
+ raise ParserConstructionError, "Object of type #{action.class} found in actions for terminal " +
320
+ "#{sym} in state #{state.id}."
321
+
322
+ end
323
+ end
324
+
325
+ if (conflict = state.conflict_on?(sym))
326
+ self.inform_conflict(state.id, conflict, sym)
327
+ end
328
+ else
329
+ # Here we check actions for non-terminals.
330
+ if actions.length > 1
331
+ raise ParserConstructionError, "State #{state.id} has multiple GoTo actions for non-terminal #{sym}."
332
+
333
+ elsif actions.length == 1 and not actions.first.is_a?(GoTo)
334
+ raise ParserConstructionError, "State #{state.id} has non-GoTo action for non-terminal #{sym}."
335
+
336
+ end
337
+ end
338
+ end
339
+ end
340
+ end
341
+
342
+ # This method checks to see if the parser would be in parse state
343
+ # _dest_ after starting in state _start_ and reading _symbols_.
344
+ def check_reachability(start, dest, symbols)
345
+ path_exists = true
346
+ cur_state = start
347
+
348
+ symbols.each do |sym|
349
+
350
+ actions = @states[cur_state.id].on?(sym)
351
+ actions = actions.select { |a| a.is_a?(Shift) } if CFG::is_terminal?(sym)
352
+
353
+ if actions.empty?
354
+ path_exists = false
355
+ break
356
+ end
357
+
358
+ # There can only be one Shift action for terminals and
359
+ # one GoTo action for non-terminals, so we know the
360
+ # first action is the only one in the list.
361
+ cur_state = @states[actions.first.id]
362
+ end
363
+
364
+ path_exists and cur_state.id == dest.id
365
+ end
366
+
367
+ # Declares a new clause inside of a production. The right-hand
368
+ # side is specified by _expression_ and the precedence of this
369
+ # production can be changed by setting the _precedence_ argument
370
+ # to some terminal symbol.
371
+ def clause(expression, precedence = nil, &action)
372
+ # Use the curr_prec only if it isn't overridden for this
373
+ # clause.
374
+ precedence ||= @curr_prec
375
+
376
+ production = @grammar.clause(expression)
377
+
378
+ # Check to make sure the action's arity matches the number
379
+ # of symbols on the right-hand side.
380
+ if @args == :splat and action.arity != production.rhs.length
381
+ raise ParserConstructionError, 'Incorrect number of arguments to action. Action arity must match the number of ' +
382
+ 'terminals and non-terminals in the clause.'
383
+ end
384
+
385
+ # Add the action to our proc list.
386
+ @procs[production.id] = [action, production.rhs.length]
387
+
388
+ # If no precedence is specified use the precedence of the
389
+ # last terminal in the production.
390
+ @production_precs[production.id] = precedence || production.last_terminal
391
+ end
392
+
393
+ alias :c :clause
394
+
395
+ # Removes resources that were needed to generate the parser but
396
+ # aren't needed when actually parsing input.
397
+ def clean
398
+ # We've told the developer about conflicts by now.
399
+ @conflicts = nil
400
+
401
+ # Drop the grammar and the grammar'.
402
+ @grammar = nil
403
+ @grammar_prime = nil
404
+
405
+ # Drop precedence and bookkeeping information.
406
+ @cur_lhs = nil
407
+ @cur_prec = nil
408
+
409
+ @prec_counts = nil
410
+ @production_precs = nil
411
+ @token_precs = nil
412
+
413
+ # Drop the items from each of the states.
414
+ @states.each { |state| state.clean }
415
+ end
416
+
417
+ # This function will print a description of the parser to the
418
+ # provided IO object.
419
+ def explain(io)
420
+ if @grammar and not @states.empty?
421
+ io.puts("###############")
422
+ io.puts("# Productions #")
423
+ io.puts("###############")
424
+ io.puts
425
+
426
+ # Print the productions.
427
+ @grammar.productions.each do |sym, productions|
428
+ productions.each do |production|
429
+ io.print("\tProduction #{production.id}: #{production.to_s}")
430
+
431
+ if (prec = @production_precs[production.id])
432
+ io.print(" : (#{prec.first} , #{prec.last})")
433
+ end
434
+
435
+ io.puts
436
+ end
437
+
438
+ io.puts
439
+ end
440
+
441
+ io.puts("##########")
442
+ io.puts("# Tokens #")
443
+ io.puts("##########")
444
+ io.puts
445
+
446
+ @grammar.terms.sort {|a,b| a.to_s <=> b.to_s }.each do |term|
447
+ io.print("\t#{term}")
448
+
449
+ if (prec = @token_precs[term])
450
+ io.print(" : (#{prec.first}, #{prec.last})")
451
+ end
452
+
453
+ io.puts
454
+ end
455
+
456
+ io.puts
457
+
458
+ io.puts("#####################")
459
+ io.puts("# Table Information #")
460
+ io.puts("#####################")
461
+ io.puts
462
+
463
+ io.puts("\tStart symbol: #{@grammar.start_symbol}")
464
+ io.puts
465
+
466
+ io.puts("\tTotal number of states: #{@states.length}")
467
+ io.puts
468
+
469
+ io.puts("\tTotal conflicts: #{@conflicts.values.flatten(1).length}")
470
+ io.puts
471
+
472
+ @conflicts.each do |state_id, conflicts|
473
+ io.puts("\tState #{state_id} has #{conflicts.length} conflict(s)")
474
+ end
475
+
476
+ io.puts if not @conflicts.empty?
477
+
478
+ # Print the parse table.
479
+ io.puts("###############")
480
+ io.puts("# Parse Table #")
481
+ io.puts("###############")
482
+ io.puts
483
+
484
+ @states.each do |state|
485
+ io.puts("State #{state.id}:")
486
+ io.puts
487
+
488
+ io.puts("\t# ITEMS #")
489
+ max = state.items.inject(0) do |max, item|
490
+ if item.lhs.to_s.length > max then item.lhs.to_s.length else max end
491
+ end
492
+
493
+ state.each do |item|
494
+ io.puts("\t#{item.to_s(max)}")
495
+ end
496
+
497
+ io.puts
498
+ io.puts("\t# ACTIONS #")
499
+
500
+ state.actions.keys.sort {|a,b| a.to_s <=> b.to_s}.each do |sym|
501
+ state.actions[sym].each do |action|
502
+ io.puts("\tOn #{sym} #{action}")
503
+ end
504
+ end
505
+
506
+ io.puts
507
+ io.puts("\t# CONFLICTS #")
508
+
509
+ if @conflicts[state.id].length == 0
510
+ io.puts("\tNone\n\n")
511
+ else
512
+ @conflicts[state.id].each do |conflict|
513
+ type, sym = conflict
514
+
515
+ io.print("\t#{if type == :SR then "Shift/Reduce" else "Reduce/Reduce" end} conflict")
516
+
517
+ io.puts(" on #{sym}")
518
+ end
519
+
520
+ io.puts
521
+ end
522
+ end
523
+
524
+ # Close any IO objects that aren't $stdout.
525
+ io.close if io.is_a?(IO) and io != $stdout
526
+ else
527
+ raise ParserConstructionError, 'Parser.explain called outside of finalize.'
528
+ end
529
+ end
530
+
531
+ # This method will finalize the parser causing the construction
532
+ # of states and their actions, and the resolution of conflicts
533
+ # using lookahead and precedence information.
534
+ #
535
+ # The _opts_ hash may contain the following options, which are
536
+ # described in more detail in the main documentation:
537
+ #
538
+ # * :explain - To explain the parser or not.
539
+ # * :lookahead - To use lookahead info for conflict resolution.
540
+ # * :precedence - To use precedence info for conflict resolution.
541
+ # * :use - A file name or object that is used to load/save the parser.
542
+ #
543
+ # No calls to ParserCore.production may appear after the call to
544
+ # ParserCore.finalize.
545
+ def finalize(opts = {})
546
+
547
+ # Get the full options hash.
548
+ opts = self.build_finalize_opts(opts)
549
+
550
+ # Get the name of the file in which the parser is defined.
551
+ def_file = caller()[2].split(':')[0]
552
+
553
+ # Check to make sure we can load the necessary information
554
+ # from the specified object.
555
+ if opts[:use] and (
556
+ (opts[:use].is_a?(String) and File.exists?(opts[:use]) and File.mtime(opts[:use]) > File.mtime(def_file)) or
557
+ (opts[:use].is_a?(File) and opts[:use].mtime > File.mtime(def_file))
558
+ )
559
+
560
+ # Un-marshal our saved data structures.
561
+ @lh_sides, @states, @symbols = Marshal.load(self.get_io(opts[:use], 'r'))
562
+
563
+ # Remove any un-needed data and return.
564
+ return self.clean
565
+ end
566
+
567
+ # Grab all of the symbols that comprise the grammar (besides
568
+ # the start symbol).
569
+ @symbols = @grammar.symbols << :ERROR
570
+
571
+ # Add our starting state to the state list.
572
+ start_production = @grammar.production(:start, @grammar.start_symbol.to_s).first
573
+ start_state = State.new(@symbols, [start_production.to_item])
574
+
575
+ start_state.close(@grammar.productions)
576
+
577
+ self.add_state(start_state)
578
+
579
+ # Translate the precedence of productions from tokens to
580
+ # (associativity, precedence) pairs.
581
+ @production_precs.each_with_index do |prec, id|
582
+ @production_precs[id] = @token_precs[prec]
583
+ end
584
+
585
+ # Build the rest of the transition table.
586
+ @states.each do |state|
587
+ #Transition states.
588
+ tstates = Hash.new { |h,k| h[k] = State.new(@symbols) }
589
+
590
+ #Bin each item in this set into reachable transition
591
+ #states.
592
+ state.each do |item|
593
+ if (next_symbol = item.next_symbol)
594
+ tstates[next_symbol] << item.copy
595
+ end
596
+ end
597
+
598
+ # For each transition state:
599
+ # 1) Get transition symbol
600
+ # 2) Advance dot
601
+ # 3) Close it
602
+ # 4) Get state id and add transition
603
+ tstates.each do |symbol, tstate|
604
+ tstate.each { |item| item.advance }
605
+
606
+ tstate.close(@grammar.productions)
607
+
608
+ id = self.add_state(tstate)
609
+
610
+ # Add Goto and Shift actions.
611
+ state.on(symbol, CFG::is_nonterminal?(symbol) ? GoTo.new(id) : Shift.new(id))
612
+ end
613
+
614
+ # Find the Accept and Reduce actions for this state.
615
+ state.each do |item|
616
+ if item.at_end?
617
+ if item.lhs == :start
618
+ state.on(:EOS, Accept.new)
619
+ else
620
+ state.add_reduction(item.id)
621
+ end
622
+ end
623
+ end
624
+ end
625
+
626
+ # Build the production.id -> production.lhs map.
627
+ @grammar.productions(:id).to_a.inject(@lh_sides) do |h, pair|
628
+ id, production = pair
629
+
630
+ h[id] = production.lhs
631
+
632
+ h
633
+ end
634
+
635
+ # Prune the parsing table for unnecessary reduce actions.
636
+ self.prune(opts[:lookahead], opts[:precedence])
637
+
638
+ # Check the parser for inconsistencies.
639
+ self.check_sanity
640
+
641
+ # Print the table if requested.
642
+ self.explain(opts[:explain]) if opts[:explain]
643
+
644
+ # Remove any data that is no longer needed.
645
+ self.clean
646
+
647
+ # Store the parser's final data structures if requested.
648
+ Marshal.dump([@lh_sides, @states, @symbols], self.get_io(opts[:use])) if opts[:use]
649
+ end
650
+
651
+ # Converts an object into an IO object as appropriate.
652
+ def get_io(o, mode = 'w')
653
+ if o.is_a?(TrueClass)
654
+ $stdout
655
+ elsif o.is_a?(String)
656
+ File.open(o, mode)
657
+ elsif o.is_a?(IO)
658
+ o
659
+ else
660
+ false
661
+ end
662
+ end
663
+
664
+ # This method generates and memoizes the G' grammar used to
665
+ # calculate the LALR(1) lookahead sets. Information about this
666
+ # grammar and its use can be found in the following paper:
667
+ #
668
+ # Simple Computation of LALR(1) Lookahed Sets
669
+ # Manuel E. Bermudez and George Logothetis
670
+ # Information Processing Letters 31 - 1989
671
+ def grammar_prime
672
+ if not @grammar_prime
673
+ @grammar_prime = CFG.new
674
+
675
+ @states.each do |state|
676
+ state.each do |item|
677
+ lhs = "#{state.id}_#{item.next_symbol}".to_sym
678
+
679
+ next unless CFG::is_nonterminal?(item.next_symbol) and not @grammar_prime.productions.keys.include?(lhs)
680
+
681
+ @grammar.productions[item.next_symbol].each do |production|
682
+ rhs = ""
683
+
684
+ cstate = state
685
+
686
+ production.rhs.each do |symbol|
687
+ rhs += "#{cstate.id}_#{symbol} "
688
+
689
+ cstate = @states[cstate.on?(symbol).first.id]
690
+ end
691
+
692
+ @grammar_prime.production(lhs, rhs)
693
+ end
694
+ end
695
+ end
696
+ end
697
+
698
+ @grammar_prime
699
+ end
700
+
701
+ # Inform the parser core that a conflict has been detected.
702
+ def inform_conflict(state_id, type, sym)
703
+ @conflicts[state_id] << [type, sym]
704
+ end
705
+
706
+ # This method is used to specify that the symbols in _symbols_
707
+ # are left associative. Subsequent calls to this method will
708
+ # give their arguments higher precedence.
709
+ def left(*symbols)
710
+ prec_level = @prec_counts[:left] += 1
711
+
712
+ symbols.map { |s| s.to_sym }.each do |sym|
713
+ @token_precs[sym] = [:left, prec_level]
714
+ end
715
+ end
716
+
717
+ # This method is used to specify that the symbols in _symbols_
718
+ # are non-associative.
719
+ def nonassoc(*symbols)
720
+ prec_level = @prec_counts[:non] += 1
721
+
722
+ symbols.map { |s| s.to_sym }.each do |sym|
723
+ @token_precs[sym] = [:non, prec_level]
724
+ end
725
+ end
726
+
727
+ # This function is where actual parsing takes place. The
728
+ # _tokens_ argument must be an array of Token objects, the last
729
+ # of which has type EOS. By default this method will return the
730
+ # value computed by the first successful parse tree found. It is
731
+ # possible to adjust this behavior using the _opts_ hash as
732
+ # follows:
733
+ #
734
+ # * :accept - Either :first or :all.
735
+ # * :env - The environment in which to evaluate the production actions.
736
+ # * :parse_tree - To print parse trees in the DOT language or not.
737
+ # * :verbose - To be verbose or not.
738
+ #
739
+ # Additional information for these options can be found in the
740
+ # main documentation.
741
+ def parse(tokens, opts = {})
742
+ # Get the full options hash.
743
+ opts = self.build_parse_opts(opts)
744
+ v = opts[:verbose]
745
+
746
+ if opts[:verbose]
747
+ v.puts("Input tokens:")
748
+ v.puts(tokens.map { |t| t.type }.inspect)
749
+ v.puts
750
+ end
751
+
752
+ # Stack IDs to keep track of them during parsing.
753
+ stack_id = 0
754
+
755
+ # Error mode indicators.
756
+ error_mode = false
757
+ reduction_guard = false
758
+
759
+ # Our various list of stacks.
760
+ accepted = []
761
+ moving_on = []
762
+ processing = [ParseStack.new(stack_id += 1)]
763
+
764
+ # Iterate over the tokens. We don't procede to the
765
+ # next token until every stack is done with the
766
+ # current one.
767
+ tokens.each do |token|
768
+ # Check to make sure this token was seen in the
769
+ # grammar definition.
770
+ if not @symbols.include?(token.type)
771
+ raise BadToken
772
+ end
773
+
774
+ v.puts("Current token: #{token.type}#{if token.value then "(#{token.value})" end}") if v
775
+
776
+ # Iterate over the stacks until each one is done.
777
+ while (stack = processing.shift)
778
+ # Get the available actions for this stack.
779
+ actions = @states[stack.state].on?(token.type)
780
+
781
+ if actions.empty?
782
+ # If we are already in error mode and there
783
+ # are no actions we skip this token.
784
+ if error_mode
785
+ moving_on << stack
786
+ next
787
+ end
788
+
789
+ # We would be dropping the last stack so we
790
+ # are going to go into error mode.
791
+ if accepted.empty? and moving_on.empty? and processing.empty?
792
+ # Try and find a valid error state.
793
+ while stack.state
794
+ if (actions = @states[stack.state].on?(:ERROR)).empty?
795
+ # This state doesn't have an
796
+ # error production. Moving on.
797
+ stack.pop
798
+ else
799
+ # Enter the found error state.
800
+ stack.push(actions.first.id, nil, :ERROR, token.position)
801
+
802
+ break
803
+ end
804
+ end
805
+
806
+ if stack.state
807
+ # We found a valid error state.
808
+ error_mode = reduction_guard = true
809
+ opts[:env].he = true
810
+ processing << stack
811
+
812
+ v.puts('Invalid input encountered. Entering error handling mode.') if v
813
+ else
814
+ # No valid error states could be
815
+ # found. Time to print a message
816
+ # and leave.
817
+
818
+ v.puts("No more actions for stack #{stack.id}. Dropping stack.") if v
819
+ end
820
+ else
821
+ v.puts("No more actions for stack #{stack.id}. Dropping stack.") if v
822
+ end
823
+
824
+ next
825
+ end
826
+
827
+ # Make (stack, action) pairs, duplicating the
828
+ # stack as necessary.
829
+ pairs = [[stack, actions.pop]] + actions.map {|action| [stack.branch(stack_id += 1), action] }
830
+
831
+ pairs.each do |stack, action|
832
+ if v
833
+ v.puts
834
+ v.puts('Current stack:')
835
+ v.puts("\tID: #{stack.id}")
836
+ v.puts("\tState stack:\t#{stack.state_stack.inspect}")
837
+ v.puts("\tOutput Stack:\t#{stack.output_stack.inspect}")
838
+ v.puts
839
+ v.puts("Action taken: #{action.to_s}")
840
+ end
841
+
842
+ if action.is_a?(Accept)
843
+ if opts[:accept] == :all
844
+ accepted << stack
845
+ else
846
+ v.puts('Accepting input.') if v
847
+ opts[:parse_tree].puts(stack.tree) if opts[:parse_tree]
848
+
849
+ if opts[:env].he
850
+ raise HandledError.new(opts[:env].errors, stack.result)
851
+ else
852
+ return stack.result
853
+ end
854
+ end
855
+
856
+ elsif action.is_a?(Reduce)
857
+ # Get the production associated with this reduction.
858
+ production_proc, pop_size = @procs[action.id]
859
+
860
+ if not production_proc
861
+ raise InternalParserError, "No production #{action.id} found."
862
+ end
863
+
864
+ args, positions = stack.pop(pop_size)
865
+ opts[:env].set_positions(positions)
866
+
867
+ result =
868
+ if @args == :array
869
+ opts[:env].instance_exec(args, &production_proc)
870
+ else
871
+ opts[:env].instance_exec(*args, &production_proc)
872
+ end
873
+
874
+ if (goto = @states[stack.state].on?(@lh_sides[action.id]).first)
875
+
876
+ v.puts("Going to state #{goto.id}.\n") if v
877
+
878
+ pos0 = nil
879
+
880
+ if args.empty?
881
+ # Empty productions need to be
882
+ # handled specially.
883
+ pos0 = stack.position
884
+
885
+ pos0.stream_offset += pos0.length + 1
886
+ pos0.line_offset += pos0.length + 1
887
+
888
+ pos0.length = 0
889
+ else
890
+ pos0 = opts[:env].pos( 0)
891
+ pos1 = opts[:env].pos(-1)
892
+
893
+ pos0.length = (pos1.stream_offset + pos1.length) - pos0.stream_offset
894
+ end
895
+
896
+ stack.push(goto.id, result, @lh_sides[action.id], pos0)
897
+ else
898
+ raise InternalParserError, "No GoTo action found in state #{stack.state} " +
899
+ "after reducing by production #{action.id}"
900
+ end
901
+
902
+ # This stack is NOT ready for the next
903
+ # token.
904
+ processing << stack
905
+
906
+ # Exit error mode if necessary.
907
+ error_mode = false if error_mode and not reduction_guard
908
+
909
+ elsif action.is_a?(Shift)
910
+ stack.push(action.id, token.value, token.type, token.position)
911
+
912
+ # This stack is ready for the next
913
+ # token.
914
+ moving_on << stack
915
+
916
+ # Exit error mode.
917
+ error_mode = false
918
+ end
919
+ end
920
+ end
921
+
922
+ v.puts("\n\n") if v
923
+
924
+ processing = moving_on
925
+ moving_on = []
926
+
927
+ # If we don't have any active stacks at this point the
928
+ # string isn't in the language.
929
+ if opts[:accept] == :first and processing.length == 0
930
+ v.close if v and v != $stdout
931
+ raise NotInLanguage
932
+ end
933
+
934
+ reduction_guard = false
935
+ end
936
+
937
+ # If we have reached this point we are accepting all parse
938
+ # trees.
939
+ if v
940
+ v.puts("Accepting input with #{accepted.length} derivation(s).")
941
+
942
+ v.close if v != $stdout
943
+ end
944
+
945
+ accepted.each do |stack|
946
+ opts[:parse_tree].puts(stack.tree)
947
+ end if opts[:parse_tree]
948
+
949
+ results = accepted.map { |stack| stack.result }
950
+
951
+ if opts[:env].he
952
+ raise HandledError.new(opts[:env].errors, results)
953
+ else
954
+ return results
955
+ end
956
+ end
957
+
958
+ # Adds a new production to the parser with a left-hand value of
959
+ # _symbol_. If _expression_ is specified it is taken as the
960
+ # right-hand side of the production and _action_ is associated
961
+ # with the production. If _expression_ is nil then _action_ is
962
+ # evaluated and expected to make one or more calls to
963
+ # ParserCore.clause. A precedence can be associate with this
964
+ # production by setting _precedence_ to a terminal symbol.
965
+ def production(symbol, expression = nil, precedence = nil, &action)
966
+
967
+ # Check the symbol.
968
+ if not (symbol.is_a?(Symbol) or symbol.is_a?(String)) or not CFG::is_nonterminal?(symbol)
969
+ riase ParserConstructionError, 'Production symbols must be Strings or Symbols and be in all lowercase.'
970
+ end
971
+
972
+ @grammar.curr_lhs = symbol.to_sym
973
+ @curr_prec = precedence
974
+
975
+ if expression
976
+ self.clause(expression, precedence, &action)
977
+ else
978
+ self.instance_exec(&action)
979
+ end
980
+
981
+ @grammar.curr_lhs = nil
982
+ @curr_prec = nil
983
+ end
984
+
985
+ alias :p :production
986
+
987
+ # This method uses lookahead sets and precedence information to
988
+ # resolve conflicts and remove unnecessary reduce actions.
989
+ def prune(do_lookahead, do_precedence)
990
+ terms = @grammar.terms
991
+
992
+ # If both options are false there is no pruning to do.
993
+ return if not (do_lookahead or do_precedence)
994
+
995
+ @states.each do |state0|
996
+
997
+ #####################
998
+ # Lookahead Pruning #
999
+ #####################
1000
+
1001
+ if do_lookahead
1002
+ # Find all of the reductions in this state.
1003
+ reductions = state0.actions.values.flatten.uniq.select { |a| a.is_a?(Reduce) }
1004
+
1005
+ reductions.each do |reduction|
1006
+ production = @grammar.productions(:id)[reduction.id]
1007
+
1008
+ lookahead = Array.new
1009
+
1010
+ # Build the lookahead set.
1011
+ @states.each do |state1|
1012
+ if self.check_reachability(state1, state0, production.rhs)
1013
+ lookahead |= (var = self.grammar_prime.follow_set("#{state1.id}_#{production.lhs}".to_sym))
1014
+ end
1015
+ end
1016
+
1017
+ # Translate the G' follow symbols into G lookahead
1018
+ # symbols.
1019
+ lookahead = lookahead.map { |sym| sym.to_s.split('_').last.to_sym }.uniq
1020
+
1021
+ # Here we remove the unnecessary reductions.
1022
+ # If there are error productions we need to
1023
+ # scale back the amount of pruning done.
1024
+ (terms - lookahead).each do |sym|
1025
+ if not (terms.include?(:ERROR) and not state0.conflict_on?(sym))
1026
+ state0.actions[sym].delete(reduction)
1027
+ end
1028
+ end
1029
+ end
1030
+ end
1031
+
1032
+ ########################################
1033
+ # Precedence and Associativity Pruning #
1034
+ ########################################
1035
+
1036
+ if do_precedence
1037
+ state0.actions.each do |symbol, actions|
1038
+
1039
+ # We are only interested in pruning actions
1040
+ # for terminal symbols.
1041
+ next unless CFG::is_terminal?(symbol)
1042
+
1043
+ # Skip to the next one if there is no
1044
+ # possibility of a Shift/Reduce or
1045
+ # Reduce/Reduce conflict.
1046
+ next unless actions and actions.length > 1
1047
+
1048
+ resolve_ok = actions.inject(true) do |m, a|
1049
+ if a.is_a?(Reduce)
1050
+ m and @production_precs[a.id]
1051
+ else
1052
+ m
1053
+ end
1054
+ end and actions.inject(false) { |m, a| m or a.is_a?(Shift) }
1055
+
1056
+ if @token_precs[symbol] and resolve_ok
1057
+ max_prec = 0
1058
+ selected_action = nil
1059
+
1060
+ # Grab the associativity and precedence
1061
+ # for the input token.
1062
+ tassoc, tprec = @token_precs[symbol]
1063
+
1064
+ actions.each do |a|
1065
+ assoc, prec = a.is_a?(Shift) ? [tassoc, tprec] : @production_precs[a.id]
1066
+
1067
+ # If two actions have the same precedence we
1068
+ # will only replace the previous production if:
1069
+ # * The token is left associative and the current action is a Reduce
1070
+ # * The token is right associative and the current action is a Shift
1071
+ if prec > max_prec or (prec == max_prec and tassoc == (a.is_a?(Shift) ? :right : :left))
1072
+ max_prec = prec
1073
+ selected_action = a
1074
+
1075
+ elsif prec == max_prec and assoc == :nonassoc
1076
+ raise ParserConstructionError, 'Non-associative token found during conflict resolution.'
1077
+
1078
+ end
1079
+ end
1080
+
1081
+ state0.actions[symbol] = [selected_action]
1082
+ end
1083
+ end
1084
+ end
1085
+ end
1086
+ end
1087
+
1088
+ # This method is used to specify that the symbols in _symbols_
1089
+ # are right associative. Subsequent calls to this method will
1090
+ # give their arguments higher precedence.
1091
+ def right(*symbols)
1092
+ prec_level = @prec_counts[:right] += 1
1093
+
1094
+ symbols.map { |s| s.to_sym }.each do |sym|
1095
+ @token_precs[sym] = [:right, prec_level]
1096
+ end
1097
+ end
1098
+
1099
+ # Changes the starting symbol of the parser.
1100
+ def start(symbol)
1101
+ @grammar.start symbol
1102
+ end
1103
+ end
1104
+
1105
+ # The ParseStack class is used by a ParserCore to keep track of state
1106
+ # during parsing.
1107
+ class ParseStack
1108
+ attr_reader :id
1109
+ attr_reader :output_stack
1110
+ attr_reader :state_stack
1111
+
1112
+ # Instantiate a new ParserStack object.
1113
+ def initialize(id, ostack = [], sstack = [0], nstack = [], connections = [], labels = [], positions = [])
1114
+ @id = id
1115
+
1116
+ @node_stack = nstack
1117
+ @output_stack = ostack
1118
+ @state_stack = sstack
1119
+
1120
+ @connections = connections
1121
+ @labels = labels
1122
+ @positions = positions
1123
+ end
1124
+
1125
+ # Branch this stack, effectively creating a new copy of its
1126
+ # internal state.
1127
+ def branch(new_id)
1128
+ ParseStack.new(new_id, @output_stack.clone, @state_stack.clone, @node_stack.clone,
1129
+ @connections.clone, @labels.clone, @positions.clone)
1130
+ end
1131
+
1132
+ # Returns the position of the last symbol on the stack.
1133
+ def position
1134
+ if @positions.empty?
1135
+ StreamPosition.new
1136
+ else
1137
+ @positions.last.clone
1138
+ end
1139
+ end
1140
+
1141
+ # Push new state and other information onto the stack.
1142
+ def push(state, o, node0, position)
1143
+ @state_stack << state
1144
+ @output_stack << o
1145
+ @node_stack << @labels.length
1146
+ @labels << node0
1147
+ @positions << position
1148
+
1149
+ if CFG::is_nonterminal?(node0)
1150
+ @cbuffer.each do |node1|
1151
+ @connections << [@labels.length - 1, node1]
1152
+ end
1153
+ end
1154
+ end
1155
+
1156
+ # Pop some number of objects off of the inside stacks, returning
1157
+ # the values popped from the output stack.
1158
+ def pop(n = 1)
1159
+ @state_stack.pop(n)
1160
+
1161
+ # Pop the node stack so that the proper edges can be added
1162
+ # when the production's left-hand side non-terminal is
1163
+ # pushed onto the stack.
1164
+ @cbuffer = @node_stack.pop(n)
1165
+
1166
+ [@output_stack.pop(n), @positions.pop(n)]
1167
+ end
1168
+
1169
+ # Fetch the result stored in this ParseStack. If there is more
1170
+ # than one object left on the output stack there is an error.
1171
+ def result
1172
+ if @output_stack.length == 1
1173
+ return @output_stack.last
1174
+ else
1175
+ raise InternalParserError, "The parsing stack should have 1 element on the output stack, not #{@output_stack.length}."
1176
+ end
1177
+ end
1178
+
1179
+ # Return the current state of this ParseStack.
1180
+ def state
1181
+ @state_stack.last
1182
+ end
1183
+
1184
+ # Return a string representing the parse tree in the DOT
1185
+ # language.
1186
+ def tree
1187
+ tree = "digraph tree#{@id} {\n"
1188
+
1189
+ @labels.each_with_index do |label, i|
1190
+ tree += "\tnode#{i} [label=\"#{label}\""
1191
+
1192
+ if CFG::is_terminal?(label)
1193
+ tree += " shape=box"
1194
+ end
1195
+
1196
+ tree += "];\n"
1197
+ end
1198
+
1199
+ tree += "\n"
1200
+
1201
+ @connections.each do |from, to|
1202
+ tree += "\tnode#{from} -> node#{to};\n"
1203
+ end
1204
+
1205
+ tree += "}"
1206
+ end
1207
+ end
1208
+
1209
+ # The State class is used to represent sets of items and actions to be
1210
+ # used during parsing.
1211
+ class State
1212
+ # The state's ID.
1213
+ attr_accessor :id
1214
+ # The CFG::Item objects that comprise this state.
1215
+ attr_reader :items
1216
+ # The Action objects that represent the actions that should be
1217
+ # taken when various inputs are observed.
1218
+ attr_reader :actions
1219
+
1220
+ # Instantiate a new State object.
1221
+ def initialize(tokens, items = [])
1222
+ @id = nil
1223
+ @items = items
1224
+ @actions = tokens.inject(Hash.new) { |h, t| h[t] = Array.new; h }
1225
+ end
1226
+
1227
+ # Compare one State to another. Two States are equal if they
1228
+ # have the same items or, if the items have been cleaned, if
1229
+ # the States have the same ID.
1230
+ def ==(other)
1231
+ if self.items and other.items then self.items == other.items else self.id == other.id end
1232
+ end
1233
+
1234
+ # Add a Reduce action to the state.
1235
+ def add_reduction(production_id)
1236
+ action = Reduce.new(production_id)
1237
+
1238
+ # Reduce actions are not allowed for the ERROR terminal.
1239
+ @actions.each { |k, v| if CFG::is_terminal?(k) and k != :ERROR then v << action end }
1240
+ end
1241
+
1242
+ # Add a new item to this state.
1243
+ def append(item)
1244
+ if item.is_a?(CFG::Item) and not @items.include?(item) then @items << item end
1245
+ end
1246
+
1247
+ alias :<< :append
1248
+
1249
+ # Clean this State by removing the list of Item objects.
1250
+ def clean
1251
+ @items = nil
1252
+ end
1253
+
1254
+ # Close this state using _productions_.
1255
+ def close(productions)
1256
+ self.each do |item|
1257
+ if (next_symbol = item.next_symbol) and CFG::is_nonterminal?(next_symbol)
1258
+ productions[next_symbol].each { |p| self << p.to_item }
1259
+ end
1260
+ end
1261
+ end
1262
+
1263
+ # Checks to see if there is a conflict in this state, given a
1264
+ # input of _sym_. Returns :SR if a shift/reduce conflict is
1265
+ # detected and :RR if a reduce/reduce conflict is detected. If
1266
+ # no conflict is detected nil is returned.
1267
+ def conflict_on?(sym)
1268
+
1269
+ reductions = 0
1270
+ shifts = 0
1271
+
1272
+ @actions[sym].each do |action|
1273
+ if action.is_a?(Reduce)
1274
+ reductions += 1
1275
+
1276
+ elsif action.is_a?(Shift)
1277
+ shifts += 1
1278
+
1279
+ end
1280
+ end
1281
+
1282
+ if shifts == 1 and reductions > 0
1283
+ :SR
1284
+ elsif reductions > 1
1285
+ :RR
1286
+ else
1287
+ nil
1288
+ end
1289
+ end
1290
+
1291
+ # Iterate over the state's items.
1292
+ def each
1293
+ @items.each {|item| yield item}
1294
+ end
1295
+
1296
+ # Specify an Action to perform when the input token is _symbol_.
1297
+ def on(symbol, action)
1298
+ if @actions.key?(symbol)
1299
+ @actions[symbol] << action
1300
+ else
1301
+ raise ParserConstructionError, "Attempting to set action for token (#{symbol}) not seen in grammar definition."
1302
+ end
1303
+ end
1304
+
1305
+ # Returns that actions that should be taken when the input token
1306
+ # is _symbol_.
1307
+ def on?(symbol)
1308
+ @actions[symbol].clone
1309
+ end
1310
+ end
1311
+
1312
+ # The Action class is used to indicate what action the parser should
1313
+ # take given a current state and input token.
1314
+ class Action
1315
+ attr_reader :id
1316
+
1317
+ def initialize(id = nil)
1318
+ @id = id
1319
+ end
1320
+ end
1321
+
1322
+ # The Accept class indicates to the parser that it should accept the
1323
+ # current parse tree.
1324
+ class Accept < Action
1325
+ def to_s
1326
+ "Accept"
1327
+ end
1328
+ end
1329
+
1330
+ # The GoTo class indicates to the parser that it should goto the state
1331
+ # specified by GoTo.id.
1332
+ class GoTo < Action
1333
+ def to_s
1334
+ "GoTo #{self.id}"
1335
+ end
1336
+ end
1337
+
1338
+ # The Reduce class indicates to the parser that it should reduce the
1339
+ # input stack by the rule specified by Reduce.id.
1340
+ class Reduce < Action
1341
+ def to_s
1342
+ "Reduce by Production #{self.id}"
1343
+ end
1344
+ end
1345
+
1346
+ # The Shift class indicates to the parser that it should shift the
1347
+ # current input token.
1348
+ class Shift < Action
1349
+ def to_s
1350
+ "Shift to State #{self.id}"
1351
+ end
1352
+ end
1353
+ end
1354
+ end