rltk 1.1.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,1354 @@
1
+ # Author: Chris Wailes <chris.wailes@gmail.com>
2
+ # Project: Ruby Language Toolkit
3
+ # Date: 2011/01/19
4
+ # Description: This file contains the base class for parsers that use RLTK.
5
+
6
+ ############
7
+ # Requires #
8
+ ############
9
+
10
+ # Ruby Language Toolkit
11
+ require 'rltk/cfg'
12
+
13
+ #######################
14
+ # Classes and Modules #
15
+ #######################
16
+
17
+ module RLTK # :nodoc:
18
+
19
+ # A BadToken exception indicates that a token was observed in the input
20
+ # stream that wasn't used in the grammar's definition.
21
+ class BadToken < Exception
22
+ def to_s
23
+ 'Unexpected token. Token not present in grammar definition.'
24
+ end
25
+ end
26
+
27
+ # A NotInLanguage exception is raised whenever there is no valid parse tree
28
+ # for a given token stream. In other words, the input string is not in the
29
+ # defined language.
30
+ class NotInLanguage < Exception
31
+ def to_s
32
+ 'String not in language.'
33
+ end
34
+ end
35
+
36
+ # An exception of this type is raised when the parser encountered a error
37
+ # that was handled by an error production.
38
+ class HandledError < Exception
39
+
40
+ # The errors as reported by the parser.
41
+ attr_reader :errors
42
+
43
+ # The result that would have been returned by the call to _parse_.
44
+ attr_reader :result
45
+
46
+ # Instantiate a new HandledError object with _errors_.
47
+ def initialize(errors, result)
48
+ @errors = errors
49
+ @result = result
50
+ end
51
+ end
52
+
53
+ # Used for errors that occure during parser construction.
54
+ class ParserConstructionError < Exception; end
55
+
56
+ # Used for runtime errors that are the parsers fault. These should never
57
+ # be observed in the wild.
58
+ class InternalParserError < Exception; end
59
+
60
+ # The Parser class may be sub-classed to produce new parsers. These
61
+ # parsers have a lot of features, and are described in the main
62
+ # documentation.
63
+ class Parser
64
+
65
+ # Called when the Parser class is sub-classed, this method adds a
66
+ # ParserCore to the new class, and installs some needed class and
67
+ # instance methods.
68
+ def Parser.inherited(klass)
69
+ klass.class_exec do
70
+ @core = ParserCore.new
71
+
72
+ # Returns this class's ParserCore object.
73
+ def self.core
74
+ @core
75
+ end
76
+
77
+ # Routes method calls to the new subclass to the ParserCore
78
+ # object.
79
+ def self.method_missing(method, *args, &proc)
80
+ @core.send(method, *args, &proc)
81
+ end
82
+
83
+ # Alias for RLTK::Parser::ParserCore.p that needs to be
84
+ # manually connected.
85
+ def self.p(*args, &proc)
86
+ @core.p(*args, &proc)
87
+ end
88
+
89
+ # Parses the given token stream using a newly instantiated
90
+ # environment. See ParserCore.parse for a description of
91
+ # the _opts_ option hash.
92
+ def self.parse(tokens, opts = {})
93
+ opts[:env] ||= self::Environment.new
94
+
95
+ @core.parse(tokens, opts)
96
+ end
97
+
98
+ # Instantiates a new parser and creates an environment to be
99
+ # used for subsequent calls.
100
+ def initialize
101
+ @env = self.class::Environment.new
102
+ end
103
+
104
+ # Returns the environment used by the instantiated parser.
105
+ def env
106
+ @env
107
+ end
108
+
109
+ # Parses the given token stream using the encapsulated
110
+ # environment. See ParserCore.parse for a description of
111
+ # the _opts_ option hash.
112
+ def parse(tokens, opts = {})
113
+ self.class.core.parse(tokens, {:env => @env}.update(opts))
114
+ end
115
+ end
116
+ end
117
+
118
+ # All actions passed to ParserCore.rule and ParserCore.clause are
119
+ # evaluated inside an instance of the Environment class or its
120
+ # subclass (which must have the same name).
121
+ class Environment
122
+ # Indicates if an error was encountered and handled.
123
+ attr_accessor :he
124
+
125
+ # A list of all objects added using the _error_ method.
126
+ attr_reader :errors
127
+
128
+ # Instantiate a new Environment object.
129
+ def initialize
130
+ self.reset
131
+ end
132
+
133
+ # Adds an object to the list of errors.
134
+ def error(o)
135
+ @errors << o
136
+ end
137
+
138
+ # Returns a StreamPosition object for the symbol at location n,
139
+ # indexed from zero.
140
+ def pos(n)
141
+ @positions[n]
142
+ end
143
+
144
+ # Reset any variables that need to be re-initialized between
145
+ # parse calls.
146
+ def reset
147
+ @errors = Array.new
148
+ @he = false
149
+ end
150
+
151
+ # Setter for the _positions_ array.
152
+ def set_positions(positions)
153
+ @positions = positions
154
+ end
155
+ end
156
+
157
+ # The ParserCore class provides mos of the functionality of the Parser
158
+ # class. A ParserCore is instantiated for each subclass of Parser,
159
+ # thereby allowing multiple parsers to be defined inside a single Ruby
160
+ # program.
161
+ class ParserCore
162
+
163
+ # The grammar that can be parsed by this ParserCore. The grammar
164
+ # is used internally and should not be manipulated outside of the
165
+ # ParserCore object.
166
+ attr_reader :grammar
167
+
168
+ # Instantiates a new ParserCore object with the needed data
169
+ # structures.
170
+ def initialize
171
+ @curr_lhs = nil
172
+ @curr_prec = nil
173
+
174
+ @conflicts = Hash.new {|h, k| h[k] = Array.new}
175
+ @grammar = CFG.new
176
+
177
+ @lh_sides = Hash.new
178
+ @procs = Array.new
179
+ @states = Array.new
180
+
181
+ # Variables for dealing with precedence.
182
+ @prec_counts = {:left => 0, :right => 0, :non => 0}
183
+ @production_precs = Array.new
184
+ @token_precs = Hash.new
185
+
186
+ # Set the default argument handling policy.
187
+ @args = :splat
188
+
189
+ @grammar.callback do |p, type, num|
190
+ @procs[p.id] =
191
+ [if type == :*
192
+ if num == :first
193
+ Proc.new { || [] }
194
+ else
195
+ Proc.new { |o, os| [o] + os }
196
+ end
197
+ elsif type == :+
198
+ if num == :first
199
+ Proc.new { |o| [o] }
200
+ else
201
+ Proc.new { |o, os| [o] + os }
202
+ end
203
+ elsif type == :'?'
204
+ if num == :first
205
+ Proc.new { || nil }
206
+ else
207
+ Proc.new { |o| o }
208
+ end
209
+ end, p.rhs.length]
210
+
211
+ @production_precs[p.id] = p.last_terminal
212
+ end
213
+ end
214
+
215
+ # If _state_ (or its equivalent) is not in the state list it is
216
+ # added and it's ID is returned. If there is already a state
217
+ # with the same items as _state_ in the state list its ID is
218
+ # returned and _state_ is discarded.
219
+ def add_state(state)
220
+ if (id = @states.index(state))
221
+ id
222
+ else
223
+ state.id = @states.length
224
+
225
+ @states << state
226
+
227
+ @states.length - 1
228
+ end
229
+ end
230
+
231
+ # Calling this method will cause the parser to pass right-hand
232
+ # side values as arrays instead of splats. This method must be
233
+ # called before ANY calls to ParserCore.production.
234
+ def array_args
235
+ if @grammar.productions.length == 0
236
+ @args = :array
237
+
238
+ @grammar.callback do |p, type, num|
239
+ @procs[p.id] =
240
+ [if type == :*
241
+ if num == :first
242
+ Proc.new { |v| [] }
243
+ else
244
+ Proc.new { |v| [v[0]] + v[1] }
245
+ end
246
+ elsif type == :+
247
+ if num == :first
248
+ Proc.new { |v| v[0] }
249
+ else
250
+ Proc.new { |v| [v[0]] + v[1] }
251
+ end
252
+ elsif type == :'?'
253
+ if num == :first
254
+ Proc.new { |v| nil }
255
+ else
256
+ Proc.new { |v| v[0] }
257
+ end
258
+ end, p.rhs.length]
259
+
260
+ @production_precs[p.id] = p.last_terminal
261
+ end
262
+ end
263
+ end
264
+
265
+ # Build a hash with the default options for ParserCore.finalize
266
+ # and then update it with the values from _opts_.
267
+ def build_finalize_opts(opts)
268
+ opts[:explain] = self.get_io(opts[:explain])
269
+
270
+ {
271
+ :explain => false,
272
+ :lookahead => true,
273
+ :precedence => true,
274
+ :use => false
275
+ }.update(opts)
276
+ end
277
+
278
+ # Build a hash with the default options for ParserCore.parse and
279
+ # then update it with the values from _opts_.
280
+ def build_parse_opts(opts)
281
+ opts[:parse_tree] = self.get_io(opts[:parse_tree])
282
+ opts[:verbose] = self.get_io(opts[:verbose])
283
+
284
+ {
285
+ :accept => :first,
286
+ :env => Environment.new,
287
+ :parse_tree => false,
288
+ :verbose => false
289
+ }.update(opts)
290
+ end
291
+
292
+ # This method is used to (surprise) check the sanity of the
293
+ # constructed parser. It checks to make sure all non-terminals
294
+ # used in the grammar definition appear on the left-hand side of
295
+ # one or more productions, and that none of the parser's states
296
+ # have invalid actions. If a problem is encountered a
297
+ # ParserConstructionError is raised.
298
+ def check_sanity
299
+ # Check to make sure all non-terminals appear on the
300
+ # left-hand side of some production.
301
+ @grammar.nonterms.each do |sym|
302
+ if not @lh_sides.values.include?(sym)
303
+ raise ParserConstructionError, "Non-terminal #{sym} does not appear on the left-hand side of any production."
304
+ end
305
+ end
306
+
307
+ # Check the actions in each state.
308
+ @states.each do |state|
309
+ state.actions.each do |sym, actions|
310
+ if CFG::is_terminal?(sym)
311
+ # Here we check actions for terminals.
312
+ actions.each do |action|
313
+ if action.is_a?(Accept)
314
+ if sym != :EOS
315
+ raise ParserConstructionError, "Accept action found for terminal #{sym} in state #{state.id}."
316
+ end
317
+
318
+ elsif not (action.is_a?(GoTo) or action.is_a?(Reduce) or action.is_a?(Shift))
319
+ raise ParserConstructionError, "Object of type #{action.class} found in actions for terminal " +
320
+ "#{sym} in state #{state.id}."
321
+
322
+ end
323
+ end
324
+
325
+ if (conflict = state.conflict_on?(sym))
326
+ self.inform_conflict(state.id, conflict, sym)
327
+ end
328
+ else
329
+ # Here we check actions for non-terminals.
330
+ if actions.length > 1
331
+ raise ParserConstructionError, "State #{state.id} has multiple GoTo actions for non-terminal #{sym}."
332
+
333
+ elsif actions.length == 1 and not actions.first.is_a?(GoTo)
334
+ raise ParserConstructionError, "State #{state.id} has non-GoTo action for non-terminal #{sym}."
335
+
336
+ end
337
+ end
338
+ end
339
+ end
340
+ end
341
+
342
+ # This method checks to see if the parser would be in parse state
343
+ # _dest_ after starting in state _start_ and reading _symbols_.
344
+ def check_reachability(start, dest, symbols)
345
+ path_exists = true
346
+ cur_state = start
347
+
348
+ symbols.each do |sym|
349
+
350
+ actions = @states[cur_state.id].on?(sym)
351
+ actions = actions.select { |a| a.is_a?(Shift) } if CFG::is_terminal?(sym)
352
+
353
+ if actions.empty?
354
+ path_exists = false
355
+ break
356
+ end
357
+
358
+ # There can only be one Shift action for terminals and
359
+ # one GoTo action for non-terminals, so we know the
360
+ # first action is the only one in the list.
361
+ cur_state = @states[actions.first.id]
362
+ end
363
+
364
+ path_exists and cur_state.id == dest.id
365
+ end
366
+
367
+ # Declares a new clause inside of a production. The right-hand
368
+ # side is specified by _expression_ and the precedence of this
369
+ # production can be changed by setting the _precedence_ argument
370
+ # to some terminal symbol.
371
+ def clause(expression, precedence = nil, &action)
372
+ # Use the curr_prec only if it isn't overridden for this
373
+ # clause.
374
+ precedence ||= @curr_prec
375
+
376
+ production = @grammar.clause(expression)
377
+
378
+ # Check to make sure the action's arity matches the number
379
+ # of symbols on the right-hand side.
380
+ if @args == :splat and action.arity != production.rhs.length
381
+ raise ParserConstructionError, 'Incorrect number of arguments to action. Action arity must match the number of ' +
382
+ 'terminals and non-terminals in the clause.'
383
+ end
384
+
385
+ # Add the action to our proc list.
386
+ @procs[production.id] = [action, production.rhs.length]
387
+
388
+ # If no precedence is specified use the precedence of the
389
+ # last terminal in the production.
390
+ @production_precs[production.id] = precedence || production.last_terminal
391
+ end
392
+
393
+ alias :c :clause
394
+
395
+ # Removes resources that were needed to generate the parser but
396
+ # aren't needed when actually parsing input.
397
+ def clean
398
+ # We've told the developer about conflicts by now.
399
+ @conflicts = nil
400
+
401
+ # Drop the grammar and the grammar'.
402
+ @grammar = nil
403
+ @grammar_prime = nil
404
+
405
+ # Drop precedence and bookkeeping information.
406
+ @cur_lhs = nil
407
+ @cur_prec = nil
408
+
409
+ @prec_counts = nil
410
+ @production_precs = nil
411
+ @token_precs = nil
412
+
413
+ # Drop the items from each of the states.
414
+ @states.each { |state| state.clean }
415
+ end
416
+
417
+ # This function will print a description of the parser to the
418
+ # provided IO object.
419
+ def explain(io)
420
+ if @grammar and not @states.empty?
421
+ io.puts("###############")
422
+ io.puts("# Productions #")
423
+ io.puts("###############")
424
+ io.puts
425
+
426
+ # Print the productions.
427
+ @grammar.productions.each do |sym, productions|
428
+ productions.each do |production|
429
+ io.print("\tProduction #{production.id}: #{production.to_s}")
430
+
431
+ if (prec = @production_precs[production.id])
432
+ io.print(" : (#{prec.first} , #{prec.last})")
433
+ end
434
+
435
+ io.puts
436
+ end
437
+
438
+ io.puts
439
+ end
440
+
441
+ io.puts("##########")
442
+ io.puts("# Tokens #")
443
+ io.puts("##########")
444
+ io.puts
445
+
446
+ @grammar.terms.sort {|a,b| a.to_s <=> b.to_s }.each do |term|
447
+ io.print("\t#{term}")
448
+
449
+ if (prec = @token_precs[term])
450
+ io.print(" : (#{prec.first}, #{prec.last})")
451
+ end
452
+
453
+ io.puts
454
+ end
455
+
456
+ io.puts
457
+
458
+ io.puts("#####################")
459
+ io.puts("# Table Information #")
460
+ io.puts("#####################")
461
+ io.puts
462
+
463
+ io.puts("\tStart symbol: #{@grammar.start_symbol}")
464
+ io.puts
465
+
466
+ io.puts("\tTotal number of states: #{@states.length}")
467
+ io.puts
468
+
469
+ io.puts("\tTotal conflicts: #{@conflicts.values.flatten(1).length}")
470
+ io.puts
471
+
472
+ @conflicts.each do |state_id, conflicts|
473
+ io.puts("\tState #{state_id} has #{conflicts.length} conflict(s)")
474
+ end
475
+
476
+ io.puts if not @conflicts.empty?
477
+
478
+ # Print the parse table.
479
+ io.puts("###############")
480
+ io.puts("# Parse Table #")
481
+ io.puts("###############")
482
+ io.puts
483
+
484
+ @states.each do |state|
485
+ io.puts("State #{state.id}:")
486
+ io.puts
487
+
488
+ io.puts("\t# ITEMS #")
489
+ max = state.items.inject(0) do |max, item|
490
+ if item.lhs.to_s.length > max then item.lhs.to_s.length else max end
491
+ end
492
+
493
+ state.each do |item|
494
+ io.puts("\t#{item.to_s(max)}")
495
+ end
496
+
497
+ io.puts
498
+ io.puts("\t# ACTIONS #")
499
+
500
+ state.actions.keys.sort {|a,b| a.to_s <=> b.to_s}.each do |sym|
501
+ state.actions[sym].each do |action|
502
+ io.puts("\tOn #{sym} #{action}")
503
+ end
504
+ end
505
+
506
+ io.puts
507
+ io.puts("\t# CONFLICTS #")
508
+
509
+ if @conflicts[state.id].length == 0
510
+ io.puts("\tNone\n\n")
511
+ else
512
+ @conflicts[state.id].each do |conflict|
513
+ type, sym = conflict
514
+
515
+ io.print("\t#{if type == :SR then "Shift/Reduce" else "Reduce/Reduce" end} conflict")
516
+
517
+ io.puts(" on #{sym}")
518
+ end
519
+
520
+ io.puts
521
+ end
522
+ end
523
+
524
+ # Close any IO objects that aren't $stdout.
525
+ io.close if io.is_a?(IO) and io != $stdout
526
+ else
527
+ raise ParserConstructionError, 'Parser.explain called outside of finalize.'
528
+ end
529
+ end
530
+
531
+ # This method will finalize the parser causing the construction
532
+ # of states and their actions, and the resolution of conflicts
533
+ # using lookahead and precedence information.
534
+ #
535
+ # The _opts_ hash may contain the following options, which are
536
+ # described in more detail in the main documentation:
537
+ #
538
+ # * :explain - To explain the parser or not.
539
+ # * :lookahead - To use lookahead info for conflict resolution.
540
+ # * :precedence - To use precedence info for conflict resolution.
541
+ # * :use - A file name or object that is used to load/save the parser.
542
+ #
543
+ # No calls to ParserCore.production may appear after the call to
544
+ # ParserCore.finalize.
545
+ def finalize(opts = {})
546
+
547
+ # Get the full options hash.
548
+ opts = self.build_finalize_opts(opts)
549
+
550
+ # Get the name of the file in which the parser is defined.
551
+ def_file = caller()[2].split(':')[0]
552
+
553
+ # Check to make sure we can load the necessary information
554
+ # from the specified object.
555
+ if opts[:use] and (
556
+ (opts[:use].is_a?(String) and File.exists?(opts[:use]) and File.mtime(opts[:use]) > File.mtime(def_file)) or
557
+ (opts[:use].is_a?(File) and opts[:use].mtime > File.mtime(def_file))
558
+ )
559
+
560
+ # Un-marshal our saved data structures.
561
+ @lh_sides, @states, @symbols = Marshal.load(self.get_io(opts[:use], 'r'))
562
+
563
+ # Remove any un-needed data and return.
564
+ return self.clean
565
+ end
566
+
567
+ # Grab all of the symbols that comprise the grammar (besides
568
+ # the start symbol).
569
+ @symbols = @grammar.symbols << :ERROR
570
+
571
+ # Add our starting state to the state list.
572
+ start_production = @grammar.production(:start, @grammar.start_symbol.to_s).first
573
+ start_state = State.new(@symbols, [start_production.to_item])
574
+
575
+ start_state.close(@grammar.productions)
576
+
577
+ self.add_state(start_state)
578
+
579
+ # Translate the precedence of productions from tokens to
580
+ # (associativity, precedence) pairs.
581
+ @production_precs.each_with_index do |prec, id|
582
+ @production_precs[id] = @token_precs[prec]
583
+ end
584
+
585
+ # Build the rest of the transition table.
586
+ @states.each do |state|
587
+ #Transition states.
588
+ tstates = Hash.new { |h,k| h[k] = State.new(@symbols) }
589
+
590
+ #Bin each item in this set into reachable transition
591
+ #states.
592
+ state.each do |item|
593
+ if (next_symbol = item.next_symbol)
594
+ tstates[next_symbol] << item.copy
595
+ end
596
+ end
597
+
598
+ # For each transition state:
599
+ # 1) Get transition symbol
600
+ # 2) Advance dot
601
+ # 3) Close it
602
+ # 4) Get state id and add transition
603
+ tstates.each do |symbol, tstate|
604
+ tstate.each { |item| item.advance }
605
+
606
+ tstate.close(@grammar.productions)
607
+
608
+ id = self.add_state(tstate)
609
+
610
+ # Add Goto and Shift actions.
611
+ state.on(symbol, CFG::is_nonterminal?(symbol) ? GoTo.new(id) : Shift.new(id))
612
+ end
613
+
614
+ # Find the Accept and Reduce actions for this state.
615
+ state.each do |item|
616
+ if item.at_end?
617
+ if item.lhs == :start
618
+ state.on(:EOS, Accept.new)
619
+ else
620
+ state.add_reduction(item.id)
621
+ end
622
+ end
623
+ end
624
+ end
625
+
626
+ # Build the production.id -> production.lhs map.
627
+ @grammar.productions(:id).to_a.inject(@lh_sides) do |h, pair|
628
+ id, production = pair
629
+
630
+ h[id] = production.lhs
631
+
632
+ h
633
+ end
634
+
635
+ # Prune the parsing table for unnecessary reduce actions.
636
+ self.prune(opts[:lookahead], opts[:precedence])
637
+
638
+ # Check the parser for inconsistencies.
639
+ self.check_sanity
640
+
641
+ # Print the table if requested.
642
+ self.explain(opts[:explain]) if opts[:explain]
643
+
644
+ # Remove any data that is no longer needed.
645
+ self.clean
646
+
647
+ # Store the parser's final data structures if requested.
648
+ Marshal.dump([@lh_sides, @states, @symbols], self.get_io(opts[:use])) if opts[:use]
649
+ end
650
+
651
+ # Converts an object into an IO object as appropriate.
652
+ def get_io(o, mode = 'w')
653
+ if o.is_a?(TrueClass)
654
+ $stdout
655
+ elsif o.is_a?(String)
656
+ File.open(o, mode)
657
+ elsif o.is_a?(IO)
658
+ o
659
+ else
660
+ false
661
+ end
662
+ end
663
+
664
+ # This method generates and memoizes the G' grammar used to
665
+ # calculate the LALR(1) lookahead sets. Information about this
666
+ # grammar and its use can be found in the following paper:
667
+ #
668
+ # Simple Computation of LALR(1) Lookahed Sets
669
+ # Manuel E. Bermudez and George Logothetis
670
+ # Information Processing Letters 31 - 1989
671
+ def grammar_prime
672
+ if not @grammar_prime
673
+ @grammar_prime = CFG.new
674
+
675
+ @states.each do |state|
676
+ state.each do |item|
677
+ lhs = "#{state.id}_#{item.next_symbol}".to_sym
678
+
679
+ next unless CFG::is_nonterminal?(item.next_symbol) and not @grammar_prime.productions.keys.include?(lhs)
680
+
681
+ @grammar.productions[item.next_symbol].each do |production|
682
+ rhs = ""
683
+
684
+ cstate = state
685
+
686
+ production.rhs.each do |symbol|
687
+ rhs += "#{cstate.id}_#{symbol} "
688
+
689
+ cstate = @states[cstate.on?(symbol).first.id]
690
+ end
691
+
692
+ @grammar_prime.production(lhs, rhs)
693
+ end
694
+ end
695
+ end
696
+ end
697
+
698
+ @grammar_prime
699
+ end
700
+
701
+ # Inform the parser core that a conflict has been detected.
702
+ def inform_conflict(state_id, type, sym)
703
+ @conflicts[state_id] << [type, sym]
704
+ end
705
+
706
+ # This method is used to specify that the symbols in _symbols_
707
+ # are left associative. Subsequent calls to this method will
708
+ # give their arguments higher precedence.
709
+ def left(*symbols)
710
+ prec_level = @prec_counts[:left] += 1
711
+
712
+ symbols.map { |s| s.to_sym }.each do |sym|
713
+ @token_precs[sym] = [:left, prec_level]
714
+ end
715
+ end
716
+
717
+ # This method is used to specify that the symbols in _symbols_
718
+ # are non-associative.
719
+ def nonassoc(*symbols)
720
+ prec_level = @prec_counts[:non] += 1
721
+
722
+ symbols.map { |s| s.to_sym }.each do |sym|
723
+ @token_precs[sym] = [:non, prec_level]
724
+ end
725
+ end
726
+
727
+ # This function is where actual parsing takes place. The
728
+ # _tokens_ argument must be an array of Token objects, the last
729
+ # of which has type EOS. By default this method will return the
730
+ # value computed by the first successful parse tree found. It is
731
+ # possible to adjust this behavior using the _opts_ hash as
732
+ # follows:
733
+ #
734
+ # * :accept - Either :first or :all.
735
+ # * :env - The environment in which to evaluate the production actions.
736
+ # * :parse_tree - To print parse trees in the DOT language or not.
737
+ # * :verbose - To be verbose or not.
738
+ #
739
+ # Additional information for these options can be found in the
740
+ # main documentation.
741
+ def parse(tokens, opts = {})
742
+ # Get the full options hash.
743
+ opts = self.build_parse_opts(opts)
744
+ v = opts[:verbose]
745
+
746
+ if opts[:verbose]
747
+ v.puts("Input tokens:")
748
+ v.puts(tokens.map { |t| t.type }.inspect)
749
+ v.puts
750
+ end
751
+
752
+ # Stack IDs to keep track of them during parsing.
753
+ stack_id = 0
754
+
755
+ # Error mode indicators.
756
+ error_mode = false
757
+ reduction_guard = false
758
+
759
+ # Our various list of stacks.
760
+ accepted = []
761
+ moving_on = []
762
+ processing = [ParseStack.new(stack_id += 1)]
763
+
764
+ # Iterate over the tokens. We don't procede to the
765
+ # next token until every stack is done with the
766
+ # current one.
767
+ tokens.each do |token|
768
+ # Check to make sure this token was seen in the
769
+ # grammar definition.
770
+ if not @symbols.include?(token.type)
771
+ raise BadToken
772
+ end
773
+
774
+ v.puts("Current token: #{token.type}#{if token.value then "(#{token.value})" end}") if v
775
+
776
+ # Iterate over the stacks until each one is done.
777
+ while (stack = processing.shift)
778
+ # Get the available actions for this stack.
779
+ actions = @states[stack.state].on?(token.type)
780
+
781
+ if actions.empty?
782
+ # If we are already in error mode and there
783
+ # are no actions we skip this token.
784
+ if error_mode
785
+ moving_on << stack
786
+ next
787
+ end
788
+
789
+ # We would be dropping the last stack so we
790
+ # are going to go into error mode.
791
+ if accepted.empty? and moving_on.empty? and processing.empty?
792
+ # Try and find a valid error state.
793
+ while stack.state
794
+ if (actions = @states[stack.state].on?(:ERROR)).empty?
795
+ # This state doesn't have an
796
+ # error production. Moving on.
797
+ stack.pop
798
+ else
799
+ # Enter the found error state.
800
+ stack.push(actions.first.id, nil, :ERROR, token.position)
801
+
802
+ break
803
+ end
804
+ end
805
+
806
+ if stack.state
807
+ # We found a valid error state.
808
+ error_mode = reduction_guard = true
809
+ opts[:env].he = true
810
+ processing << stack
811
+
812
+ v.puts('Invalid input encountered. Entering error handling mode.') if v
813
+ else
814
+ # No valid error states could be
815
+ # found. Time to print a message
816
+ # and leave.
817
+
818
+ v.puts("No more actions for stack #{stack.id}. Dropping stack.") if v
819
+ end
820
+ else
821
+ v.puts("No more actions for stack #{stack.id}. Dropping stack.") if v
822
+ end
823
+
824
+ next
825
+ end
826
+
827
+ # Make (stack, action) pairs, duplicating the
828
+ # stack as necessary.
829
+ pairs = [[stack, actions.pop]] + actions.map {|action| [stack.branch(stack_id += 1), action] }
830
+
831
+ pairs.each do |stack, action|
832
+ if v
833
+ v.puts
834
+ v.puts('Current stack:')
835
+ v.puts("\tID: #{stack.id}")
836
+ v.puts("\tState stack:\t#{stack.state_stack.inspect}")
837
+ v.puts("\tOutput Stack:\t#{stack.output_stack.inspect}")
838
+ v.puts
839
+ v.puts("Action taken: #{action.to_s}")
840
+ end
841
+
842
+ if action.is_a?(Accept)
843
+ if opts[:accept] == :all
844
+ accepted << stack
845
+ else
846
+ v.puts('Accepting input.') if v
847
+ opts[:parse_tree].puts(stack.tree) if opts[:parse_tree]
848
+
849
+ if opts[:env].he
850
+ raise HandledError.new(opts[:env].errors, stack.result)
851
+ else
852
+ return stack.result
853
+ end
854
+ end
855
+
856
+ elsif action.is_a?(Reduce)
857
+ # Get the production associated with this reduction.
858
+ production_proc, pop_size = @procs[action.id]
859
+
860
+ if not production_proc
861
+ raise InternalParserError, "No production #{action.id} found."
862
+ end
863
+
864
+ args, positions = stack.pop(pop_size)
865
+ opts[:env].set_positions(positions)
866
+
867
+ result =
868
+ if @args == :array
869
+ opts[:env].instance_exec(args, &production_proc)
870
+ else
871
+ opts[:env].instance_exec(*args, &production_proc)
872
+ end
873
+
874
+ if (goto = @states[stack.state].on?(@lh_sides[action.id]).first)
875
+
876
+ v.puts("Going to state #{goto.id}.\n") if v
877
+
878
+ pos0 = nil
879
+
880
+ if args.empty?
881
+ # Empty productions need to be
882
+ # handled specially.
883
+ pos0 = stack.position
884
+
885
+ pos0.stream_offset += pos0.length + 1
886
+ pos0.line_offset += pos0.length + 1
887
+
888
+ pos0.length = 0
889
+ else
890
+ pos0 = opts[:env].pos( 0)
891
+ pos1 = opts[:env].pos(-1)
892
+
893
+ pos0.length = (pos1.stream_offset + pos1.length) - pos0.stream_offset
894
+ end
895
+
896
+ stack.push(goto.id, result, @lh_sides[action.id], pos0)
897
+ else
898
+ raise InternalParserError, "No GoTo action found in state #{stack.state} " +
899
+ "after reducing by production #{action.id}"
900
+ end
901
+
902
+ # This stack is NOT ready for the next
903
+ # token.
904
+ processing << stack
905
+
906
+ # Exit error mode if necessary.
907
+ error_mode = false if error_mode and not reduction_guard
908
+
909
+ elsif action.is_a?(Shift)
910
+ stack.push(action.id, token.value, token.type, token.position)
911
+
912
+ # This stack is ready for the next
913
+ # token.
914
+ moving_on << stack
915
+
916
+ # Exit error mode.
917
+ error_mode = false
918
+ end
919
+ end
920
+ end
921
+
922
+ v.puts("\n\n") if v
923
+
924
+ processing = moving_on
925
+ moving_on = []
926
+
927
+ # If we don't have any active stacks at this point the
928
+ # string isn't in the language.
929
+ if opts[:accept] == :first and processing.length == 0
930
+ v.close if v and v != $stdout
931
+ raise NotInLanguage
932
+ end
933
+
934
+ reduction_guard = false
935
+ end
936
+
937
+ # If we have reached this point we are accepting all parse
938
+ # trees.
939
+ if v
940
+ v.puts("Accepting input with #{accepted.length} derivation(s).")
941
+
942
+ v.close if v != $stdout
943
+ end
944
+
945
+ accepted.each do |stack|
946
+ opts[:parse_tree].puts(stack.tree)
947
+ end if opts[:parse_tree]
948
+
949
+ results = accepted.map { |stack| stack.result }
950
+
951
+ if opts[:env].he
952
+ raise HandledError.new(opts[:env].errors, results)
953
+ else
954
+ return results
955
+ end
956
+ end
957
+
958
+ # Adds a new production to the parser with a left-hand value of
959
+ # _symbol_. If _expression_ is specified it is taken as the
960
+ # right-hand side of the production and _action_ is associated
961
+ # with the production. If _expression_ is nil then _action_ is
962
+ # evaluated and expected to make one or more calls to
963
+ # ParserCore.clause. A precedence can be associate with this
964
+ # production by setting _precedence_ to a terminal symbol.
965
+ def production(symbol, expression = nil, precedence = nil, &action)
966
+
967
+ # Check the symbol.
968
+ if not (symbol.is_a?(Symbol) or symbol.is_a?(String)) or not CFG::is_nonterminal?(symbol)
969
+ riase ParserConstructionError, 'Production symbols must be Strings or Symbols and be in all lowercase.'
970
+ end
971
+
972
+ @grammar.curr_lhs = symbol.to_sym
973
+ @curr_prec = precedence
974
+
975
+ if expression
976
+ self.clause(expression, precedence, &action)
977
+ else
978
+ self.instance_exec(&action)
979
+ end
980
+
981
+ @grammar.curr_lhs = nil
982
+ @curr_prec = nil
983
+ end
984
+
985
+ alias :p :production
986
+
987
+ # This method uses lookahead sets and precedence information to
988
+ # resolve conflicts and remove unnecessary reduce actions.
989
+ def prune(do_lookahead, do_precedence)
990
+ terms = @grammar.terms
991
+
992
+ # If both options are false there is no pruning to do.
993
+ return if not (do_lookahead or do_precedence)
994
+
995
+ @states.each do |state0|
996
+
997
+ #####################
998
+ # Lookahead Pruning #
999
+ #####################
1000
+
1001
+ if do_lookahead
1002
+ # Find all of the reductions in this state.
1003
+ reductions = state0.actions.values.flatten.uniq.select { |a| a.is_a?(Reduce) }
1004
+
1005
+ reductions.each do |reduction|
1006
+ production = @grammar.productions(:id)[reduction.id]
1007
+
1008
+ lookahead = Array.new
1009
+
1010
+ # Build the lookahead set.
1011
+ @states.each do |state1|
1012
+ if self.check_reachability(state1, state0, production.rhs)
1013
+ lookahead |= (var = self.grammar_prime.follow_set("#{state1.id}_#{production.lhs}".to_sym))
1014
+ end
1015
+ end
1016
+
1017
+ # Translate the G' follow symbols into G lookahead
1018
+ # symbols.
1019
+ lookahead = lookahead.map { |sym| sym.to_s.split('_').last.to_sym }.uniq
1020
+
1021
+ # Here we remove the unnecessary reductions.
1022
+ # If there are error productions we need to
1023
+ # scale back the amount of pruning done.
1024
+ (terms - lookahead).each do |sym|
1025
+ if not (terms.include?(:ERROR) and not state0.conflict_on?(sym))
1026
+ state0.actions[sym].delete(reduction)
1027
+ end
1028
+ end
1029
+ end
1030
+ end
1031
+
1032
+ ########################################
1033
+ # Precedence and Associativity Pruning #
1034
+ ########################################
1035
+
1036
+ if do_precedence
1037
+ state0.actions.each do |symbol, actions|
1038
+
1039
+ # We are only interested in pruning actions
1040
+ # for terminal symbols.
1041
+ next unless CFG::is_terminal?(symbol)
1042
+
1043
+ # Skip to the next one if there is no
1044
+ # possibility of a Shift/Reduce or
1045
+ # Reduce/Reduce conflict.
1046
+ next unless actions and actions.length > 1
1047
+
1048
+ resolve_ok = actions.inject(true) do |m, a|
1049
+ if a.is_a?(Reduce)
1050
+ m and @production_precs[a.id]
1051
+ else
1052
+ m
1053
+ end
1054
+ end and actions.inject(false) { |m, a| m or a.is_a?(Shift) }
1055
+
1056
+ if @token_precs[symbol] and resolve_ok
1057
+ max_prec = 0
1058
+ selected_action = nil
1059
+
1060
+ # Grab the associativity and precedence
1061
+ # for the input token.
1062
+ tassoc, tprec = @token_precs[symbol]
1063
+
1064
+ actions.each do |a|
1065
+ assoc, prec = a.is_a?(Shift) ? [tassoc, tprec] : @production_precs[a.id]
1066
+
1067
+ # If two actions have the same precedence we
1068
+ # will only replace the previous production if:
1069
+ # * The token is left associative and the current action is a Reduce
1070
+ # * The token is right associative and the current action is a Shift
1071
+ if prec > max_prec or (prec == max_prec and tassoc == (a.is_a?(Shift) ? :right : :left))
1072
+ max_prec = prec
1073
+ selected_action = a
1074
+
1075
+ elsif prec == max_prec and assoc == :nonassoc
1076
+ raise ParserConstructionError, 'Non-associative token found during conflict resolution.'
1077
+
1078
+ end
1079
+ end
1080
+
1081
+ state0.actions[symbol] = [selected_action]
1082
+ end
1083
+ end
1084
+ end
1085
+ end
1086
+ end
1087
+
1088
+ # This method is used to specify that the symbols in _symbols_
1089
+ # are right associative. Subsequent calls to this method will
1090
+ # give their arguments higher precedence.
1091
+ def right(*symbols)
1092
+ prec_level = @prec_counts[:right] += 1
1093
+
1094
+ symbols.map { |s| s.to_sym }.each do |sym|
1095
+ @token_precs[sym] = [:right, prec_level]
1096
+ end
1097
+ end
1098
+
1099
+ # Changes the starting symbol of the parser.
1100
+ def start(symbol)
1101
+ @grammar.start symbol
1102
+ end
1103
+ end
1104
+
1105
+ # The ParseStack class is used by a ParserCore to keep track of state
1106
+ # during parsing.
1107
+ class ParseStack
1108
+ attr_reader :id
1109
+ attr_reader :output_stack
1110
+ attr_reader :state_stack
1111
+
1112
+ # Instantiate a new ParserStack object.
1113
+ def initialize(id, ostack = [], sstack = [0], nstack = [], connections = [], labels = [], positions = [])
1114
+ @id = id
1115
+
1116
+ @node_stack = nstack
1117
+ @output_stack = ostack
1118
+ @state_stack = sstack
1119
+
1120
+ @connections = connections
1121
+ @labels = labels
1122
+ @positions = positions
1123
+ end
1124
+
1125
+ # Branch this stack, effectively creating a new copy of its
1126
+ # internal state.
1127
+ def branch(new_id)
1128
+ ParseStack.new(new_id, @output_stack.clone, @state_stack.clone, @node_stack.clone,
1129
+ @connections.clone, @labels.clone, @positions.clone)
1130
+ end
1131
+
1132
+ # Returns the position of the last symbol on the stack.
1133
+ def position
1134
+ if @positions.empty?
1135
+ StreamPosition.new
1136
+ else
1137
+ @positions.last.clone
1138
+ end
1139
+ end
1140
+
1141
+ # Push new state and other information onto the stack.
1142
+ def push(state, o, node0, position)
1143
+ @state_stack << state
1144
+ @output_stack << o
1145
+ @node_stack << @labels.length
1146
+ @labels << node0
1147
+ @positions << position
1148
+
1149
+ if CFG::is_nonterminal?(node0)
1150
+ @cbuffer.each do |node1|
1151
+ @connections << [@labels.length - 1, node1]
1152
+ end
1153
+ end
1154
+ end
1155
+
1156
+ # Pop some number of objects off of the inside stacks, returning
1157
+ # the values popped from the output stack.
1158
+ def pop(n = 1)
1159
+ @state_stack.pop(n)
1160
+
1161
+ # Pop the node stack so that the proper edges can be added
1162
+ # when the production's left-hand side non-terminal is
1163
+ # pushed onto the stack.
1164
+ @cbuffer = @node_stack.pop(n)
1165
+
1166
+ [@output_stack.pop(n), @positions.pop(n)]
1167
+ end
1168
+
1169
+ # Fetch the result stored in this ParseStack. If there is more
1170
+ # than one object left on the output stack there is an error.
1171
+ def result
1172
+ if @output_stack.length == 1
1173
+ return @output_stack.last
1174
+ else
1175
+ raise InternalParserError, "The parsing stack should have 1 element on the output stack, not #{@output_stack.length}."
1176
+ end
1177
+ end
1178
+
1179
+ # Return the current state of this ParseStack.
1180
+ def state
1181
+ @state_stack.last
1182
+ end
1183
+
1184
+ # Return a string representing the parse tree in the DOT
1185
+ # language.
1186
+ def tree
1187
+ tree = "digraph tree#{@id} {\n"
1188
+
1189
+ @labels.each_with_index do |label, i|
1190
+ tree += "\tnode#{i} [label=\"#{label}\""
1191
+
1192
+ if CFG::is_terminal?(label)
1193
+ tree += " shape=box"
1194
+ end
1195
+
1196
+ tree += "];\n"
1197
+ end
1198
+
1199
+ tree += "\n"
1200
+
1201
+ @connections.each do |from, to|
1202
+ tree += "\tnode#{from} -> node#{to};\n"
1203
+ end
1204
+
1205
+ tree += "}"
1206
+ end
1207
+ end
1208
+
1209
+ # The State class is used to represent sets of items and actions to be
1210
+ # used during parsing.
1211
+ class State
1212
+ # The state's ID.
1213
+ attr_accessor :id
1214
+ # The CFG::Item objects that comprise this state.
1215
+ attr_reader :items
1216
+ # The Action objects that represent the actions that should be
1217
+ # taken when various inputs are observed.
1218
+ attr_reader :actions
1219
+
1220
+ # Instantiate a new State object.
1221
+ def initialize(tokens, items = [])
1222
+ @id = nil
1223
+ @items = items
1224
+ @actions = tokens.inject(Hash.new) { |h, t| h[t] = Array.new; h }
1225
+ end
1226
+
1227
+ # Compare one State to another. Two States are equal if they
1228
+ # have the same items or, if the items have been cleaned, if
1229
+ # the States have the same ID.
1230
+ def ==(other)
1231
+ if self.items and other.items then self.items == other.items else self.id == other.id end
1232
+ end
1233
+
1234
+ # Add a Reduce action to the state.
1235
+ def add_reduction(production_id)
1236
+ action = Reduce.new(production_id)
1237
+
1238
+ # Reduce actions are not allowed for the ERROR terminal.
1239
+ @actions.each { |k, v| if CFG::is_terminal?(k) and k != :ERROR then v << action end }
1240
+ end
1241
+
1242
+ # Add a new item to this state.
1243
+ def append(item)
1244
+ if item.is_a?(CFG::Item) and not @items.include?(item) then @items << item end
1245
+ end
1246
+
1247
+ alias :<< :append
1248
+
1249
+ # Clean this State by removing the list of Item objects.
1250
+ def clean
1251
+ @items = nil
1252
+ end
1253
+
1254
+ # Close this state using _productions_.
1255
+ def close(productions)
1256
+ self.each do |item|
1257
+ if (next_symbol = item.next_symbol) and CFG::is_nonterminal?(next_symbol)
1258
+ productions[next_symbol].each { |p| self << p.to_item }
1259
+ end
1260
+ end
1261
+ end
1262
+
1263
+ # Checks to see if there is a conflict in this state, given a
1264
+ # input of _sym_. Returns :SR if a shift/reduce conflict is
1265
+ # detected and :RR if a reduce/reduce conflict is detected. If
1266
+ # no conflict is detected nil is returned.
1267
+ def conflict_on?(sym)
1268
+
1269
+ reductions = 0
1270
+ shifts = 0
1271
+
1272
+ @actions[sym].each do |action|
1273
+ if action.is_a?(Reduce)
1274
+ reductions += 1
1275
+
1276
+ elsif action.is_a?(Shift)
1277
+ shifts += 1
1278
+
1279
+ end
1280
+ end
1281
+
1282
+ if shifts == 1 and reductions > 0
1283
+ :SR
1284
+ elsif reductions > 1
1285
+ :RR
1286
+ else
1287
+ nil
1288
+ end
1289
+ end
1290
+
1291
+ # Iterate over the state's items.
1292
+ def each
1293
+ @items.each {|item| yield item}
1294
+ end
1295
+
1296
+ # Specify an Action to perform when the input token is _symbol_.
1297
+ def on(symbol, action)
1298
+ if @actions.key?(symbol)
1299
+ @actions[symbol] << action
1300
+ else
1301
+ raise ParserConstructionError, "Attempting to set action for token (#{symbol}) not seen in grammar definition."
1302
+ end
1303
+ end
1304
+
1305
+ # Returns that actions that should be taken when the input token
1306
+ # is _symbol_.
1307
+ def on?(symbol)
1308
+ @actions[symbol].clone
1309
+ end
1310
+ end
1311
+
1312
+ # The Action class is used to indicate what action the parser should
1313
+ # take given a current state and input token.
1314
+ class Action
1315
+ attr_reader :id
1316
+
1317
+ def initialize(id = nil)
1318
+ @id = id
1319
+ end
1320
+ end
1321
+
1322
+ # The Accept class indicates to the parser that it should accept the
1323
+ # current parse tree.
1324
+ class Accept < Action
1325
+ def to_s
1326
+ "Accept"
1327
+ end
1328
+ end
1329
+
1330
+ # The GoTo class indicates to the parser that it should goto the state
1331
+ # specified by GoTo.id.
1332
+ class GoTo < Action
1333
+ def to_s
1334
+ "GoTo #{self.id}"
1335
+ end
1336
+ end
1337
+
1338
+ # The Reduce class indicates to the parser that it should reduce the
1339
+ # input stack by the rule specified by Reduce.id.
1340
+ class Reduce < Action
1341
+ def to_s
1342
+ "Reduce by Production #{self.id}"
1343
+ end
1344
+ end
1345
+
1346
+ # The Shift class indicates to the parser that it should shift the
1347
+ # current input token.
1348
+ class Shift < Action
1349
+ def to_s
1350
+ "Shift to State #{self.id}"
1351
+ end
1352
+ end
1353
+ end
1354
+ end