rltk3 3.0.2

Sign up to get free protection for your applications and to get access to all the features.
Files changed (56) hide show
  1. checksums.yaml +7 -0
  2. data/AUTHORS +1 -0
  3. data/LICENSE +27 -0
  4. data/README.md +852 -0
  5. data/Rakefile +197 -0
  6. data/lib/rltk/ast.rb +573 -0
  7. data/lib/rltk/cfg.rb +683 -0
  8. data/lib/rltk/cg/basic_block.rb +157 -0
  9. data/lib/rltk/cg/bindings.rb +151 -0
  10. data/lib/rltk/cg/builder.rb +1127 -0
  11. data/lib/rltk/cg/context.rb +48 -0
  12. data/lib/rltk/cg/contractor.rb +51 -0
  13. data/lib/rltk/cg/execution_engine.rb +194 -0
  14. data/lib/rltk/cg/function.rb +237 -0
  15. data/lib/rltk/cg/generated_bindings.rb +8118 -0
  16. data/lib/rltk/cg/generic_value.rb +95 -0
  17. data/lib/rltk/cg/instruction.rb +519 -0
  18. data/lib/rltk/cg/llvm.rb +150 -0
  19. data/lib/rltk/cg/memory_buffer.rb +75 -0
  20. data/lib/rltk/cg/module.rb +451 -0
  21. data/lib/rltk/cg/pass_manager.rb +252 -0
  22. data/lib/rltk/cg/support.rb +29 -0
  23. data/lib/rltk/cg/target.rb +230 -0
  24. data/lib/rltk/cg/triple.rb +58 -0
  25. data/lib/rltk/cg/type.rb +554 -0
  26. data/lib/rltk/cg/value.rb +1272 -0
  27. data/lib/rltk/cg.rb +32 -0
  28. data/lib/rltk/lexer.rb +372 -0
  29. data/lib/rltk/lexers/calculator.rb +44 -0
  30. data/lib/rltk/lexers/ebnf.rb +38 -0
  31. data/lib/rltk/parser.rb +1702 -0
  32. data/lib/rltk/parsers/infix_calc.rb +43 -0
  33. data/lib/rltk/parsers/postfix_calc.rb +34 -0
  34. data/lib/rltk/parsers/prefix_calc.rb +34 -0
  35. data/lib/rltk/token.rb +90 -0
  36. data/lib/rltk/version.rb +11 -0
  37. data/lib/rltk.rb +16 -0
  38. data/test/cg/tc_basic_block.rb +83 -0
  39. data/test/cg/tc_control_flow.rb +191 -0
  40. data/test/cg/tc_function.rb +54 -0
  41. data/test/cg/tc_generic_value.rb +33 -0
  42. data/test/cg/tc_instruction.rb +256 -0
  43. data/test/cg/tc_llvm.rb +25 -0
  44. data/test/cg/tc_math.rb +88 -0
  45. data/test/cg/tc_module.rb +89 -0
  46. data/test/cg/tc_transforms.rb +68 -0
  47. data/test/cg/tc_type.rb +69 -0
  48. data/test/cg/tc_value.rb +151 -0
  49. data/test/cg/ts_cg.rb +23 -0
  50. data/test/tc_ast.rb +332 -0
  51. data/test/tc_cfg.rb +164 -0
  52. data/test/tc_lexer.rb +216 -0
  53. data/test/tc_parser.rb +711 -0
  54. data/test/tc_token.rb +34 -0
  55. data/test/ts_rltk.rb +47 -0
  56. metadata +317 -0
@@ -0,0 +1,1702 @@
1
+ # Author: Chris Wailes <chris.wailes@gmail.com>
2
+ # Project: Ruby Language Toolkit
3
+ # Date: 2011/01/19
4
+ # Description: This file contains the base class for parsers that use RLTK.
5
+
6
+ ############
7
+ # Requires #
8
+ ############
9
+
10
+ # Ruby Language Toolkit
11
+ require 'rltk/cfg'
12
+
13
+ #######################
14
+ # Classes and Modules #
15
+ #######################
16
+
17
+ # The RLTK root module
18
+ module RLTK
19
+ # A BadToken error indicates that a token was observed in the input stream
20
+ # that wasn't used in the grammar's definition.
21
+ class BadToken < StandardError
22
+ # @return [String] String representation of the error.
23
+ def initialize(token)
24
+ @token = token
25
+ end
26
+
27
+ def to_s
28
+ "Unexpected token: #{@token.inspect}. Token not present in grammar definition."
29
+ end
30
+ end
31
+
32
+ # A NotInLanguage error is raised whenever there is no valid parse tree
33
+ # for a given token stream. In other words, the input string is not in the
34
+ # defined language.
35
+ class NotInLanguage < StandardError
36
+
37
+ class << self
38
+ def default_context_length
39
+ @default_context_length || 100
40
+ end
41
+
42
+ def default_context_length=(v)
43
+ @default_context_length = v
44
+ end
45
+ end
46
+
47
+ # @return [Array<Token>] List of tokens that have been successfully parsed
48
+ attr_reader :seen
49
+
50
+ # @return [Token] Token that caused the parser to stop
51
+ attr_reader :current
52
+
53
+ # @return [Array<Token>] List of tokens that have yet to be seen
54
+ attr_reader :remaining
55
+
56
+ # @param [Array<Token>] seen Tokens that have been successfully parsed
57
+ # @param [Token] current Token that caused the parser to stop
58
+ # @param [Array<Token>] remaining Tokens that have yet to be seen
59
+ def initialize(seen, current, remaining, context_length = self.class.default_context_length)
60
+ @seen = seen
61
+ @current = current
62
+ @remaining = remaining
63
+ @context_length = context_length
64
+ end
65
+
66
+ # @return [String] String representation of the error.
67
+ def to_s
68
+ seen = @context_length == :all ? @seen : @seen[-@context_length..-1]
69
+ remaining = @context_length == :all ? @remaining : @remaining[0..@context_length]
70
+ "String not in language. Token info:\n\tSeen: #{seen}\n\tCurrent: #{@current}\n\tRemaining: #{remaining}"
71
+ end
72
+ end
73
+
74
+ # An error of this type is raised when the parser encountered a error that
75
+ # was handled by an error production.
76
+ class HandledError < StandardError
77
+
78
+ # The errors as reported by the parser.
79
+ #
80
+ # @return [Array<Object>]
81
+ attr_reader :errors
82
+
83
+ # The result that would have been returned by the call to *parse*.
84
+ attr_reader :result
85
+
86
+ # Instantiate a new HandledError object with *errors*.
87
+ #
88
+ # @param [Array<Object>] errors Errors added to the parsing environment by calls to {Parser::Environment#error}.
89
+ # @param [Object] result Object resulting from parsing Tokens before the error occurred.
90
+ def initialize(errors, result)
91
+ @errors = errors
92
+ @result = result
93
+ end
94
+ end
95
+
96
+ # Used for exceptions that occure during parser construction.
97
+ class ParserConstructionException < Exception; end
98
+
99
+ # Used for runtime exceptions that are the parsers fault. These should
100
+ # never be observed in the wild.
101
+ class InternalParserException < Exception; end
102
+
103
+ # Used to indicate that a parser is empty or hasn't been finalized.
104
+ class UselessParserException < Exception
105
+ # Sets the error messsage for this exception.
106
+ def initialize
107
+ super('Parser has not been finalized.')
108
+ end
109
+ end
110
+
111
+ # The Parser class may be sub-classed to produce new parsers. These
112
+ # parsers have a lot of features, and are described in the main
113
+ # documentation.
114
+ class Parser
115
+ # @return [Environment] Environment used by the instantiated parser.
116
+ attr_reader :env
117
+
118
+ #################
119
+ # Class Methods #
120
+ #################
121
+
122
+ class << self
123
+ # The overridden new prevents un-finalized parsers from being
124
+ # instantiated.
125
+ def new(*args)
126
+ if not @symbols
127
+ raise UselessParserException
128
+ else
129
+ super(*args)
130
+ end
131
+ end
132
+
133
+ # Installs instance class varialbes into a class.
134
+ #
135
+ # @return [void]
136
+ def install_icvars
137
+ @curr_lhs = nil
138
+ @curr_prec = nil
139
+
140
+ @conflicts = Hash.new {|h, k| h[k] = Array.new}
141
+ @grammar = CFG.new
142
+ @grammar_prime = nil
143
+
144
+ @lh_sides = Hash.new
145
+ @procs = Array.new
146
+ @states = Array.new
147
+
148
+ @symbols = nil
149
+
150
+ # Variables for dealing with precedence.
151
+ @prec_counts = {:left => 0, :right => 0, :non => 0}
152
+ @production_precs = Array.new
153
+ @token_precs = Hash.new
154
+ @token_hooks = Hash.new {|h, k| h[k] = []}
155
+
156
+ # Set the default argument handling policy. Valid values
157
+ # are :array and :splat.
158
+ @default_arg_type = :splat
159
+
160
+ @grammar.callback do |type, which, p, sels = []|
161
+ @procs[p.id] = [
162
+ case type
163
+ when :optional
164
+ case which
165
+ when :empty then ProdProc.new { || nil }
166
+ else ProdProc.new { |o| o }
167
+ end
168
+
169
+ when :elp
170
+ case which
171
+ when :empty then ProdProc.new { || [] }
172
+ else ProdProc.new { |prime| prime }
173
+ end
174
+
175
+ when :nelp
176
+ case which
177
+ when :single
178
+ ProdProc.new { |el| [el] }
179
+
180
+ when :multiple
181
+ ProdProc.new(:splat, sels) do |*syms|
182
+ el = syms[1..-1]
183
+ syms.first << (el.length == 1 ? el.first : el)
184
+ end
185
+
186
+ else
187
+ ProdProc.new { |*el| el.length == 1 ? el.first : el }
188
+ end
189
+ end,
190
+ p.rhs.length
191
+ ]
192
+
193
+ @production_precs[p.id] = p.last_terminal
194
+ end
195
+ end
196
+
197
+ # Called when the Lexer class is sub-classed, it installes
198
+ # necessary instance class variables.
199
+ #
200
+ # @return [void]
201
+ def inherited(klass)
202
+ klass.install_icvars
203
+ end
204
+
205
+ # If *state* (or its equivalent) is not in the state list it is
206
+ # added and it's ID is returned. If there is already a state
207
+ # with the same items as *state* in the state list its ID is
208
+ # returned and *state* is discarded.
209
+ #
210
+ # @param [State] state State to add to the parser.
211
+ #
212
+ # @return [Integer] The ID of the state.
213
+ def add_state(state)
214
+ if (id = @states.index(state))
215
+ id
216
+ else
217
+ state.id = @states.length
218
+
219
+ @states << state
220
+
221
+ @states.length - 1
222
+ end
223
+ end
224
+
225
+ # Build a hash with the default options for Parser.finalize
226
+ # and then update it with the values from *opts*.
227
+ #
228
+ # @param [Hash{Symbol => Object}] opts Hash containing options for finalize.
229
+ #
230
+ # @return [Hash{Symbol => Object}]
231
+ def build_finalize_opts(opts)
232
+ opts[:explain] = self.get_io(opts[:explain])
233
+
234
+ {
235
+ explain: false,
236
+ lookahead: true,
237
+ precedence: true,
238
+ use: false
239
+ }.update(opts)
240
+ end
241
+ private :build_finalize_opts
242
+
243
+ # Build a hash with the default options for Parser.parse and
244
+ # then update it with the values from *opts*.
245
+ #
246
+ # @param [Hash{Symbol => Object}] opts Hash containing options for parse.
247
+ #
248
+ # @return [Hash{Symbol => Object}]
249
+ def build_parse_opts(opts)
250
+ opts[:parse_tree] = self.get_io(opts[:parse_tree])
251
+ opts[:verbose] = self.get_io(opts[:verbose])
252
+
253
+ {
254
+ accept: :first,
255
+ env: self::Environment.new,
256
+ parse_tree: false,
257
+ verbose: false
258
+ }.update(opts)
259
+ end
260
+ private :build_parse_opts
261
+
262
+ # This method is used to (surprise) check the sanity of the
263
+ # constructed parser. It checks to make sure all non-terminals
264
+ # used in the grammar definition appear on the left-hand side of
265
+ # one or more productions, and that none of the parser's states
266
+ # have invalid actions. If a problem is encountered a
267
+ # ParserConstructionException is raised.
268
+ #
269
+ # @return [void]
270
+ def check_sanity
271
+ # Check to make sure all non-terminals appear on the
272
+ # left-hand side of some production.
273
+ @grammar.nonterms.each do |sym|
274
+ if not @lh_sides.values.include?(sym)
275
+ raise ParserConstructionException, "Non-terminal #{sym} does not appear on the left-hand side of any production."
276
+ end
277
+ end
278
+
279
+ # Check the actions in each state.
280
+ each_state do |state|
281
+ state.actions.each do |sym, actions|
282
+ if CFG::is_terminal?(sym)
283
+ # Here we check actions for terminals.
284
+ actions.each do |action|
285
+ if action.is_a?(Accept)
286
+ if sym != :EOS
287
+ raise ParserConstructionException, "Accept action found for terminal #{sym} in state #{state.id}."
288
+ end
289
+
290
+ elsif not (action.is_a?(GoTo) or action.is_a?(Reduce) or action.is_a?(Shift))
291
+ raise ParserConstructionException, "Object of type #{action.class} found in actions for terminal " +
292
+ "#{sym} in state #{state.id}."
293
+
294
+ end
295
+ end
296
+
297
+ if (conflict = state.conflict_on?(sym))
298
+ self.inform_conflict(state.id, conflict, sym)
299
+ end
300
+ else
301
+ # Here we check actions for non-terminals.
302
+ if actions.length > 1
303
+ raise ParserConstructionException, "State #{state.id} has multiple GoTo actions for non-terminal #{sym}."
304
+
305
+ elsif actions.length == 1 and not actions.first.is_a?(GoTo)
306
+ raise ParserConstructionException, "State #{state.id} has non-GoTo action for non-terminal #{sym}."
307
+
308
+ end
309
+ end
310
+ end
311
+ end
312
+ end
313
+
314
+ # This method checks to see if the parser would be in parse state
315
+ # *dest* after starting in state *start* and reading *symbols*.
316
+ #
317
+ # @param [Symbol] start Symbol representing a CFG production.
318
+ # @param [Symbol] dest Symbol representing a CFG production.
319
+ # @param [Array<Symbol>] symbols Grammar symbols.
320
+ #
321
+ # @return [Boolean] If the destination symbol is reachable from the start symbol after reading *symbols*.
322
+ def check_reachability(start, dest, symbols)
323
+ path_exists = true
324
+ cur_state = start
325
+
326
+ symbols.each do |sym|
327
+
328
+ actions = @states[cur_state.id].on?(sym)
329
+ actions = actions.select { |a| a.is_a?(Shift) } if CFG::is_terminal?(sym)
330
+
331
+ if actions.empty?
332
+ path_exists = false
333
+ break
334
+ end
335
+
336
+ # There can only be one Shift action for terminals and
337
+ # one GoTo action for non-terminals, so we know the
338
+ # first action is the only one in the list.
339
+ cur_state = @states[actions.first.id]
340
+ end
341
+
342
+ path_exists and cur_state.id == dest.id
343
+ end
344
+
345
+ # Declares a new clause inside of a production. The right-hand
346
+ # side is specified by *expression* and the precedence of this
347
+ # production can be changed by setting the *precedence* argument
348
+ # to some terminal symbol.
349
+ #
350
+ # @param [String, Symbol] expression Right-hand side of a production.
351
+ # @param [Symbol] precedence Symbol representing the precedence of this production.
352
+ # @param [:array, :splat] arg_type Method to use when passing arguments to the action.
353
+ # @param [Proc] action Action to be taken when the production is reduced.
354
+ #
355
+ # @return [void]
356
+ def clause(expression, precedence = nil, arg_type = @default_arg_type, &action)
357
+ # Use the curr_prec only if it isn't overridden for this
358
+ # clause.
359
+ precedence ||= @curr_prec
360
+
361
+ production, selections = @grammar.clause(expression)
362
+
363
+ # Check to make sure the action's arity matches the number
364
+ # of symbols on the right-hand side.
365
+ expected_arity = (selections.empty? ? production.rhs.length : selections.length)
366
+ if arg_type == :splat and action.arity != expected_arity
367
+ raise ParserConstructionException,
368
+ "Incorrect number of action parameters. Expected #{expected_arity} but got #{action.arity}." +
369
+ ' Action arity must match the number of terminals and non-terminals in the clause.'
370
+ end
371
+
372
+ # Add the action to our proc list.
373
+ @procs[production.id] = [ProdProc.new(arg_type, selections, &action), production.rhs.length]
374
+
375
+ # If no precedence is specified use the precedence of the
376
+ # last terminal in the production.
377
+ @production_precs[production.id] = precedence || production.last_terminal
378
+ end
379
+ alias :c :clause
380
+
381
+ # Removes resources that were needed to generate the parser but
382
+ # aren't needed when actually parsing input.
383
+ #
384
+ # @return [void]
385
+ def clean
386
+ # We've told the developer about conflicts by now.
387
+ @conflicts = nil
388
+
389
+ # Drop the grammar and the grammar'.
390
+ @grammar = nil
391
+ @grammar_prime = nil
392
+
393
+ # Drop precedence and bookkeeping information.
394
+ @cur_lhs = nil
395
+ @cur_prec = nil
396
+
397
+ @prec_counts = nil
398
+ @production_precs = nil
399
+ @token_precs = nil
400
+
401
+ # Drop the items from each of the states.
402
+ each_state { |state| state.clean }
403
+ end
404
+
405
+ # Set the default argument type for the actions associated with
406
+ # clauses. All actions defined after this call will be passed
407
+ # arguments in the way specified here, unless overridden in the
408
+ # call to {Parser.clause}.
409
+ #
410
+ # @param [:array, :splat] type The default argument type.
411
+ #
412
+ # @return [void]
413
+ def default_arg_type(type)
414
+ @default_arg_type = type if type == :array or type == :splat
415
+ end
416
+ alias :dat :default_arg_type
417
+
418
+ # Adds productions and actions for parsing empty lists.
419
+ #
420
+ # @see CFG#empty_list_production
421
+ def build_list_production(symbol, list_elements, separator = '')
422
+ @grammar.build_list_production(symbol, list_elements, separator)
423
+ end
424
+ alias :list :build_list_production
425
+
426
+ # This function will print a description of the parser to the
427
+ # provided IO object.
428
+ #
429
+ # @param [IO] io Input/Output object used for printing the parser's explanation.
430
+ #
431
+ # @return [void]
432
+ def explain(io)
433
+ if @grammar and not @states.empty?
434
+ io.puts('###############')
435
+ io.puts('# Productions #')
436
+ io.puts('###############')
437
+ io.puts
438
+
439
+ max_id_length = @grammar.productions(:id).length.to_s.length
440
+
441
+ # Print the productions.
442
+ @grammar.productions.each do |sym, productions|
443
+
444
+ max_rhs_length = productions.inject(0) { |m, p| if (len = p.to_s.length) > m then len else m end }
445
+
446
+ productions.each do |production|
447
+ p_string = production.to_s
448
+
449
+ io.print("\tProduction #{sprintf("%#{max_id_length}d", production.id)}: #{p_string}")
450
+
451
+ if (prec = @production_precs[production.id])
452
+ io.print(' ' * (max_rhs_length - p_string.length))
453
+ io.print(" : (#{sprintf("%-5s", prec.first)}, #{prec.last})")
454
+ end
455
+
456
+ io.puts
457
+ end
458
+
459
+ io.puts
460
+ end
461
+
462
+ io.puts('##########')
463
+ io.puts('# Tokens #')
464
+ io.puts('##########')
465
+ io.puts
466
+
467
+ max_token_len = @grammar.terms.inject(0) { |m, t| if t.length > m then t.length else m end }
468
+
469
+ @grammar.terms.sort {|a,b| a.to_s <=> b.to_s }.each do |term|
470
+ io.print("\t#{term}")
471
+
472
+ if (prec = @token_precs[term])
473
+ io.print(' ' * (max_token_len - term.length))
474
+ io.print(" : (#{sprintf("%-5s", prec.first)}, #{prec.last})")
475
+ end
476
+
477
+ io.puts
478
+ end
479
+
480
+ io.puts
481
+
482
+ io.puts('#####################')
483
+ io.puts('# Table Information #')
484
+ io.puts('#####################')
485
+ io.puts
486
+
487
+ io.puts("\tStart symbol: #{@grammar.start_symbol}'")
488
+ io.puts
489
+
490
+ io.puts("\tTotal number of states: #{@states.length}")
491
+ io.puts
492
+
493
+ io.puts("\tTotal conflicts: #{@conflicts.values.flatten(1).length}")
494
+ io.puts
495
+
496
+ @conflicts.each do |state_id, conflicts|
497
+ io.puts("\tState #{state_id} has #{conflicts.length} conflict(s)")
498
+ end
499
+
500
+ io.puts if not @conflicts.empty?
501
+
502
+ # Print the parse table.
503
+ io.puts('###############')
504
+ io.puts('# Parse Table #')
505
+ io.puts('###############')
506
+ io.puts
507
+
508
+ each_state do |state|
509
+ io.puts("State #{state.id}:")
510
+ io.puts
511
+
512
+ io.puts("\t# ITEMS #")
513
+ max = state.items.inject(0) do |inner_max, item|
514
+ if item.lhs.to_s.length > inner_max then item.lhs.to_s.length else inner_max end
515
+ end
516
+
517
+ state.each do |item|
518
+ io.puts("\t#{item.to_s(max)}")
519
+ end
520
+
521
+ io.puts
522
+ io.puts("\t# ACTIONS #")
523
+
524
+ state.actions.keys.sort {|a,b| a.to_s <=> b.to_s}.each do |sym|
525
+ state.actions[sym].each do |action|
526
+ io.puts("\tOn #{sym} #{action}")
527
+ end
528
+ end
529
+
530
+ io.puts
531
+ io.puts("\t# CONFLICTS #")
532
+
533
+ if @conflicts[state.id].length == 0
534
+ io.puts("\tNone\n\n")
535
+ else
536
+ @conflicts[state.id].each do |conflict|
537
+ type, sym = conflict
538
+
539
+ io.print("\t#{if type == :SR then "Shift/Reduce" else "Reduce/Reduce" end} conflict")
540
+
541
+ io.puts(" on #{sym}")
542
+ end
543
+
544
+ io.puts
545
+ end
546
+ end
547
+
548
+ # Close any IO objects that aren't $stdout.
549
+ io.close if io.is_a?(IO) and io != $stdout
550
+ else
551
+ raise ParserConstructionException, 'Parser.explain called outside of finalize.'
552
+ end
553
+ end
554
+
555
+ # This method will finalize the parser causing the construction
556
+ # of states and their actions, and the resolution of conflicts
557
+ # using lookahead and precedence information.
558
+ #
559
+ # No calls to {Parser.production} may appear after the call to
560
+ # Parser.finalize.
561
+ #
562
+ # @param [Hash] opts Options describing how to finalize the parser.
563
+ #
564
+ # @option opts [Boolean,String,IO] :explain To explain the parser or not.
565
+ # @option opts [Boolean] :lookahead To use lookahead info for conflict resolution.
566
+ # @option opts [Boolean] :precedence To use precedence info for conflict resolution.
567
+ # @option opts [String,IO] :use A file name or object that is used to load/save the parser.
568
+ #
569
+ # @return [void]
570
+ def finalize(opts = {})
571
+
572
+ if @grammar.productions.empty?
573
+ raise ParserConstructionException,
574
+ "Parser has no productions. Cowardly refusing to construct an empty parser."
575
+ end
576
+
577
+ # Get the full options hash.
578
+ opts = build_finalize_opts(opts)
579
+
580
+ # Get the name of the file in which the parser is defined.
581
+ #
582
+ # FIXME: See why this is failing for the simple ListParser example.
583
+ def_file = caller()[2].split(':')[0] if opts[:use]
584
+
585
+ # Check to make sure we can load the necessary information
586
+ # from the specified object.
587
+ if opts[:use] and (
588
+ (opts[:use].is_a?(String) and File.exist?(opts[:use]) and File.mtime(opts[:use]) > File.mtime(def_file)) or
589
+ (opts[:use].is_a?(File) and opts[:use].mtime > File.mtime(def_file))
590
+ )
591
+
592
+ file = self.get_io(opts[:use], 'r')
593
+
594
+ # Un-marshal our saved data structures.
595
+ file.flock(File::LOCK_SH)
596
+ @lh_sides, @states, @symbols = Marshal.load(file)
597
+ file.flock(File::LOCK_UN)
598
+
599
+ # Close the file if we opened it.
600
+ file.close if opts[:use].is_a?(String)
601
+
602
+ # Remove any un-needed data and return.
603
+ return self.clean
604
+ end
605
+
606
+ # Grab all of the symbols that comprise the grammar
607
+ # (besides the start symbol).
608
+ @symbols = @grammar.symbols << :ERROR
609
+
610
+ # Add our starting state to the state list.
611
+ @start_symbol = (@grammar.start_symbol.to_s + '\'').to_sym
612
+ start_production, _ = @grammar.production(@start_symbol, @grammar.start_symbol).first
613
+ start_state = State.new(@symbols, [start_production.to_item])
614
+
615
+ start_state.close(@grammar.productions)
616
+
617
+ self.add_state(start_state)
618
+
619
+ # Translate the precedence of productions from tokens to
620
+ # (associativity, precedence) pairs.
621
+ @production_precs.map! { |prec| @token_precs[prec] }
622
+
623
+ # Build the rest of the transition table.
624
+ each_state do |state|
625
+ #Transition states.
626
+ tstates = Hash.new { |h,k| h[k] = State.new(@symbols) }
627
+
628
+ #Bin each item in this set into reachable transition
629
+ #states.
630
+ state.each do |item|
631
+ if (next_symbol = item.next_symbol)
632
+ tstates[next_symbol] << item.copy
633
+ end
634
+ end
635
+
636
+ # For each transition state:
637
+ # 1) Get transition symbol
638
+ # 2) Advance dot
639
+ # 3) Close it
640
+ # 4) Get state id and add transition
641
+ tstates.each do |symbol, tstate|
642
+ tstate.each { |item| item.advance }
643
+
644
+ tstate.close(@grammar.productions)
645
+
646
+ id = self.add_state(tstate)
647
+
648
+ # Add Goto and Shift actions.
649
+ state.on(symbol, CFG::is_nonterminal?(symbol) ? GoTo.new(id) : Shift.new(id))
650
+ end
651
+
652
+ # Find the Accept and Reduce actions for this state.
653
+ state.each do |item|
654
+ if item.at_end?
655
+ if item.lhs == @start_symbol
656
+ state.on(:EOS, Accept.new)
657
+ else
658
+ state.add_reduction(@grammar.productions(:id)[item.id])
659
+ end
660
+ end
661
+ end
662
+ end
663
+
664
+ # Build the production.id -> production.lhs map.
665
+ @grammar.productions(:id).each { |id, production| @lh_sides[id] = production.lhs }
666
+
667
+ # Prune the parsing table for unnecessary reduce actions.
668
+ self.prune(opts[:lookahead], opts[:precedence])
669
+
670
+ # Check the parser for inconsistencies.
671
+ self.check_sanity
672
+
673
+ # Print the table if requested.
674
+ self.explain(opts[:explain]) if opts[:explain]
675
+
676
+ # Remove any data that is no longer needed.
677
+ self.clean
678
+
679
+ # Store the parser's final data structures if requested.
680
+ if opts[:use]
681
+ io = self.get_io(opts[:use])
682
+
683
+ io.flock(File::LOCK_EX) if io.is_a?(File)
684
+ Marshal.dump([@lh_sides, @states, @symbols], io)
685
+ io.flock(File::LOCK_UN) if io.is_a?(File)
686
+
687
+ # Close the IO object if we opened it.
688
+ io.close if opts[:use].is_a?(String)
689
+ end
690
+ end
691
+
692
+ # Converts an object into an IO object as appropriate.
693
+ #
694
+ # @param [Object] o Object to be converted into an IO object.
695
+ # @param [String] mode String representing the mode to open the IO object in.
696
+ #
697
+ # @return [IO, false] The IO object or false if a conversion wasn't possible.
698
+ def get_io(o, mode = 'w')
699
+ if o.is_a?(TrueClass)
700
+ $stdout
701
+ elsif o.is_a?(String)
702
+ File.open(o, mode)
703
+ elsif o.is_a?(IO)
704
+ o
705
+ else
706
+ false
707
+ end
708
+ end
709
+
710
+ # Iterate over the parser's states.
711
+ #
712
+ # @yieldparam [State] state One of the parser automaton's state objects
713
+ #
714
+ # @return [void]
715
+ def each_state
716
+ current_state = 0
717
+ while current_state < @states.count
718
+ yield @states.at(current_state)
719
+ current_state += 1
720
+ end
721
+ end
722
+
723
+ # @return [CFG] The grammar that can be parsed by this Parser.
724
+ def grammar
725
+ @grammar.clone
726
+ end
727
+
728
+ # This method generates and memoizes the G' grammar used to
729
+ # calculate the LALR(1) lookahead sets. Information about this
730
+ # grammar and its use can be found in the following paper:
731
+ #
732
+ # Simple Computation of LALR(1) Lookahead Sets
733
+ # Manuel E. Bermudez and George Logothetis
734
+ # Information Processing Letters 31 - 1989
735
+ #
736
+ # @return [CFG]
737
+ def grammar_prime
738
+ if not @grammar_prime
739
+ @grammar_prime = CFG.new
740
+
741
+ each_state do |state|
742
+ state.each do |item|
743
+ lhs = "#{state.id}_#{item.next_symbol}".to_sym
744
+
745
+ next unless CFG::is_nonterminal?(item.next_symbol) and not @grammar_prime.productions.keys.include?(lhs)
746
+
747
+ @grammar.productions[item.next_symbol].each do |production|
748
+ rhs = ''
749
+
750
+ cstate = state
751
+
752
+ production.rhs.each do |symbol|
753
+ rhs += "#{cstate.id}_#{symbol} "
754
+
755
+ cstate = @states[cstate.on?(symbol).first.id]
756
+ end
757
+
758
+ @grammar_prime.production(lhs, rhs)
759
+ end
760
+ end
761
+ end
762
+ end
763
+
764
+ @grammar_prime
765
+ end
766
+
767
+ # Inform the parser core that a conflict has been detected.
768
+ #
769
+ # @param [Integer] state_id ID of the state where the conflict was encountered.
770
+ # @param [:RR, :SR] type Reduce/Reduce or Shift/Reduce conflict.
771
+ # @param [Symbol] sym Symbol that caused the conflict.
772
+ #
773
+ # @return [void]
774
+ def inform_conflict(state_id, type, sym)
775
+ @conflicts[state_id] << [type, sym]
776
+ end
777
+
778
+ # This method is used to specify that the symbols in *symbols*
779
+ # are left-associative. Subsequent calls to this method will
780
+ # give their arguments higher precedence.
781
+ #
782
+ # @param [Array<Symbol>] symbols Symbols that are left associative.
783
+ #
784
+ # @return [void]
785
+ def left(*symbols)
786
+ prec_level = @prec_counts[:left] += 1
787
+
788
+ symbols.map { |s| s.to_sym }.each do |sym|
789
+ @token_precs[sym] = [:left, prec_level]
790
+ end
791
+ end
792
+
793
+ # This method is used to specify that the symbols in *symbols*
794
+ # are non-associative.
795
+ #
796
+ # @param [Array<Symbol>] symbols Symbols that are non-associative.
797
+ #
798
+ # @return [void]
799
+ def nonassoc(*symbols)
800
+ prec_level = @prec_counts[:non] += 1
801
+
802
+ symbols.map { |s| s.to_sym }.each do |sym|
803
+ @token_precs[sym] = [:non, prec_level]
804
+ end
805
+ end
806
+
807
+ # Adds productions and actions for parsing nonempty lists.
808
+ #
809
+ # @see CFG#nonempty_list_production
810
+ def build_nonempty_list_production(symbol, list_elements, separator = '')
811
+ @grammar.build_nonempty_list_production(symbol, list_elements, separator)
812
+ end
813
+ alias :nonempty_list :build_nonempty_list_production
814
+
815
+ # This function is where actual parsing takes place. The
816
+ # _tokens_ argument must be an array of Token objects, the last
817
+ # of which has type EOS. By default this method will return the
818
+ # value computed by the first successful parse tree found.
819
+ #
820
+ # Additional information about the parsing options can be found in
821
+ # the main documentation.
822
+ #
823
+ # @param [Array<Token>] tokens Tokens to be parsed.
824
+ # @param [Hash] opts Options to use when parsing input.
825
+ #
826
+ # @option opts [:first, :all] :accept Either :first or :all.
827
+ # @option opts [Object] :env The environment in which to evaluate the production action.
828
+ # @option opts [Boolean,String,IO] :parse_tree To print parse trees in the DOT language or not.
829
+ # @option opts [Boolean,String,IO] :verbose To be verbose or not.
830
+ #
831
+ # @return [Object, Array<Object>] Result or results of parsing the given tokens.
832
+ def parse(tokens, opts = {})
833
+ # Get the full options hash.
834
+ opts = build_parse_opts(opts)
835
+ v = opts[:verbose]
836
+
837
+ if opts[:verbose]
838
+ v.puts("Input tokens:")
839
+ v.puts(tokens.map { |t| t.type }.inspect)
840
+ v.puts
841
+ end
842
+
843
+ # Stack IDs to keep track of them during parsing.
844
+ stack_id = 0
845
+
846
+ # Error mode indicators.
847
+ error_mode = false
848
+ reduction_guard = false
849
+
850
+ # Our various list of stacks.
851
+ accepted = []
852
+ moving_on = []
853
+ processing = [ParseStack.new(stack_id += 1)]
854
+
855
+ # Iterate over the tokens. We don't procede to the
856
+ # next token until every stack is done with the
857
+ # current one.
858
+ tokens.each_with_index do |token, index|
859
+ # Check to make sure this token was seen in the
860
+ # grammar definition.
861
+ raise BadToken.new(token) if not @symbols.include?(token.type)
862
+
863
+ v.puts("Current token: #{token.type}#{if token.value then "(#{token.value})" end}") if v
864
+
865
+ # Iterate over the stacks until each one is done.
866
+ while (stack = processing.shift)
867
+ # Execute any token hooks in this stack's environment.
868
+ @token_hooks[token.type].each { |hook| opts[:env].instance_exec(&hook)}
869
+
870
+ # Get the available actions for this stack.
871
+ actions = @states[stack.state].on?(token.type)
872
+
873
+ if actions.empty?
874
+ # If we are already in error mode and there
875
+ # are no actions we skip this token.
876
+ if error_mode
877
+ v.puts("Discarding token: #{token.type}#{if token.value then "(#{token.value})" end}") if v
878
+
879
+ # Add the current token to the array
880
+ # that corresponds to the output value
881
+ # for the ERROR token.
882
+ stack.output_stack.last << token
883
+
884
+ moving_on << stack
885
+ next
886
+ end
887
+
888
+ # We would be dropping the last stack so we
889
+ # are going to go into error mode.
890
+ if accepted.empty? and moving_on.empty? and processing.empty?
891
+
892
+ if v
893
+ v.puts
894
+ v.puts('Current stack:')
895
+ v.puts("\tID: #{stack.id}")
896
+ v.puts("\tState stack:\t#{stack.state_stack.inspect}")
897
+ v.puts("\tOutput Stack:\t#{stack.output_stack.inspect}")
898
+ v.puts
899
+ end
900
+
901
+ # Try and find a valid error state.
902
+ while stack.state
903
+ if (actions = @states[stack.state].on?(:ERROR)).empty?
904
+ # This state doesn't have an
905
+ # error production. Moving on.
906
+ stack.pop
907
+ else
908
+ # Enter the found error state.
909
+ stack.push(actions.first.id, [token], :ERROR, token.position)
910
+
911
+ break
912
+ end
913
+ end
914
+
915
+ if stack.state
916
+ # We found a valid error state.
917
+ error_mode = reduction_guard = true
918
+ opts[:env].he = true
919
+ moving_on << stack
920
+
921
+ if v
922
+ v.puts('Invalid input encountered. Entering error handling mode.')
923
+ v.puts("Discarding token: #{token.type}#{if token.value then "(#{token.value})" end}")
924
+ end
925
+ else
926
+ # No valid error states could be
927
+ # found. Time to print a message
928
+ # and leave.
929
+
930
+ v.puts("No more actions for stack #{stack.id}. Dropping stack.") if v
931
+ end
932
+ else
933
+ v.puts("No more actions for stack #{stack.id}. Dropping stack.") if v
934
+ end
935
+
936
+ next
937
+ end
938
+
939
+ # Make (stack, action) pairs, duplicating the
940
+ # stack as necessary.
941
+ pairs = [[stack, actions.pop]] + actions.map {|action| [stack.branch(stack_id += 1), action] }
942
+
943
+ pairs.each do |new_stack, action|
944
+ if v
945
+ v.puts
946
+ v.puts('Current stack:')
947
+ v.puts("\tID: #{new_stack.id}")
948
+ v.puts("\tState stack:\t#{new_stack.state_stack.inspect}")
949
+ v.puts("\tOutput Stack:\t#{new_stack.output_stack.inspect}")
950
+ v.puts
951
+ v.puts("Action taken: #{action.to_s}")
952
+ end
953
+
954
+ if action.is_a?(Accept)
955
+ if opts[:accept] == :all
956
+ accepted << new_stack
957
+ else
958
+ v.puts('Accepting input.') if v
959
+ opts[:parse_tree].puts(new_stack.tree) if opts[:parse_tree]
960
+
961
+ if opts[:env].he
962
+ raise HandledError.new(opts[:env].errors, new_stack.result)
963
+ else
964
+ return new_stack.result
965
+ end
966
+ end
967
+
968
+ elsif action.is_a?(Reduce)
969
+ # Get the production associated with this reduction.
970
+ production_proc, pop_size = @procs[action.id]
971
+
972
+ if not production_proc
973
+ raise InternalParserException, "No production #{action.id} found."
974
+ end
975
+
976
+ args, positions = new_stack.pop(pop_size)
977
+ opts[:env].set_positions(positions)
978
+
979
+ if not production_proc.selections.empty?
980
+ args = args.values_at(*production_proc.selections)
981
+ end
982
+
983
+ result =
984
+ if production_proc.arg_type == :array
985
+ opts[:env].instance_exec(args, &production_proc)
986
+ else
987
+ opts[:env].instance_exec(*args, &production_proc)
988
+ end
989
+
990
+ if (goto = @states[new_stack.state].on?(@lh_sides[action.id]).first)
991
+
992
+ v.puts("Going to state #{goto.id}.\n") if v
993
+
994
+ pos0 = nil
995
+
996
+ if args.empty?
997
+ # Empty productions need to be
998
+ # handled specially.
999
+ pos0 = new_stack.position
1000
+
1001
+ pos0.stream_offset += pos0.length + 1
1002
+ pos0.line_offset += pos0.length + 1
1003
+
1004
+ pos0.length = 0
1005
+ else
1006
+ pos0 = opts[:env].pos( 0)
1007
+ pos1 = opts[:env].pos(-1)
1008
+
1009
+ pos0.length = (pos1.stream_offset + pos1.length) - pos0.stream_offset
1010
+ end
1011
+
1012
+ new_stack.push(goto.id, result, @lh_sides[action.id], pos0)
1013
+ else
1014
+ raise InternalParserException, "No GoTo action found in state #{stack.state} " +
1015
+ "after reducing by production #{action.id}"
1016
+ end
1017
+
1018
+ # This stack is NOT ready for the next
1019
+ # token.
1020
+ processing << new_stack
1021
+
1022
+ # Exit error mode if necessary.
1023
+ error_mode = false if error_mode and not reduction_guard
1024
+
1025
+ elsif action.is_a?(Shift)
1026
+ new_stack.push(action.id, token.value, token.type, token.position)
1027
+
1028
+ # This stack is ready for the next
1029
+ # token.
1030
+ moving_on << new_stack
1031
+
1032
+ # Exit error mode.
1033
+ error_mode = false
1034
+ end
1035
+ end
1036
+ end
1037
+
1038
+ v.puts("\n\n") if v
1039
+
1040
+ processing = moving_on
1041
+ moving_on = []
1042
+
1043
+ # If we don't have any active stacks at this point the
1044
+ # string isn't in the language.
1045
+ if opts[:accept] == :first and processing.length == 0
1046
+ v.close if v and v != $stdout
1047
+ raise NotInLanguage.new(tokens[0...index], tokens[index], tokens[index.next..-1])
1048
+ end
1049
+
1050
+ reduction_guard = false
1051
+ end
1052
+
1053
+ # If we have reached this point we are accepting all parse
1054
+ # trees.
1055
+ if v
1056
+ v.puts("Accepting input with #{accepted.length} derivation(s).")
1057
+
1058
+ v.close if v != $stdout
1059
+ end
1060
+
1061
+ accepted.each do |stack|
1062
+ opts[:parse_tree].puts(stack.tree)
1063
+ end if opts[:parse_tree]
1064
+
1065
+ results = accepted.map { |stack| stack.result }
1066
+
1067
+ if opts[:env].he
1068
+ raise HandledError.new(opts[:env].errors, results)
1069
+ else
1070
+ return results
1071
+ end
1072
+ end
1073
+
1074
+ # Adds a new production to the parser with a left-hand value of
1075
+ # *symbol*. If *expression* is specified it is taken as the
1076
+ # right-hand side of the production and *action* is associated
1077
+ # with the production. If *expression* is nil then *action* is
1078
+ # evaluated and expected to make one or more calls to
1079
+ # Parser.clause. A precedence can be associate with this
1080
+ # production by setting *precedence* to a terminal symbol.
1081
+ #
1082
+ # @param [Symbol] symbol Left-hand side of the production.
1083
+ # @param [String, Symbol, nil] expression Right-hand side of the production.
1084
+ # @param [Symbol, nil] precedence Symbol representing the precedence of this produciton.
1085
+ # @param [:array, :splat] arg_type Method to use when passing arguments to the action.
1086
+ # @param [Proc] action Action associated with this production.
1087
+ #
1088
+ # @return [void]
1089
+ def production(symbol, expression = nil, precedence = nil, arg_type = @default_arg_type, &action)
1090
+
1091
+ # Check the symbol.
1092
+ if not (symbol.is_a?(Symbol) or symbol.is_a?(String)) or not CFG::is_nonterminal?(symbol)
1093
+ raise ParserConstructionException, 'Production symbols must be Strings or Symbols and be in all lowercase.'
1094
+ end
1095
+
1096
+ @grammar.curr_lhs = symbol.to_sym
1097
+ @curr_prec = precedence
1098
+
1099
+ orig_dat = nil
1100
+ if arg_type != @default_arg_type
1101
+ orig_dat = @default_arg_type
1102
+ @default_arg_type = arg_type
1103
+ end
1104
+
1105
+ if expression
1106
+ self.clause(expression, precedence, &action)
1107
+ else
1108
+ self.instance_exec(&action)
1109
+ end
1110
+
1111
+ @default_arg_type = orig_dat if not orig_dat.nil?
1112
+
1113
+ @grammar.curr_lhs = nil
1114
+ @curr_prec = nil
1115
+ end
1116
+ alias :p :production
1117
+
1118
+ # This method uses lookahead sets and precedence information to
1119
+ # resolve conflicts and remove unnecessary reduce actions.
1120
+ #
1121
+ # @param [Boolean] do_lookahead Prune based on lookahead sets or not.
1122
+ # @param [Boolean] do_precedence Prune based on precedence or not.
1123
+ #
1124
+ # @return [void]
1125
+ def prune(do_lookahead, do_precedence)
1126
+ terms = @grammar.terms
1127
+
1128
+ # If both options are false there is no pruning to do.
1129
+ return if not (do_lookahead or do_precedence)
1130
+
1131
+ each_state do |state0|
1132
+
1133
+ #####################
1134
+ # Lookahead Pruning #
1135
+ #####################
1136
+
1137
+ if do_lookahead
1138
+ # Find all of the reductions in this state.
1139
+ reductions = state0.actions.values.flatten.uniq.select { |a| a.is_a?(Reduce) }
1140
+
1141
+ reductions.each do |reduction|
1142
+ production = @grammar.productions(:id)[reduction.id]
1143
+
1144
+ lookahead = Array.new
1145
+
1146
+ # Build the lookahead set.
1147
+ each_state do |state1|
1148
+ if self.check_reachability(state1, state0, production.rhs)
1149
+ lookahead |= self.grammar_prime.follow_set("#{state1.id}_#{production.lhs}".to_sym)
1150
+ end
1151
+ end
1152
+
1153
+ # Translate the G' follow symbols into G
1154
+ # lookahead symbols.
1155
+ lookahead = lookahead.map { |sym| sym.to_s.split('_', 2).last.to_sym }.uniq
1156
+
1157
+ # Here we remove the unnecessary reductions.
1158
+ # If there are error productions we need to
1159
+ # scale back the amount of pruning done.
1160
+ pruning_candidates = terms - lookahead
1161
+
1162
+ if terms.include?(:ERROR)
1163
+ pruning_candidates.each do |sym|
1164
+ state0.actions[sym].delete(reduction) if state0.conflict_on?(sym)
1165
+ end
1166
+ else
1167
+ pruning_candidates.each { |sym| state0.actions[sym].delete(reduction) }
1168
+ end
1169
+ end
1170
+ end
1171
+
1172
+ ########################################
1173
+ # Precedence and Associativity Pruning #
1174
+ ########################################
1175
+
1176
+ if do_precedence
1177
+ state0.actions.each do |symbol, actions|
1178
+
1179
+ # We are only interested in pruning actions
1180
+ # for terminal symbols.
1181
+ next unless CFG::is_terminal?(symbol)
1182
+
1183
+ # Skip to the next one if there is no
1184
+ # possibility of a Shift/Reduce or
1185
+ # Reduce/Reduce conflict.
1186
+ next unless actions and actions.length > 1
1187
+
1188
+ resolve_ok = actions.inject(true) do |m, a|
1189
+ if a.is_a?(Reduce)
1190
+ m and @production_precs[a.id]
1191
+ else
1192
+ m
1193
+ end
1194
+ end and actions.inject(false) { |m, a| m or a.is_a?(Shift) }
1195
+
1196
+ if @token_precs[symbol] and resolve_ok
1197
+ max_prec = 0
1198
+ selected_action = nil
1199
+
1200
+ # Grab the associativity and precedence
1201
+ # for the input token.
1202
+ tassoc, tprec = @token_precs[symbol]
1203
+
1204
+ actions.each do |a|
1205
+ assoc, prec = a.is_a?(Shift) ? [tassoc, tprec] : @production_precs[a.id]
1206
+
1207
+ # If two actions have the same precedence we
1208
+ # will only replace the previous production if:
1209
+ # * The token is left associative and the current action is a Reduce
1210
+ # * The token is right associative and the current action is a Shift
1211
+ if prec > max_prec or (prec == max_prec and tassoc == (a.is_a?(Shift) ? :right : :left))
1212
+ max_prec = prec
1213
+ selected_action = a
1214
+
1215
+ elsif prec == max_prec and assoc == :nonassoc
1216
+ raise ParserConstructionException, 'Non-associative token found during conflict resolution.'
1217
+
1218
+ end
1219
+ end
1220
+
1221
+ state0.actions[symbol] = [selected_action]
1222
+ end
1223
+ end
1224
+ end
1225
+ end
1226
+ end
1227
+
1228
+ # This method is used to specify that the symbols in _symbols_
1229
+ # are right associative. Subsequent calls to this method will
1230
+ # give their arguments higher precedence.
1231
+ #
1232
+ # @param [Array<Symbol>] symbols Symbols that are right-associative.
1233
+ #
1234
+ # @return [void]
1235
+ def right(*symbols)
1236
+ prec_level = @prec_counts[:right] += 1
1237
+
1238
+ symbols.map { |s| s.to_sym }.each do |sym|
1239
+ @token_precs[sym] = [:right, prec_level]
1240
+ end
1241
+ end
1242
+
1243
+ # Changes the starting symbol of the parser.
1244
+ #
1245
+ # @param [Symbol] symbol The starting symbol of the grammar.
1246
+ #
1247
+ # @return [void]
1248
+ def start(symbol)
1249
+ @grammar.start symbol
1250
+ end
1251
+
1252
+ # Add a hook that is executed whenever *sym* is seen.
1253
+ #
1254
+ # The *sym* must be a terminal symbol.
1255
+ #
1256
+ # @param [Symbol] sym Symbol to hook into
1257
+ # @param [Proc] proc Code to execute when the block is seen
1258
+ #
1259
+ # @return [void]
1260
+ def token_hook(sym, &proc)
1261
+ if CFG::is_terminal?(sym)
1262
+ @token_hooks[sym] << proc
1263
+ else
1264
+ raise 'Method token_hook expects `sym` to be non-terminal.'
1265
+ end
1266
+ end
1267
+ end
1268
+
1269
+ ####################
1270
+ # Instance Methods #
1271
+ ####################
1272
+
1273
+ # Instantiates a new parser and creates an environment to be
1274
+ # used for subsequent calls.
1275
+ def initialize
1276
+ @env = self.class::Environment.new
1277
+ end
1278
+
1279
+ # Parses the given token stream using the encapsulated environment.
1280
+ #
1281
+ # @see .parse
1282
+ def parse(tokens, opts = {})
1283
+ self.class.parse(tokens, {:env => @env}.update(opts))
1284
+ end
1285
+
1286
+ ################################
1287
+
1288
+ # All actions passed to Parser.producation and Parser.clause are
1289
+ # evaluated inside an instance of the Environment class or its
1290
+ # subclass (which must have the same name).
1291
+ class Environment
1292
+ # Indicates if an error was encountered and handled.
1293
+ #
1294
+ # @return [Boolean]
1295
+ attr_accessor :he
1296
+
1297
+ # A list of all objects added using the *error* method.
1298
+ #
1299
+ # @return [Array<Object>]
1300
+ attr_reader :errors
1301
+
1302
+ # Instantiate a new Environment object.
1303
+ def initialize
1304
+ self.reset
1305
+ end
1306
+
1307
+ # Adds an object to the list of errors.
1308
+ #
1309
+ # @return [void]
1310
+ def error(o)
1311
+ @errors << o
1312
+ end
1313
+
1314
+ # Returns a StreamPosition object for the symbol at location n,
1315
+ # indexed from zero.
1316
+ #
1317
+ # @param [Integer] n Index for symbol position.
1318
+ #
1319
+ # @return [StreamPosition] Position of symbol at index n.
1320
+ def pos(n)
1321
+ @positions[n]
1322
+ end
1323
+
1324
+ # Reset any variables that need to be re-initialized between
1325
+ # parse calls.
1326
+ #
1327
+ # @return [void]
1328
+ def reset
1329
+ @errors = Array.new
1330
+ @he = false
1331
+ end
1332
+
1333
+ # Setter for the *positions* array.
1334
+ #
1335
+ # @param [Array<StreamPosition>] positions
1336
+ #
1337
+ # @return [Array<StreamPosition>] The same array of positions.
1338
+ def set_positions(positions)
1339
+ @positions = positions
1340
+ end
1341
+ end
1342
+
1343
+ # The ParseStack class is used by a Parser to keep track of state
1344
+ # during parsing.
1345
+ class ParseStack
1346
+ # @return [Integer] ID of this parse stack.
1347
+ attr_reader :id
1348
+
1349
+ # @return [Array<Object>] Array of objects produced by {Reduce} actions.
1350
+ attr_reader :output_stack
1351
+
1352
+ # @return [Array<Integer>] Array of states used when performing {Reduce} actions.
1353
+ attr_reader :state_stack
1354
+
1355
+ # Instantiate a new ParserStack object.
1356
+ #
1357
+ # @param [Integer] id ID for this parse stack. Used by GLR algorithm.
1358
+ # @param [Array<Object>] ostack Output stack. Holds results of {Reduce} and {Shift} actions.
1359
+ # @param [Array<Integer>] sstack State stack. Holds states that have been shifted due to {Shift} actions.
1360
+ # @param [Array<Integer>] nstack Node stack. Holds dot language IDs for nodes in the parse tree.
1361
+ # @param [Array<Array<Integer>>] connections Integer pairs representing edges in the parse tree.
1362
+ # @param [Array<Symbol>] labels Labels for nodes in the parse tree.
1363
+ # @param [Array<StreamPosition>] positions Position data for symbols that have been shifted.
1364
+ def initialize(id, ostack = [], sstack = [0], nstack = [], connections = [], labels = [], positions = [])
1365
+ @id = id
1366
+
1367
+ @node_stack = nstack
1368
+ @output_stack = ostack
1369
+ @state_stack = sstack
1370
+
1371
+ @connections = connections
1372
+ @labels = labels
1373
+ @positions = positions
1374
+ end
1375
+
1376
+ # Branch this stack, effectively creating a new copy of its
1377
+ # internal state.
1378
+ #
1379
+ # @param [Integer] new_id ID for the new ParseStack.
1380
+ #
1381
+ # @return [ParseStack]
1382
+ def branch(new_id)
1383
+ # We have to do a deeper copy of the output stack to avoid
1384
+ # interactions between the Proc objects for the different
1385
+ # parsing paths.
1386
+ #
1387
+ # The being/rescue block is needed because some classes
1388
+ # respond to `clone` but always raise an error.
1389
+ new_output_stack = @output_stack.map do |o|
1390
+ # Check to see if we can obtain a deep copy.
1391
+ if 0.respond_to?(:copy)
1392
+ o.copy
1393
+
1394
+ else
1395
+ begin o.clone rescue o end
1396
+ end
1397
+ end
1398
+
1399
+ ParseStack.new(new_id, new_output_stack, @state_stack.clone,
1400
+ @node_stack.clone, @connections.clone, @labels.clone, @positions.clone)
1401
+ end
1402
+
1403
+ # @return [StreamPosition] Position data for the last symbol on the stack.
1404
+ def position
1405
+ if @positions.empty?
1406
+ StreamPosition.new
1407
+ else
1408
+ @positions.last.clone
1409
+ end
1410
+ end
1411
+
1412
+ # Push new state and other information onto the stack.
1413
+ #
1414
+ # @param [Integer] state ID of the shifted state.
1415
+ # @param [Object] o Value of Token that caused the shift.
1416
+ # @param [Symbol] node0 Label for node in parse tree.
1417
+ # @param [StreamPosition] position Position token that got shifted.
1418
+ #
1419
+ # @return [void]
1420
+ def push(state, o, node0, position)
1421
+ @state_stack << state
1422
+ @output_stack << o
1423
+ @node_stack << @labels.length
1424
+ @labels << if CFG::is_terminal?(node0) and o then node0.to_s + "(#{o})" else node0 end
1425
+ @positions << position
1426
+
1427
+ if CFG::is_nonterminal?(node0)
1428
+ @cbuffer.each do |node1|
1429
+ @connections << [@labels.length - 1, node1]
1430
+ end
1431
+ end
1432
+ end
1433
+
1434
+ # Pop some number of objects off of the inside stacks.
1435
+ #
1436
+ # @param [Integer] n Number of object to pop off the stack.
1437
+ #
1438
+ # @return [Array(Object, StreamPosition)] Values popped from the output and positions stacks.
1439
+ def pop(n = 1)
1440
+ @state_stack.pop(n)
1441
+
1442
+ # Pop the node stack so that the proper edges can be added
1443
+ # when the production's left-hand side non-terminal is
1444
+ # pushed onto the stack.
1445
+ @cbuffer = @node_stack.pop(n)
1446
+
1447
+ [@output_stack.pop(n), @positions.pop(n)]
1448
+ end
1449
+
1450
+ # Fetch the result stored in this ParseStack. If there is more
1451
+ # than one object left on the output stack there is an error.
1452
+ #
1453
+ # @return [Object] The end result of this parse stack.
1454
+ def result
1455
+ if @output_stack.length == 1
1456
+ return @output_stack.last
1457
+ else
1458
+ raise InternalParserException, "The parsing stack should have 1 element on the output stack, not #{@output_stack.length}."
1459
+ end
1460
+ end
1461
+
1462
+ # @return [Integer] Current state of this ParseStack.
1463
+ def state
1464
+ @state_stack.last
1465
+ end
1466
+
1467
+ # @return [String] Representation of the parse tree in the DOT langauge.
1468
+ def tree
1469
+ tree = "digraph tree#{@id} {\n"
1470
+
1471
+ @labels.each_with_index do |label, i|
1472
+ tree += "\tnode#{i} [label=\"#{label}\""
1473
+
1474
+ if CFG::is_terminal?(label)
1475
+ tree += " shape=box"
1476
+ end
1477
+
1478
+ tree += "];\n"
1479
+ end
1480
+
1481
+ tree += "\n"
1482
+
1483
+ @connections.each do |from, to|
1484
+ tree += "\tnode#{from} -> node#{to};\n"
1485
+ end
1486
+
1487
+ tree += "}"
1488
+ end
1489
+ end
1490
+
1491
+ # The State class is used to represent sets of items and actions to be
1492
+ # used during parsing.
1493
+ class State
1494
+ # @return [Integer] State's ID.
1495
+ attr_accessor :id
1496
+
1497
+ # @return [Array<CFG::Item>] Item objects that comprise this state
1498
+ attr_reader :items
1499
+
1500
+ # @return [Hash{Symbol => Array<Action>}] Maps lookahead symbols to actions
1501
+ attr_reader :actions
1502
+
1503
+ # Instantiate a new State object.
1504
+ #
1505
+ # @param [Array<Symbol>] tokens Tokens that represent this state
1506
+ # @param [Array<CFG::Item>] items Items that make up this state
1507
+ def initialize(tokens, items = [])
1508
+ @id = nil
1509
+ @items = items
1510
+ @actions = tokens.inject(Hash.new) { |h, t| h[t] = Array.new; h }
1511
+ end
1512
+
1513
+ # Compare one State to another. Two States are equal if they
1514
+ # have the same items or, if the items have been cleaned, if
1515
+ # the States have the same ID.
1516
+ #
1517
+ # @param [State] other Another State to compare to
1518
+ #
1519
+ # @return [Boolean]
1520
+ def ==(other)
1521
+ if self.items and other.items then self.items == other.items else self.id == other.id end
1522
+ end
1523
+
1524
+ # Add a Reduce action to the state.
1525
+ #
1526
+ # @param [Production] production Production used to perform the reduction
1527
+ #
1528
+ # @return [void]
1529
+ def add_reduction(production)
1530
+ action = Reduce.new(production)
1531
+
1532
+ # Reduce actions are not allowed for the ERROR terminal.
1533
+ @actions.each { |k, v| if CFG::is_terminal?(k) and k != :ERROR then v << action end }
1534
+ end
1535
+
1536
+ # @param [CFG::Item] item Item to add to this state.
1537
+ def append(item)
1538
+ if item.is_a?(CFG::Item) and not @items.include?(item) then @items << item end
1539
+ end
1540
+ alias :<< :append
1541
+
1542
+ # Clean this State by removing the list of {CFG::Item} objects.
1543
+ #
1544
+ # @return [void]
1545
+ def clean
1546
+ @items = nil
1547
+ end
1548
+
1549
+ # Close this state using *productions*.
1550
+ #
1551
+ # @param [Array<CFG::Production>] productions Productions used to close this state.
1552
+ #
1553
+ # @return [vod]
1554
+ def close(productions)
1555
+ self.each do |item|
1556
+ if (next_symbol = item.next_symbol) and CFG::is_nonterminal?(next_symbol)
1557
+ productions[next_symbol].each { |p| self << p.to_item }
1558
+ end
1559
+ end
1560
+ end
1561
+
1562
+ # Checks to see if there is a conflict in this state, given a
1563
+ # input of *sym*. Returns :SR if a shift/reduce conflict is
1564
+ # detected and :RR if a reduce/reduce conflict is detected. If
1565
+ # no conflict is detected nil is returned.
1566
+ #
1567
+ # @param [Symbol] sym Symbol to check for conflicts on.
1568
+ #
1569
+ # @return [:SR, :RR, nil]
1570
+ def conflict_on?(sym)
1571
+
1572
+ reductions = 0
1573
+ shifts = 0
1574
+
1575
+ @actions[sym].each do |action|
1576
+ if action.is_a?(Reduce)
1577
+ reductions += 1
1578
+
1579
+ elsif action.is_a?(Shift)
1580
+ shifts += 1
1581
+
1582
+ end
1583
+ end
1584
+
1585
+ if shifts == 1 and reductions > 0
1586
+ :SR
1587
+ elsif reductions > 1
1588
+ :RR
1589
+ else
1590
+ nil
1591
+ end
1592
+ end
1593
+
1594
+ # Iterate over the state's items.
1595
+ #
1596
+ # @return [void]
1597
+ def each
1598
+ current_item = 0
1599
+ while current_item < @items.count
1600
+ yield @items.at(current_item)
1601
+ current_item += 1
1602
+ end
1603
+ end
1604
+
1605
+ # Specify an Action to perform when the input token is *symbol*.
1606
+ #
1607
+ # @param [Symbol] symbol Symbol to add action for.
1608
+ # @param [Action] action Action for symbol.
1609
+ #
1610
+ # @return [void]
1611
+ def on(symbol, action)
1612
+ if @actions.key?(symbol)
1613
+ @actions[symbol] << action
1614
+ else
1615
+ raise ParserConstructionException, "Attempting to set action for token (#{symbol}) not seen in grammar definition."
1616
+ end
1617
+ end
1618
+
1619
+ # Returns that actions that should be taken when the input token
1620
+ # is *symbol*.
1621
+ #
1622
+ # @param [Symbol] symbol Symbol we want the actions for.
1623
+ #
1624
+ # @return [Array<Action>] Actions that should be taken.
1625
+ def on?(symbol)
1626
+ @actions[symbol].clone
1627
+ end
1628
+ end
1629
+
1630
+ # A subclass of Proc that indicates how it should be passed arguments
1631
+ # by the parser.
1632
+ class ProdProc < Proc
1633
+ # @return [:array, :splat] Method that should be used to pass arguments to this proc.
1634
+ attr_reader :arg_type
1635
+
1636
+ # @return [Array<Integer>] Mask for selection of tokens to pass to action. Empty mask means pass all.
1637
+ attr_reader :selections
1638
+
1639
+ def initialize(arg_type = :splat, selections = [])
1640
+ super()
1641
+ @arg_type = arg_type
1642
+ @selections = selections
1643
+ end
1644
+ end
1645
+
1646
+ # The Action class is used to indicate what action the parser should
1647
+ # take given a current state and input token.
1648
+ class Action
1649
+ # @return [Integer] ID of this action.
1650
+ attr_reader :id
1651
+
1652
+ # @param [Integer] id ID of this action.
1653
+ def initialize(id = nil)
1654
+ @id = id
1655
+ end
1656
+ end
1657
+
1658
+ # The Accept class indicates to the parser that it should accept the
1659
+ # current parse tree.
1660
+ class Accept < Action
1661
+ # @return [String] String representation of this action.
1662
+ def to_s
1663
+ "Accept"
1664
+ end
1665
+ end
1666
+
1667
+ # The GoTo class indicates to the parser that it should goto the state
1668
+ # specified by GoTo.id.
1669
+ class GoTo < Action
1670
+ # @return [String] String representation of this action.
1671
+ def to_s
1672
+ "GoTo #{self.id}"
1673
+ end
1674
+ end
1675
+
1676
+ # The Reduce class indicates to the parser that it should reduce the
1677
+ # input stack by the rule specified by Reduce.id.
1678
+ class Reduce < Action
1679
+
1680
+ # @param [Production] production Production to reduce by
1681
+ def initialize(production)
1682
+ super(production.id)
1683
+
1684
+ @production = production
1685
+ end
1686
+
1687
+ # @return [String] String representation of this action.
1688
+ def to_s
1689
+ "Reduce by Production #{self.id} : #{@production}"
1690
+ end
1691
+ end
1692
+
1693
+ # The Shift class indicates to the parser that it should shift the
1694
+ # current input token.
1695
+ class Shift < Action
1696
+ # @return [String] String representation of this action.
1697
+ def to_s
1698
+ "Shift to State #{self.id}"
1699
+ end
1700
+ end
1701
+ end
1702
+ end