rltk3 3.0.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (56) hide show
  1. checksums.yaml +7 -0
  2. data/AUTHORS +1 -0
  3. data/LICENSE +27 -0
  4. data/README.md +852 -0
  5. data/Rakefile +197 -0
  6. data/lib/rltk/ast.rb +573 -0
  7. data/lib/rltk/cfg.rb +683 -0
  8. data/lib/rltk/cg/basic_block.rb +157 -0
  9. data/lib/rltk/cg/bindings.rb +151 -0
  10. data/lib/rltk/cg/builder.rb +1127 -0
  11. data/lib/rltk/cg/context.rb +48 -0
  12. data/lib/rltk/cg/contractor.rb +51 -0
  13. data/lib/rltk/cg/execution_engine.rb +194 -0
  14. data/lib/rltk/cg/function.rb +237 -0
  15. data/lib/rltk/cg/generated_bindings.rb +8118 -0
  16. data/lib/rltk/cg/generic_value.rb +95 -0
  17. data/lib/rltk/cg/instruction.rb +519 -0
  18. data/lib/rltk/cg/llvm.rb +150 -0
  19. data/lib/rltk/cg/memory_buffer.rb +75 -0
  20. data/lib/rltk/cg/module.rb +451 -0
  21. data/lib/rltk/cg/pass_manager.rb +252 -0
  22. data/lib/rltk/cg/support.rb +29 -0
  23. data/lib/rltk/cg/target.rb +230 -0
  24. data/lib/rltk/cg/triple.rb +58 -0
  25. data/lib/rltk/cg/type.rb +554 -0
  26. data/lib/rltk/cg/value.rb +1272 -0
  27. data/lib/rltk/cg.rb +32 -0
  28. data/lib/rltk/lexer.rb +372 -0
  29. data/lib/rltk/lexers/calculator.rb +44 -0
  30. data/lib/rltk/lexers/ebnf.rb +38 -0
  31. data/lib/rltk/parser.rb +1702 -0
  32. data/lib/rltk/parsers/infix_calc.rb +43 -0
  33. data/lib/rltk/parsers/postfix_calc.rb +34 -0
  34. data/lib/rltk/parsers/prefix_calc.rb +34 -0
  35. data/lib/rltk/token.rb +90 -0
  36. data/lib/rltk/version.rb +11 -0
  37. data/lib/rltk.rb +16 -0
  38. data/test/cg/tc_basic_block.rb +83 -0
  39. data/test/cg/tc_control_flow.rb +191 -0
  40. data/test/cg/tc_function.rb +54 -0
  41. data/test/cg/tc_generic_value.rb +33 -0
  42. data/test/cg/tc_instruction.rb +256 -0
  43. data/test/cg/tc_llvm.rb +25 -0
  44. data/test/cg/tc_math.rb +88 -0
  45. data/test/cg/tc_module.rb +89 -0
  46. data/test/cg/tc_transforms.rb +68 -0
  47. data/test/cg/tc_type.rb +69 -0
  48. data/test/cg/tc_value.rb +151 -0
  49. data/test/cg/ts_cg.rb +23 -0
  50. data/test/tc_ast.rb +332 -0
  51. data/test/tc_cfg.rb +164 -0
  52. data/test/tc_lexer.rb +216 -0
  53. data/test/tc_parser.rb +711 -0
  54. data/test/tc_token.rb +34 -0
  55. data/test/ts_rltk.rb +47 -0
  56. metadata +317 -0
@@ -0,0 +1,1702 @@
1
+ # Author: Chris Wailes <chris.wailes@gmail.com>
2
+ # Project: Ruby Language Toolkit
3
+ # Date: 2011/01/19
4
+ # Description: This file contains the base class for parsers that use RLTK.
5
+
6
+ ############
7
+ # Requires #
8
+ ############
9
+
10
+ # Ruby Language Toolkit
11
+ require 'rltk/cfg'
12
+
13
+ #######################
14
+ # Classes and Modules #
15
+ #######################
16
+
17
+ # The RLTK root module
18
+ module RLTK
19
+ # A BadToken error indicates that a token was observed in the input stream
20
+ # that wasn't used in the grammar's definition.
21
+ class BadToken < StandardError
22
+ # @return [String] String representation of the error.
23
+ def initialize(token)
24
+ @token = token
25
+ end
26
+
27
+ def to_s
28
+ "Unexpected token: #{@token.inspect}. Token not present in grammar definition."
29
+ end
30
+ end
31
+
32
+ # A NotInLanguage error is raised whenever there is no valid parse tree
33
+ # for a given token stream. In other words, the input string is not in the
34
+ # defined language.
35
+ class NotInLanguage < StandardError
36
+
37
+ class << self
38
+ def default_context_length
39
+ @default_context_length || 100
40
+ end
41
+
42
+ def default_context_length=(v)
43
+ @default_context_length = v
44
+ end
45
+ end
46
+
47
+ # @return [Array<Token>] List of tokens that have been successfully parsed
48
+ attr_reader :seen
49
+
50
+ # @return [Token] Token that caused the parser to stop
51
+ attr_reader :current
52
+
53
+ # @return [Array<Token>] List of tokens that have yet to be seen
54
+ attr_reader :remaining
55
+
56
+ # @param [Array<Token>] seen Tokens that have been successfully parsed
57
+ # @param [Token] current Token that caused the parser to stop
58
+ # @param [Array<Token>] remaining Tokens that have yet to be seen
59
+ def initialize(seen, current, remaining, context_length = self.class.default_context_length)
60
+ @seen = seen
61
+ @current = current
62
+ @remaining = remaining
63
+ @context_length = context_length
64
+ end
65
+
66
+ # @return [String] String representation of the error.
67
+ def to_s
68
+ seen = @context_length == :all ? @seen : @seen[-@context_length..-1]
69
+ remaining = @context_length == :all ? @remaining : @remaining[0..@context_length]
70
+ "String not in language. Token info:\n\tSeen: #{seen}\n\tCurrent: #{@current}\n\tRemaining: #{remaining}"
71
+ end
72
+ end
73
+
74
+ # An error of this type is raised when the parser encountered a error that
75
+ # was handled by an error production.
76
+ class HandledError < StandardError
77
+
78
+ # The errors as reported by the parser.
79
+ #
80
+ # @return [Array<Object>]
81
+ attr_reader :errors
82
+
83
+ # The result that would have been returned by the call to *parse*.
84
+ attr_reader :result
85
+
86
+ # Instantiate a new HandledError object with *errors*.
87
+ #
88
+ # @param [Array<Object>] errors Errors added to the parsing environment by calls to {Parser::Environment#error}.
89
+ # @param [Object] result Object resulting from parsing Tokens before the error occurred.
90
+ def initialize(errors, result)
91
+ @errors = errors
92
+ @result = result
93
+ end
94
+ end
95
+
96
+ # Used for exceptions that occure during parser construction.
97
+ class ParserConstructionException < Exception; end
98
+
99
+ # Used for runtime exceptions that are the parsers fault. These should
100
+ # never be observed in the wild.
101
+ class InternalParserException < Exception; end
102
+
103
+ # Used to indicate that a parser is empty or hasn't been finalized.
104
+ class UselessParserException < Exception
105
+ # Sets the error messsage for this exception.
106
+ def initialize
107
+ super('Parser has not been finalized.')
108
+ end
109
+ end
110
+
111
+ # The Parser class may be sub-classed to produce new parsers. These
112
+ # parsers have a lot of features, and are described in the main
113
+ # documentation.
114
+ class Parser
115
+ # @return [Environment] Environment used by the instantiated parser.
116
+ attr_reader :env
117
+
118
+ #################
119
+ # Class Methods #
120
+ #################
121
+
122
+ class << self
123
+ # The overridden new prevents un-finalized parsers from being
124
+ # instantiated.
125
+ def new(*args)
126
+ if not @symbols
127
+ raise UselessParserException
128
+ else
129
+ super(*args)
130
+ end
131
+ end
132
+
133
+ # Installs instance class varialbes into a class.
134
+ #
135
+ # @return [void]
136
+ def install_icvars
137
+ @curr_lhs = nil
138
+ @curr_prec = nil
139
+
140
+ @conflicts = Hash.new {|h, k| h[k] = Array.new}
141
+ @grammar = CFG.new
142
+ @grammar_prime = nil
143
+
144
+ @lh_sides = Hash.new
145
+ @procs = Array.new
146
+ @states = Array.new
147
+
148
+ @symbols = nil
149
+
150
+ # Variables for dealing with precedence.
151
+ @prec_counts = {:left => 0, :right => 0, :non => 0}
152
+ @production_precs = Array.new
153
+ @token_precs = Hash.new
154
+ @token_hooks = Hash.new {|h, k| h[k] = []}
155
+
156
+ # Set the default argument handling policy. Valid values
157
+ # are :array and :splat.
158
+ @default_arg_type = :splat
159
+
160
+ @grammar.callback do |type, which, p, sels = []|
161
+ @procs[p.id] = [
162
+ case type
163
+ when :optional
164
+ case which
165
+ when :empty then ProdProc.new { || nil }
166
+ else ProdProc.new { |o| o }
167
+ end
168
+
169
+ when :elp
170
+ case which
171
+ when :empty then ProdProc.new { || [] }
172
+ else ProdProc.new { |prime| prime }
173
+ end
174
+
175
+ when :nelp
176
+ case which
177
+ when :single
178
+ ProdProc.new { |el| [el] }
179
+
180
+ when :multiple
181
+ ProdProc.new(:splat, sels) do |*syms|
182
+ el = syms[1..-1]
183
+ syms.first << (el.length == 1 ? el.first : el)
184
+ end
185
+
186
+ else
187
+ ProdProc.new { |*el| el.length == 1 ? el.first : el }
188
+ end
189
+ end,
190
+ p.rhs.length
191
+ ]
192
+
193
+ @production_precs[p.id] = p.last_terminal
194
+ end
195
+ end
196
+
197
+ # Called when the Lexer class is sub-classed, it installes
198
+ # necessary instance class variables.
199
+ #
200
+ # @return [void]
201
+ def inherited(klass)
202
+ klass.install_icvars
203
+ end
204
+
205
+ # If *state* (or its equivalent) is not in the state list it is
206
+ # added and it's ID is returned. If there is already a state
207
+ # with the same items as *state* in the state list its ID is
208
+ # returned and *state* is discarded.
209
+ #
210
+ # @param [State] state State to add to the parser.
211
+ #
212
+ # @return [Integer] The ID of the state.
213
+ def add_state(state)
214
+ if (id = @states.index(state))
215
+ id
216
+ else
217
+ state.id = @states.length
218
+
219
+ @states << state
220
+
221
+ @states.length - 1
222
+ end
223
+ end
224
+
225
+ # Build a hash with the default options for Parser.finalize
226
+ # and then update it with the values from *opts*.
227
+ #
228
+ # @param [Hash{Symbol => Object}] opts Hash containing options for finalize.
229
+ #
230
+ # @return [Hash{Symbol => Object}]
231
+ def build_finalize_opts(opts)
232
+ opts[:explain] = self.get_io(opts[:explain])
233
+
234
+ {
235
+ explain: false,
236
+ lookahead: true,
237
+ precedence: true,
238
+ use: false
239
+ }.update(opts)
240
+ end
241
+ private :build_finalize_opts
242
+
243
+ # Build a hash with the default options for Parser.parse and
244
+ # then update it with the values from *opts*.
245
+ #
246
+ # @param [Hash{Symbol => Object}] opts Hash containing options for parse.
247
+ #
248
+ # @return [Hash{Symbol => Object}]
249
+ def build_parse_opts(opts)
250
+ opts[:parse_tree] = self.get_io(opts[:parse_tree])
251
+ opts[:verbose] = self.get_io(opts[:verbose])
252
+
253
+ {
254
+ accept: :first,
255
+ env: self::Environment.new,
256
+ parse_tree: false,
257
+ verbose: false
258
+ }.update(opts)
259
+ end
260
+ private :build_parse_opts
261
+
262
+ # This method is used to (surprise) check the sanity of the
263
+ # constructed parser. It checks to make sure all non-terminals
264
+ # used in the grammar definition appear on the left-hand side of
265
+ # one or more productions, and that none of the parser's states
266
+ # have invalid actions. If a problem is encountered a
267
+ # ParserConstructionException is raised.
268
+ #
269
+ # @return [void]
270
+ def check_sanity
271
+ # Check to make sure all non-terminals appear on the
272
+ # left-hand side of some production.
273
+ @grammar.nonterms.each do |sym|
274
+ if not @lh_sides.values.include?(sym)
275
+ raise ParserConstructionException, "Non-terminal #{sym} does not appear on the left-hand side of any production."
276
+ end
277
+ end
278
+
279
+ # Check the actions in each state.
280
+ each_state do |state|
281
+ state.actions.each do |sym, actions|
282
+ if CFG::is_terminal?(sym)
283
+ # Here we check actions for terminals.
284
+ actions.each do |action|
285
+ if action.is_a?(Accept)
286
+ if sym != :EOS
287
+ raise ParserConstructionException, "Accept action found for terminal #{sym} in state #{state.id}."
288
+ end
289
+
290
+ elsif not (action.is_a?(GoTo) or action.is_a?(Reduce) or action.is_a?(Shift))
291
+ raise ParserConstructionException, "Object of type #{action.class} found in actions for terminal " +
292
+ "#{sym} in state #{state.id}."
293
+
294
+ end
295
+ end
296
+
297
+ if (conflict = state.conflict_on?(sym))
298
+ self.inform_conflict(state.id, conflict, sym)
299
+ end
300
+ else
301
+ # Here we check actions for non-terminals.
302
+ if actions.length > 1
303
+ raise ParserConstructionException, "State #{state.id} has multiple GoTo actions for non-terminal #{sym}."
304
+
305
+ elsif actions.length == 1 and not actions.first.is_a?(GoTo)
306
+ raise ParserConstructionException, "State #{state.id} has non-GoTo action for non-terminal #{sym}."
307
+
308
+ end
309
+ end
310
+ end
311
+ end
312
+ end
313
+
314
+ # This method checks to see if the parser would be in parse state
315
+ # *dest* after starting in state *start* and reading *symbols*.
316
+ #
317
+ # @param [Symbol] start Symbol representing a CFG production.
318
+ # @param [Symbol] dest Symbol representing a CFG production.
319
+ # @param [Array<Symbol>] symbols Grammar symbols.
320
+ #
321
+ # @return [Boolean] If the destination symbol is reachable from the start symbol after reading *symbols*.
322
+ def check_reachability(start, dest, symbols)
323
+ path_exists = true
324
+ cur_state = start
325
+
326
+ symbols.each do |sym|
327
+
328
+ actions = @states[cur_state.id].on?(sym)
329
+ actions = actions.select { |a| a.is_a?(Shift) } if CFG::is_terminal?(sym)
330
+
331
+ if actions.empty?
332
+ path_exists = false
333
+ break
334
+ end
335
+
336
+ # There can only be one Shift action for terminals and
337
+ # one GoTo action for non-terminals, so we know the
338
+ # first action is the only one in the list.
339
+ cur_state = @states[actions.first.id]
340
+ end
341
+
342
+ path_exists and cur_state.id == dest.id
343
+ end
344
+
345
+ # Declares a new clause inside of a production. The right-hand
346
+ # side is specified by *expression* and the precedence of this
347
+ # production can be changed by setting the *precedence* argument
348
+ # to some terminal symbol.
349
+ #
350
+ # @param [String, Symbol] expression Right-hand side of a production.
351
+ # @param [Symbol] precedence Symbol representing the precedence of this production.
352
+ # @param [:array, :splat] arg_type Method to use when passing arguments to the action.
353
+ # @param [Proc] action Action to be taken when the production is reduced.
354
+ #
355
+ # @return [void]
356
+ def clause(expression, precedence = nil, arg_type = @default_arg_type, &action)
357
+ # Use the curr_prec only if it isn't overridden for this
358
+ # clause.
359
+ precedence ||= @curr_prec
360
+
361
+ production, selections = @grammar.clause(expression)
362
+
363
+ # Check to make sure the action's arity matches the number
364
+ # of symbols on the right-hand side.
365
+ expected_arity = (selections.empty? ? production.rhs.length : selections.length)
366
+ if arg_type == :splat and action.arity != expected_arity
367
+ raise ParserConstructionException,
368
+ "Incorrect number of action parameters. Expected #{expected_arity} but got #{action.arity}." +
369
+ ' Action arity must match the number of terminals and non-terminals in the clause.'
370
+ end
371
+
372
+ # Add the action to our proc list.
373
+ @procs[production.id] = [ProdProc.new(arg_type, selections, &action), production.rhs.length]
374
+
375
+ # If no precedence is specified use the precedence of the
376
+ # last terminal in the production.
377
+ @production_precs[production.id] = precedence || production.last_terminal
378
+ end
379
+ alias :c :clause
380
+
381
+ # Removes resources that were needed to generate the parser but
382
+ # aren't needed when actually parsing input.
383
+ #
384
+ # @return [void]
385
+ def clean
386
+ # We've told the developer about conflicts by now.
387
+ @conflicts = nil
388
+
389
+ # Drop the grammar and the grammar'.
390
+ @grammar = nil
391
+ @grammar_prime = nil
392
+
393
+ # Drop precedence and bookkeeping information.
394
+ @cur_lhs = nil
395
+ @cur_prec = nil
396
+
397
+ @prec_counts = nil
398
+ @production_precs = nil
399
+ @token_precs = nil
400
+
401
+ # Drop the items from each of the states.
402
+ each_state { |state| state.clean }
403
+ end
404
+
405
+ # Set the default argument type for the actions associated with
406
+ # clauses. All actions defined after this call will be passed
407
+ # arguments in the way specified here, unless overridden in the
408
+ # call to {Parser.clause}.
409
+ #
410
+ # @param [:array, :splat] type The default argument type.
411
+ #
412
+ # @return [void]
413
+ def default_arg_type(type)
414
+ @default_arg_type = type if type == :array or type == :splat
415
+ end
416
+ alias :dat :default_arg_type
417
+
418
+ # Adds productions and actions for parsing empty lists.
419
+ #
420
+ # @see CFG#empty_list_production
421
+ def build_list_production(symbol, list_elements, separator = '')
422
+ @grammar.build_list_production(symbol, list_elements, separator)
423
+ end
424
+ alias :list :build_list_production
425
+
426
+ # This function will print a description of the parser to the
427
+ # provided IO object.
428
+ #
429
+ # @param [IO] io Input/Output object used for printing the parser's explanation.
430
+ #
431
+ # @return [void]
432
+ def explain(io)
433
+ if @grammar and not @states.empty?
434
+ io.puts('###############')
435
+ io.puts('# Productions #')
436
+ io.puts('###############')
437
+ io.puts
438
+
439
+ max_id_length = @grammar.productions(:id).length.to_s.length
440
+
441
+ # Print the productions.
442
+ @grammar.productions.each do |sym, productions|
443
+
444
+ max_rhs_length = productions.inject(0) { |m, p| if (len = p.to_s.length) > m then len else m end }
445
+
446
+ productions.each do |production|
447
+ p_string = production.to_s
448
+
449
+ io.print("\tProduction #{sprintf("%#{max_id_length}d", production.id)}: #{p_string}")
450
+
451
+ if (prec = @production_precs[production.id])
452
+ io.print(' ' * (max_rhs_length - p_string.length))
453
+ io.print(" : (#{sprintf("%-5s", prec.first)}, #{prec.last})")
454
+ end
455
+
456
+ io.puts
457
+ end
458
+
459
+ io.puts
460
+ end
461
+
462
+ io.puts('##########')
463
+ io.puts('# Tokens #')
464
+ io.puts('##########')
465
+ io.puts
466
+
467
+ max_token_len = @grammar.terms.inject(0) { |m, t| if t.length > m then t.length else m end }
468
+
469
+ @grammar.terms.sort {|a,b| a.to_s <=> b.to_s }.each do |term|
470
+ io.print("\t#{term}")
471
+
472
+ if (prec = @token_precs[term])
473
+ io.print(' ' * (max_token_len - term.length))
474
+ io.print(" : (#{sprintf("%-5s", prec.first)}, #{prec.last})")
475
+ end
476
+
477
+ io.puts
478
+ end
479
+
480
+ io.puts
481
+
482
+ io.puts('#####################')
483
+ io.puts('# Table Information #')
484
+ io.puts('#####################')
485
+ io.puts
486
+
487
+ io.puts("\tStart symbol: #{@grammar.start_symbol}'")
488
+ io.puts
489
+
490
+ io.puts("\tTotal number of states: #{@states.length}")
491
+ io.puts
492
+
493
+ io.puts("\tTotal conflicts: #{@conflicts.values.flatten(1).length}")
494
+ io.puts
495
+
496
+ @conflicts.each do |state_id, conflicts|
497
+ io.puts("\tState #{state_id} has #{conflicts.length} conflict(s)")
498
+ end
499
+
500
+ io.puts if not @conflicts.empty?
501
+
502
+ # Print the parse table.
503
+ io.puts('###############')
504
+ io.puts('# Parse Table #')
505
+ io.puts('###############')
506
+ io.puts
507
+
508
+ each_state do |state|
509
+ io.puts("State #{state.id}:")
510
+ io.puts
511
+
512
+ io.puts("\t# ITEMS #")
513
+ max = state.items.inject(0) do |inner_max, item|
514
+ if item.lhs.to_s.length > inner_max then item.lhs.to_s.length else inner_max end
515
+ end
516
+
517
+ state.each do |item|
518
+ io.puts("\t#{item.to_s(max)}")
519
+ end
520
+
521
+ io.puts
522
+ io.puts("\t# ACTIONS #")
523
+
524
+ state.actions.keys.sort {|a,b| a.to_s <=> b.to_s}.each do |sym|
525
+ state.actions[sym].each do |action|
526
+ io.puts("\tOn #{sym} #{action}")
527
+ end
528
+ end
529
+
530
+ io.puts
531
+ io.puts("\t# CONFLICTS #")
532
+
533
+ if @conflicts[state.id].length == 0
534
+ io.puts("\tNone\n\n")
535
+ else
536
+ @conflicts[state.id].each do |conflict|
537
+ type, sym = conflict
538
+
539
+ io.print("\t#{if type == :SR then "Shift/Reduce" else "Reduce/Reduce" end} conflict")
540
+
541
+ io.puts(" on #{sym}")
542
+ end
543
+
544
+ io.puts
545
+ end
546
+ end
547
+
548
+ # Close any IO objects that aren't $stdout.
549
+ io.close if io.is_a?(IO) and io != $stdout
550
+ else
551
+ raise ParserConstructionException, 'Parser.explain called outside of finalize.'
552
+ end
553
+ end
554
+
555
+ # This method will finalize the parser causing the construction
556
+ # of states and their actions, and the resolution of conflicts
557
+ # using lookahead and precedence information.
558
+ #
559
+ # No calls to {Parser.production} may appear after the call to
560
+ # Parser.finalize.
561
+ #
562
+ # @param [Hash] opts Options describing how to finalize the parser.
563
+ #
564
+ # @option opts [Boolean,String,IO] :explain To explain the parser or not.
565
+ # @option opts [Boolean] :lookahead To use lookahead info for conflict resolution.
566
+ # @option opts [Boolean] :precedence To use precedence info for conflict resolution.
567
+ # @option opts [String,IO] :use A file name or object that is used to load/save the parser.
568
+ #
569
+ # @return [void]
570
+ def finalize(opts = {})
571
+
572
+ if @grammar.productions.empty?
573
+ raise ParserConstructionException,
574
+ "Parser has no productions. Cowardly refusing to construct an empty parser."
575
+ end
576
+
577
+ # Get the full options hash.
578
+ opts = build_finalize_opts(opts)
579
+
580
+ # Get the name of the file in which the parser is defined.
581
+ #
582
+ # FIXME: See why this is failing for the simple ListParser example.
583
+ def_file = caller()[2].split(':')[0] if opts[:use]
584
+
585
+ # Check to make sure we can load the necessary information
586
+ # from the specified object.
587
+ if opts[:use] and (
588
+ (opts[:use].is_a?(String) and File.exist?(opts[:use]) and File.mtime(opts[:use]) > File.mtime(def_file)) or
589
+ (opts[:use].is_a?(File) and opts[:use].mtime > File.mtime(def_file))
590
+ )
591
+
592
+ file = self.get_io(opts[:use], 'r')
593
+
594
+ # Un-marshal our saved data structures.
595
+ file.flock(File::LOCK_SH)
596
+ @lh_sides, @states, @symbols = Marshal.load(file)
597
+ file.flock(File::LOCK_UN)
598
+
599
+ # Close the file if we opened it.
600
+ file.close if opts[:use].is_a?(String)
601
+
602
+ # Remove any un-needed data and return.
603
+ return self.clean
604
+ end
605
+
606
+ # Grab all of the symbols that comprise the grammar
607
+ # (besides the start symbol).
608
+ @symbols = @grammar.symbols << :ERROR
609
+
610
+ # Add our starting state to the state list.
611
+ @start_symbol = (@grammar.start_symbol.to_s + '\'').to_sym
612
+ start_production, _ = @grammar.production(@start_symbol, @grammar.start_symbol).first
613
+ start_state = State.new(@symbols, [start_production.to_item])
614
+
615
+ start_state.close(@grammar.productions)
616
+
617
+ self.add_state(start_state)
618
+
619
+ # Translate the precedence of productions from tokens to
620
+ # (associativity, precedence) pairs.
621
+ @production_precs.map! { |prec| @token_precs[prec] }
622
+
623
+ # Build the rest of the transition table.
624
+ each_state do |state|
625
+ #Transition states.
626
+ tstates = Hash.new { |h,k| h[k] = State.new(@symbols) }
627
+
628
+ #Bin each item in this set into reachable transition
629
+ #states.
630
+ state.each do |item|
631
+ if (next_symbol = item.next_symbol)
632
+ tstates[next_symbol] << item.copy
633
+ end
634
+ end
635
+
636
+ # For each transition state:
637
+ # 1) Get transition symbol
638
+ # 2) Advance dot
639
+ # 3) Close it
640
+ # 4) Get state id and add transition
641
+ tstates.each do |symbol, tstate|
642
+ tstate.each { |item| item.advance }
643
+
644
+ tstate.close(@grammar.productions)
645
+
646
+ id = self.add_state(tstate)
647
+
648
+ # Add Goto and Shift actions.
649
+ state.on(symbol, CFG::is_nonterminal?(symbol) ? GoTo.new(id) : Shift.new(id))
650
+ end
651
+
652
+ # Find the Accept and Reduce actions for this state.
653
+ state.each do |item|
654
+ if item.at_end?
655
+ if item.lhs == @start_symbol
656
+ state.on(:EOS, Accept.new)
657
+ else
658
+ state.add_reduction(@grammar.productions(:id)[item.id])
659
+ end
660
+ end
661
+ end
662
+ end
663
+
664
+ # Build the production.id -> production.lhs map.
665
+ @grammar.productions(:id).each { |id, production| @lh_sides[id] = production.lhs }
666
+
667
+ # Prune the parsing table for unnecessary reduce actions.
668
+ self.prune(opts[:lookahead], opts[:precedence])
669
+
670
+ # Check the parser for inconsistencies.
671
+ self.check_sanity
672
+
673
+ # Print the table if requested.
674
+ self.explain(opts[:explain]) if opts[:explain]
675
+
676
+ # Remove any data that is no longer needed.
677
+ self.clean
678
+
679
+ # Store the parser's final data structures if requested.
680
+ if opts[:use]
681
+ io = self.get_io(opts[:use])
682
+
683
+ io.flock(File::LOCK_EX) if io.is_a?(File)
684
+ Marshal.dump([@lh_sides, @states, @symbols], io)
685
+ io.flock(File::LOCK_UN) if io.is_a?(File)
686
+
687
+ # Close the IO object if we opened it.
688
+ io.close if opts[:use].is_a?(String)
689
+ end
690
+ end
691
+
692
+ # Converts an object into an IO object as appropriate.
693
+ #
694
+ # @param [Object] o Object to be converted into an IO object.
695
+ # @param [String] mode String representing the mode to open the IO object in.
696
+ #
697
+ # @return [IO, false] The IO object or false if a conversion wasn't possible.
698
+ def get_io(o, mode = 'w')
699
+ if o.is_a?(TrueClass)
700
+ $stdout
701
+ elsif o.is_a?(String)
702
+ File.open(o, mode)
703
+ elsif o.is_a?(IO)
704
+ o
705
+ else
706
+ false
707
+ end
708
+ end
709
+
710
+ # Iterate over the parser's states.
711
+ #
712
+ # @yieldparam [State] state One of the parser automaton's state objects
713
+ #
714
+ # @return [void]
715
+ def each_state
716
+ current_state = 0
717
+ while current_state < @states.count
718
+ yield @states.at(current_state)
719
+ current_state += 1
720
+ end
721
+ end
722
+
723
+ # @return [CFG] The grammar that can be parsed by this Parser.
724
+ def grammar
725
+ @grammar.clone
726
+ end
727
+
728
+ # This method generates and memoizes the G' grammar used to
729
+ # calculate the LALR(1) lookahead sets. Information about this
730
+ # grammar and its use can be found in the following paper:
731
+ #
732
+ # Simple Computation of LALR(1) Lookahead Sets
733
+ # Manuel E. Bermudez and George Logothetis
734
+ # Information Processing Letters 31 - 1989
735
+ #
736
+ # @return [CFG]
737
+ def grammar_prime
738
+ if not @grammar_prime
739
+ @grammar_prime = CFG.new
740
+
741
+ each_state do |state|
742
+ state.each do |item|
743
+ lhs = "#{state.id}_#{item.next_symbol}".to_sym
744
+
745
+ next unless CFG::is_nonterminal?(item.next_symbol) and not @grammar_prime.productions.keys.include?(lhs)
746
+
747
+ @grammar.productions[item.next_symbol].each do |production|
748
+ rhs = ''
749
+
750
+ cstate = state
751
+
752
+ production.rhs.each do |symbol|
753
+ rhs += "#{cstate.id}_#{symbol} "
754
+
755
+ cstate = @states[cstate.on?(symbol).first.id]
756
+ end
757
+
758
+ @grammar_prime.production(lhs, rhs)
759
+ end
760
+ end
761
+ end
762
+ end
763
+
764
+ @grammar_prime
765
+ end
766
+
767
+ # Inform the parser core that a conflict has been detected.
768
+ #
769
+ # @param [Integer] state_id ID of the state where the conflict was encountered.
770
+ # @param [:RR, :SR] type Reduce/Reduce or Shift/Reduce conflict.
771
+ # @param [Symbol] sym Symbol that caused the conflict.
772
+ #
773
+ # @return [void]
774
+ def inform_conflict(state_id, type, sym)
775
+ @conflicts[state_id] << [type, sym]
776
+ end
777
+
778
+ # This method is used to specify that the symbols in *symbols*
779
+ # are left-associative. Subsequent calls to this method will
780
+ # give their arguments higher precedence.
781
+ #
782
+ # @param [Array<Symbol>] symbols Symbols that are left associative.
783
+ #
784
+ # @return [void]
785
+ def left(*symbols)
786
+ prec_level = @prec_counts[:left] += 1
787
+
788
+ symbols.map { |s| s.to_sym }.each do |sym|
789
+ @token_precs[sym] = [:left, prec_level]
790
+ end
791
+ end
792
+
793
+ # This method is used to specify that the symbols in *symbols*
794
+ # are non-associative.
795
+ #
796
+ # @param [Array<Symbol>] symbols Symbols that are non-associative.
797
+ #
798
+ # @return [void]
799
+ def nonassoc(*symbols)
800
+ prec_level = @prec_counts[:non] += 1
801
+
802
+ symbols.map { |s| s.to_sym }.each do |sym|
803
+ @token_precs[sym] = [:non, prec_level]
804
+ end
805
+ end
806
+
807
+ # Adds productions and actions for parsing nonempty lists.
808
+ #
809
+ # @see CFG#nonempty_list_production
810
+ def build_nonempty_list_production(symbol, list_elements, separator = '')
811
+ @grammar.build_nonempty_list_production(symbol, list_elements, separator)
812
+ end
813
+ alias :nonempty_list :build_nonempty_list_production
814
+
815
+ # This function is where actual parsing takes place. The
816
+ # _tokens_ argument must be an array of Token objects, the last
817
+ # of which has type EOS. By default this method will return the
818
+ # value computed by the first successful parse tree found.
819
+ #
820
+ # Additional information about the parsing options can be found in
821
+ # the main documentation.
822
+ #
823
+ # @param [Array<Token>] tokens Tokens to be parsed.
824
+ # @param [Hash] opts Options to use when parsing input.
825
+ #
826
+ # @option opts [:first, :all] :accept Either :first or :all.
827
+ # @option opts [Object] :env The environment in which to evaluate the production action.
828
+ # @option opts [Boolean,String,IO] :parse_tree To print parse trees in the DOT language or not.
829
+ # @option opts [Boolean,String,IO] :verbose To be verbose or not.
830
+ #
831
+ # @return [Object, Array<Object>] Result or results of parsing the given tokens.
832
+ def parse(tokens, opts = {})
833
+ # Get the full options hash.
834
+ opts = build_parse_opts(opts)
835
+ v = opts[:verbose]
836
+
837
+ if opts[:verbose]
838
+ v.puts("Input tokens:")
839
+ v.puts(tokens.map { |t| t.type }.inspect)
840
+ v.puts
841
+ end
842
+
843
+ # Stack IDs to keep track of them during parsing.
844
+ stack_id = 0
845
+
846
+ # Error mode indicators.
847
+ error_mode = false
848
+ reduction_guard = false
849
+
850
+ # Our various list of stacks.
851
+ accepted = []
852
+ moving_on = []
853
+ processing = [ParseStack.new(stack_id += 1)]
854
+
855
+ # Iterate over the tokens. We don't procede to the
856
+ # next token until every stack is done with the
857
+ # current one.
858
+ tokens.each_with_index do |token, index|
859
+ # Check to make sure this token was seen in the
860
+ # grammar definition.
861
+ raise BadToken.new(token) if not @symbols.include?(token.type)
862
+
863
+ v.puts("Current token: #{token.type}#{if token.value then "(#{token.value})" end}") if v
864
+
865
+ # Iterate over the stacks until each one is done.
866
+ while (stack = processing.shift)
867
+ # Execute any token hooks in this stack's environment.
868
+ @token_hooks[token.type].each { |hook| opts[:env].instance_exec(&hook)}
869
+
870
+ # Get the available actions for this stack.
871
+ actions = @states[stack.state].on?(token.type)
872
+
873
+ if actions.empty?
874
+ # If we are already in error mode and there
875
+ # are no actions we skip this token.
876
+ if error_mode
877
+ v.puts("Discarding token: #{token.type}#{if token.value then "(#{token.value})" end}") if v
878
+
879
+ # Add the current token to the array
880
+ # that corresponds to the output value
881
+ # for the ERROR token.
882
+ stack.output_stack.last << token
883
+
884
+ moving_on << stack
885
+ next
886
+ end
887
+
888
+ # We would be dropping the last stack so we
889
+ # are going to go into error mode.
890
+ if accepted.empty? and moving_on.empty? and processing.empty?
891
+
892
+ if v
893
+ v.puts
894
+ v.puts('Current stack:')
895
+ v.puts("\tID: #{stack.id}")
896
+ v.puts("\tState stack:\t#{stack.state_stack.inspect}")
897
+ v.puts("\tOutput Stack:\t#{stack.output_stack.inspect}")
898
+ v.puts
899
+ end
900
+
901
+ # Try and find a valid error state.
902
+ while stack.state
903
+ if (actions = @states[stack.state].on?(:ERROR)).empty?
904
+ # This state doesn't have an
905
+ # error production. Moving on.
906
+ stack.pop
907
+ else
908
+ # Enter the found error state.
909
+ stack.push(actions.first.id, [token], :ERROR, token.position)
910
+
911
+ break
912
+ end
913
+ end
914
+
915
+ if stack.state
916
+ # We found a valid error state.
917
+ error_mode = reduction_guard = true
918
+ opts[:env].he = true
919
+ moving_on << stack
920
+
921
+ if v
922
+ v.puts('Invalid input encountered. Entering error handling mode.')
923
+ v.puts("Discarding token: #{token.type}#{if token.value then "(#{token.value})" end}")
924
+ end
925
+ else
926
+ # No valid error states could be
927
+ # found. Time to print a message
928
+ # and leave.
929
+
930
+ v.puts("No more actions for stack #{stack.id}. Dropping stack.") if v
931
+ end
932
+ else
933
+ v.puts("No more actions for stack #{stack.id}. Dropping stack.") if v
934
+ end
935
+
936
+ next
937
+ end
938
+
939
+ # Make (stack, action) pairs, duplicating the
940
+ # stack as necessary.
941
+ pairs = [[stack, actions.pop]] + actions.map {|action| [stack.branch(stack_id += 1), action] }
942
+
943
+ pairs.each do |new_stack, action|
944
+ if v
945
+ v.puts
946
+ v.puts('Current stack:')
947
+ v.puts("\tID: #{new_stack.id}")
948
+ v.puts("\tState stack:\t#{new_stack.state_stack.inspect}")
949
+ v.puts("\tOutput Stack:\t#{new_stack.output_stack.inspect}")
950
+ v.puts
951
+ v.puts("Action taken: #{action.to_s}")
952
+ end
953
+
954
+ if action.is_a?(Accept)
955
+ if opts[:accept] == :all
956
+ accepted << new_stack
957
+ else
958
+ v.puts('Accepting input.') if v
959
+ opts[:parse_tree].puts(new_stack.tree) if opts[:parse_tree]
960
+
961
+ if opts[:env].he
962
+ raise HandledError.new(opts[:env].errors, new_stack.result)
963
+ else
964
+ return new_stack.result
965
+ end
966
+ end
967
+
968
+ elsif action.is_a?(Reduce)
969
+ # Get the production associated with this reduction.
970
+ production_proc, pop_size = @procs[action.id]
971
+
972
+ if not production_proc
973
+ raise InternalParserException, "No production #{action.id} found."
974
+ end
975
+
976
+ args, positions = new_stack.pop(pop_size)
977
+ opts[:env].set_positions(positions)
978
+
979
+ if not production_proc.selections.empty?
980
+ args = args.values_at(*production_proc.selections)
981
+ end
982
+
983
+ result =
984
+ if production_proc.arg_type == :array
985
+ opts[:env].instance_exec(args, &production_proc)
986
+ else
987
+ opts[:env].instance_exec(*args, &production_proc)
988
+ end
989
+
990
+ if (goto = @states[new_stack.state].on?(@lh_sides[action.id]).first)
991
+
992
+ v.puts("Going to state #{goto.id}.\n") if v
993
+
994
+ pos0 = nil
995
+
996
+ if args.empty?
997
+ # Empty productions need to be
998
+ # handled specially.
999
+ pos0 = new_stack.position
1000
+
1001
+ pos0.stream_offset += pos0.length + 1
1002
+ pos0.line_offset += pos0.length + 1
1003
+
1004
+ pos0.length = 0
1005
+ else
1006
+ pos0 = opts[:env].pos( 0)
1007
+ pos1 = opts[:env].pos(-1)
1008
+
1009
+ pos0.length = (pos1.stream_offset + pos1.length) - pos0.stream_offset
1010
+ end
1011
+
1012
+ new_stack.push(goto.id, result, @lh_sides[action.id], pos0)
1013
+ else
1014
+ raise InternalParserException, "No GoTo action found in state #{stack.state} " +
1015
+ "after reducing by production #{action.id}"
1016
+ end
1017
+
1018
+ # This stack is NOT ready for the next
1019
+ # token.
1020
+ processing << new_stack
1021
+
1022
+ # Exit error mode if necessary.
1023
+ error_mode = false if error_mode and not reduction_guard
1024
+
1025
+ elsif action.is_a?(Shift)
1026
+ new_stack.push(action.id, token.value, token.type, token.position)
1027
+
1028
+ # This stack is ready for the next
1029
+ # token.
1030
+ moving_on << new_stack
1031
+
1032
+ # Exit error mode.
1033
+ error_mode = false
1034
+ end
1035
+ end
1036
+ end
1037
+
1038
+ v.puts("\n\n") if v
1039
+
1040
+ processing = moving_on
1041
+ moving_on = []
1042
+
1043
+ # If we don't have any active stacks at this point the
1044
+ # string isn't in the language.
1045
+ if opts[:accept] == :first and processing.length == 0
1046
+ v.close if v and v != $stdout
1047
+ raise NotInLanguage.new(tokens[0...index], tokens[index], tokens[index.next..-1])
1048
+ end
1049
+
1050
+ reduction_guard = false
1051
+ end
1052
+
1053
+ # If we have reached this point we are accepting all parse
1054
+ # trees.
1055
+ if v
1056
+ v.puts("Accepting input with #{accepted.length} derivation(s).")
1057
+
1058
+ v.close if v != $stdout
1059
+ end
1060
+
1061
+ accepted.each do |stack|
1062
+ opts[:parse_tree].puts(stack.tree)
1063
+ end if opts[:parse_tree]
1064
+
1065
+ results = accepted.map { |stack| stack.result }
1066
+
1067
+ if opts[:env].he
1068
+ raise HandledError.new(opts[:env].errors, results)
1069
+ else
1070
+ return results
1071
+ end
1072
+ end
1073
+
1074
+ # Adds a new production to the parser with a left-hand value of
1075
+ # *symbol*. If *expression* is specified it is taken as the
1076
+ # right-hand side of the production and *action* is associated
1077
+ # with the production. If *expression* is nil then *action* is
1078
+ # evaluated and expected to make one or more calls to
1079
+ # Parser.clause. A precedence can be associate with this
1080
+ # production by setting *precedence* to a terminal symbol.
1081
+ #
1082
+ # @param [Symbol] symbol Left-hand side of the production.
1083
+ # @param [String, Symbol, nil] expression Right-hand side of the production.
1084
+ # @param [Symbol, nil] precedence Symbol representing the precedence of this produciton.
1085
+ # @param [:array, :splat] arg_type Method to use when passing arguments to the action.
1086
+ # @param [Proc] action Action associated with this production.
1087
+ #
1088
+ # @return [void]
1089
+ def production(symbol, expression = nil, precedence = nil, arg_type = @default_arg_type, &action)
1090
+
1091
+ # Check the symbol.
1092
+ if not (symbol.is_a?(Symbol) or symbol.is_a?(String)) or not CFG::is_nonterminal?(symbol)
1093
+ raise ParserConstructionException, 'Production symbols must be Strings or Symbols and be in all lowercase.'
1094
+ end
1095
+
1096
+ @grammar.curr_lhs = symbol.to_sym
1097
+ @curr_prec = precedence
1098
+
1099
+ orig_dat = nil
1100
+ if arg_type != @default_arg_type
1101
+ orig_dat = @default_arg_type
1102
+ @default_arg_type = arg_type
1103
+ end
1104
+
1105
+ if expression
1106
+ self.clause(expression, precedence, &action)
1107
+ else
1108
+ self.instance_exec(&action)
1109
+ end
1110
+
1111
+ @default_arg_type = orig_dat if not orig_dat.nil?
1112
+
1113
+ @grammar.curr_lhs = nil
1114
+ @curr_prec = nil
1115
+ end
1116
+ alias :p :production
1117
+
1118
+ # This method uses lookahead sets and precedence information to
1119
+ # resolve conflicts and remove unnecessary reduce actions.
1120
+ #
1121
+ # @param [Boolean] do_lookahead Prune based on lookahead sets or not.
1122
+ # @param [Boolean] do_precedence Prune based on precedence or not.
1123
+ #
1124
+ # @return [void]
1125
+ def prune(do_lookahead, do_precedence)
1126
+ terms = @grammar.terms
1127
+
1128
+ # If both options are false there is no pruning to do.
1129
+ return if not (do_lookahead or do_precedence)
1130
+
1131
+ each_state do |state0|
1132
+
1133
+ #####################
1134
+ # Lookahead Pruning #
1135
+ #####################
1136
+
1137
+ if do_lookahead
1138
+ # Find all of the reductions in this state.
1139
+ reductions = state0.actions.values.flatten.uniq.select { |a| a.is_a?(Reduce) }
1140
+
1141
+ reductions.each do |reduction|
1142
+ production = @grammar.productions(:id)[reduction.id]
1143
+
1144
+ lookahead = Array.new
1145
+
1146
+ # Build the lookahead set.
1147
+ each_state do |state1|
1148
+ if self.check_reachability(state1, state0, production.rhs)
1149
+ lookahead |= self.grammar_prime.follow_set("#{state1.id}_#{production.lhs}".to_sym)
1150
+ end
1151
+ end
1152
+
1153
+ # Translate the G' follow symbols into G
1154
+ # lookahead symbols.
1155
+ lookahead = lookahead.map { |sym| sym.to_s.split('_', 2).last.to_sym }.uniq
1156
+
1157
+ # Here we remove the unnecessary reductions.
1158
+ # If there are error productions we need to
1159
+ # scale back the amount of pruning done.
1160
+ pruning_candidates = terms - lookahead
1161
+
1162
+ if terms.include?(:ERROR)
1163
+ pruning_candidates.each do |sym|
1164
+ state0.actions[sym].delete(reduction) if state0.conflict_on?(sym)
1165
+ end
1166
+ else
1167
+ pruning_candidates.each { |sym| state0.actions[sym].delete(reduction) }
1168
+ end
1169
+ end
1170
+ end
1171
+
1172
+ ########################################
1173
+ # Precedence and Associativity Pruning #
1174
+ ########################################
1175
+
1176
+ if do_precedence
1177
+ state0.actions.each do |symbol, actions|
1178
+
1179
+ # We are only interested in pruning actions
1180
+ # for terminal symbols.
1181
+ next unless CFG::is_terminal?(symbol)
1182
+
1183
+ # Skip to the next one if there is no
1184
+ # possibility of a Shift/Reduce or
1185
+ # Reduce/Reduce conflict.
1186
+ next unless actions and actions.length > 1
1187
+
1188
+ resolve_ok = actions.inject(true) do |m, a|
1189
+ if a.is_a?(Reduce)
1190
+ m and @production_precs[a.id]
1191
+ else
1192
+ m
1193
+ end
1194
+ end and actions.inject(false) { |m, a| m or a.is_a?(Shift) }
1195
+
1196
+ if @token_precs[symbol] and resolve_ok
1197
+ max_prec = 0
1198
+ selected_action = nil
1199
+
1200
+ # Grab the associativity and precedence
1201
+ # for the input token.
1202
+ tassoc, tprec = @token_precs[symbol]
1203
+
1204
+ actions.each do |a|
1205
+ assoc, prec = a.is_a?(Shift) ? [tassoc, tprec] : @production_precs[a.id]
1206
+
1207
+ # If two actions have the same precedence we
1208
+ # will only replace the previous production if:
1209
+ # * The token is left associative and the current action is a Reduce
1210
+ # * The token is right associative and the current action is a Shift
1211
+ if prec > max_prec or (prec == max_prec and tassoc == (a.is_a?(Shift) ? :right : :left))
1212
+ max_prec = prec
1213
+ selected_action = a
1214
+
1215
+ elsif prec == max_prec and assoc == :nonassoc
1216
+ raise ParserConstructionException, 'Non-associative token found during conflict resolution.'
1217
+
1218
+ end
1219
+ end
1220
+
1221
+ state0.actions[symbol] = [selected_action]
1222
+ end
1223
+ end
1224
+ end
1225
+ end
1226
+ end
1227
+
1228
+ # This method is used to specify that the symbols in _symbols_
1229
+ # are right associative. Subsequent calls to this method will
1230
+ # give their arguments higher precedence.
1231
+ #
1232
+ # @param [Array<Symbol>] symbols Symbols that are right-associative.
1233
+ #
1234
+ # @return [void]
1235
+ def right(*symbols)
1236
+ prec_level = @prec_counts[:right] += 1
1237
+
1238
+ symbols.map { |s| s.to_sym }.each do |sym|
1239
+ @token_precs[sym] = [:right, prec_level]
1240
+ end
1241
+ end
1242
+
1243
+ # Changes the starting symbol of the parser.
1244
+ #
1245
+ # @param [Symbol] symbol The starting symbol of the grammar.
1246
+ #
1247
+ # @return [void]
1248
+ def start(symbol)
1249
+ @grammar.start symbol
1250
+ end
1251
+
1252
+ # Add a hook that is executed whenever *sym* is seen.
1253
+ #
1254
+ # The *sym* must be a terminal symbol.
1255
+ #
1256
+ # @param [Symbol] sym Symbol to hook into
1257
+ # @param [Proc] proc Code to execute when the block is seen
1258
+ #
1259
+ # @return [void]
1260
+ def token_hook(sym, &proc)
1261
+ if CFG::is_terminal?(sym)
1262
+ @token_hooks[sym] << proc
1263
+ else
1264
+ raise 'Method token_hook expects `sym` to be non-terminal.'
1265
+ end
1266
+ end
1267
+ end
1268
+
1269
+ ####################
1270
+ # Instance Methods #
1271
+ ####################
1272
+
1273
+ # Instantiates a new parser and creates an environment to be
1274
+ # used for subsequent calls.
1275
+ def initialize
1276
+ @env = self.class::Environment.new
1277
+ end
1278
+
1279
+ # Parses the given token stream using the encapsulated environment.
1280
+ #
1281
+ # @see .parse
1282
+ def parse(tokens, opts = {})
1283
+ self.class.parse(tokens, {:env => @env}.update(opts))
1284
+ end
1285
+
1286
+ ################################
1287
+
1288
+ # All actions passed to Parser.producation and Parser.clause are
1289
+ # evaluated inside an instance of the Environment class or its
1290
+ # subclass (which must have the same name).
1291
+ class Environment
1292
+ # Indicates if an error was encountered and handled.
1293
+ #
1294
+ # @return [Boolean]
1295
+ attr_accessor :he
1296
+
1297
+ # A list of all objects added using the *error* method.
1298
+ #
1299
+ # @return [Array<Object>]
1300
+ attr_reader :errors
1301
+
1302
+ # Instantiate a new Environment object.
1303
+ def initialize
1304
+ self.reset
1305
+ end
1306
+
1307
+ # Adds an object to the list of errors.
1308
+ #
1309
+ # @return [void]
1310
+ def error(o)
1311
+ @errors << o
1312
+ end
1313
+
1314
+ # Returns a StreamPosition object for the symbol at location n,
1315
+ # indexed from zero.
1316
+ #
1317
+ # @param [Integer] n Index for symbol position.
1318
+ #
1319
+ # @return [StreamPosition] Position of symbol at index n.
1320
+ def pos(n)
1321
+ @positions[n]
1322
+ end
1323
+
1324
+ # Reset any variables that need to be re-initialized between
1325
+ # parse calls.
1326
+ #
1327
+ # @return [void]
1328
+ def reset
1329
+ @errors = Array.new
1330
+ @he = false
1331
+ end
1332
+
1333
+ # Setter for the *positions* array.
1334
+ #
1335
+ # @param [Array<StreamPosition>] positions
1336
+ #
1337
+ # @return [Array<StreamPosition>] The same array of positions.
1338
+ def set_positions(positions)
1339
+ @positions = positions
1340
+ end
1341
+ end
1342
+
1343
+ # The ParseStack class is used by a Parser to keep track of state
1344
+ # during parsing.
1345
+ class ParseStack
1346
+ # @return [Integer] ID of this parse stack.
1347
+ attr_reader :id
1348
+
1349
+ # @return [Array<Object>] Array of objects produced by {Reduce} actions.
1350
+ attr_reader :output_stack
1351
+
1352
+ # @return [Array<Integer>] Array of states used when performing {Reduce} actions.
1353
+ attr_reader :state_stack
1354
+
1355
+ # Instantiate a new ParserStack object.
1356
+ #
1357
+ # @param [Integer] id ID for this parse stack. Used by GLR algorithm.
1358
+ # @param [Array<Object>] ostack Output stack. Holds results of {Reduce} and {Shift} actions.
1359
+ # @param [Array<Integer>] sstack State stack. Holds states that have been shifted due to {Shift} actions.
1360
+ # @param [Array<Integer>] nstack Node stack. Holds dot language IDs for nodes in the parse tree.
1361
+ # @param [Array<Array<Integer>>] connections Integer pairs representing edges in the parse tree.
1362
+ # @param [Array<Symbol>] labels Labels for nodes in the parse tree.
1363
+ # @param [Array<StreamPosition>] positions Position data for symbols that have been shifted.
1364
+ def initialize(id, ostack = [], sstack = [0], nstack = [], connections = [], labels = [], positions = [])
1365
+ @id = id
1366
+
1367
+ @node_stack = nstack
1368
+ @output_stack = ostack
1369
+ @state_stack = sstack
1370
+
1371
+ @connections = connections
1372
+ @labels = labels
1373
+ @positions = positions
1374
+ end
1375
+
1376
+ # Branch this stack, effectively creating a new copy of its
1377
+ # internal state.
1378
+ #
1379
+ # @param [Integer] new_id ID for the new ParseStack.
1380
+ #
1381
+ # @return [ParseStack]
1382
+ def branch(new_id)
1383
+ # We have to do a deeper copy of the output stack to avoid
1384
+ # interactions between the Proc objects for the different
1385
+ # parsing paths.
1386
+ #
1387
+ # The being/rescue block is needed because some classes
1388
+ # respond to `clone` but always raise an error.
1389
+ new_output_stack = @output_stack.map do |o|
1390
+ # Check to see if we can obtain a deep copy.
1391
+ if 0.respond_to?(:copy)
1392
+ o.copy
1393
+
1394
+ else
1395
+ begin o.clone rescue o end
1396
+ end
1397
+ end
1398
+
1399
+ ParseStack.new(new_id, new_output_stack, @state_stack.clone,
1400
+ @node_stack.clone, @connections.clone, @labels.clone, @positions.clone)
1401
+ end
1402
+
1403
+ # @return [StreamPosition] Position data for the last symbol on the stack.
1404
+ def position
1405
+ if @positions.empty?
1406
+ StreamPosition.new
1407
+ else
1408
+ @positions.last.clone
1409
+ end
1410
+ end
1411
+
1412
+ # Push new state and other information onto the stack.
1413
+ #
1414
+ # @param [Integer] state ID of the shifted state.
1415
+ # @param [Object] o Value of Token that caused the shift.
1416
+ # @param [Symbol] node0 Label for node in parse tree.
1417
+ # @param [StreamPosition] position Position token that got shifted.
1418
+ #
1419
+ # @return [void]
1420
+ def push(state, o, node0, position)
1421
+ @state_stack << state
1422
+ @output_stack << o
1423
+ @node_stack << @labels.length
1424
+ @labels << if CFG::is_terminal?(node0) and o then node0.to_s + "(#{o})" else node0 end
1425
+ @positions << position
1426
+
1427
+ if CFG::is_nonterminal?(node0)
1428
+ @cbuffer.each do |node1|
1429
+ @connections << [@labels.length - 1, node1]
1430
+ end
1431
+ end
1432
+ end
1433
+
1434
+ # Pop some number of objects off of the inside stacks.
1435
+ #
1436
+ # @param [Integer] n Number of object to pop off the stack.
1437
+ #
1438
+ # @return [Array(Object, StreamPosition)] Values popped from the output and positions stacks.
1439
+ def pop(n = 1)
1440
+ @state_stack.pop(n)
1441
+
1442
+ # Pop the node stack so that the proper edges can be added
1443
+ # when the production's left-hand side non-terminal is
1444
+ # pushed onto the stack.
1445
+ @cbuffer = @node_stack.pop(n)
1446
+
1447
+ [@output_stack.pop(n), @positions.pop(n)]
1448
+ end
1449
+
1450
+ # Fetch the result stored in this ParseStack. If there is more
1451
+ # than one object left on the output stack there is an error.
1452
+ #
1453
+ # @return [Object] The end result of this parse stack.
1454
+ def result
1455
+ if @output_stack.length == 1
1456
+ return @output_stack.last
1457
+ else
1458
+ raise InternalParserException, "The parsing stack should have 1 element on the output stack, not #{@output_stack.length}."
1459
+ end
1460
+ end
1461
+
1462
+ # @return [Integer] Current state of this ParseStack.
1463
+ def state
1464
+ @state_stack.last
1465
+ end
1466
+
1467
+ # @return [String] Representation of the parse tree in the DOT langauge.
1468
+ def tree
1469
+ tree = "digraph tree#{@id} {\n"
1470
+
1471
+ @labels.each_with_index do |label, i|
1472
+ tree += "\tnode#{i} [label=\"#{label}\""
1473
+
1474
+ if CFG::is_terminal?(label)
1475
+ tree += " shape=box"
1476
+ end
1477
+
1478
+ tree += "];\n"
1479
+ end
1480
+
1481
+ tree += "\n"
1482
+
1483
+ @connections.each do |from, to|
1484
+ tree += "\tnode#{from} -> node#{to};\n"
1485
+ end
1486
+
1487
+ tree += "}"
1488
+ end
1489
+ end
1490
+
1491
+ # The State class is used to represent sets of items and actions to be
1492
+ # used during parsing.
1493
+ class State
1494
+ # @return [Integer] State's ID.
1495
+ attr_accessor :id
1496
+
1497
+ # @return [Array<CFG::Item>] Item objects that comprise this state
1498
+ attr_reader :items
1499
+
1500
+ # @return [Hash{Symbol => Array<Action>}] Maps lookahead symbols to actions
1501
+ attr_reader :actions
1502
+
1503
+ # Instantiate a new State object.
1504
+ #
1505
+ # @param [Array<Symbol>] tokens Tokens that represent this state
1506
+ # @param [Array<CFG::Item>] items Items that make up this state
1507
+ def initialize(tokens, items = [])
1508
+ @id = nil
1509
+ @items = items
1510
+ @actions = tokens.inject(Hash.new) { |h, t| h[t] = Array.new; h }
1511
+ end
1512
+
1513
+ # Compare one State to another. Two States are equal if they
1514
+ # have the same items or, if the items have been cleaned, if
1515
+ # the States have the same ID.
1516
+ #
1517
+ # @param [State] other Another State to compare to
1518
+ #
1519
+ # @return [Boolean]
1520
+ def ==(other)
1521
+ if self.items and other.items then self.items == other.items else self.id == other.id end
1522
+ end
1523
+
1524
+ # Add a Reduce action to the state.
1525
+ #
1526
+ # @param [Production] production Production used to perform the reduction
1527
+ #
1528
+ # @return [void]
1529
+ def add_reduction(production)
1530
+ action = Reduce.new(production)
1531
+
1532
+ # Reduce actions are not allowed for the ERROR terminal.
1533
+ @actions.each { |k, v| if CFG::is_terminal?(k) and k != :ERROR then v << action end }
1534
+ end
1535
+
1536
+ # @param [CFG::Item] item Item to add to this state.
1537
+ def append(item)
1538
+ if item.is_a?(CFG::Item) and not @items.include?(item) then @items << item end
1539
+ end
1540
+ alias :<< :append
1541
+
1542
+ # Clean this State by removing the list of {CFG::Item} objects.
1543
+ #
1544
+ # @return [void]
1545
+ def clean
1546
+ @items = nil
1547
+ end
1548
+
1549
+ # Close this state using *productions*.
1550
+ #
1551
+ # @param [Array<CFG::Production>] productions Productions used to close this state.
1552
+ #
1553
+ # @return [vod]
1554
+ def close(productions)
1555
+ self.each do |item|
1556
+ if (next_symbol = item.next_symbol) and CFG::is_nonterminal?(next_symbol)
1557
+ productions[next_symbol].each { |p| self << p.to_item }
1558
+ end
1559
+ end
1560
+ end
1561
+
1562
+ # Checks to see if there is a conflict in this state, given a
1563
+ # input of *sym*. Returns :SR if a shift/reduce conflict is
1564
+ # detected and :RR if a reduce/reduce conflict is detected. If
1565
+ # no conflict is detected nil is returned.
1566
+ #
1567
+ # @param [Symbol] sym Symbol to check for conflicts on.
1568
+ #
1569
+ # @return [:SR, :RR, nil]
1570
+ def conflict_on?(sym)
1571
+
1572
+ reductions = 0
1573
+ shifts = 0
1574
+
1575
+ @actions[sym].each do |action|
1576
+ if action.is_a?(Reduce)
1577
+ reductions += 1
1578
+
1579
+ elsif action.is_a?(Shift)
1580
+ shifts += 1
1581
+
1582
+ end
1583
+ end
1584
+
1585
+ if shifts == 1 and reductions > 0
1586
+ :SR
1587
+ elsif reductions > 1
1588
+ :RR
1589
+ else
1590
+ nil
1591
+ end
1592
+ end
1593
+
1594
+ # Iterate over the state's items.
1595
+ #
1596
+ # @return [void]
1597
+ def each
1598
+ current_item = 0
1599
+ while current_item < @items.count
1600
+ yield @items.at(current_item)
1601
+ current_item += 1
1602
+ end
1603
+ end
1604
+
1605
+ # Specify an Action to perform when the input token is *symbol*.
1606
+ #
1607
+ # @param [Symbol] symbol Symbol to add action for.
1608
+ # @param [Action] action Action for symbol.
1609
+ #
1610
+ # @return [void]
1611
+ def on(symbol, action)
1612
+ if @actions.key?(symbol)
1613
+ @actions[symbol] << action
1614
+ else
1615
+ raise ParserConstructionException, "Attempting to set action for token (#{symbol}) not seen in grammar definition."
1616
+ end
1617
+ end
1618
+
1619
+ # Returns that actions that should be taken when the input token
1620
+ # is *symbol*.
1621
+ #
1622
+ # @param [Symbol] symbol Symbol we want the actions for.
1623
+ #
1624
+ # @return [Array<Action>] Actions that should be taken.
1625
+ def on?(symbol)
1626
+ @actions[symbol].clone
1627
+ end
1628
+ end
1629
+
1630
+ # A subclass of Proc that indicates how it should be passed arguments
1631
+ # by the parser.
1632
+ class ProdProc < Proc
1633
+ # @return [:array, :splat] Method that should be used to pass arguments to this proc.
1634
+ attr_reader :arg_type
1635
+
1636
+ # @return [Array<Integer>] Mask for selection of tokens to pass to action. Empty mask means pass all.
1637
+ attr_reader :selections
1638
+
1639
+ def initialize(arg_type = :splat, selections = [])
1640
+ super()
1641
+ @arg_type = arg_type
1642
+ @selections = selections
1643
+ end
1644
+ end
1645
+
1646
+ # The Action class is used to indicate what action the parser should
1647
+ # take given a current state and input token.
1648
+ class Action
1649
+ # @return [Integer] ID of this action.
1650
+ attr_reader :id
1651
+
1652
+ # @param [Integer] id ID of this action.
1653
+ def initialize(id = nil)
1654
+ @id = id
1655
+ end
1656
+ end
1657
+
1658
+ # The Accept class indicates to the parser that it should accept the
1659
+ # current parse tree.
1660
+ class Accept < Action
1661
+ # @return [String] String representation of this action.
1662
+ def to_s
1663
+ "Accept"
1664
+ end
1665
+ end
1666
+
1667
+ # The GoTo class indicates to the parser that it should goto the state
1668
+ # specified by GoTo.id.
1669
+ class GoTo < Action
1670
+ # @return [String] String representation of this action.
1671
+ def to_s
1672
+ "GoTo #{self.id}"
1673
+ end
1674
+ end
1675
+
1676
+ # The Reduce class indicates to the parser that it should reduce the
1677
+ # input stack by the rule specified by Reduce.id.
1678
+ class Reduce < Action
1679
+
1680
+ # @param [Production] production Production to reduce by
1681
+ def initialize(production)
1682
+ super(production.id)
1683
+
1684
+ @production = production
1685
+ end
1686
+
1687
+ # @return [String] String representation of this action.
1688
+ def to_s
1689
+ "Reduce by Production #{self.id} : #{@production}"
1690
+ end
1691
+ end
1692
+
1693
+ # The Shift class indicates to the parser that it should shift the
1694
+ # current input token.
1695
+ class Shift < Action
1696
+ # @return [String] String representation of this action.
1697
+ def to_s
1698
+ "Shift to State #{self.id}"
1699
+ end
1700
+ end
1701
+ end
1702
+ end