rltk 2.2.1 → 3.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (59) hide show
  1. checksums.yaml +7 -0
  2. data/LICENSE +12 -12
  3. data/README.md +458 -285
  4. data/Rakefile +99 -92
  5. data/lib/rltk/ast.rb +221 -126
  6. data/lib/rltk/cfg.rb +218 -239
  7. data/lib/rltk/cg/basic_block.rb +1 -1
  8. data/lib/rltk/cg/bindings.rb +9 -26
  9. data/lib/rltk/cg/builder.rb +40 -8
  10. data/lib/rltk/cg/context.rb +1 -1
  11. data/lib/rltk/cg/contractor.rb +51 -0
  12. data/lib/rltk/cg/execution_engine.rb +45 -8
  13. data/lib/rltk/cg/function.rb +12 -2
  14. data/lib/rltk/cg/generated_bindings.rb +2541 -575
  15. data/lib/rltk/cg/generic_value.rb +2 -2
  16. data/lib/rltk/cg/instruction.rb +104 -83
  17. data/lib/rltk/cg/llvm.rb +44 -3
  18. data/lib/rltk/cg/memory_buffer.rb +22 -5
  19. data/lib/rltk/cg/module.rb +85 -36
  20. data/lib/rltk/cg/old_generated_bindings.rb +6152 -0
  21. data/lib/rltk/cg/pass_manager.rb +87 -43
  22. data/lib/rltk/cg/support.rb +2 -4
  23. data/lib/rltk/cg/target.rb +158 -28
  24. data/lib/rltk/cg/triple.rb +8 -8
  25. data/lib/rltk/cg/type.rb +69 -25
  26. data/lib/rltk/cg/value.rb +107 -66
  27. data/lib/rltk/cg.rb +16 -17
  28. data/lib/rltk/lexer.rb +21 -11
  29. data/lib/rltk/lexers/calculator.rb +1 -1
  30. data/lib/rltk/lexers/ebnf.rb +8 -7
  31. data/lib/rltk/parser.rb +300 -247
  32. data/lib/rltk/parsers/infix_calc.rb +1 -1
  33. data/lib/rltk/parsers/postfix_calc.rb +2 -2
  34. data/lib/rltk/parsers/prefix_calc.rb +2 -2
  35. data/lib/rltk/token.rb +1 -2
  36. data/lib/rltk/version.rb +3 -3
  37. data/lib/rltk.rb +6 -6
  38. data/test/cg/tc_basic_block.rb +83 -0
  39. data/test/cg/tc_control_flow.rb +191 -0
  40. data/test/cg/tc_function.rb +54 -0
  41. data/test/cg/tc_generic_value.rb +33 -0
  42. data/test/cg/tc_instruction.rb +256 -0
  43. data/test/cg/tc_llvm.rb +25 -0
  44. data/test/cg/tc_math.rb +88 -0
  45. data/test/cg/tc_module.rb +89 -0
  46. data/test/cg/tc_transforms.rb +68 -0
  47. data/test/cg/tc_type.rb +69 -0
  48. data/test/cg/tc_value.rb +151 -0
  49. data/test/cg/ts_cg.rb +23 -0
  50. data/test/tc_ast.rb +105 -8
  51. data/test/tc_cfg.rb +63 -48
  52. data/test/tc_lexer.rb +84 -96
  53. data/test/tc_parser.rb +224 -52
  54. data/test/tc_token.rb +6 -6
  55. data/test/ts_rltk.rb +12 -15
  56. metadata +149 -75
  57. data/lib/rltk/cg/generated_extended_bindings.rb +0 -287
  58. data/lib/rltk/util/abstract_class.rb +0 -25
  59. data/lib/rltk/util/monkeys.rb +0 -129
data/lib/rltk/parser.rb CHANGED
@@ -14,8 +14,8 @@ require 'rltk/cfg'
14
14
  # Classes and Modules #
15
15
  #######################
16
16
 
17
- module RLTK # :nodoc:
18
-
17
+ # The RLTK root module
18
+ module RLTK
19
19
  # A BadToken error indicates that a token was observed in the input stream
20
20
  # that wasn't used in the grammar's definition.
21
21
  class BadToken < StandardError
@@ -29,9 +29,28 @@ module RLTK # :nodoc:
29
29
  # for a given token stream. In other words, the input string is not in the
30
30
  # defined language.
31
31
  class NotInLanguage < StandardError
32
+
33
+ # @return [Array<Token>] List of tokens that have been successfully parsed
34
+ attr_reader :seen
35
+
36
+ # @return [Token] Token that caused the parser to stop
37
+ attr_reader :current
38
+
39
+ # @return [Array<Token>] List of tokens that have yet to be seen
40
+ attr_reader :remaining
41
+
42
+ # @param [Array<Token>] seen Tokens that have been successfully parsed
43
+ # @param [Token] current Token that caused the parser to stop
44
+ # @param [Array<Token>] remaining Tokens that have yet to be seen
45
+ def initialize(seen, current, remaining)
46
+ @seen = seen
47
+ @current = current
48
+ @remaining = remaining
49
+ end
50
+
32
51
  # @return [String] String representation of the error.
33
52
  def to_s
34
- 'String not in language.'
53
+ "String not in language. Token info:\n\tSeen: #{@seen}\n\tCurrent: #{@current}\n\tRemaining: #{@remaining}"
35
54
  end
36
55
  end
37
56
 
@@ -80,57 +99,53 @@ module RLTK # :nodoc:
80
99
  #
81
100
  # @return [void]
82
101
  def install_icvars
83
- @curr_lhs = nil
84
- @curr_prec = nil
102
+ @curr_lhs = nil
103
+ @curr_prec = nil
85
104
 
86
- @conflicts = Hash.new {|h, k| h[k] = Array.new}
87
- @grammar = CFG.new
105
+ @conflicts = Hash.new {|h, k| h[k] = Array.new}
106
+ @grammar = CFG.new
88
107
 
89
- @lh_sides = Hash.new
90
- @procs = Array.new
91
- @states = Array.new
108
+ @lh_sides = Hash.new
109
+ @procs = Array.new
110
+ @states = Array.new
92
111
 
93
112
  # Variables for dealing with precedence.
94
- @prec_counts = {:left => 0, :right => 0, :non => 0}
95
- @production_precs = Array.new
96
- @token_precs = Hash.new
113
+ @prec_counts = {:left => 0, :right => 0, :non => 0}
114
+ @production_precs = Array.new
115
+ @token_precs = Hash.new
97
116
 
98
- # Set the default argument handling policy.
99
- @args = :splat
117
+ # Set the default argument handling policy. Valid values
118
+ # are :array and :splat.
119
+ @default_arg_type = :splat
100
120
 
101
- @grammar.callback do |p, type, num|
102
- @procs[p.id] =
103
- [
121
+ @grammar.callback do |type, which, p, sels = []|
122
+ @procs[p.id] = [
104
123
  case type
105
- when :*
106
- case num
107
- when :first then Proc.new { || [] }
108
- else Proc.new { |os, o| os << o }
109
- end
110
-
111
- when :+
112
- case num
113
- when :first then Proc.new { |o| [o] }
114
- else Proc.new { |os, o| os << o }
115
- end
116
-
117
- when :'?'
118
- case num
119
- when :first then Proc.new { || nil }
120
- else Proc.new { |o| o }
124
+ when :optional
125
+ case which
126
+ when :empty then ProdProc.new { || nil }
127
+ else ProdProc.new { |o| o }
121
128
  end
122
129
 
123
130
  when :elp
124
- case num
125
- when :first then Proc.new { || [] }
126
- else Proc.new { |prime| prime }
131
+ case which
132
+ when :empty then ProdProc.new { || [] }
133
+ else ProdProc.new { |prime| prime }
127
134
  end
128
135
 
129
136
  when :nelp
130
- case num
131
- when :first then Proc.new { |el| [el] }
132
- when :second then Proc.new { |els, _, el| els + [el] }
133
- else Proc.new { |*el| if el.length == 1 then el.first else el end }
137
+ case which
138
+ when :single
139
+ ProdProc.new { |el| [el] }
140
+
141
+ when :multiple
142
+ ProdProc.new(:splat, sels) do |*syms|
143
+ el = syms[1..-1]
144
+ syms.first << (el.length == 1 ? el.first : el)
145
+ end
146
+
147
+ else
148
+ ProdProc.new { |*el| el.length == 1 ? el.first : el }
134
149
  end
135
150
  end,
136
151
  p.rhs.length
@@ -168,58 +183,6 @@ module RLTK # :nodoc:
168
183
  end
169
184
  end
170
185
 
171
- # Calling this method will cause the parser to pass right-hand
172
- # side values as arrays instead of splats. This method must be
173
- # called before ANY calls to Parser.production.
174
- #
175
- # @return [void]
176
- def array_args
177
- if @grammar.productions.length == 0
178
- @args = :array
179
-
180
- @grammar.callback do |p, type, num|
181
- @procs[p.id] =
182
- [
183
- case type
184
- when :*
185
- case num
186
- when :first then Proc.new { |v| [] }
187
- else Proc.new { |v| v[0] << v[1] }
188
- end
189
-
190
- when :+
191
- case num
192
- when :first then Proc.new { |v| [v[0]] }
193
- else Proc.new { |v| v[0] << v[1] }
194
- end
195
-
196
- when :'?'
197
- case num
198
- when :first then Proc.new { |v| nil }
199
- else Proc.new { |v| v[0] }
200
- end
201
-
202
- when :elp
203
- case num
204
- when :first then Proc.new { |v| [] }
205
- else Proc.new { |v| v[0] }
206
- end
207
-
208
- when :nelp
209
- case num
210
- when :first then Proc.new { |v| v }
211
- when :second then Proc.new { |v| v[0] + [v[2]] }
212
- else Proc.new { |v| if v.length == 1 then v.first else v end }
213
- end
214
- end,
215
- p.rhs.length
216
- ]
217
-
218
- @production_precs[p.id] = p.last_terminal
219
- end
220
- end
221
- end
222
-
223
186
  # Build a hash with the default options for Parser.finalize
224
187
  # and then update it with the values from *opts*.
225
188
  #
@@ -230,10 +193,10 @@ module RLTK # :nodoc:
230
193
  opts[:explain] = self.get_io(opts[:explain])
231
194
 
232
195
  {
233
- :explain => false,
234
- :lookahead => true,
235
- :precedence => true,
236
- :use => false
196
+ explain: false,
197
+ lookahead: true,
198
+ precedence: true,
199
+ use: false
237
200
  }.update(opts)
238
201
  end
239
202
  private :build_finalize_opts
@@ -245,14 +208,14 @@ module RLTK # :nodoc:
245
208
  #
246
209
  # @return [Hash{Symbol => Object}]
247
210
  def build_parse_opts(opts)
248
- opts[:parse_tree] = self.get_io(opts[:parse_tree])
249
- opts[:verbose] = self.get_io(opts[:verbose])
211
+ opts[:parse_tree] = self.get_io(opts[:parse_tree])
212
+ opts[:verbose] = self.get_io(opts[:verbose])
250
213
 
251
214
  {
252
- :accept => :first,
253
- :env => self::Environment.new,
254
- :parse_tree => false,
255
- :verbose => false
215
+ accept: :first,
216
+ env: self::Environment.new,
217
+ parse_tree: false,
218
+ verbose: false
256
219
  }.update(opts)
257
220
  end
258
221
  private :build_parse_opts
@@ -275,7 +238,7 @@ module RLTK # :nodoc:
275
238
  end
276
239
 
277
240
  # Check the actions in each state.
278
- @states.each do |state|
241
+ each_state do |state|
279
242
  state.actions.each do |sym, actions|
280
243
  if CFG::is_terminal?(sym)
281
244
  # Here we check actions for terminals.
@@ -312,14 +275,14 @@ module RLTK # :nodoc:
312
275
  # This method checks to see if the parser would be in parse state
313
276
  # *dest* after starting in state *start* and reading *symbols*.
314
277
  #
315
- # @param [Symbol] start Symbol representing a CFG production.
316
- # @param [Symbol] dest Symbol representing a CFG production.
317
- # @param [Array<Symbol>] symbols Grammar symbols.
278
+ # @param [Symbol] start Symbol representing a CFG production.
279
+ # @param [Symbol] dest Symbol representing a CFG production.
280
+ # @param [Array<Symbol>] symbols Grammar symbols.
318
281
  #
319
282
  # @return [Boolean] If the destination symbol is reachable from the start symbol after reading *symbols*.
320
283
  def check_reachability(start, dest, symbols)
321
- path_exists = true
322
- cur_state = start
284
+ path_exists = true
285
+ cur_state = start
323
286
 
324
287
  symbols.each do |sym|
325
288
 
@@ -345,27 +308,30 @@ module RLTK # :nodoc:
345
308
  # production can be changed by setting the *precedence* argument
346
309
  # to some terminal symbol.
347
310
  #
348
- # @param [String] expression Right-hand side of a production.
349
- # @param [Symbol] precedence Symbol representing the precedence of this production.
350
- # @param [Proc] action Action to be taken when the production is reduced.
311
+ # @param [String, Symbol] expression Right-hand side of a production.
312
+ # @param [Symbol] precedence Symbol representing the precedence of this production.
313
+ # @param [:array, :splat] arg_type Method to use when passing arguments to the action.
314
+ # @param [Proc] action Action to be taken when the production is reduced.
351
315
  #
352
316
  # @return [void]
353
- def clause(expression, precedence = nil, &action)
317
+ def clause(expression, precedence = nil, arg_type = @default_arg_type, &action)
354
318
  # Use the curr_prec only if it isn't overridden for this
355
319
  # clause.
356
320
  precedence ||= @curr_prec
357
321
 
358
- production = @grammar.clause(expression)
322
+ production, selections = @grammar.clause(expression)
359
323
 
360
324
  # Check to make sure the action's arity matches the number
361
325
  # of symbols on the right-hand side.
362
- if @args == :splat and action.arity != production.rhs.length
363
- raise ParserConstructionException, 'Incorrect number of arguments to action. Action arity must match the number of ' +
364
- 'terminals and non-terminals in the clause.'
326
+ expected_arity = (selections.empty? ? production.rhs.length : selections.length)
327
+ if arg_type == :splat and action.arity != expected_arity
328
+ raise ParserConstructionException,
329
+ "Incorrect number of action parameters. Expected #{expected_arity} but got #{action.arity}." +
330
+ ' Action arity must match the number of terminals and non-terminals in the clause.'
365
331
  end
366
332
 
367
333
  # Add the action to our proc list.
368
- @procs[production.id] = [action, production.rhs.length]
334
+ @procs[production.id] = [ProdProc.new(arg_type, selections, &action), production.rhs.length]
369
335
 
370
336
  # If no precedence is specified use the precedence of the
371
337
  # last terminal in the production.
@@ -382,28 +348,41 @@ module RLTK # :nodoc:
382
348
  @conflicts = nil
383
349
 
384
350
  # Drop the grammar and the grammar'.
385
- @grammar = nil
386
- @grammar_prime = nil
351
+ @grammar = nil
352
+ @grammar_prime = nil
387
353
 
388
354
  # Drop precedence and bookkeeping information.
389
- @cur_lhs = nil
390
- @cur_prec = nil
355
+ @cur_lhs = nil
356
+ @cur_prec = nil
391
357
 
392
- @prec_counts = nil
393
- @production_precs = nil
394
- @token_precs = nil
358
+ @prec_counts = nil
359
+ @production_precs = nil
360
+ @token_precs = nil
395
361
 
396
362
  # Drop the items from each of the states.
397
- @states.each { |state| state.clean }
363
+ each_state { |state| state.clean }
364
+ end
365
+
366
+ # Set the default argument type for the actions associated with
367
+ # clauses. All actions defined after this call will be passed
368
+ # arguments in the way specified here, unless overridden in the
369
+ # call to {Parser.clause}.
370
+ #
371
+ # @param [:array, :splat] type The default argument type.
372
+ #
373
+ # @return [void]
374
+ def default_arg_type(type)
375
+ @default_arg_type = type if type == :array or type == :splat
398
376
  end
377
+ alias :dat :default_arg_type
399
378
 
400
379
  # Adds productions and actions for parsing empty lists.
401
380
  #
402
381
  # @see CFG#empty_list_production
403
- def empty_list_production(symbol, list_elements, separator)
404
- @grammar.empty_list(symbol, list_elements, separator)
382
+ def build_list_production(symbol, list_elements, separator = '')
383
+ @grammar.build_list_production(symbol, list_elements, separator)
405
384
  end
406
- alias :empty_list :empty_list_production
385
+ alias :list :build_list_production
407
386
 
408
387
  # This function will print a description of the parser to the
409
388
  # provided IO object.
@@ -418,13 +397,21 @@ module RLTK # :nodoc:
418
397
  io.puts('###############')
419
398
  io.puts
420
399
 
400
+ max_id_length = @grammar.productions(:id).length.to_s.length
401
+
421
402
  # Print the productions.
422
403
  @grammar.productions.each do |sym, productions|
404
+
405
+ max_rhs_length = productions.inject(0) { |m, p| if (len = p.to_s.length) > m then len else m end }
406
+
423
407
  productions.each do |production|
424
- io.print("\tProduction #{production.id}: #{production.to_s}")
408
+ p_string = production.to_s
409
+
410
+ io.print("\tProduction #{sprintf("%#{max_id_length}d", production.id)}: #{p_string}")
425
411
 
426
412
  if (prec = @production_precs[production.id])
427
- io.print(" : (#{prec.first} , #{prec.last})")
413
+ io.print(' ' * (max_rhs_length - p_string.length))
414
+ io.print(" : (#{sprintf("%-5s", prec.first)}, #{prec.last})")
428
415
  end
429
416
 
430
417
  io.puts
@@ -438,11 +425,14 @@ module RLTK # :nodoc:
438
425
  io.puts('##########')
439
426
  io.puts
440
427
 
428
+ max_token_len = @grammar.terms.inject(0) { |m, t| if t.length > m then t.length else m end }
429
+
441
430
  @grammar.terms.sort {|a,b| a.to_s <=> b.to_s }.each do |term|
442
431
  io.print("\t#{term}")
443
432
 
444
433
  if (prec = @token_precs[term])
445
- io.print(" : (#{prec.first}, #{prec.last})")
434
+ io.print(' ' * (max_token_len - term.length))
435
+ io.print(" : (#{sprintf("%-5s", prec.first)}, #{prec.last})")
446
436
  end
447
437
 
448
438
  io.puts
@@ -455,7 +445,7 @@ module RLTK # :nodoc:
455
445
  io.puts('#####################')
456
446
  io.puts
457
447
 
458
- io.puts("\tStart symbol: #{@grammar.start_symbol}")
448
+ io.puts("\tStart symbol: #{@grammar.start_symbol}'")
459
449
  io.puts
460
450
 
461
451
  io.puts("\tTotal number of states: #{@states.length}")
@@ -476,7 +466,7 @@ module RLTK # :nodoc:
476
466
  io.puts('###############')
477
467
  io.puts
478
468
 
479
- @states.each do |state|
469
+ each_state do |state|
480
470
  io.puts("State #{state.id}:")
481
471
  io.puts
482
472
 
@@ -527,18 +517,15 @@ module RLTK # :nodoc:
527
517
  # of states and their actions, and the resolution of conflicts
528
518
  # using lookahead and precedence information.
529
519
  #
530
- # The *opts* hash may contain the following options, which are
531
- # described in more detail in the main documentation:
532
- #
533
- # * :explain - To explain the parser or not.
534
- # * :lookahead - To use lookahead info for conflict resolution.
535
- # * :precedence - To use precedence info for conflict resolution.
536
- # * :use - A file name or object that is used to load/save the parser.
537
- #
538
520
  # No calls to {Parser.production} may appear after the call to
539
521
  # Parser.finalize.
540
522
  #
541
- # @param [Hash{Symbol => Object}] opts Options describing how to finalize the parser.
523
+ # @param [Hash] opts Options describing how to finalize the parser.
524
+ #
525
+ # @option opts [Boolean,String,IO] :explain To explain the parser or not.
526
+ # @option opts [Boolean] :lookahead To use lookahead info for conflict resolution.
527
+ # @option opts [Boolean] :precedence To use precedence info for conflict resolution.
528
+ # @option opts [String,IO] :use A file name or object that is used to load/save the parser.
542
529
  #
543
530
  # @return [void]
544
531
  def finalize(opts = {})
@@ -572,13 +559,14 @@ module RLTK # :nodoc:
572
559
  return self.clean
573
560
  end
574
561
 
575
- # Grab all of the symbols that comprise the grammar (besides
576
- # the start symbol).
562
+ # Grab all of the symbols that comprise the grammar
563
+ # (besides the start symbol).
577
564
  @symbols = @grammar.symbols << :ERROR
578
565
 
579
566
  # Add our starting state to the state list.
580
- start_production = @grammar.production(:start, @grammar.start_symbol.to_s).first
581
- start_state = State.new(@symbols, [start_production.to_item])
567
+ @start_symbol = (@grammar.start_symbol.to_s + '\'').to_sym
568
+ start_production, _ = @grammar.production(@start_symbol, @grammar.start_symbol).first
569
+ start_state = State.new(@symbols, [start_production.to_item])
582
570
 
583
571
  start_state.close(@grammar.productions)
584
572
 
@@ -586,12 +574,10 @@ module RLTK # :nodoc:
586
574
 
587
575
  # Translate the precedence of productions from tokens to
588
576
  # (associativity, precedence) pairs.
589
- @production_precs.each_with_index do |prec, id|
590
- @production_precs[id] = @token_precs[prec]
591
- end
577
+ @production_precs.map! { |prec| @token_precs[prec] }
592
578
 
593
579
  # Build the rest of the transition table.
594
- @states.each do |state|
580
+ each_state do |state|
595
581
  #Transition states.
596
582
  tstates = Hash.new { |h,k| h[k] = State.new(@symbols) }
597
583
 
@@ -622,23 +608,17 @@ module RLTK # :nodoc:
622
608
  # Find the Accept and Reduce actions for this state.
623
609
  state.each do |item|
624
610
  if item.at_end?
625
- if item.lhs == :start
611
+ if item.lhs == @start_symbol
626
612
  state.on(:EOS, Accept.new)
627
613
  else
628
- state.add_reduction(item.id)
614
+ state.add_reduction(@grammar.productions(:id)[item.id])
629
615
  end
630
616
  end
631
617
  end
632
618
  end
633
619
 
634
620
  # Build the production.id -> production.lhs map.
635
- @grammar.productions(:id).to_a.inject(@lh_sides) do |h, pair|
636
- id, production = pair
637
-
638
- h[id] = production.lhs
639
-
640
- h
641
- end
621
+ @grammar.productions(:id).each { |id, production| @lh_sides[id] = production.lhs }
642
622
 
643
623
  # Prune the parsing table for unnecessary reduce actions.
644
624
  self.prune(opts[:lookahead], opts[:precedence])
@@ -667,8 +647,8 @@ module RLTK # :nodoc:
667
647
 
668
648
  # Converts an object into an IO object as appropriate.
669
649
  #
670
- # @param [Object] o Object to be converted into an IO object.
671
- # @param [String] mode String representing the mode to open the IO object in.
650
+ # @param [Object] o Object to be converted into an IO object.
651
+ # @param [String] mode String representing the mode to open the IO object in.
672
652
  #
673
653
  # @return [IO, false] The IO object or false if a conversion wasn't possible.
674
654
  def get_io(o, mode = 'w')
@@ -683,7 +663,20 @@ module RLTK # :nodoc:
683
663
  end
684
664
  end
685
665
 
686
- # @return [CFG] The grammar that can be parsed by this Parser.
666
+ # Iterate over the parser's states.
667
+ #
668
+ # @yieldparam [State] state One of the parser automaton's state objects
669
+ #
670
+ # @return [void]
671
+ def each_state
672
+ current_state = 0
673
+ while current_state < @states.count
674
+ yield @states.at(current_state)
675
+ current_state += 1
676
+ end
677
+ end
678
+
679
+ # @return [CFG] The grammar that can be parsed by this Parser.
687
680
  def grammar
688
681
  @grammar.clone
689
682
  end
@@ -692,7 +685,7 @@ module RLTK # :nodoc:
692
685
  # calculate the LALR(1) lookahead sets. Information about this
693
686
  # grammar and its use can be found in the following paper:
694
687
  #
695
- # Simple Computation of LALR(1) Lookahed Sets
688
+ # Simple Computation of LALR(1) Lookahead Sets
696
689
  # Manuel E. Bermudez and George Logothetis
697
690
  # Information Processing Letters 31 - 1989
698
691
  #
@@ -701,14 +694,14 @@ module RLTK # :nodoc:
701
694
  if not @grammar_prime
702
695
  @grammar_prime = CFG.new
703
696
 
704
- @states.each do |state|
697
+ each_state do |state|
705
698
  state.each do |item|
706
699
  lhs = "#{state.id}_#{item.next_symbol}".to_sym
707
700
 
708
701
  next unless CFG::is_nonterminal?(item.next_symbol) and not @grammar_prime.productions.keys.include?(lhs)
709
702
 
710
703
  @grammar.productions[item.next_symbol].each do |production|
711
- rhs = ""
704
+ rhs = ''
712
705
 
713
706
  cstate = state
714
707
 
@@ -729,9 +722,9 @@ module RLTK # :nodoc:
729
722
 
730
723
  # Inform the parser core that a conflict has been detected.
731
724
  #
732
- # @param [Integer] state_id ID of the state where the conflict was encountered.
733
- # @param [:RR, :SR] type Reduce/Reduce or Shift/Reduce conflict.
734
- # @param [Symbol] sym Symbol that caused the conflict.
725
+ # @param [Integer] state_id ID of the state where the conflict was encountered.
726
+ # @param [:RR, :SR] type Reduce/Reduce or Shift/Reduce conflict.
727
+ # @param [Symbol] sym Symbol that caused the conflict.
735
728
  #
736
729
  # @return [void]
737
730
  def inform_conflict(state_id, type, sym)
@@ -742,7 +735,7 @@ module RLTK # :nodoc:
742
735
  # are left-associative. Subsequent calls to this method will
743
736
  # give their arguments higher precedence.
744
737
  #
745
- # @param [Array<Symbol>] symbols Symbols that are left associative.
738
+ # @param [Array<Symbol>] symbols Symbols that are left associative.
746
739
  #
747
740
  # @return [void]
748
741
  def left(*symbols)
@@ -756,7 +749,7 @@ module RLTK # :nodoc:
756
749
  # This method is used to specify that the symbols in *symbols*
757
750
  # are non-associative.
758
751
  #
759
- # @param [Array<Symbol>] symbols Symbols that are non-associative.
752
+ # @param [Array<Symbol>] symbols Symbols that are non-associative.
760
753
  #
761
754
  # @return [void]
762
755
  def nonassoc(*symbols)
@@ -770,29 +763,28 @@ module RLTK # :nodoc:
770
763
  # Adds productions and actions for parsing nonempty lists.
771
764
  #
772
765
  # @see CFG#nonempty_list_production
773
- def nonempty_list_production(symbol, list_elements, separator)
774
- @grammar.nonempty_list(symbol, list_elements, separator)
766
+ def build_nonempty_list_production(symbol, list_elements, separator = '')
767
+ @grammar.build_nonempty_list_production(symbol, list_elements, separator)
775
768
  end
776
- alias :nonempty_list :nonempty_list_production
769
+ alias :nonempty_list :build_nonempty_list_production
777
770
 
778
771
  # This function is where actual parsing takes place. The
779
772
  # _tokens_ argument must be an array of Token objects, the last
780
773
  # of which has type EOS. By default this method will return the
781
- # value computed by the first successful parse tree found. It is
782
- # possible to adjust this behavior using the _opts_ hash as
783
- # follows:
774
+ # value computed by the first successful parse tree found.
784
775
  #
785
- # * :accept - Either :first or :all.
786
- # * :env - The environment in which to evaluate the production actions.
787
- # * :parse_tree - To print parse trees in the DOT language or not.
788
- # * :verbose - To be verbose or not.
789
- #
790
- # Additional information for these options can be found in the
791
- # main documentation.
776
+ # Additional information about the parsing options can be found in
777
+ # the main documentation.
778
+ #
779
+ # @param [Array<Token>] tokens Tokens to be parsed.
780
+ # @param [Hash] opts Options to use when parsing input.
792
781
  #
793
- # @param [Array<Token>] tokens Tokens to be parsed.
782
+ # @option opts [:first, :all] :accept Either :first or :all.
783
+ # @option opts [Object] :env The environment in which to evaluate the production action.
784
+ # @option opts [Boolean,String,IO] :parse_tree To print parse trees in the DOT language or not.
785
+ # @option opts [Boolean,String,IO] :verbose To be verbose or not.
794
786
  #
795
- # @return [Object, Array<Object>] Result or results of parsing the given tokens.
787
+ # @return [Object, Array<Object>] Result or results of parsing the given tokens.
796
788
  def parse(tokens, opts = {})
797
789
  # Get the full options hash.
798
790
  opts = build_parse_opts(opts)
@@ -808,18 +800,18 @@ module RLTK # :nodoc:
808
800
  stack_id = 0
809
801
 
810
802
  # Error mode indicators.
811
- error_mode = false
812
- reduction_guard = false
803
+ error_mode = false
804
+ reduction_guard = false
813
805
 
814
806
  # Our various list of stacks.
815
- accepted = []
816
- moving_on = []
817
- processing = [ParseStack.new(stack_id += 1)]
807
+ accepted = []
808
+ moving_on = []
809
+ processing = [ParseStack.new(stack_id += 1)]
818
810
 
819
811
  # Iterate over the tokens. We don't procede to the
820
812
  # next token until every stack is done with the
821
813
  # current one.
822
- tokens.each do |token|
814
+ tokens.each_with_index do |token, index|
823
815
  # Check to make sure this token was seen in the
824
816
  # grammar definition.
825
817
  raise BadToken if not @symbols.include?(token.type)
@@ -937,8 +929,12 @@ module RLTK # :nodoc:
937
929
  args, positions = stack.pop(pop_size)
938
930
  opts[:env].set_positions(positions)
939
931
 
932
+ if not production_proc.selections.empty?
933
+ args = args.values_at(*production_proc.selections)
934
+ end
935
+
940
936
  result =
941
- if @args == :array
937
+ if production_proc.arg_type == :array
942
938
  opts[:env].instance_exec(args, &production_proc)
943
939
  else
944
940
  opts[:env].instance_exec(*args, &production_proc)
@@ -994,14 +990,14 @@ module RLTK # :nodoc:
994
990
 
995
991
  v.puts("\n\n") if v
996
992
 
997
- processing = moving_on
998
- moving_on = []
993
+ processing = moving_on
994
+ moving_on = []
999
995
 
1000
996
  # If we don't have any active stacks at this point the
1001
997
  # string isn't in the language.
1002
998
  if opts[:accept] == :first and processing.length == 0
1003
999
  v.close if v and v != $stdout
1004
- raise NotInLanguage
1000
+ raise NotInLanguage.new(tokens[0...index], tokens[index], tokens[index.next..-1])
1005
1001
  end
1006
1002
 
1007
1003
  reduction_guard = false
@@ -1036,21 +1032,28 @@ module RLTK # :nodoc:
1036
1032
  # Parser.clause. A precedence can be associate with this
1037
1033
  # production by setting *precedence* to a terminal symbol.
1038
1034
  #
1039
- # @param [Symbol] symbol Left-hand side of the production.
1040
- # @param [String, nil] expression Right-hand side of the production.
1041
- # @param [Symbol, nil] precedence Symbol representing the precedence of this produciton.
1042
- # @param [Proc] action Action associated with this production.
1035
+ # @param [Symbol] symbol Left-hand side of the production.
1036
+ # @param [String, Symbol, nil] expression Right-hand side of the production.
1037
+ # @param [Symbol, nil] precedence Symbol representing the precedence of this produciton.
1038
+ # @param [:array, :splat] arg_type Method to use when passing arguments to the action.
1039
+ # @param [Proc] action Action associated with this production.
1043
1040
  #
1044
1041
  # @return [void]
1045
- def production(symbol, expression = nil, precedence = nil, &action)
1042
+ def production(symbol, expression = nil, precedence = nil, arg_type = @default_arg_type, &action)
1046
1043
 
1047
1044
  # Check the symbol.
1048
1045
  if not (symbol.is_a?(Symbol) or symbol.is_a?(String)) or not CFG::is_nonterminal?(symbol)
1049
- riase ParserConstructionException, 'Production symbols must be Strings or Symbols and be in all lowercase.'
1046
+ raise ParserConstructionException, 'Production symbols must be Strings or Symbols and be in all lowercase.'
1050
1047
  end
1051
1048
 
1052
- @grammar.curr_lhs = symbol.to_sym
1053
- @curr_prec = precedence
1049
+ @grammar.curr_lhs = symbol.to_sym
1050
+ @curr_prec = precedence
1051
+
1052
+ orig_dat = nil
1053
+ if arg_type != @default_arg_type
1054
+ orig_dat = @default_arg_type
1055
+ @default_arg_type = arg_type
1056
+ end
1054
1057
 
1055
1058
  if expression
1056
1059
  self.clause(expression, precedence, &action)
@@ -1058,16 +1061,18 @@ module RLTK # :nodoc:
1058
1061
  self.instance_exec(&action)
1059
1062
  end
1060
1063
 
1061
- @grammar.curr_lhs = nil
1062
- @curr_prec = nil
1064
+ @default_arg_type = orig_dat if not orig_dat.nil?
1065
+
1066
+ @grammar.curr_lhs = nil
1067
+ @curr_prec = nil
1063
1068
  end
1064
1069
  alias :p :production
1065
1070
 
1066
1071
  # This method uses lookahead sets and precedence information to
1067
1072
  # resolve conflicts and remove unnecessary reduce actions.
1068
1073
  #
1069
- # @param [Boolean] do_lookahead Prune based on lookahead sets or not.
1070
- # @param [Boolean] do_precedence Prune based on precedence or not.
1074
+ # @param [Boolean] do_lookahead Prune based on lookahead sets or not.
1075
+ # @param [Boolean] do_precedence Prune based on precedence or not.
1071
1076
  #
1072
1077
  # @return [void]
1073
1078
  def prune(do_lookahead, do_precedence)
@@ -1076,7 +1081,7 @@ module RLTK # :nodoc:
1076
1081
  # If both options are false there is no pruning to do.
1077
1082
  return if not (do_lookahead or do_precedence)
1078
1083
 
1079
- @states.each do |state0|
1084
+ each_state do |state0|
1080
1085
 
1081
1086
  #####################
1082
1087
  # Lookahead Pruning #
@@ -1092,23 +1097,27 @@ module RLTK # :nodoc:
1092
1097
  lookahead = Array.new
1093
1098
 
1094
1099
  # Build the lookahead set.
1095
- @states.each do |state1|
1100
+ each_state do |state1|
1096
1101
  if self.check_reachability(state1, state0, production.rhs)
1097
1102
  lookahead |= self.grammar_prime.follow_set("#{state1.id}_#{production.lhs}".to_sym)
1098
1103
  end
1099
1104
  end
1100
1105
 
1101
- # Translate the G' follow symbols into G lookahead
1102
- # symbols.
1103
- lookahead = lookahead.map { |sym| sym.to_s.split('_').last.to_sym }.uniq
1106
+ # Translate the G' follow symbols into G
1107
+ # lookahead symbols.
1108
+ lookahead = lookahead.map { |sym| sym.to_s.split('_', 2).last.to_sym }.uniq
1104
1109
 
1105
1110
  # Here we remove the unnecessary reductions.
1106
1111
  # If there are error productions we need to
1107
1112
  # scale back the amount of pruning done.
1108
- (terms - lookahead).each do |sym|
1109
- if not (terms.include?(:ERROR) and not state0.conflict_on?(sym))
1110
- state0.actions[sym].delete(reduction)
1113
+ pruning_candidates = terms - lookahead
1114
+
1115
+ if terms.include?(:ERROR)
1116
+ pruning_candidates.each do |sym|
1117
+ state0.actions[sym].delete(reduction) if state0.conflict_on?(sym)
1111
1118
  end
1119
+ else
1120
+ pruning_candidates.each { |sym| state0.actions[sym].delete(reduction) }
1112
1121
  end
1113
1122
  end
1114
1123
  end
@@ -1153,8 +1162,8 @@ module RLTK # :nodoc:
1153
1162
  # * The token is left associative and the current action is a Reduce
1154
1163
  # * The token is right associative and the current action is a Shift
1155
1164
  if prec > max_prec or (prec == max_prec and tassoc == (a.is_a?(Shift) ? :right : :left))
1156
- max_prec = prec
1157
- selected_action = a
1165
+ max_prec = prec
1166
+ selected_action = a
1158
1167
 
1159
1168
  elsif prec == max_prec and assoc == :nonassoc
1160
1169
  raise ParserConstructionException, 'Non-associative token found during conflict resolution.'
@@ -1282,23 +1291,23 @@ module RLTK # :nodoc:
1282
1291
 
1283
1292
  # Instantiate a new ParserStack object.
1284
1293
  #
1285
- # @param [Integer] id ID for this parse stack. Used by GLR algorithm.
1286
- # @param [Array<Object>] ostack Output stack. Holds results of {Reduce} and {Shift} actions.
1287
- # @param [Array<Integer>] sstack State stack. Holds states that have been shifted due to {Shift} actions.
1288
- # @param [Array<Integer>] nstack Node stack. Holds dot language IDs for nodes in the parse tree.
1289
- # @param [Array<Array<Integer>>] connections Integer pairs representing edges in the parse tree.
1290
- # @param [Array<Symbol>] labels Labels for nodes in the parse tree.
1291
- # @param [Array<StreamPosition>] positions Position data for symbols that have been shifted.
1294
+ # @param [Integer] id ID for this parse stack. Used by GLR algorithm.
1295
+ # @param [Array<Object>] ostack Output stack. Holds results of {Reduce} and {Shift} actions.
1296
+ # @param [Array<Integer>] sstack State stack. Holds states that have been shifted due to {Shift} actions.
1297
+ # @param [Array<Integer>] nstack Node stack. Holds dot language IDs for nodes in the parse tree.
1298
+ # @param [Array<Array<Integer>>] connections Integer pairs representing edges in the parse tree.
1299
+ # @param [Array<Symbol>] labels Labels for nodes in the parse tree.
1300
+ # @param [Array<StreamPosition>] positions Position data for symbols that have been shifted.
1292
1301
  def initialize(id, ostack = [], sstack = [0], nstack = [], connections = [], labels = [], positions = [])
1293
1302
  @id = id
1294
1303
 
1295
- @node_stack = nstack
1296
- @output_stack = ostack
1297
- @state_stack = sstack
1304
+ @node_stack = nstack
1305
+ @output_stack = ostack
1306
+ @state_stack = sstack
1298
1307
 
1299
- @connections = connections
1300
- @labels = labels
1301
- @positions = positions
1308
+ @connections = connections
1309
+ @labels = labels
1310
+ @positions = positions
1302
1311
  end
1303
1312
 
1304
1313
  # Branch this stack, effectively creating a new copy of its
@@ -1308,8 +1317,24 @@ module RLTK # :nodoc:
1308
1317
  #
1309
1318
  # @return [ParseStack]
1310
1319
  def branch(new_id)
1311
- ParseStack.new(new_id, @output_stack.clone, @state_stack.clone, @node_stack.clone,
1312
- @connections.clone, @labels.clone, @positions.clone)
1320
+ # We have to do a deeper copy of the output stack to avoid
1321
+ # interactions between the Proc objects for the different
1322
+ # parsing paths.
1323
+ #
1324
+ # The being/rescue block is needed because some classes
1325
+ # respond to `clone` but always raise an error.
1326
+ new_output_stack = @output_stack.map do |o|
1327
+ # Check to see if we can obtain a deep copy.
1328
+ if 0.respond_to?(:copy)
1329
+ o.copy
1330
+
1331
+ else
1332
+ begin o.clone rescue o end
1333
+ end
1334
+ end
1335
+
1336
+ ParseStack.new(new_id, new_output_stack, @state_stack.clone,
1337
+ @node_stack.clone, @connections.clone, @labels.clone, @positions.clone)
1313
1338
  end
1314
1339
 
1315
1340
  # @return [StreamPosition] Position data for the last symbol on the stack.
@@ -1347,7 +1372,7 @@ module RLTK # :nodoc:
1347
1372
  #
1348
1373
  # @param [Integer] n Number of object to pop off the stack.
1349
1374
  #
1350
- # @return [Array<Array<Object, StreamPosition>>] Values popped from the output and positions stacks.
1375
+ # @return [Array(Object, StreamPosition)] Values popped from the output and positions stacks.
1351
1376
  def pop(n = 1)
1352
1377
  @state_stack.pop(n)
1353
1378
 
@@ -1406,27 +1431,27 @@ module RLTK # :nodoc:
1406
1431
  # @return [Integer] State's ID.
1407
1432
  attr_accessor :id
1408
1433
 
1409
- # @return [Array<CFG::Item>] Item objects that comprise this state.
1434
+ # @return [Array<CFG::Item>] Item objects that comprise this state
1410
1435
  attr_reader :items
1411
1436
 
1412
- # @return [Array<Action>] Action objects that represent the actions that should be taken when various inputs are observed.
1437
+ # @return [Hash{Symbol => Array<Action>}] Maps lookahead symbols to actions
1413
1438
  attr_reader :actions
1414
1439
 
1415
1440
  # Instantiate a new State object.
1416
1441
  #
1417
- # @param [Array<Token>] tokens Tokens that represent this state.
1418
- # @param [Array<CFG::Item>] items Items that make up this state.
1442
+ # @param [Array<Symbol>] tokens Tokens that represent this state
1443
+ # @param [Array<CFG::Item>] items Items that make up this state
1419
1444
  def initialize(tokens, items = [])
1420
- @id = nil
1421
- @items = items
1422
- @actions = tokens.inject(Hash.new) { |h, t| h[t] = Array.new; h }
1445
+ @id = nil
1446
+ @items = items
1447
+ @actions = tokens.inject(Hash.new) { |h, t| h[t] = Array.new; h }
1423
1448
  end
1424
1449
 
1425
1450
  # Compare one State to another. Two States are equal if they
1426
1451
  # have the same items or, if the items have been cleaned, if
1427
1452
  # the States have the same ID.
1428
1453
  #
1429
- # @param [State] other Another State to compare to.
1454
+ # @param [State] other Another State to compare to
1430
1455
  #
1431
1456
  # @return [Boolean]
1432
1457
  def ==(other)
@@ -1435,11 +1460,11 @@ module RLTK # :nodoc:
1435
1460
 
1436
1461
  # Add a Reduce action to the state.
1437
1462
  #
1438
- # @param [Integer] production_id ID of production to add to this state.
1463
+ # @param [Production] production Production used to perform the reduction
1439
1464
  #
1440
1465
  # @return [void]
1441
- def add_reduction(production_id)
1442
- action = Reduce.new(production_id)
1466
+ def add_reduction(production)
1467
+ action = Reduce.new(production)
1443
1468
 
1444
1469
  # Reduce actions are not allowed for the ERROR terminal.
1445
1470
  @actions.each { |k, v| if CFG::is_terminal?(k) and k != :ERROR then v << action end }
@@ -1507,7 +1532,11 @@ module RLTK # :nodoc:
1507
1532
  #
1508
1533
  # @return [void]
1509
1534
  def each
1510
- @items.each {|item| yield item}
1535
+ current_item = 0
1536
+ while current_item < @items.count
1537
+ yield @items.at(current_item)
1538
+ current_item += 1
1539
+ end
1511
1540
  end
1512
1541
 
1513
1542
  # Specify an Action to perform when the input token is *symbol*.
@@ -1535,6 +1564,22 @@ module RLTK # :nodoc:
1535
1564
  end
1536
1565
  end
1537
1566
 
1567
+ # A subclass of Proc that indicates how it should be passed arguments
1568
+ # by the parser.
1569
+ class ProdProc < Proc
1570
+ # @return [:array, :splat] Method that should be used to pass arguments to this proc.
1571
+ attr_reader :arg_type
1572
+
1573
+ # @return [Array<Integer>] Mask for selection of tokens to pass to action. Empty mask means pass all.
1574
+ attr_reader :selections
1575
+
1576
+ def initialize(arg_type = :splat, selections = [])
1577
+ super()
1578
+ @arg_type = arg_type
1579
+ @selections = selections
1580
+ end
1581
+ end
1582
+
1538
1583
  # The Action class is used to indicate what action the parser should
1539
1584
  # take given a current state and input token.
1540
1585
  class Action
@@ -1568,9 +1613,17 @@ module RLTK # :nodoc:
1568
1613
  # The Reduce class indicates to the parser that it should reduce the
1569
1614
  # input stack by the rule specified by Reduce.id.
1570
1615
  class Reduce < Action
1616
+
1617
+ # @param [Production] production Production to reduce by
1618
+ def initialize(production)
1619
+ super(production.id)
1620
+
1621
+ @production = production
1622
+ end
1623
+
1571
1624
  # @return [String] String representation of this action.
1572
1625
  def to_s
1573
- "Reduce by Production #{self.id}"
1626
+ "Reduce by Production #{self.id} : #{@production}"
1574
1627
  end
1575
1628
  end
1576
1629