rltk 2.2.1 → 3.0.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (59) hide show
  1. checksums.yaml +7 -0
  2. data/LICENSE +12 -12
  3. data/README.md +458 -285
  4. data/Rakefile +99 -92
  5. data/lib/rltk/ast.rb +221 -126
  6. data/lib/rltk/cfg.rb +218 -239
  7. data/lib/rltk/cg/basic_block.rb +1 -1
  8. data/lib/rltk/cg/bindings.rb +9 -26
  9. data/lib/rltk/cg/builder.rb +40 -8
  10. data/lib/rltk/cg/context.rb +1 -1
  11. data/lib/rltk/cg/contractor.rb +51 -0
  12. data/lib/rltk/cg/execution_engine.rb +45 -8
  13. data/lib/rltk/cg/function.rb +12 -2
  14. data/lib/rltk/cg/generated_bindings.rb +2541 -575
  15. data/lib/rltk/cg/generic_value.rb +2 -2
  16. data/lib/rltk/cg/instruction.rb +104 -83
  17. data/lib/rltk/cg/llvm.rb +44 -3
  18. data/lib/rltk/cg/memory_buffer.rb +22 -5
  19. data/lib/rltk/cg/module.rb +85 -36
  20. data/lib/rltk/cg/old_generated_bindings.rb +6152 -0
  21. data/lib/rltk/cg/pass_manager.rb +87 -43
  22. data/lib/rltk/cg/support.rb +2 -4
  23. data/lib/rltk/cg/target.rb +158 -28
  24. data/lib/rltk/cg/triple.rb +8 -8
  25. data/lib/rltk/cg/type.rb +69 -25
  26. data/lib/rltk/cg/value.rb +107 -66
  27. data/lib/rltk/cg.rb +16 -17
  28. data/lib/rltk/lexer.rb +21 -11
  29. data/lib/rltk/lexers/calculator.rb +1 -1
  30. data/lib/rltk/lexers/ebnf.rb +8 -7
  31. data/lib/rltk/parser.rb +300 -247
  32. data/lib/rltk/parsers/infix_calc.rb +1 -1
  33. data/lib/rltk/parsers/postfix_calc.rb +2 -2
  34. data/lib/rltk/parsers/prefix_calc.rb +2 -2
  35. data/lib/rltk/token.rb +1 -2
  36. data/lib/rltk/version.rb +3 -3
  37. data/lib/rltk.rb +6 -6
  38. data/test/cg/tc_basic_block.rb +83 -0
  39. data/test/cg/tc_control_flow.rb +191 -0
  40. data/test/cg/tc_function.rb +54 -0
  41. data/test/cg/tc_generic_value.rb +33 -0
  42. data/test/cg/tc_instruction.rb +256 -0
  43. data/test/cg/tc_llvm.rb +25 -0
  44. data/test/cg/tc_math.rb +88 -0
  45. data/test/cg/tc_module.rb +89 -0
  46. data/test/cg/tc_transforms.rb +68 -0
  47. data/test/cg/tc_type.rb +69 -0
  48. data/test/cg/tc_value.rb +151 -0
  49. data/test/cg/ts_cg.rb +23 -0
  50. data/test/tc_ast.rb +105 -8
  51. data/test/tc_cfg.rb +63 -48
  52. data/test/tc_lexer.rb +84 -96
  53. data/test/tc_parser.rb +224 -52
  54. data/test/tc_token.rb +6 -6
  55. data/test/ts_rltk.rb +12 -15
  56. metadata +149 -75
  57. data/lib/rltk/cg/generated_extended_bindings.rb +0 -287
  58. data/lib/rltk/util/abstract_class.rb +0 -25
  59. data/lib/rltk/util/monkeys.rb +0 -129
data/lib/rltk/parser.rb CHANGED
@@ -14,8 +14,8 @@ require 'rltk/cfg'
14
14
  # Classes and Modules #
15
15
  #######################
16
16
 
17
- module RLTK # :nodoc:
18
-
17
+ # The RLTK root module
18
+ module RLTK
19
19
  # A BadToken error indicates that a token was observed in the input stream
20
20
  # that wasn't used in the grammar's definition.
21
21
  class BadToken < StandardError
@@ -29,9 +29,28 @@ module RLTK # :nodoc:
29
29
  # for a given token stream. In other words, the input string is not in the
30
30
  # defined language.
31
31
  class NotInLanguage < StandardError
32
+
33
+ # @return [Array<Token>] List of tokens that have been successfully parsed
34
+ attr_reader :seen
35
+
36
+ # @return [Token] Token that caused the parser to stop
37
+ attr_reader :current
38
+
39
+ # @return [Array<Token>] List of tokens that have yet to be seen
40
+ attr_reader :remaining
41
+
42
+ # @param [Array<Token>] seen Tokens that have been successfully parsed
43
+ # @param [Token] current Token that caused the parser to stop
44
+ # @param [Array<Token>] remaining Tokens that have yet to be seen
45
+ def initialize(seen, current, remaining)
46
+ @seen = seen
47
+ @current = current
48
+ @remaining = remaining
49
+ end
50
+
32
51
  # @return [String] String representation of the error.
33
52
  def to_s
34
- 'String not in language.'
53
+ "String not in language. Token info:\n\tSeen: #{@seen}\n\tCurrent: #{@current}\n\tRemaining: #{@remaining}"
35
54
  end
36
55
  end
37
56
 
@@ -80,57 +99,53 @@ module RLTK # :nodoc:
80
99
  #
81
100
  # @return [void]
82
101
  def install_icvars
83
- @curr_lhs = nil
84
- @curr_prec = nil
102
+ @curr_lhs = nil
103
+ @curr_prec = nil
85
104
 
86
- @conflicts = Hash.new {|h, k| h[k] = Array.new}
87
- @grammar = CFG.new
105
+ @conflicts = Hash.new {|h, k| h[k] = Array.new}
106
+ @grammar = CFG.new
88
107
 
89
- @lh_sides = Hash.new
90
- @procs = Array.new
91
- @states = Array.new
108
+ @lh_sides = Hash.new
109
+ @procs = Array.new
110
+ @states = Array.new
92
111
 
93
112
  # Variables for dealing with precedence.
94
- @prec_counts = {:left => 0, :right => 0, :non => 0}
95
- @production_precs = Array.new
96
- @token_precs = Hash.new
113
+ @prec_counts = {:left => 0, :right => 0, :non => 0}
114
+ @production_precs = Array.new
115
+ @token_precs = Hash.new
97
116
 
98
- # Set the default argument handling policy.
99
- @args = :splat
117
+ # Set the default argument handling policy. Valid values
118
+ # are :array and :splat.
119
+ @default_arg_type = :splat
100
120
 
101
- @grammar.callback do |p, type, num|
102
- @procs[p.id] =
103
- [
121
+ @grammar.callback do |type, which, p, sels = []|
122
+ @procs[p.id] = [
104
123
  case type
105
- when :*
106
- case num
107
- when :first then Proc.new { || [] }
108
- else Proc.new { |os, o| os << o }
109
- end
110
-
111
- when :+
112
- case num
113
- when :first then Proc.new { |o| [o] }
114
- else Proc.new { |os, o| os << o }
115
- end
116
-
117
- when :'?'
118
- case num
119
- when :first then Proc.new { || nil }
120
- else Proc.new { |o| o }
124
+ when :optional
125
+ case which
126
+ when :empty then ProdProc.new { || nil }
127
+ else ProdProc.new { |o| o }
121
128
  end
122
129
 
123
130
  when :elp
124
- case num
125
- when :first then Proc.new { || [] }
126
- else Proc.new { |prime| prime }
131
+ case which
132
+ when :empty then ProdProc.new { || [] }
133
+ else ProdProc.new { |prime| prime }
127
134
  end
128
135
 
129
136
  when :nelp
130
- case num
131
- when :first then Proc.new { |el| [el] }
132
- when :second then Proc.new { |els, _, el| els + [el] }
133
- else Proc.new { |*el| if el.length == 1 then el.first else el end }
137
+ case which
138
+ when :single
139
+ ProdProc.new { |el| [el] }
140
+
141
+ when :multiple
142
+ ProdProc.new(:splat, sels) do |*syms|
143
+ el = syms[1..-1]
144
+ syms.first << (el.length == 1 ? el.first : el)
145
+ end
146
+
147
+ else
148
+ ProdProc.new { |*el| el.length == 1 ? el.first : el }
134
149
  end
135
150
  end,
136
151
  p.rhs.length
@@ -168,58 +183,6 @@ module RLTK # :nodoc:
168
183
  end
169
184
  end
170
185
 
171
- # Calling this method will cause the parser to pass right-hand
172
- # side values as arrays instead of splats. This method must be
173
- # called before ANY calls to Parser.production.
174
- #
175
- # @return [void]
176
- def array_args
177
- if @grammar.productions.length == 0
178
- @args = :array
179
-
180
- @grammar.callback do |p, type, num|
181
- @procs[p.id] =
182
- [
183
- case type
184
- when :*
185
- case num
186
- when :first then Proc.new { |v| [] }
187
- else Proc.new { |v| v[0] << v[1] }
188
- end
189
-
190
- when :+
191
- case num
192
- when :first then Proc.new { |v| [v[0]] }
193
- else Proc.new { |v| v[0] << v[1] }
194
- end
195
-
196
- when :'?'
197
- case num
198
- when :first then Proc.new { |v| nil }
199
- else Proc.new { |v| v[0] }
200
- end
201
-
202
- when :elp
203
- case num
204
- when :first then Proc.new { |v| [] }
205
- else Proc.new { |v| v[0] }
206
- end
207
-
208
- when :nelp
209
- case num
210
- when :first then Proc.new { |v| v }
211
- when :second then Proc.new { |v| v[0] + [v[2]] }
212
- else Proc.new { |v| if v.length == 1 then v.first else v end }
213
- end
214
- end,
215
- p.rhs.length
216
- ]
217
-
218
- @production_precs[p.id] = p.last_terminal
219
- end
220
- end
221
- end
222
-
223
186
  # Build a hash with the default options for Parser.finalize
224
187
  # and then update it with the values from *opts*.
225
188
  #
@@ -230,10 +193,10 @@ module RLTK # :nodoc:
230
193
  opts[:explain] = self.get_io(opts[:explain])
231
194
 
232
195
  {
233
- :explain => false,
234
- :lookahead => true,
235
- :precedence => true,
236
- :use => false
196
+ explain: false,
197
+ lookahead: true,
198
+ precedence: true,
199
+ use: false
237
200
  }.update(opts)
238
201
  end
239
202
  private :build_finalize_opts
@@ -245,14 +208,14 @@ module RLTK # :nodoc:
245
208
  #
246
209
  # @return [Hash{Symbol => Object}]
247
210
  def build_parse_opts(opts)
248
- opts[:parse_tree] = self.get_io(opts[:parse_tree])
249
- opts[:verbose] = self.get_io(opts[:verbose])
211
+ opts[:parse_tree] = self.get_io(opts[:parse_tree])
212
+ opts[:verbose] = self.get_io(opts[:verbose])
250
213
 
251
214
  {
252
- :accept => :first,
253
- :env => self::Environment.new,
254
- :parse_tree => false,
255
- :verbose => false
215
+ accept: :first,
216
+ env: self::Environment.new,
217
+ parse_tree: false,
218
+ verbose: false
256
219
  }.update(opts)
257
220
  end
258
221
  private :build_parse_opts
@@ -275,7 +238,7 @@ module RLTK # :nodoc:
275
238
  end
276
239
 
277
240
  # Check the actions in each state.
278
- @states.each do |state|
241
+ each_state do |state|
279
242
  state.actions.each do |sym, actions|
280
243
  if CFG::is_terminal?(sym)
281
244
  # Here we check actions for terminals.
@@ -312,14 +275,14 @@ module RLTK # :nodoc:
312
275
  # This method checks to see if the parser would be in parse state
313
276
  # *dest* after starting in state *start* and reading *symbols*.
314
277
  #
315
- # @param [Symbol] start Symbol representing a CFG production.
316
- # @param [Symbol] dest Symbol representing a CFG production.
317
- # @param [Array<Symbol>] symbols Grammar symbols.
278
+ # @param [Symbol] start Symbol representing a CFG production.
279
+ # @param [Symbol] dest Symbol representing a CFG production.
280
+ # @param [Array<Symbol>] symbols Grammar symbols.
318
281
  #
319
282
  # @return [Boolean] If the destination symbol is reachable from the start symbol after reading *symbols*.
320
283
  def check_reachability(start, dest, symbols)
321
- path_exists = true
322
- cur_state = start
284
+ path_exists = true
285
+ cur_state = start
323
286
 
324
287
  symbols.each do |sym|
325
288
 
@@ -345,27 +308,30 @@ module RLTK # :nodoc:
345
308
  # production can be changed by setting the *precedence* argument
346
309
  # to some terminal symbol.
347
310
  #
348
- # @param [String] expression Right-hand side of a production.
349
- # @param [Symbol] precedence Symbol representing the precedence of this production.
350
- # @param [Proc] action Action to be taken when the production is reduced.
311
+ # @param [String, Symbol] expression Right-hand side of a production.
312
+ # @param [Symbol] precedence Symbol representing the precedence of this production.
313
+ # @param [:array, :splat] arg_type Method to use when passing arguments to the action.
314
+ # @param [Proc] action Action to be taken when the production is reduced.
351
315
  #
352
316
  # @return [void]
353
- def clause(expression, precedence = nil, &action)
317
+ def clause(expression, precedence = nil, arg_type = @default_arg_type, &action)
354
318
  # Use the curr_prec only if it isn't overridden for this
355
319
  # clause.
356
320
  precedence ||= @curr_prec
357
321
 
358
- production = @grammar.clause(expression)
322
+ production, selections = @grammar.clause(expression)
359
323
 
360
324
  # Check to make sure the action's arity matches the number
361
325
  # of symbols on the right-hand side.
362
- if @args == :splat and action.arity != production.rhs.length
363
- raise ParserConstructionException, 'Incorrect number of arguments to action. Action arity must match the number of ' +
364
- 'terminals and non-terminals in the clause.'
326
+ expected_arity = (selections.empty? ? production.rhs.length : selections.length)
327
+ if arg_type == :splat and action.arity != expected_arity
328
+ raise ParserConstructionException,
329
+ "Incorrect number of action parameters. Expected #{expected_arity} but got #{action.arity}." +
330
+ ' Action arity must match the number of terminals and non-terminals in the clause.'
365
331
  end
366
332
 
367
333
  # Add the action to our proc list.
368
- @procs[production.id] = [action, production.rhs.length]
334
+ @procs[production.id] = [ProdProc.new(arg_type, selections, &action), production.rhs.length]
369
335
 
370
336
  # If no precedence is specified use the precedence of the
371
337
  # last terminal in the production.
@@ -382,28 +348,41 @@ module RLTK # :nodoc:
382
348
  @conflicts = nil
383
349
 
384
350
  # Drop the grammar and the grammar'.
385
- @grammar = nil
386
- @grammar_prime = nil
351
+ @grammar = nil
352
+ @grammar_prime = nil
387
353
 
388
354
  # Drop precedence and bookkeeping information.
389
- @cur_lhs = nil
390
- @cur_prec = nil
355
+ @cur_lhs = nil
356
+ @cur_prec = nil
391
357
 
392
- @prec_counts = nil
393
- @production_precs = nil
394
- @token_precs = nil
358
+ @prec_counts = nil
359
+ @production_precs = nil
360
+ @token_precs = nil
395
361
 
396
362
  # Drop the items from each of the states.
397
- @states.each { |state| state.clean }
363
+ each_state { |state| state.clean }
364
+ end
365
+
366
+ # Set the default argument type for the actions associated with
367
+ # clauses. All actions defined after this call will be passed
368
+ # arguments in the way specified here, unless overridden in the
369
+ # call to {Parser.clause}.
370
+ #
371
+ # @param [:array, :splat] type The default argument type.
372
+ #
373
+ # @return [void]
374
+ def default_arg_type(type)
375
+ @default_arg_type = type if type == :array or type == :splat
398
376
  end
377
+ alias :dat :default_arg_type
399
378
 
400
379
  # Adds productions and actions for parsing empty lists.
401
380
  #
402
381
  # @see CFG#empty_list_production
403
- def empty_list_production(symbol, list_elements, separator)
404
- @grammar.empty_list(symbol, list_elements, separator)
382
+ def build_list_production(symbol, list_elements, separator = '')
383
+ @grammar.build_list_production(symbol, list_elements, separator)
405
384
  end
406
- alias :empty_list :empty_list_production
385
+ alias :list :build_list_production
407
386
 
408
387
  # This function will print a description of the parser to the
409
388
  # provided IO object.
@@ -418,13 +397,21 @@ module RLTK # :nodoc:
418
397
  io.puts('###############')
419
398
  io.puts
420
399
 
400
+ max_id_length = @grammar.productions(:id).length.to_s.length
401
+
421
402
  # Print the productions.
422
403
  @grammar.productions.each do |sym, productions|
404
+
405
+ max_rhs_length = productions.inject(0) { |m, p| if (len = p.to_s.length) > m then len else m end }
406
+
423
407
  productions.each do |production|
424
- io.print("\tProduction #{production.id}: #{production.to_s}")
408
+ p_string = production.to_s
409
+
410
+ io.print("\tProduction #{sprintf("%#{max_id_length}d", production.id)}: #{p_string}")
425
411
 
426
412
  if (prec = @production_precs[production.id])
427
- io.print(" : (#{prec.first} , #{prec.last})")
413
+ io.print(' ' * (max_rhs_length - p_string.length))
414
+ io.print(" : (#{sprintf("%-5s", prec.first)}, #{prec.last})")
428
415
  end
429
416
 
430
417
  io.puts
@@ -438,11 +425,14 @@ module RLTK # :nodoc:
438
425
  io.puts('##########')
439
426
  io.puts
440
427
 
428
+ max_token_len = @grammar.terms.inject(0) { |m, t| if t.length > m then t.length else m end }
429
+
441
430
  @grammar.terms.sort {|a,b| a.to_s <=> b.to_s }.each do |term|
442
431
  io.print("\t#{term}")
443
432
 
444
433
  if (prec = @token_precs[term])
445
- io.print(" : (#{prec.first}, #{prec.last})")
434
+ io.print(' ' * (max_token_len - term.length))
435
+ io.print(" : (#{sprintf("%-5s", prec.first)}, #{prec.last})")
446
436
  end
447
437
 
448
438
  io.puts
@@ -455,7 +445,7 @@ module RLTK # :nodoc:
455
445
  io.puts('#####################')
456
446
  io.puts
457
447
 
458
- io.puts("\tStart symbol: #{@grammar.start_symbol}")
448
+ io.puts("\tStart symbol: #{@grammar.start_symbol}'")
459
449
  io.puts
460
450
 
461
451
  io.puts("\tTotal number of states: #{@states.length}")
@@ -476,7 +466,7 @@ module RLTK # :nodoc:
476
466
  io.puts('###############')
477
467
  io.puts
478
468
 
479
- @states.each do |state|
469
+ each_state do |state|
480
470
  io.puts("State #{state.id}:")
481
471
  io.puts
482
472
 
@@ -527,18 +517,15 @@ module RLTK # :nodoc:
527
517
  # of states and their actions, and the resolution of conflicts
528
518
  # using lookahead and precedence information.
529
519
  #
530
- # The *opts* hash may contain the following options, which are
531
- # described in more detail in the main documentation:
532
- #
533
- # * :explain - To explain the parser or not.
534
- # * :lookahead - To use lookahead info for conflict resolution.
535
- # * :precedence - To use precedence info for conflict resolution.
536
- # * :use - A file name or object that is used to load/save the parser.
537
- #
538
520
  # No calls to {Parser.production} may appear after the call to
539
521
  # Parser.finalize.
540
522
  #
541
- # @param [Hash{Symbol => Object}] opts Options describing how to finalize the parser.
523
+ # @param [Hash] opts Options describing how to finalize the parser.
524
+ #
525
+ # @option opts [Boolean,String,IO] :explain To explain the parser or not.
526
+ # @option opts [Boolean] :lookahead To use lookahead info for conflict resolution.
527
+ # @option opts [Boolean] :precedence To use precedence info for conflict resolution.
528
+ # @option opts [String,IO] :use A file name or object that is used to load/save the parser.
542
529
  #
543
530
  # @return [void]
544
531
  def finalize(opts = {})
@@ -572,13 +559,14 @@ module RLTK # :nodoc:
572
559
  return self.clean
573
560
  end
574
561
 
575
- # Grab all of the symbols that comprise the grammar (besides
576
- # the start symbol).
562
+ # Grab all of the symbols that comprise the grammar
563
+ # (besides the start symbol).
577
564
  @symbols = @grammar.symbols << :ERROR
578
565
 
579
566
  # Add our starting state to the state list.
580
- start_production = @grammar.production(:start, @grammar.start_symbol.to_s).first
581
- start_state = State.new(@symbols, [start_production.to_item])
567
+ @start_symbol = (@grammar.start_symbol.to_s + '\'').to_sym
568
+ start_production, _ = @grammar.production(@start_symbol, @grammar.start_symbol).first
569
+ start_state = State.new(@symbols, [start_production.to_item])
582
570
 
583
571
  start_state.close(@grammar.productions)
584
572
 
@@ -586,12 +574,10 @@ module RLTK # :nodoc:
586
574
 
587
575
  # Translate the precedence of productions from tokens to
588
576
  # (associativity, precedence) pairs.
589
- @production_precs.each_with_index do |prec, id|
590
- @production_precs[id] = @token_precs[prec]
591
- end
577
+ @production_precs.map! { |prec| @token_precs[prec] }
592
578
 
593
579
  # Build the rest of the transition table.
594
- @states.each do |state|
580
+ each_state do |state|
595
581
  #Transition states.
596
582
  tstates = Hash.new { |h,k| h[k] = State.new(@symbols) }
597
583
 
@@ -622,23 +608,17 @@ module RLTK # :nodoc:
622
608
  # Find the Accept and Reduce actions for this state.
623
609
  state.each do |item|
624
610
  if item.at_end?
625
- if item.lhs == :start
611
+ if item.lhs == @start_symbol
626
612
  state.on(:EOS, Accept.new)
627
613
  else
628
- state.add_reduction(item.id)
614
+ state.add_reduction(@grammar.productions(:id)[item.id])
629
615
  end
630
616
  end
631
617
  end
632
618
  end
633
619
 
634
620
  # Build the production.id -> production.lhs map.
635
- @grammar.productions(:id).to_a.inject(@lh_sides) do |h, pair|
636
- id, production = pair
637
-
638
- h[id] = production.lhs
639
-
640
- h
641
- end
621
+ @grammar.productions(:id).each { |id, production| @lh_sides[id] = production.lhs }
642
622
 
643
623
  # Prune the parsing table for unnecessary reduce actions.
644
624
  self.prune(opts[:lookahead], opts[:precedence])
@@ -667,8 +647,8 @@ module RLTK # :nodoc:
667
647
 
668
648
  # Converts an object into an IO object as appropriate.
669
649
  #
670
- # @param [Object] o Object to be converted into an IO object.
671
- # @param [String] mode String representing the mode to open the IO object in.
650
+ # @param [Object] o Object to be converted into an IO object.
651
+ # @param [String] mode String representing the mode to open the IO object in.
672
652
  #
673
653
  # @return [IO, false] The IO object or false if a conversion wasn't possible.
674
654
  def get_io(o, mode = 'w')
@@ -683,7 +663,20 @@ module RLTK # :nodoc:
683
663
  end
684
664
  end
685
665
 
686
- # @return [CFG] The grammar that can be parsed by this Parser.
666
+ # Iterate over the parser's states.
667
+ #
668
+ # @yieldparam [State] state One of the parser automaton's state objects
669
+ #
670
+ # @return [void]
671
+ def each_state
672
+ current_state = 0
673
+ while current_state < @states.count
674
+ yield @states.at(current_state)
675
+ current_state += 1
676
+ end
677
+ end
678
+
679
+ # @return [CFG] The grammar that can be parsed by this Parser.
687
680
  def grammar
688
681
  @grammar.clone
689
682
  end
@@ -692,7 +685,7 @@ module RLTK # :nodoc:
692
685
  # calculate the LALR(1) lookahead sets. Information about this
693
686
  # grammar and its use can be found in the following paper:
694
687
  #
695
- # Simple Computation of LALR(1) Lookahed Sets
688
+ # Simple Computation of LALR(1) Lookahead Sets
696
689
  # Manuel E. Bermudez and George Logothetis
697
690
  # Information Processing Letters 31 - 1989
698
691
  #
@@ -701,14 +694,14 @@ module RLTK # :nodoc:
701
694
  if not @grammar_prime
702
695
  @grammar_prime = CFG.new
703
696
 
704
- @states.each do |state|
697
+ each_state do |state|
705
698
  state.each do |item|
706
699
  lhs = "#{state.id}_#{item.next_symbol}".to_sym
707
700
 
708
701
  next unless CFG::is_nonterminal?(item.next_symbol) and not @grammar_prime.productions.keys.include?(lhs)
709
702
 
710
703
  @grammar.productions[item.next_symbol].each do |production|
711
- rhs = ""
704
+ rhs = ''
712
705
 
713
706
  cstate = state
714
707
 
@@ -729,9 +722,9 @@ module RLTK # :nodoc:
729
722
 
730
723
  # Inform the parser core that a conflict has been detected.
731
724
  #
732
- # @param [Integer] state_id ID of the state where the conflict was encountered.
733
- # @param [:RR, :SR] type Reduce/Reduce or Shift/Reduce conflict.
734
- # @param [Symbol] sym Symbol that caused the conflict.
725
+ # @param [Integer] state_id ID of the state where the conflict was encountered.
726
+ # @param [:RR, :SR] type Reduce/Reduce or Shift/Reduce conflict.
727
+ # @param [Symbol] sym Symbol that caused the conflict.
735
728
  #
736
729
  # @return [void]
737
730
  def inform_conflict(state_id, type, sym)
@@ -742,7 +735,7 @@ module RLTK # :nodoc:
742
735
  # are left-associative. Subsequent calls to this method will
743
736
  # give their arguments higher precedence.
744
737
  #
745
- # @param [Array<Symbol>] symbols Symbols that are left associative.
738
+ # @param [Array<Symbol>] symbols Symbols that are left associative.
746
739
  #
747
740
  # @return [void]
748
741
  def left(*symbols)
@@ -756,7 +749,7 @@ module RLTK # :nodoc:
756
749
  # This method is used to specify that the symbols in *symbols*
757
750
  # are non-associative.
758
751
  #
759
- # @param [Array<Symbol>] symbols Symbols that are non-associative.
752
+ # @param [Array<Symbol>] symbols Symbols that are non-associative.
760
753
  #
761
754
  # @return [void]
762
755
  def nonassoc(*symbols)
@@ -770,29 +763,28 @@ module RLTK # :nodoc:
770
763
  # Adds productions and actions for parsing nonempty lists.
771
764
  #
772
765
  # @see CFG#nonempty_list_production
773
- def nonempty_list_production(symbol, list_elements, separator)
774
- @grammar.nonempty_list(symbol, list_elements, separator)
766
+ def build_nonempty_list_production(symbol, list_elements, separator = '')
767
+ @grammar.build_nonempty_list_production(symbol, list_elements, separator)
775
768
  end
776
- alias :nonempty_list :nonempty_list_production
769
+ alias :nonempty_list :build_nonempty_list_production
777
770
 
778
771
  # This function is where actual parsing takes place. The
779
772
  # _tokens_ argument must be an array of Token objects, the last
780
773
  # of which has type EOS. By default this method will return the
781
- # value computed by the first successful parse tree found. It is
782
- # possible to adjust this behavior using the _opts_ hash as
783
- # follows:
774
+ # value computed by the first successful parse tree found.
784
775
  #
785
- # * :accept - Either :first or :all.
786
- # * :env - The environment in which to evaluate the production actions.
787
- # * :parse_tree - To print parse trees in the DOT language or not.
788
- # * :verbose - To be verbose or not.
789
- #
790
- # Additional information for these options can be found in the
791
- # main documentation.
776
+ # Additional information about the parsing options can be found in
777
+ # the main documentation.
778
+ #
779
+ # @param [Array<Token>] tokens Tokens to be parsed.
780
+ # @param [Hash] opts Options to use when parsing input.
792
781
  #
793
- # @param [Array<Token>] tokens Tokens to be parsed.
782
+ # @option opts [:first, :all] :accept Either :first or :all.
783
+ # @option opts [Object] :env The environment in which to evaluate the production action.
784
+ # @option opts [Boolean,String,IO] :parse_tree To print parse trees in the DOT language or not.
785
+ # @option opts [Boolean,String,IO] :verbose To be verbose or not.
794
786
  #
795
- # @return [Object, Array<Object>] Result or results of parsing the given tokens.
787
+ # @return [Object, Array<Object>] Result or results of parsing the given tokens.
796
788
  def parse(tokens, opts = {})
797
789
  # Get the full options hash.
798
790
  opts = build_parse_opts(opts)
@@ -808,18 +800,18 @@ module RLTK # :nodoc:
808
800
  stack_id = 0
809
801
 
810
802
  # Error mode indicators.
811
- error_mode = false
812
- reduction_guard = false
803
+ error_mode = false
804
+ reduction_guard = false
813
805
 
814
806
  # Our various list of stacks.
815
- accepted = []
816
- moving_on = []
817
- processing = [ParseStack.new(stack_id += 1)]
807
+ accepted = []
808
+ moving_on = []
809
+ processing = [ParseStack.new(stack_id += 1)]
818
810
 
819
811
  # Iterate over the tokens. We don't procede to the
820
812
  # next token until every stack is done with the
821
813
  # current one.
822
- tokens.each do |token|
814
+ tokens.each_with_index do |token, index|
823
815
  # Check to make sure this token was seen in the
824
816
  # grammar definition.
825
817
  raise BadToken if not @symbols.include?(token.type)
@@ -937,8 +929,12 @@ module RLTK # :nodoc:
937
929
  args, positions = stack.pop(pop_size)
938
930
  opts[:env].set_positions(positions)
939
931
 
932
+ if not production_proc.selections.empty?
933
+ args = args.values_at(*production_proc.selections)
934
+ end
935
+
940
936
  result =
941
- if @args == :array
937
+ if production_proc.arg_type == :array
942
938
  opts[:env].instance_exec(args, &production_proc)
943
939
  else
944
940
  opts[:env].instance_exec(*args, &production_proc)
@@ -994,14 +990,14 @@ module RLTK # :nodoc:
994
990
 
995
991
  v.puts("\n\n") if v
996
992
 
997
- processing = moving_on
998
- moving_on = []
993
+ processing = moving_on
994
+ moving_on = []
999
995
 
1000
996
  # If we don't have any active stacks at this point the
1001
997
  # string isn't in the language.
1002
998
  if opts[:accept] == :first and processing.length == 0
1003
999
  v.close if v and v != $stdout
1004
- raise NotInLanguage
1000
+ raise NotInLanguage.new(tokens[0...index], tokens[index], tokens[index.next..-1])
1005
1001
  end
1006
1002
 
1007
1003
  reduction_guard = false
@@ -1036,21 +1032,28 @@ module RLTK # :nodoc:
1036
1032
  # Parser.clause. A precedence can be associate with this
1037
1033
  # production by setting *precedence* to a terminal symbol.
1038
1034
  #
1039
- # @param [Symbol] symbol Left-hand side of the production.
1040
- # @param [String, nil] expression Right-hand side of the production.
1041
- # @param [Symbol, nil] precedence Symbol representing the precedence of this produciton.
1042
- # @param [Proc] action Action associated with this production.
1035
+ # @param [Symbol] symbol Left-hand side of the production.
1036
+ # @param [String, Symbol, nil] expression Right-hand side of the production.
1037
+ # @param [Symbol, nil] precedence Symbol representing the precedence of this produciton.
1038
+ # @param [:array, :splat] arg_type Method to use when passing arguments to the action.
1039
+ # @param [Proc] action Action associated with this production.
1043
1040
  #
1044
1041
  # @return [void]
1045
- def production(symbol, expression = nil, precedence = nil, &action)
1042
+ def production(symbol, expression = nil, precedence = nil, arg_type = @default_arg_type, &action)
1046
1043
 
1047
1044
  # Check the symbol.
1048
1045
  if not (symbol.is_a?(Symbol) or symbol.is_a?(String)) or not CFG::is_nonterminal?(symbol)
1049
- riase ParserConstructionException, 'Production symbols must be Strings or Symbols and be in all lowercase.'
1046
+ raise ParserConstructionException, 'Production symbols must be Strings or Symbols and be in all lowercase.'
1050
1047
  end
1051
1048
 
1052
- @grammar.curr_lhs = symbol.to_sym
1053
- @curr_prec = precedence
1049
+ @grammar.curr_lhs = symbol.to_sym
1050
+ @curr_prec = precedence
1051
+
1052
+ orig_dat = nil
1053
+ if arg_type != @default_arg_type
1054
+ orig_dat = @default_arg_type
1055
+ @default_arg_type = arg_type
1056
+ end
1054
1057
 
1055
1058
  if expression
1056
1059
  self.clause(expression, precedence, &action)
@@ -1058,16 +1061,18 @@ module RLTK # :nodoc:
1058
1061
  self.instance_exec(&action)
1059
1062
  end
1060
1063
 
1061
- @grammar.curr_lhs = nil
1062
- @curr_prec = nil
1064
+ @default_arg_type = orig_dat if not orig_dat.nil?
1065
+
1066
+ @grammar.curr_lhs = nil
1067
+ @curr_prec = nil
1063
1068
  end
1064
1069
  alias :p :production
1065
1070
 
1066
1071
  # This method uses lookahead sets and precedence information to
1067
1072
  # resolve conflicts and remove unnecessary reduce actions.
1068
1073
  #
1069
- # @param [Boolean] do_lookahead Prune based on lookahead sets or not.
1070
- # @param [Boolean] do_precedence Prune based on precedence or not.
1074
+ # @param [Boolean] do_lookahead Prune based on lookahead sets or not.
1075
+ # @param [Boolean] do_precedence Prune based on precedence or not.
1071
1076
  #
1072
1077
  # @return [void]
1073
1078
  def prune(do_lookahead, do_precedence)
@@ -1076,7 +1081,7 @@ module RLTK # :nodoc:
1076
1081
  # If both options are false there is no pruning to do.
1077
1082
  return if not (do_lookahead or do_precedence)
1078
1083
 
1079
- @states.each do |state0|
1084
+ each_state do |state0|
1080
1085
 
1081
1086
  #####################
1082
1087
  # Lookahead Pruning #
@@ -1092,23 +1097,27 @@ module RLTK # :nodoc:
1092
1097
  lookahead = Array.new
1093
1098
 
1094
1099
  # Build the lookahead set.
1095
- @states.each do |state1|
1100
+ each_state do |state1|
1096
1101
  if self.check_reachability(state1, state0, production.rhs)
1097
1102
  lookahead |= self.grammar_prime.follow_set("#{state1.id}_#{production.lhs}".to_sym)
1098
1103
  end
1099
1104
  end
1100
1105
 
1101
- # Translate the G' follow symbols into G lookahead
1102
- # symbols.
1103
- lookahead = lookahead.map { |sym| sym.to_s.split('_').last.to_sym }.uniq
1106
+ # Translate the G' follow symbols into G
1107
+ # lookahead symbols.
1108
+ lookahead = lookahead.map { |sym| sym.to_s.split('_', 2).last.to_sym }.uniq
1104
1109
 
1105
1110
  # Here we remove the unnecessary reductions.
1106
1111
  # If there are error productions we need to
1107
1112
  # scale back the amount of pruning done.
1108
- (terms - lookahead).each do |sym|
1109
- if not (terms.include?(:ERROR) and not state0.conflict_on?(sym))
1110
- state0.actions[sym].delete(reduction)
1113
+ pruning_candidates = terms - lookahead
1114
+
1115
+ if terms.include?(:ERROR)
1116
+ pruning_candidates.each do |sym|
1117
+ state0.actions[sym].delete(reduction) if state0.conflict_on?(sym)
1111
1118
  end
1119
+ else
1120
+ pruning_candidates.each { |sym| state0.actions[sym].delete(reduction) }
1112
1121
  end
1113
1122
  end
1114
1123
  end
@@ -1153,8 +1162,8 @@ module RLTK # :nodoc:
1153
1162
  # * The token is left associative and the current action is a Reduce
1154
1163
  # * The token is right associative and the current action is a Shift
1155
1164
  if prec > max_prec or (prec == max_prec and tassoc == (a.is_a?(Shift) ? :right : :left))
1156
- max_prec = prec
1157
- selected_action = a
1165
+ max_prec = prec
1166
+ selected_action = a
1158
1167
 
1159
1168
  elsif prec == max_prec and assoc == :nonassoc
1160
1169
  raise ParserConstructionException, 'Non-associative token found during conflict resolution.'
@@ -1282,23 +1291,23 @@ module RLTK # :nodoc:
1282
1291
 
1283
1292
  # Instantiate a new ParserStack object.
1284
1293
  #
1285
- # @param [Integer] id ID for this parse stack. Used by GLR algorithm.
1286
- # @param [Array<Object>] ostack Output stack. Holds results of {Reduce} and {Shift} actions.
1287
- # @param [Array<Integer>] sstack State stack. Holds states that have been shifted due to {Shift} actions.
1288
- # @param [Array<Integer>] nstack Node stack. Holds dot language IDs for nodes in the parse tree.
1289
- # @param [Array<Array<Integer>>] connections Integer pairs representing edges in the parse tree.
1290
- # @param [Array<Symbol>] labels Labels for nodes in the parse tree.
1291
- # @param [Array<StreamPosition>] positions Position data for symbols that have been shifted.
1294
+ # @param [Integer] id ID for this parse stack. Used by GLR algorithm.
1295
+ # @param [Array<Object>] ostack Output stack. Holds results of {Reduce} and {Shift} actions.
1296
+ # @param [Array<Integer>] sstack State stack. Holds states that have been shifted due to {Shift} actions.
1297
+ # @param [Array<Integer>] nstack Node stack. Holds dot language IDs for nodes in the parse tree.
1298
+ # @param [Array<Array<Integer>>] connections Integer pairs representing edges in the parse tree.
1299
+ # @param [Array<Symbol>] labels Labels for nodes in the parse tree.
1300
+ # @param [Array<StreamPosition>] positions Position data for symbols that have been shifted.
1292
1301
  def initialize(id, ostack = [], sstack = [0], nstack = [], connections = [], labels = [], positions = [])
1293
1302
  @id = id
1294
1303
 
1295
- @node_stack = nstack
1296
- @output_stack = ostack
1297
- @state_stack = sstack
1304
+ @node_stack = nstack
1305
+ @output_stack = ostack
1306
+ @state_stack = sstack
1298
1307
 
1299
- @connections = connections
1300
- @labels = labels
1301
- @positions = positions
1308
+ @connections = connections
1309
+ @labels = labels
1310
+ @positions = positions
1302
1311
  end
1303
1312
 
1304
1313
  # Branch this stack, effectively creating a new copy of its
@@ -1308,8 +1317,24 @@ module RLTK # :nodoc:
1308
1317
  #
1309
1318
  # @return [ParseStack]
1310
1319
  def branch(new_id)
1311
- ParseStack.new(new_id, @output_stack.clone, @state_stack.clone, @node_stack.clone,
1312
- @connections.clone, @labels.clone, @positions.clone)
1320
+ # We have to do a deeper copy of the output stack to avoid
1321
+ # interactions between the Proc objects for the different
1322
+ # parsing paths.
1323
+ #
1324
+ # The being/rescue block is needed because some classes
1325
+ # respond to `clone` but always raise an error.
1326
+ new_output_stack = @output_stack.map do |o|
1327
+ # Check to see if we can obtain a deep copy.
1328
+ if 0.respond_to?(:copy)
1329
+ o.copy
1330
+
1331
+ else
1332
+ begin o.clone rescue o end
1333
+ end
1334
+ end
1335
+
1336
+ ParseStack.new(new_id, new_output_stack, @state_stack.clone,
1337
+ @node_stack.clone, @connections.clone, @labels.clone, @positions.clone)
1313
1338
  end
1314
1339
 
1315
1340
  # @return [StreamPosition] Position data for the last symbol on the stack.
@@ -1347,7 +1372,7 @@ module RLTK # :nodoc:
1347
1372
  #
1348
1373
  # @param [Integer] n Number of object to pop off the stack.
1349
1374
  #
1350
- # @return [Array<Array<Object, StreamPosition>>] Values popped from the output and positions stacks.
1375
+ # @return [Array(Object, StreamPosition)] Values popped from the output and positions stacks.
1351
1376
  def pop(n = 1)
1352
1377
  @state_stack.pop(n)
1353
1378
 
@@ -1406,27 +1431,27 @@ module RLTK # :nodoc:
1406
1431
  # @return [Integer] State's ID.
1407
1432
  attr_accessor :id
1408
1433
 
1409
- # @return [Array<CFG::Item>] Item objects that comprise this state.
1434
+ # @return [Array<CFG::Item>] Item objects that comprise this state
1410
1435
  attr_reader :items
1411
1436
 
1412
- # @return [Array<Action>] Action objects that represent the actions that should be taken when various inputs are observed.
1437
+ # @return [Hash{Symbol => Array<Action>}] Maps lookahead symbols to actions
1413
1438
  attr_reader :actions
1414
1439
 
1415
1440
  # Instantiate a new State object.
1416
1441
  #
1417
- # @param [Array<Token>] tokens Tokens that represent this state.
1418
- # @param [Array<CFG::Item>] items Items that make up this state.
1442
+ # @param [Array<Symbol>] tokens Tokens that represent this state
1443
+ # @param [Array<CFG::Item>] items Items that make up this state
1419
1444
  def initialize(tokens, items = [])
1420
- @id = nil
1421
- @items = items
1422
- @actions = tokens.inject(Hash.new) { |h, t| h[t] = Array.new; h }
1445
+ @id = nil
1446
+ @items = items
1447
+ @actions = tokens.inject(Hash.new) { |h, t| h[t] = Array.new; h }
1423
1448
  end
1424
1449
 
1425
1450
  # Compare one State to another. Two States are equal if they
1426
1451
  # have the same items or, if the items have been cleaned, if
1427
1452
  # the States have the same ID.
1428
1453
  #
1429
- # @param [State] other Another State to compare to.
1454
+ # @param [State] other Another State to compare to
1430
1455
  #
1431
1456
  # @return [Boolean]
1432
1457
  def ==(other)
@@ -1435,11 +1460,11 @@ module RLTK # :nodoc:
1435
1460
 
1436
1461
  # Add a Reduce action to the state.
1437
1462
  #
1438
- # @param [Integer] production_id ID of production to add to this state.
1463
+ # @param [Production] production Production used to perform the reduction
1439
1464
  #
1440
1465
  # @return [void]
1441
- def add_reduction(production_id)
1442
- action = Reduce.new(production_id)
1466
+ def add_reduction(production)
1467
+ action = Reduce.new(production)
1443
1468
 
1444
1469
  # Reduce actions are not allowed for the ERROR terminal.
1445
1470
  @actions.each { |k, v| if CFG::is_terminal?(k) and k != :ERROR then v << action end }
@@ -1507,7 +1532,11 @@ module RLTK # :nodoc:
1507
1532
  #
1508
1533
  # @return [void]
1509
1534
  def each
1510
- @items.each {|item| yield item}
1535
+ current_item = 0
1536
+ while current_item < @items.count
1537
+ yield @items.at(current_item)
1538
+ current_item += 1
1539
+ end
1511
1540
  end
1512
1541
 
1513
1542
  # Specify an Action to perform when the input token is *symbol*.
@@ -1535,6 +1564,22 @@ module RLTK # :nodoc:
1535
1564
  end
1536
1565
  end
1537
1566
 
1567
+ # A subclass of Proc that indicates how it should be passed arguments
1568
+ # by the parser.
1569
+ class ProdProc < Proc
1570
+ # @return [:array, :splat] Method that should be used to pass arguments to this proc.
1571
+ attr_reader :arg_type
1572
+
1573
+ # @return [Array<Integer>] Mask for selection of tokens to pass to action. Empty mask means pass all.
1574
+ attr_reader :selections
1575
+
1576
+ def initialize(arg_type = :splat, selections = [])
1577
+ super()
1578
+ @arg_type = arg_type
1579
+ @selections = selections
1580
+ end
1581
+ end
1582
+
1538
1583
  # The Action class is used to indicate what action the parser should
1539
1584
  # take given a current state and input token.
1540
1585
  class Action
@@ -1568,9 +1613,17 @@ module RLTK # :nodoc:
1568
1613
  # The Reduce class indicates to the parser that it should reduce the
1569
1614
  # input stack by the rule specified by Reduce.id.
1570
1615
  class Reduce < Action
1616
+
1617
+ # @param [Production] production Production to reduce by
1618
+ def initialize(production)
1619
+ super(production.id)
1620
+
1621
+ @production = production
1622
+ end
1623
+
1571
1624
  # @return [String] String representation of this action.
1572
1625
  def to_s
1573
- "Reduce by Production #{self.id}"
1626
+ "Reduce by Production #{self.id} : #{@production}"
1574
1627
  end
1575
1628
  end
1576
1629