redparse 0.8.4 → 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (45) hide show
  1. checksums.yaml +4 -0
  2. data/COPYING.LGPL +503 -158
  3. data/History.txt +192 -0
  4. data/Makefile +9 -0
  5. data/README.txt +72 -39
  6. data/bin/redparse +108 -14
  7. data/lib/miniredparse.rb +1543 -0
  8. data/lib/redparse.rb +971 -105
  9. data/lib/redparse/ReduceWithsFor_RedParse_1_8.rb +17412 -0
  10. data/lib/redparse/ReduceWithsFor_RedParse_1_9.rb +17633 -0
  11. data/lib/redparse/babynodes.rb +17 -0
  12. data/lib/redparse/babyparser.rb +17 -0
  13. data/lib/redparse/cache.rb +290 -6
  14. data/lib/redparse/compile.rb +6 -97
  15. data/lib/redparse/decisiontree.rb +1 -1
  16. data/lib/redparse/float_accurate_to_s.rb +30 -6
  17. data/lib/redparse/generate.rb +18 -0
  18. data/lib/redparse/node.rb +415 -124
  19. data/lib/redparse/parse_tree_server.rb +20 -2
  20. data/lib/redparse/problemfiles.rb +1 -1
  21. data/lib/redparse/pthelper.rb +17 -31
  22. data/lib/redparse/reg_more_sugar.rb +1 -1
  23. data/lib/redparse/replacing/parse_tree.rb +30 -0
  24. data/lib/redparse/replacing/ripper.rb +20 -0
  25. data/lib/redparse/replacing/ruby_parser.rb +28 -0
  26. data/lib/redparse/ripper.rb +393 -0
  27. data/lib/redparse/ripper_sexp.rb +153 -0
  28. data/lib/redparse/stackableclasses.rb +113 -0
  29. data/lib/redparse/version.rb +18 -1
  30. data/redparse.gemspec +29 -9
  31. data/rplt.txt +31 -0
  32. data/test/data/hd_with_blank_string.rb +3 -0
  33. data/test/data/pt_known_output.rb +13273 -0
  34. data/test/data/wp.pp +0 -0
  35. data/test/generate_parse_tree_server_rc.rb +17 -0
  36. data/test/rp-locatetest.rb +2 -2
  37. data/test/test_1.9.rb +338 -35
  38. data/test/test_all.rb +22 -3
  39. data/test/test_part.rb +32 -0
  40. data/test/test_redparse.rb +396 -74
  41. data/test/test_xform_tree.rb +18 -0
  42. data/test/unparse_1.9_exceptions.txt +85 -0
  43. data/test/unparse_1.9_exceptions.txt.old +81 -0
  44. metadata +71 -46
  45. data/Rakefile +0 -35
@@ -1,6 +1,6 @@
1
1
  =begin
2
2
  redparse - a ruby parser written in ruby
3
- Copyright (C) 2008,2009 Caleb Clausen
3
+ Copyright (C) 2008,2009, 2012, 2016 Caleb Clausen
4
4
 
5
5
  This program is free software: you can redistribute it and/or modify
6
6
  it under the terms of the GNU Lesser General Public License as published by
@@ -20,6 +20,8 @@
20
20
 
21
21
  require 'forwardable'
22
22
 
23
+ require 'digest/sha2'
24
+
23
25
  begin
24
26
  require 'rubygems'
25
27
  rescue LoadError=>e
@@ -33,14 +35,39 @@ require 'reglookab'
33
35
  require "redparse/node"
34
36
  #require "redparse/decisiontree"
35
37
  require "redparse/reg_more_sugar"
36
- require "redparse/generate"
38
+ #require "redparse/generate"
37
39
  require "redparse/cache"
38
40
  #require "redparse/compile"
39
41
 
40
42
  class RedParse
43
+
44
+
45
+
46
+ alias :dump :inspect # preserve old inspect functionality
41
47
 
42
- ####### generic stuff for parsing any(?) language
48
+ # irb friendly #inspect/#to_s
49
+ def to_s
50
+ mods=class<<self; ancestors; end.reject{|k| !k.name }-self.class.ancestors
51
+ mods=mods.map{|mod| mod.name }.join('+')
52
+ mods="+"<<mods unless mods.empty?
53
+ input=@input||@lexer.input
54
+ "#<#{self.class.name}#{mods}: [#{input.inspect}]>"
55
+ end
56
+
57
+ alias :inspect :to_s
58
+
59
+ def pretty_stack max=nil
60
+ target=@stack
61
+ target=target[-max..-1] if max and max<target.size
62
+
63
+ target.map{|n|
64
+ res=n.inspect
65
+ res<<"\n" unless res[-1]=="\n"
66
+ res
67
+ }
68
+ end
43
69
 
70
+ ####### generic stuff for parsing any(?) language
44
71
  # include Nodes
45
72
  class StackMonkey
46
73
  def initialize(name,first_changed_index,and_expect_node,options={},&monkey_code)
@@ -119,12 +146,16 @@ end
119
146
  #index of data at which to start matching
120
147
  i=@stack.size-1 #-1 because last element of @stack is always lookahead
121
148
 
149
+ =begin was, but now done by expanded_RULES
122
150
  #I could call this a JIT compiler, but that's a bit grandiose....
123
151
  #more of a JIT pre-processor
124
152
  compiled_rule=@compiled_rules[rule]||=
125
153
  rule.map{|pattern|
126
154
  String|Regexp===pattern ? KW(pattern) : pattern
127
155
  }
156
+ =end
157
+ assert(rule.grep(String|Regexp|Reg::Subseq|Reg::LookAhead|Reg::LookBack|Proc).empty?)
158
+ compiled_rule=rule
128
159
 
129
160
  #what's the minimum @stack size this rule could match?
130
161
  rule_min_size=@min_sizes[compiled_rule]||=
@@ -198,12 +229,417 @@ end
198
229
  return true #let caller know we found a match
199
230
 
200
231
 
201
- rescue Exception=>e
232
+ rescue Exception #=>e
202
233
  #puts "error (#{e}) while executing rule: #{rule.inspect}"
203
234
  #puts e.backtrace.join("\n")
204
235
  raise
205
236
  end
206
237
 
238
+
239
+ def coalesce_loop(klass=nil,ident=nil,klass2=nil,ident2=nil)
240
+ eligible=rules.reverse.map!{|rule| can_coalesce?(rule,klass,ident,klass2,ident2)&&rule }
241
+ i=rules.size
242
+ eligible.map!{|rule|
243
+ i-=1
244
+ next unless rule
245
+ if @size_cache
246
+ @size_cache[[i,rule.right]]||=1
247
+ @size_cache[[i,rule.right]]+=1
248
+ end
249
+ coalesce rule, i, klass,ident,klass2,ident2
250
+ }
251
+ eligible.compact!
252
+ @size_cache[klass2 ? [klass,ident,klass2,ident2] : ident ? ident : klass]= eligible.size if @size_cache
253
+
254
+ @empty_reduce_withs+=1 if defined? @empty_reduce_withs and eligible.size.zero?
255
+
256
+ return eligible
257
+ end
258
+
259
+ def can_coalesce? rule,klass=nil,ident=nil,klass2=nil,ident2=nil
260
+ Reg::Transform===rule or fail
261
+ node_type= rule.right
262
+ rule=rule.left.subregs.dup
263
+ rule.pop if Proc|::Reg::LookAhead===rule.last
264
+ rule[0]=rule[0].subregs[0] if ::Reg::LookBack===rule[0]
265
+
266
+ =begin was, but now done by expanded_RULES
267
+ #I could call this a JIT compiler, but that's a bit grandiose....
268
+ #more of a JIT pre-processor
269
+ compiled_rule=@compiled_rules[rule]||=
270
+ rule.map{|pattern|
271
+ String|Regexp===pattern ? KW(pattern) : pattern
272
+ }
273
+ =end
274
+ assert(rule.grep(String|Regexp|Reg::Subseq|Reg::LookAhead|Reg::LookBack|Proc).empty?)
275
+
276
+
277
+ return false if klass && !can_combine?(rule,klass,ident)
278
+ return false if klass2 && !can_combine2?(rule,klass2,ident2,-2)
279
+ warn "plain lit matches #{node_type}" if klass==LiteralNode and klass2.nil?
280
+ return true
281
+ end
282
+
283
+ def coalesce rule,rulenum,klass=nil,ident=nil,klass2=nil,ident2=nil
284
+ #last 4 params aren't actually neeeded anymore
285
+
286
+ @coalesce_result||=[]
287
+ result=@coalesce_result[rulenum]
288
+ return result if result
289
+
290
+ #dissect the rule
291
+ Reg::Transform===rule or fail
292
+ node_type= rule.right
293
+ rule=rule.left.subregs.dup
294
+ lookahead_processor=(rule.pop if Proc|::Reg::LookAhead===rule.last)
295
+ lookback=rule[0]=rule[0].subregs[0] if ::Reg::LookBack===rule[0]
296
+
297
+ assert @rules[rulenum].right==node_type
298
+
299
+ if klass==VarNode and klass2==KeywordToken
300
+ #warn "can_combine2? about to fail"
301
+ end
302
+
303
+ needends=0
304
+ result=["\n##{mui node_type}\n"]
305
+
306
+ #index of data at which to start matching
307
+ result<<"i=@stack.size-1 ##{mui node_type}\n#-1 because last element of @stack is always lookahead\n"
308
+
309
+ =begin was, but now done by expanded_RULES
310
+ #I could call this a JIT compiler, but that's a bit grandiose....
311
+ #more of a JIT pre-processor
312
+ compiled_rule=@compiled_rules[rule]||=
313
+ rule.map{|pattern|
314
+ String|Regexp===pattern ? KW(pattern) : pattern
315
+ }
316
+ =end
317
+ assert(rule.grep(String|Regexp|Reg::Subseq|Reg::LookAhead|Reg::LookBack|Proc).empty?)
318
+ compiled_rule=rule
319
+
320
+ return if klass && !can_combine?(compiled_rule,klass,ident) #should never happen
321
+ return if klass2 && !can_combine2?(compiled_rule,klass2,ident2,-2) #should never happen
322
+
323
+ #what's the minimum @stack size this rule could match?
324
+ rule_min_size=@min_sizes[compiled_rule]||=
325
+ compiled_rule.inject(0){|sum,pattern|
326
+ sum + pattern.itemrange.begin
327
+ }
328
+ if rule_min_size > 1
329
+ needends+=1
330
+ result<<"if i>=#{rule_min_size}\n"
331
+ min_i=rule_min_size
332
+ end
333
+ #@@has_loop||=[]
334
+ #@@has_optional||=[]
335
+ has_loop=#@@has_loop[rulenum]||=
336
+ compiled_rule.find{|x| x.itemrange.last.to_f.infinite? }
337
+ has_optional=#@@has_optional[rulenum]||=
338
+ compiled_rule.find{|x| x.itemrange.first.zero? }
339
+
340
+ if Class===node_type and has_loop||has_optional
341
+ result<<"matching=[]\n"
342
+ need_matching=true
343
+ end
344
+
345
+ j=compiled_rule.size
346
+ #actually try to match rule elements against each @stack element in turn
347
+ first1=true
348
+ compiled_rule.reverse_each{|matcher|
349
+ j-=1
350
+ result<<"i.zero? and fail\n" unless min_i && min_i>0 or first1
351
+ first1=false
352
+ #is this matcher optional? looping?
353
+ maximum= matcher.itemrange.last
354
+ minimum= matcher.itemrange.first
355
+ loop= maximum.to_f.infinite?
356
+ optional=minimum.zero?
357
+ fail "looping matcher with finite maximum not supported" if maximum>1 and !loop
358
+ if need_matching
359
+ success="matching.unshift item"
360
+ loopsuccess="target.unshift item"
361
+ optfail="matching.unshift nil"
362
+
363
+ result<<"matching.unshift target=[]\n" if loop
364
+ end
365
+ is_lookback=matcher .equal? lookback
366
+ if loop or optional
367
+ matcher=matcher.subregs[0]
368
+ fail "lookback is not a scalar" if is_lookback
369
+ end
370
+
371
+ itemget="@stack[i-=1]"
372
+ itemget="(item=#{itemget})" if success
373
+ test="#{ref_to matcher,rulenum,j}===#{itemget} #try match of #{mui matcher}"
374
+ p [:misparse_start, matcher] if node_type===MisparsedNode and j.zero?
375
+ matcher= ~ (matcher.subregs[0]|NilClass) if Reg::Not===matcher
376
+ if matcher===nil and j.zero?
377
+ warn "rule ##{rulenum}(>>#{node_type}) can match nil at start; might match emptiness before start of stack"
378
+ end
379
+ if !loop
380
+ fail unless maximum==1
381
+ min_i-=1 if min_i
382
+ result<<<<-END
383
+ if #{test}
384
+ #{success if !is_lookback}
385
+ END
386
+ optional ? result<<<<-END : needends+=1
387
+ else
388
+ #ignore optional match fail
389
+ #but bump the data position back up, since the latest datum
390
+ #didn't actually match anything.
391
+ i+=1
392
+ #{optfail}
393
+ end
394
+ END
395
+ else
396
+ min_i=nil
397
+ if minimum<10
398
+ needends+=minimum
399
+ result<<<<-END*minimum
400
+ if #{test}
401
+ #{loopsuccess}
402
+ END
403
+ result<<<<-END
404
+ while #{test}
405
+ #{loopsuccess}
406
+ end
407
+ #but bump the data position back up, since the latest datum
408
+ #didn't actually match anything.
409
+ i+=1
410
+ END
411
+ else
412
+ needends+=1
413
+ result<<<<-END
414
+ #{"n=#{minimum}" unless need_matching}
415
+ while #{test}
416
+ #{loopsuccess || "n-=1"}
417
+ end
418
+ if #{need_matching ? "target.size>=minimum" : "n<=0"} then
419
+ #but bump the data position back up, since the latest datum
420
+ #didn't actually match anything.
421
+ i+=1
422
+ END
423
+ end
424
+
425
+ end
426
+ }
427
+
428
+ #give lookahead matcher (if any) a chance to fail the match
429
+ result<<case lookahead_processor
430
+ when ::Reg::LookAhead
431
+ action_idx=compiled_rule.size+1
432
+ needends+=1
433
+ "if #{ref_to lookahead_processor.subregs[0],rulenum,compiled_rule.size}===@stack.last ##{mui lookahead_processor.subregs[0] }\n"
434
+ when Proc
435
+ action_idx=compiled_rule.size+1
436
+ needends+=1
437
+ "if #{ref_to lookahead_processor,rulenum,compiled_rule.size}[self,@stack.last] ##{mui lookahead_processor}\n"
438
+ else ''
439
+ end
440
+
441
+ #if there was a lookback item, don't include it in the matched set
442
+ #result<<"matching.shift\n" if lookback and need_matching
443
+
444
+ need_return=true
445
+
446
+ #replace matching elements in @stack with node type found
447
+ result<<
448
+ case node_type
449
+ when Class
450
+ #if there was a lookback item, don't include it in the new node
451
+ <<-END
452
+ #{"i+=1" if lookback}
453
+ matchrange= i...-1 #what elems in @stack were matched?
454
+ #{"matching=@stack.slice! matchrange" unless need_matching}
455
+ node=#{ref_to node_type,rulenum,action_idx||rule.size}.create(*matching) ##{mui node_type}
456
+ node.startline||=#{need_matching ? "@stack[i]" : "matching.first"}.startline
457
+ node.endline=@endline
458
+ #{need_matching ? "@stack[matchrange]=[node]" : "@stack.insert i,node" }
459
+ END
460
+ when Proc,StackMonkey; ref_to(node_type,rulenum,action_idx||rule.size)+"[@stack] ##{mui node_type}\n"
461
+ when :shift; need_return=false; "return 0\n"
462
+ when :accept,:error; need_return=false; "throw :ParserDone\n"
463
+ else fail
464
+ end
465
+
466
+ result<<"return true #let caller know we found a match\n" if need_return
467
+ result<<"end;"*needends
468
+ result<<"\n"
469
+
470
+ return @coalesce_result[rulenum]=result
471
+ rescue Exception #=>e
472
+ #puts "error (#{e}) while executing rule: #{rule.inspect}"
473
+ #puts e.backtrace.join("\n")
474
+ raise
475
+ end
476
+
477
+ @@ref_to_cache={}
478
+ @@ref_to_cache_by_id={}
479
+ @@ref_to_idx=-1
480
+ def ref_to obj,i,j
481
+ assert j<=0x3FF
482
+ if Module===obj and obj.name
483
+ return obj.name
484
+ elsif ref=@@ref_to_cache_by_id[obj.__id__] || @@ref_to_cache[(i<<10)+j]
485
+ return ref
486
+ else
487
+ @@ref_to_rules||=
488
+ rules.map{|rule|
489
+ rule.left.subregs.map{|pat|
490
+ case pat
491
+ when String,Regexp #not needed anymore...?
492
+ RedParse::KW(pat)
493
+ when Reg::LookBack,Reg::LookAhead,Reg::Repeat #Reg::Repeat should be handled already by now
494
+ pat.subregs[0]
495
+ #subseqs handled already
496
+ else pat
497
+ end
498
+ }<<rule.right
499
+ }
500
+
501
+ @ref_to_code||=[]
502
+ name="@@ref_#{@@ref_to_idx+=1}"
503
+ #eval "#{name}=obj"
504
+ unless @@ref_to_rules[i][j]==obj
505
+ warn "ref_to mismatch"
506
+ end
507
+ @ref_to_code<<"#{name}=rules[#{i}][#{j}]"
508
+ @ref_to_code<<"warn_unless_equal #@@ref_to_idx,mui(#{name}),#{squote mui( obj )}"
509
+ @@ref_to_cache[(i<<10)+j]=name
510
+ @@ref_to_cache_by_id[obj.__id__]=name
511
+ end
512
+ end
513
+
514
+ module ReduceWithUtils
515
+ #a version of inspect that is especially likely to be stable;
516
+ #no embedded addresses and ivar order is always the same
517
+ def matcher_unique_inspect(m)
518
+ result=m.inspect
519
+ return result unless /\A#<[A-Z]/===result
520
+ "#<#{m.class}: "+
521
+ m.instance_variables.sort.map{|iv|
522
+ val=m.instance_variable_get(iv).inspect
523
+ val.gsub!(/#<(Proc|(?:Stack|Delete)Monkey):(?:0[xX])?[0-9a-fA-F]+/){ "#<#$1:" }
524
+ iv.to_s+"="+val
525
+ }.join(" ")+">"
526
+ end
527
+ alias mui matcher_unique_inspect
528
+
529
+ def squote(str)
530
+ "'#{str.gsub(/['\\]/){|ch| %[\\]+ch }}'"
531
+ end
532
+
533
+ @@unequal_parser_ref_vars=0
534
+ @@line_mismatch_parser_ref_vars=0
535
+ def warn_unless_equal i,ref,orig
536
+ return if ref==orig
537
+ msg="expected @ref_#{i} to == #{squote orig}, saw #{squote ref}"
538
+ ref=ref.gsub(/\.rb:\d+>/,".rb:X>")
539
+ orig=orig.gsub(/\.rb:\d+>/,".rb:X>")
540
+ count=
541
+ if ref==orig
542
+ msg="@ref_#{i} differed in line nums"
543
+ warn "more @ref_ vars differed in line nums..." if @@line_mismatch_parser_ref_vars==1
544
+ @@line_mismatch_parser_ref_vars+=1
545
+ else
546
+ @@unequal_parser_ref_vars+=1
547
+ end
548
+ warn msg if 1==count
549
+ end
550
+ end
551
+ include ReduceWithUtils
552
+
553
+ def classes_matched_by(matcher)
554
+ result=[]
555
+ worklist=[matcher]
556
+ begin
557
+ case x=worklist.shift
558
+ when Reg::And,Reg::Or; worklist.concat x.subregs
559
+ when Class; result<<x
560
+ end
561
+ end until worklist.empty?
562
+ return [Object] if result.empty?
563
+ return result
564
+ end
565
+
566
+
567
+
568
+ def can_combine? rule,klass,ident
569
+ rule.reverse_each{|matcher|
570
+ if Reg::Repeat===matcher
571
+ optional= matcher.times.first==0
572
+ matcher=matcher.subregs[0]
573
+ end
574
+ if ident
575
+ return true if matcher===klass.new(ident)
576
+ optional ? next : break
577
+ end
578
+
579
+ =begin was
580
+ orlist= Reg::Or===matcher ? matcher.subregs : [matcher]
581
+ orlist.map!{|m|
582
+ classes=(Reg::And===m ? m.subregs : [m]).grep(Class)
583
+ case classes.size
584
+ when 0; return true
585
+ when 1
586
+ else warn "multiple classes in matcher #{matcher}"
587
+ end
588
+ classes if classes.all?{|k| klass<=k }
589
+ }
590
+ return true if orlist.compact.flatten[0]
591
+ =end
592
+ return true if classes_matched_by(matcher).any?{|k| klass<=k }
593
+
594
+ break unless optional
595
+ }
596
+ return false
597
+ end
598
+
599
+ def can_combine2? rule,klass,ident,index=-1
600
+ #very similar to can_combine?, just above
601
+ #i think can_combine2? with 3 params is equiv to can_combine?
602
+ #so, the two should be merged
603
+ index=-index
604
+ rule_max_size=rule.inject(0){|sum,pattern|
605
+ sum + pattern.itemrange.end
606
+ }
607
+ return true if rule_max_size<index
608
+ min=max=0
609
+ rule.reverse_each{|matcher|
610
+ break if index<min
611
+ if Reg::Repeat===matcher
612
+ #optional= matcher.times.first==0
613
+ min+=matcher.times.first
614
+ max+=matcher.times.last
615
+ matcher=matcher.subregs[0]
616
+ else
617
+ min+=1
618
+ max+=1
619
+ end
620
+ next if index>max
621
+ if ident
622
+ return true if matcher===klass.new(ident)
623
+ next #was: optional ? next : break
624
+ end
625
+ =begin was
626
+ orlist= Reg::Or===matcher ? matcher.subregs : [matcher]
627
+ orlist.map!{|m|
628
+ classes=(Reg::And===m ? m.subregs : [m]).grep(Class)
629
+ case classes.size
630
+ when 0; return true
631
+ when 1
632
+ else warn "multiple classes in matcher #{matcher}: #{classes.inspect}"
633
+ end
634
+ classes if classes.all?{|k| klass<=k }
635
+ }
636
+ return true if orlist.compact.flatten[0]
637
+ =end
638
+ return true if classes_matched_by(matcher).any?{|k| klass<=k }
639
+ }
640
+ return false
641
+ end
642
+
207
643
  class ParseError<RuntimeError
208
644
  def initialize(msg,stack)
209
645
  super(msg)
@@ -234,7 +670,7 @@ end
234
670
  end
235
671
 
236
672
  #try all possible reductions
237
- def reduce
673
+ def old_slow_reduce
238
674
  shift=nil
239
675
  @rules.reverse_each{|rule|
240
676
  shift=evaluate(rule) and break
@@ -242,20 +678,279 @@ end
242
678
  return shift
243
679
  end
244
680
 
245
- def parse
681
+ HASHED_REDUCER=!ENV['REDUCE_INTERPRETER']
246
682
 
247
- #hack, so StringToken can know what parser its called from
248
- #so it can use it to parse inclusions
249
- oldparser=Thread.current[:$RedParse_parser]
250
- Thread.current[:$RedParse_parser]||=self
683
+ @@rules_compile_cache={}
251
684
 
252
- return @cached_result if defined? @cached_result
685
+ #try all possible reductions
686
+ def reduce
687
+ code=@@rules_compile_cache[class<<self; ancestors end.reject{|k| !k.name}<<@rubyversion]||=coalesce_loop().join
688
+ code= <<-END
689
+ class RedParse
690
+ def (Thread.current['$RedParse_instance']).reduce
691
+ #{code}
692
+ return nil
693
+ end
694
+ end
695
+ END
253
696
 
254
- @rules||=expanded_RULES()
255
- # @inputs||=enumerate_exemplars
697
+ f=Tempfile.new("reduce")
698
+ Thread.current['$RedParse_instance']=self
699
+ p [:code_hash, code.hash]
700
+ f.write code
701
+ f.flush
702
+ load f.path
256
703
 
257
- @stack=[StartToken.new, get_token]
258
- #last token on @stack is always implicitly the lookahead
704
+ reduce
705
+ ensure f.close if f
706
+ end if !HASHED_REDUCER
707
+
708
+
709
+ # include StackableClasses
710
+
711
+ Punc2name={
712
+ "("=>"lparen", ")"=>"rparen",
713
+ "["=>"lbracket", "]"=>"rbracket",
714
+ "{"=>"lbrace", "}"=>"rbrace",
715
+ ","=>"comma",
716
+ ";"=>"semicolon",
717
+ "::"=>"double_colon",
718
+ "."=>"dot",
719
+ "?"=>"question_mark", ":"=>"colon",
720
+ "="=>"equals",
721
+ "|"=>"pipe",
722
+ "<<"=>"leftleft", ">>"=>"rightright",
723
+ "=>"=>"arrow",
724
+ "->"=>"stabby",
725
+ "rhs,"=>"rhs_comma",
726
+ "lhs,"=>"lhs_comma",
727
+ "||="=>"or_equals",
728
+ "&&="=>"and_equals",
729
+ }
730
+
731
+
732
+ RUBYUNOPERATORS=::RubyLexer::RUBYUNOPERATORS
733
+ RUBYBINOPERATORS=::RubyLexer::RUBYBINOPERATORS
734
+ RUBYSYMOPERATORS=::RubyLexer::RUBYSYMOPERATORS
735
+ RUBYNONSYMOPERATORS=::RubyLexer::RUBYNONSYMOPERATORS
736
+ OPERATORS=RUBYUNOPERATORS-%w[~@ !@]+RUBYBINOPERATORS+RUBYNONSYMOPERATORS+
737
+ %w[while until if unless rescue and or not unary* unary& rescue3 lhs* rhs*]
738
+ OPERATORS.uniq!
739
+ RUBYKEYWORDLIST=(
740
+ RubyLexer::RUBYKEYWORDLIST+Punc2name.keys+
741
+ RUBYSYMOPERATORS+RUBYNONSYMOPERATORS
742
+ ).uniq
743
+
744
+ def rubyoperatorlist; OPERATORS end
745
+ def rubykeywordlist; RUBYKEYWORDLIST end
746
+
747
+ class KeywordToken
748
+ def reducer_method(stack)
749
+ :"reduce_with_tos_KeywordToken_#@ident"
750
+ end
751
+ def reducer_ident
752
+ :"KeywordToken_#@ident"
753
+ end
754
+ end
755
+
756
+ class OperatorToken
757
+ def reducer_ident
758
+ :"OperatorToken_#@ident"
759
+ end
760
+ end
761
+
762
+ class ValueNode
763
+ def reducer_method(stack)
764
+ :"reduce_with_tos_#{stack[-3].reducer_ident}_then_#{reducer_ident}"
765
+ end
766
+ end
767
+
768
+ def parser_identity
769
+ #what is the relationship between this method and #signature?
770
+ #can the two be combined?
771
+ result=class<<self; ancestors end.reject{|k| !k.name}
772
+ result.reject!{|k| !!((::RedParse<k)..false) }
773
+ result.reject!{|k| k.name[/^(?:RedParse::)?ReduceWiths/] }
774
+ result.reverse!
775
+ result.push @rubyversion
776
+ #@rubyversion in identity is a hack; should have RedParse1_9 module instead
777
+ end
778
+
779
+ def code_for_reduce_with ident, code
780
+ code=coalesce_loop(*code) if Array===code
781
+ ident.gsub!(/[\\']/){|x| "\\"+x}
782
+ code=code.join
783
+ @reduce_with_defns+=1
784
+ if name=@reduce_with_cache[code]
785
+ @reduce_with_aliases+=1
786
+ "alias :'reduce_with_tos_#{ident}' :'#{name}'\n"
787
+ else
788
+ @reduce_with_cache[code]=name="reduce_with_tos_#{ident}"
789
+ ["define_method('", name ,"') do\n", code ,"\nnil\nend\n"]
790
+ end
791
+ end
792
+
793
+ def addl_node_containers; [] end
794
+
795
+ def write_reduce_withs path=nil
796
+ return unless HASHED_REDUCER
797
+ start=Time.now
798
+ @size_cache={}
799
+ identity=parser_identity
800
+ @reduce_with_cache={}
801
+ @reduce_with_aliases=0
802
+ @empty_reduce_withs=@reduce_with_defns=0
803
+
804
+ expanded_RULES()
805
+ shortnames=[] #[[],[]]
806
+ list=[self.class,*addl_node_containers].map{|mod|
807
+ mod.constants.select{|k|
808
+ /(?:Node|Token)$/===k.to_s
809
+ }.map{|k|
810
+ mod.const_get k
811
+ }
812
+ }.flatten.grep(Class).uniq
813
+ #list=STACKABLE_CLASSES()
814
+ list -= [KeywordToken,ImplicitParamListStartToken,ImplicitParamListEndToken,
815
+ Token,WToken,NewlineToken,DecoratorToken,Node,ValueNode]
816
+ list.reject!{|x| IgnoreToken>=x and not /(^|:)AssignmentRhs/===x.name}
817
+ exprclasses,list=list.partition{|k| k<=ValueNode }
818
+ fail unless list.include? StartToken
819
+ indexcode=list.map{|klass|
820
+ shortname=klass.to_s[/[^:]+$/]
821
+ warn "empty reducer_ident for ::#{klass}" if shortname.empty?
822
+ <<-END
823
+ class ::#{klass}
824
+ def reducer_method(stack)
825
+ :reduce_with_tos_#{shortname}
826
+ end if instance_methods(false).&(["reducer_method",:reducer_method]).empty?
827
+ def reducer_ident
828
+ :#{shortname}
829
+ end if instance_methods(false).&(["reducer_ident",:reducer_ident]).empty?
830
+ end
831
+ END
832
+ }.concat(exprclasses.map{|exprclass|
833
+ shec=exprclass.name[/[^:]+$/]
834
+ warn "empty reducer_ident for ::#{exprclass}" if shec.empty?
835
+ <<-END
836
+ class ::#{exprclass}
837
+ def reducer_ident
838
+ :#{shec}
839
+ end if instance_methods(false).&(["reducer_ident",:reducer_ident]).empty?
840
+ end
841
+ END
842
+ })
843
+ ruby=["#Copyright (C) #{Time.now.year} #{ENV['COPYRIGHT_OWNER']||'Caleb Clausen'}\n"+
844
+ "#Generated with ruby v#{RUBY_VERSION}\n"
845
+ ].concat list.map{|klass|
846
+ shortname=klass.to_s[/[^:]+$/]
847
+ shortnames<<[shortname,klass,nil]
848
+ code_for_reduce_with( shortname, [klass] )
849
+ }.concat(rubykeywordlist.map{|kw|
850
+ shortname="KeywordToken_#{kw}"
851
+ shortnames<<[shortname,KeywordToken,kw]
852
+ code_for_reduce_with( shortname, [KeywordToken, kw] )
853
+ }).concat({ImplicitParamListStartToken=>'(',ImplicitParamListEndToken=>')'}.map{|(k,v)|
854
+ shortnames<<[k.name,k,v]
855
+ code_for_reduce_with k.name, [k,v]
856
+ })
857
+ shortnames.delete ["OperatorToken",OperatorToken,nil]
858
+ record=shortnames.dup
859
+ ruby.concat(exprclasses.map{|exprclass|
860
+ shec=exprclass.name[/[^:]+$/]
861
+ shortnames.map{|(sn,snclass,snparam)|
862
+ warn "empty shortname for #{snclass}" if sn.empty?
863
+ record<<["#{sn}_then_#{shec}", exprclass, nil, snclass, snparam]
864
+ code_for_reduce_with "#{sn}_then_#{shec}", [exprclass, nil, snclass, snparam]
865
+ }
866
+ })
867
+ ruby.concat(exprclasses.map{|exprclass|
868
+ shec=exprclass.name[/[^:]+$/]
869
+ rubyoperatorlist.map{|op|
870
+ record<<["OperatorToken_#{op}_then_#{shec}", exprclass, nil, OperatorToken, op]
871
+ code_for_reduce_with "OperatorToken_#{op}_then_#{shec}", [exprclass, nil, OperatorToken, op]
872
+ }
873
+ }).concat([LiteralNode,VarNode].map{|k|
874
+ shec=k.name[/[^:]+$/]
875
+ record<<["#{shec}_then_#{shec}", k, nil, k, nil]
876
+ code_for_reduce_with "#{shec}_then_#{shec}", [k, nil, k, nil]
877
+ })
878
+
879
+ modname="ReduceWithsFor_#{parser_identity.join('_').tr(':.','_')}"
880
+
881
+ size_cache,rule_popularity=@size_cache.partition{|((i,action),size)| Integer===i }
882
+
883
+ ruby.unshift [<<-END,@ref_to_code.join("\n"),<<-END2]
884
+ #number of coalescences: #{size_cache.size}
885
+ #empty coalescences: #@empty_reduce_withs
886
+ #duplicate coalescences: #@reduce_with_aliases
887
+ #nonduplicate coalescences: #{@reduce_with_cache.size}
888
+ #reduce_with_defns: #@reduce_with_defns
889
+ extend RedParse::ReduceWithUtils
890
+ def self.redparse_modules_init(parser)
891
+ return if defined? @@ref_0
892
+ rules=parser.rules.map{|rule|
893
+ rule.left.subregs.map{|pat|
894
+ case pat
895
+ when String,Regexp #not needed anymore...?
896
+ RedParse::KW(pat)
897
+ when Reg::LookBack,Reg::LookAhead,Reg::Repeat #Reg::Repeat should be handled already by now
898
+ pat.subregs[0]
899
+ #subseqs handled already
900
+ else pat
901
+ end
902
+ }<<rule.right
903
+ }
904
+ END
905
+
906
+ end
907
+ def redparse_modules_init
908
+ ::RedParse::#{modname}.redparse_modules_init(self)
909
+ super
910
+ end
911
+ END2
912
+
913
+ ruby.unshift( "#15 largest coalescences:\n",
914
+ *size_cache.sort_by{|(k,size)| size}[-15..-1].map{ \
915
+ |(k,size)| "##{k.inspect}=#{size}\n"
916
+ })
917
+
918
+ ruby.unshift("#10 most popular rules:\n",
919
+ *rule_popularity.sort_by{|(rule,pop)| pop}[-10..-1].map{ \
920
+ |((i,action),pop)| "##{i} #{action.inspect}=#{pop}\n"
921
+ })
922
+
923
+ warn "15 largest coalescences:"
924
+ size_cache.sort_by{|(klass,size)| size}[-15..-1].each{ \
925
+ |(klass,size)| warn "#{klass.inspect}=#{size}"
926
+ }
927
+
928
+ warn "10 most popular rules:"
929
+ rule_popularity.sort_by{|(rule,pop)| pop}[-10..-1].each{ \
930
+ |((i,action),pop)| warn "#{i} #{action.inspect}=#{pop}"
931
+ }
932
+
933
+
934
+ @ref_to_code=nil
935
+ ruby=["module RedParse::#{modname}\n",ruby,"\nend\n",indexcode]
936
+ @@rules_compile_cache[identity]=ruby
937
+
938
+ path ||= $LOAD_PATH.find{|d| File.exist? File.join(d,"redparse.rb") }+"/redparse/"
939
+ #should use reduce_withs_directory here somehow instead...
940
+
941
+ path += modname+".rb" if path[-1]==?/
942
+ File.open(path,"wb") {|f| ruby.flatten.each{|frag| f.write frag } }
943
+
944
+ #warn "actual write_reduce_withs writing took #{Time.now-start}s"
945
+ warn "size of #{path}: #{File.size path}"
946
+
947
+ ensure
948
+ warn "write_reduce_withs took #{Time.now-start}s" if start
949
+ @reduce_with_cache=nil if @reduce_with_cache
950
+ @size_cache=nil if @size_cache
951
+ end
952
+
953
+ def old_reduce_loop
259
954
  catch(:ParserDone){ loop {
260
955
  #try all possible reductions
261
956
  next if reduce==true
@@ -269,6 +964,55 @@ end
269
964
  #shift our token onto the @stack
270
965
  @stack.push tok
271
966
  }}
967
+ end
968
+
969
+ =begin should be
970
+ reduce_call= HASHED_REDUCER ?
971
+ 'send(@stack[-2].reducer_method(@stack))' :
972
+ 'reduce'
973
+ eval <<-END,__FILE__,__LINE__
974
+ def reduce_loop
975
+ catch(:ParserDone){ ( @stack.push(get_token||break) unless(#{reduce_call}==true) ) while true }
976
+ end
977
+ END
978
+ =end
979
+ def reduce_loop
980
+ catch(:ParserDone){ while true
981
+ #try all possible reductions
982
+ #was: next if reduce==true
983
+ next if send(@stack[-2].reducer_method(@stack))==true
984
+
985
+ #no rule can match current @stack, get another token
986
+ tok=get_token or break
987
+
988
+ #are we done yet?
989
+ #tok.nil? or EoiToken===tok && EoiToken===@stack.last and break
990
+
991
+ #shift our token onto the @stack
992
+ @stack.push tok
993
+ end }
994
+ end
995
+
996
+ if ENV['REDUCE_INTERPRETER']
997
+ alias reduce old_slow_reduce
998
+ alias reduce_loop old_reduce_loop
999
+ end
1000
+
1001
+ def parse
1002
+
1003
+ #hack, so StringToken can know what parser its called from
1004
+ #so it can use it to parse inclusions
1005
+ oldparser=Thread.current[:$RedParse_parser]
1006
+ Thread.current[:$RedParse_parser]||=self
1007
+
1008
+ return @cached_result if defined? @cached_result
1009
+
1010
+ expanded_RULES()
1011
+ # @inputs||=enumerate_exemplars
1012
+
1013
+ @stack=[StartToken.new, get_token]
1014
+ #last token on @stack is always implicitly the lookahead
1015
+ reduce_loop
272
1016
 
273
1017
  @stack.size==2 and return result=NopNode.new #handle empty parse string
274
1018
 
@@ -276,7 +1020,7 @@ end
276
1020
  #with the last an Eoi, and first a StartToken
277
1021
  #there was a parse error
278
1022
  unless @stack.size==3
279
- pp @stack[-[15,@stack.size].min..-1] if ENV['PRINT_STACK']
1023
+ puts( pretty_stack( 15 ))if ENV['PRINT_STACK']
280
1024
  top=MisparsedNode.new("(toplevel)", @stack[1...-1],'')
281
1025
  raise ParseError.new(top.msg,@stack)
282
1026
  end
@@ -314,17 +1058,19 @@ end
314
1058
 
315
1059
  =begin
316
1060
  rescue Exception=>e
1061
+ if ENV['PRINT_PARSE_ERRORS']
317
1062
  input=@lexer
318
1063
  if Array===input
319
- puts "error while parsing:"
320
- pp input
1064
+ STDERR.puts "error while parsing:"
1065
+ STDERR.write input.pretty_inspect
321
1066
  input=nil
322
1067
  else
323
1068
  input=input.original_file
324
- inputname=@lexer.filename
325
- input.to_s.size>1000 and input=inputname
326
- puts "error while parsing: <<< #{input} >>>"
1069
+ # inputname=@lexer.filename
1070
+ STDERR.puts "error while parsing #@filename:#@endline: <<< #{input.inspect if input.inspect.size<=1000} >>>"
327
1071
  end
1072
+ e.backtrace.each{|l| p l }
1073
+ end
328
1074
  raise
329
1075
  else
330
1076
  =end
@@ -354,18 +1100,23 @@ end
354
1100
 
355
1101
  #inline any subsequences in RULES right into the patterns
356
1102
  #reg should do this already, but current release does not
1103
+ #also expand regexp/string to keyword matcher
357
1104
  def expanded_RULES
1105
+ return @rules if defined? @rules
358
1106
  result=RULES()
359
- return result if (-[:foo, -[:bar]]).subregs.grep(Reg::Subseq).empty?
360
- result.map!{|rule|
361
- unless rule.left.subregs.grep(Reg::Subseq)
1107
+ #return result if (-[:foo, -[:bar]]).subregs.grep(Reg::Subseq).empty?
1108
+ @rules=result.map!{|rule|
1109
+ if rule.left.subregs.grep(Reg::Subseq|String|Regexp).empty?
362
1110
  then rule
363
1111
  else
364
1112
  right=rule.right
365
1113
  rule=rule.left.subregs.dup
366
1114
  (rule.size-1).downto(0){|i|
367
- if Reg::Subseq===rule[i]
368
- rule[i,1]=rule[i].subregs
1115
+ case mtr=rule[i]
1116
+ when Reg::Subseq
1117
+ rule[i,1]=mtr.subregs
1118
+ when String,Regexp
1119
+ rule[i]=RedParse::KW(mtr)
369
1120
  end
370
1121
  }
371
1122
  -rule>>right
@@ -482,8 +1233,8 @@ end
482
1233
  #the 'precedence' of comma is somewhat controversial. it actually has
483
1234
  #several different precedences depending on which kind of comma it is.
484
1235
  #the precedence of , is higher than :, => and the assignment operators
485
- #in certain (lhs) contexts. therefore, the precedence of lhs, should
486
- #really be above =.
1236
+ #in certain (lhs) contexts. therefore, the precedence of lhs-comma should
1237
+ #really be above "=".
487
1238
 
488
1239
  #"unary" prefix function names seen has operators have this precedence
489
1240
  #but, rubylexer handles precedence of these and outputs fake parens
@@ -514,21 +1265,6 @@ if defined? SPECIALIZED_KEYWORDS
514
1265
  end
515
1266
 
516
1267
  KW2class={}
517
-
518
- Punc2name={
519
- "("=>"lparen", ")"=>"rparen",
520
- "["=>"lbracket", "]"=>"rbracket",
521
- "{"=>"lbrace", "}"=>"rbrace",
522
- ","=>"comma",
523
- ";"=>"semicolon",
524
- "::"=>"double_colon",
525
- "."=>"dot",
526
- "?"=>"question_mark", ":"=>"colon",
527
- "="=>"equals",
528
- "|"=>"pipe",
529
- "<<"=>"leftleft", ">>"=>"rightright",
530
- "=>"=>"arrow",
531
- }
532
1268
  end
533
1269
 
534
1270
  def self.KW(ident)
@@ -665,17 +1401,24 @@ end
665
1401
  end
666
1402
 
667
1403
  def lower_op
1404
+ @lower_op||=proc{|parser,op|
1405
+ LOWEST_OP===op or (!(parser.VALUELIKE_LA() === op) and
1406
+ parser.left_op_higher(parser.stack[-3],op)
1407
+ )
1408
+ }.extend LowerOp_inspect
1409
+ =begin was
668
1410
  return @lower_op if defined? @lower_op
669
1411
  lower_op=item_that{|op| left_op_higher(@stack[-3],op) }
670
- lower_op=(LOWEST_OP|(~VALUELIKE_LA & lower_op)).la
1412
+ lower_op=(LOWEST_OP|(~VALUELIKE_LA() & lower_op)).la
671
1413
  lower_op.extend LowerOp_inspect
672
1414
  @lower_op=lower_op
1415
+ =end
673
1416
  end
674
1417
 
675
1418
  #this is a hack, should use graphcopy to search for Deferreds and replace with double-Deferred as below
676
1419
  def item_that(*a,&b)
677
1420
  if defined? @generating_parse_tables
678
- huh unless b
1421
+ fail unless b
679
1422
  #double supers, one of them in a block executed after this method returns....
680
1423
  #man that's weird
681
1424
  super(*a){|ob| @saw_item_that[[super(*a,&b),ob]]=true}
@@ -741,21 +1484,31 @@ end
741
1484
  LHS_COMMA=Op('lhs,',true)#&-{:tag => :lhs}
742
1485
  RHS_COMMA=Op('rhs,',true)#&-{:tag => :rhs}
743
1486
  #PARAM_COMMA=Op('param,',true)#&-{:tag => :param}
744
- FUNCLIKE_KEYWORD=KeywordToken&-{:ident=>RubyLexer::FUNCLIKE_KEYWORDS}
1487
+ def FUNCLIKE_KEYWORD
1488
+ KeywordToken&-{:ident=>@funclikes}
1489
+ end
745
1490
  IGN_SEMI_BEFORE=KW(/^(#{RubyLexer::INNERBOUNDINGWORDS.gsub(/(rescue|then)\|/,'')[1...-1]}|end|[)}\]])$/)|EoiToken
746
1491
  IGN_SEMI_AFTER=KW(/^(begin|[;:({|]|then|do|else|ensure)$/)|BlockFormalsNode
747
1492
 
748
1493
  #for use in lookback patterns
749
1494
  OPERATORLIKE_LB=OperatorToken|
750
- KW(/^(not | defined\? | rescue3 | .*[@,] | [ ~ ! ; \( \[ \{ ? : ] | \.{1,3} | :: | => | ![=~])$/x)|
1495
+ KW(/^(not | defined\? | rescue3 | .*[@,] | [~!;\(\[\{?:] | \.{1,3} | :: | => | ![=~])$/x)|
751
1496
  KW(%r{^( \*\*? | << | >> | &&? | \|\|? | \^ | % | / | - | \+ )?=$}x)|
752
1497
  KW(BEGINWORDS)|KW(/^#{INNERBOUNDINGWORDS}$/)|RescueHeaderNode|StartToken|
753
1498
  GoalPostToken|BlockFormalsNode|AssignmentRhsListStartToken
754
1499
 
755
1500
  #for use in lookahead patterns
756
- VALUELIKE_LA=KW(RubyLexer::VARLIKE_KEYWORDS)|NumberToken|SymbolToken|StringToken|UNOP|DEFOP|
757
- KW(/^[({]$/x)|VarNameToken|MethNameToken|HerePlaceholderToken|
758
- KW(BEGINWORDS)|FUNCLIKE_KEYWORD|AssignmentRhsListStartToken
1501
+ def VALUELIKE_LA
1502
+ @valuelike_la ||=
1503
+
1504
+ KW(@varlikes)|NumberToken|SymbolToken|StringToken|UNOP|DEFOP|
1505
+ KW(/^[({]$/x)|VarNameToken|MethNameToken|HerePlaceholderToken|
1506
+ KW(BEGINWORDS)|FUNCLIKE_KEYWORD()|AssignmentRhsListStartToken
1507
+
1508
+ #why isn't this a sufficient implementation of this method:
1509
+ # KW('(')
1510
+ #in which case, '(' can be made the highest precedence operator instead
1511
+ end
759
1512
  LOWEST_OP=KW(/^(#{ENDWORDS})$/)|KW(/^#{INNERBOUNDINGWORDS.sub('rescue|','')}$/)|
760
1513
  EoiToken|GoalPostToken|AssignmentRhsListEndToken
761
1514
 
@@ -770,6 +1523,8 @@ end
770
1523
  def RULES
771
1524
  lower_op= lower_op()
772
1525
 
1526
+
1527
+ result=
773
1528
  [-[StartToken.lb, Expr.-, EoiToken.la]>>:accept,
774
1529
  -[EoiToken]>>:error,
775
1530
  ]+
@@ -786,12 +1541,11 @@ end
786
1541
 
787
1542
  # -[Op('=',true)|KW(/^(rescue|when|\[)$/)|Op(/,$/,true),
788
1543
  # Op(/^(?:unary|rhs)\*$/), ValueNode, (MODIFYASSIGNOP|Op('=',true)).la]>>:shift,
789
- # -[MethNameToken|FUNCLIKE_KEYWORD, KW('('),
1544
+ # -[MethNameToken|FUNCLIKE_KEYWORD(), KW('('),
790
1545
  # Op(/^(?:unary|rhs)\*$/), ValueNode, (MODIFYASSIGNOP|Op('=',true)).la]>>:shift,
791
1546
  #star should not be used in an lhs if an rhs or param list context is available to eat it.
792
1547
  #(including param lists for keywords such as return,break,next,rescue,yield,when)
793
1548
 
794
- #hmmm.... | in char classes below looks useless (predates GoalPostToken)
795
1549
  -[Op(/^(?:unary|lhs)\*$/), (GoalPostToken|Op(/,$/,true)|KW(/^(in|[=)|;])$/)).la]>>DanglingStarNode, #dangling *
796
1550
  -[Op(/,$/,true), (GoalPostToken|KW(/^(in|[=)|;])$/)).la]>> #dangling ,
797
1551
  stack_monkey("DanglingComma",1,DanglingCommaNode){|stack|
@@ -799,9 +1553,12 @@ end
799
1553
  dcomma.offset=stack.last.offset
800
1554
  stack.push dcomma, stack.pop
801
1555
  },
1556
+ #hmmm.... | in char classes above looks useless (predates GoalPostToken)
1557
+
1558
+ # -[Expr, Op|KW_Op, Expr, lower_op]>>RawOpNode, #most operators
1559
+ -[Expr, Op, Expr, lower_op]>>RawOpNode, #most operators
1560
+ -[Expr, "=>", Expr, lower_op]>>ArrowOpNode,
802
1561
 
803
- -[Expr, Op|KW_Op, Expr, lower_op]>>RawOpNode, #most operators
804
-
805
1562
  #assignment
806
1563
  -[Lvalue, MODIFYASSIGNOP, Expr, lower_op]>>AssignNode,
807
1564
  -[Lvalue, Op('=',true), AssignmentRhsNode, lower_op]>>AssignNode,
@@ -840,11 +1597,6 @@ end
840
1597
  stack[-3].after_comma=true},
841
1598
  #mebbe this should be a lexer hack?
842
1599
 
843
- -[#(OPERATORLIKE_LB&~(MethNameToken|FUNCLIKE_KEYWORD)).lb,
844
- '(', Expr, KW(')')&~(-{:callsite? =>true}|-{:not_real? =>true})]>>ParenedNode,
845
- -[#(OPERATORLIKE_LB&~(MethNameToken|FUNCLIKE_KEYWORD)).lb,
846
- '(', KW(')')&~(-{:callsite? =>true}|-{:not_real? =>true})]>>VarLikeNode, #(), alias for nil
847
-
848
1600
  -[#(OPERATORLIKE_LB&~Op('=',true)).lb,
849
1601
  Expr, RESCUE_OP, Expr, lower_op]>>RescueOpNode,
850
1602
 
@@ -853,12 +1605,22 @@ end
853
1605
  -[Expr, DotOp, CallNode, lower_op]>>DotCall, #binary .
854
1606
  -[Expr, DoubleColonOp, CallNode, lower_op]>>DotCall, #binary ::
855
1607
  -[Expr, DoubleColonOp, VarNode, lower_op]>>ConstantNode,#binary ::
1608
+ #lower_op constaints on lookahead are unnecessary in above 4 (unless I give openparen a precedence)
856
1609
 
857
1610
  -[Expr, "?", Expr, ":", Expr, lower_op]>>TernaryNode,
858
1611
 
859
1612
 
860
1613
  -[MethNameToken, '(', Expr.-, ')', BlockNode.-, KW('do').~.la]>>CallNode,
861
- -[FUNCLIKE_KEYWORD, '(', Expr.-, ')', BlockNode.-, KW('do').~.la]>>KWCallNode,
1614
+ -[FUNCLIKE_KEYWORD(), '(', Expr.-, ')', BlockNode.-, KW('do').~.la]>>KWCallNode,
1615
+
1616
+ -[#(OPERATORLIKE_LB&
1617
+ (MethNameToken|FUNCLIKE_KEYWORD()).~.lb,
1618
+ '(', Expr, KW(')')&~(-{:callsite? =>true}|-{:not_real? =>true}), KW('do').~.la]>>ParenedNode,
1619
+
1620
+ -[#(OPERATORLIKE_LB&
1621
+ (MethNameToken|FUNCLIKE_KEYWORD()).~.lb,
1622
+ '(', KW(')')&~(-{:callsite? =>true}|-{:not_real? =>true}), KW('do').~.la]>>VarLikeNode, #(), alias for nil
1623
+ #constraint on do in above 2 rules is probably overkill
862
1624
 
863
1625
  -[ValueNode, Op(/,$/,true), ValueNode, lower_op]>>CommaOpNode,
864
1626
 
@@ -996,17 +1758,26 @@ end
996
1758
  },
997
1759
 
998
1760
  #treat these keywords like (rvalue) variables.
999
- -[RubyLexer::VARLIKE_KEYWORDS]>>VarLikeNode,
1761
+ -[@varlikes]>>VarLikeNode,
1000
1762
 
1001
1763
  #here docs
1002
1764
  -[HerePlaceholderToken]>>HereDocNode,
1003
- -[HereBodyToken.la]>>delete_monkey(1,"delete_here_body"),
1004
- ##this is rediculous. this should be a lexer hack?
1765
+ #-[HereBodyToken.la]>>delete_monkey(1,"delete_here_body"), ##this is rediculous. this should be a lexer hack?
1005
1766
 
1006
1767
  -[VarNameToken]>>VarNode,
1007
1768
 
1008
1769
 
1009
1770
  ]
1771
+
1772
+ if @rubyversion >= 1.9
1773
+ result.concat [
1774
+ # -['->', ParenedNode.-, 'do', Expr.-, 'end']>>ProcLiteralNode,
1775
+ # -['->', VarLikeNode["nil",{:@value=>nil}].reg, 'do', Expr.-, 'end']>>ProcLiteralNode,
1776
+ -[(DotOp|DoubleColonOp).lb, '(',Expr.-,')', BlockNode.-, KW('do').~.la]>>CallNode,
1777
+ ]
1778
+ end
1779
+
1780
+ return result
1010
1781
  end
1011
1782
 
1012
1783
  if defined? END_ATTACK
@@ -1014,32 +1785,62 @@ if defined? END_ATTACK
1014
1785
  include Reducer
1015
1786
  end
1016
1787
 
1017
- def initialize(input,name="(eval)",line=1,lvars=[],options={})
1018
- @rubyversion=options[:rubyversion]||1.8
1788
+ def signature
1789
+ ancs=class<<self; ancestors end.reject{|k| !k.name}
1790
+ ancs.slice!(ancs.index(RedParse)..-1)
1019
1791
 
1020
- cache=Cache.new(name,line,lvars.sort.join(" "),@rubyversion,self.class.name)
1021
- cache_mode=options[:cache_mode]||:read_write
1022
- raise ArgumentError unless /^(?:read_(?:write|only)|write_only|none)$/===cache_mode.to_s
1023
- read_cache= /read/===cache_mode.to_s
1792
+ lancs=class<<@lexer; ancestors end.reject{|k| !k.name}
1793
+ [RubyLexer,Array].each{|k|
1794
+ if i=lancs.index(k)
1795
+ lancs.slice!(i..-1)
1796
+ end
1797
+ }
1798
+
1799
+ RedParse.signature(@encoding,ancs+lancs)
1800
+ end
1801
+ def RedParse.signature(encoding,ancs=ancestors)
1802
+ @@my_sha||=Digest::SHA256.file(__FILE__)
1803
+ @@node_sha||=Digest::SHA256.file(__FILE__.sub(/\.rb\z/,"/node.rb"))
1804
+ [ancs.map{|m| m.name}, encoding, @@my_sha, @@node_sha,]
1805
+ end
1806
+
1807
+ def initialize(input,name=nil,line=nil,lvars=nil,options=nil)
1808
+ line,name=name,nil if Hash===name or Array===name or Integer===name
1809
+ lvars,line=line,nil if Hash===line or Array===line
1810
+ options,lvars=lvars,nil if Hash===lvars
1811
+ options||={}
1812
+ name||=options[:name]||"(eval)"
1813
+ line||=options[:line]||1
1814
+ lvars||=options[:lvars]||[]
1815
+ @rubyversion=options[:rubyversion]||1.8
1024
1816
  input.binmode if input.respond_to? :binmode
1025
- if read_cache and cache and result=cache.get(input)
1026
- @cached_result=result
1027
- @write_cache=nil
1028
- return
1029
- end
1030
- if /write/===cache_mode.to_s
1031
- @write_cache,@input= cache,input
1032
- else
1033
- @write_cache=nil
1034
- end
1817
+
1818
+ @encoding=options[:encoding]||:ascii
1819
+ @encoding=:binary if @rubyversion<=1.8
1035
1820
 
1036
1821
  if Array===input
1037
1822
  def input.get1token; shift end
1038
1823
  @lexer=input
1824
+ if @rubyversion>=1.9
1825
+ @funclikes=RubyLexer::RubyLexer1_9::FUNCLIKE_KEYWORDS
1826
+ @varlikes=RubyLexer::RubyLexer1_9::VARLIKE_KEYWORDS
1827
+ else
1828
+ @funclikes=RubyLexer::FUNCLIKE_KEYWORDS
1829
+ @varlikes=RubyLexer::VARLIKE_KEYWORDS
1830
+ end
1039
1831
  else
1040
- @lexer=RubyLexer.new(name,input,line,0,:rubyversion=>@rubyversion)
1832
+ if RubyLexer===input
1833
+ @lexer=input
1834
+ else
1835
+ @lexer=RubyLexer.new(name,input,line,0,:rubyversion=>@rubyversion,:encoding=>@encoding)
1836
+ end
1837
+ @funclikes=@lexer::FUNCLIKE_KEYWORDS()
1838
+ @varlikes=@lexer::VARLIKE_KEYWORDS()
1041
1839
  lvars.each{|lvar| @lexer.localvars[lvar]=true }
1840
+ @encoding=@lexer.encoding_name_normalize(@encoding.to_s).to_sym
1841
+ warn "#{@encoding} encoding won't really work right now" if RubyLexer::NONWORKING_ENCODINGS.include? @encoding
1042
1842
  end
1843
+ @funclikes=/#@funclikes|^->$/ if @rubyversion>=1.9
1043
1844
  @filename=name
1044
1845
  @min_sizes={}
1045
1846
  @compiled_rules={}
@@ -1052,26 +1853,82 @@ if defined? END_ATTACK
1052
1853
  compile
1053
1854
  end
1054
1855
  @saw_item_that=nil
1856
+ @print_filter=proc{true}
1857
+
1858
+ if modules=options[:modules]
1859
+ modules.each{|m| extend m}
1860
+ end
1861
+ if modules=options[:lexer_modules]
1862
+ modules.each{|m| @lexer.extend m}
1863
+ end
1864
+
1865
+ dir=reduce_withs_directory
1866
+ modname="ReduceWithsFor_#{parser_identity.join('_').tr(':.','_')}"
1867
+
1868
+ cache=Cache.new(
1869
+ File===input,name,
1870
+ :line,line,:encoding,@encoding,:locals,lvars.sort.join(","),
1871
+ @rubyversion, :/, *signature
1872
+ )
1873
+ cache_mode=options[:cache_mode]||ENV['REDPARSE_CACHE']||:read_write
1874
+ cache_mode=cache_mode.to_sym
1875
+ raise ArgumentError,"bad cache mode #{cache_mode}" unless /^(?:read_(?:write|only)|write_only|none)$/===cache_mode.to_s
1876
+ read_cache= /read/===cache_mode.to_s
1877
+ if read_cache and cache and result=cache.get(input)
1878
+ @cached_result=result
1879
+ @write_cache=nil
1880
+ return
1881
+ end
1882
+ if /write/===cache_mode.to_s
1883
+ @write_cache,@input= cache,input
1884
+ else
1885
+ @write_cache=nil
1886
+ end
1887
+
1888
+ #but, need to skip warning lines matching this regexp:
1889
+ # /(^|[/\\])#{modname}\.rb:\d+: warning: mismatched indentations at 'end' with 'if' at \d+$/
1890
+
1891
+ begin
1892
+ require File.join(dir,modname)
1893
+ rescue LoadError
1894
+ else
1895
+ extend self.class.const_get( modname )
1896
+ end
1897
+ redparse_modules_init
1898
+ end
1899
+
1900
+ def self.parse(*args)
1901
+ new(*args).parse
1902
+ end
1903
+
1904
+ def redparse_modules_init
1905
+
1906
+ end
1907
+
1908
+ def reduce_withs_directory
1909
+ "redparse"
1055
1910
  end
1056
1911
 
1057
- attr_accessor :lexer
1058
- attr :rubyversion
1912
+ attr_accessor :lexer, :print_filter
1913
+ attr_reader :rubyversion, :stack
1914
+
1915
+ alias rules expanded_RULES
1059
1916
 
1060
1917
  def get_token(recursing=false)
1061
1918
  unless @moretokens.empty?
1062
1919
  @last_token=@moretokens.shift
1063
- p @last_token if ENV['PRINT_TOKENS'] unless recursing
1920
+ p @last_token if ENV['PRINT_TOKENS'] && @print_filter[@last_token] and not recursing
1064
1921
  return @last_token
1065
1922
  end
1066
1923
 
1067
1924
  rpt=ENV['RAW_PRINT_TOKENS']
1068
1925
  begin
1069
1926
  result=@lexer.get1token or break
1070
- p result if rpt
1927
+ p result if rpt and @print_filter[result]
1071
1928
 
1072
1929
  #set token's line
1073
1930
  result.startline= @endline||=1
1074
- result.endline||=@endline if result.respond_to? :endline=
1931
+ #result.endline||=@endline if result.respond_to? :endline=
1075
1932
 
1076
1933
  if result.respond_to?(:as) and as=result.as
1077
1934
  #result=make_kw(as,result.offset)
@@ -1080,7 +1937,10 @@ end
1080
1937
  result=result.dup
1081
1938
  result.ident=as
1082
1939
  else
1083
- result=make_kw(as,result.offset)
1940
+ result2=make_kw(as,result.offset)
1941
+ result2.startline=result.startline
1942
+ result2.endline=result.endline
1943
+ result=result2
1084
1944
  end
1085
1945
  result.not_real! if result.respond_to? :not_real!
1086
1946
  else
@@ -1102,28 +1962,44 @@ end
1102
1962
  case name=result.ident
1103
1963
 
1104
1964
  when /^(#{BINOP_KEYWORDS.join '|'})$/o #should be like this in rubylexer
1105
- result=OperatorToken.new(name,result.offset) unless result.has_end?
1106
- when "|"; result=GoalPostToken.new(result.offset) #is this needed still?
1965
+ unless result.has_end?
1966
+ orig=result
1967
+ result=OperatorToken.new(name,result.offset)
1968
+ result.endline=orig.endline
1969
+ end
1970
+ when "|";
1971
+ orig=result
1972
+ result=GoalPostToken.new(result.offset) #is this needed still? (yes)
1973
+ result.endline=orig.endline
1107
1974
  when "__FILE__"; #I wish rubylexer would handle this
1108
- class<<result; attr_accessor :value; end
1109
- result.value=@file.dup
1975
+ #class<<result; attr_accessor :value; end
1976
+ assert result.value==@file.dup
1110
1977
  when "__LINE__"; #I wish rubylexer would handle this
1111
- class<<result; attr_accessor :value; end
1112
- result.value=@endline
1978
+ #class<<result; attr_accessor :value; end
1979
+ assert result.value==@endline
1113
1980
  else
1114
1981
  result=make_kw name,result.offset if defined? SPECIALIZED_KEYWORDS
1115
1982
  #warning, this may discard information stored in instance vars of result
1116
1983
  end
1117
1984
 
1985
+ when StringToken,HerePlaceholderToken
1986
+ @endline=result.endline
1987
+
1118
1988
  when EoiToken; break
1119
- when HereBodyToken; break
1989
+ when HereBodyToken;
1990
+ @endline=result.endline
1991
+ redo
1120
1992
  when AssignmentRhsListStartToken; break
1121
1993
  when AssignmentRhsListEndToken; break
1122
1994
  when IgnoreToken; redo
1123
1995
  end
1124
1996
  end
1125
1997
  end while false
1126
- p result if ENV['PRINT_TOKENS'] unless recursing
1998
+ p result if ENV['PRINT_TOKENS'] && @print_filter[@last_token] unless recursing
1999
+
2000
+ #ugly weak assertion
2001
+ assert result.endline==@endline unless result.ident==';' && result.endline-1==@endline or EoiToken===result
2002
+
1127
2003
  return @last_token=result
1128
2004
  end
1129
2005
 
@@ -1135,21 +2011,11 @@ end
1135
2011
  @moretokens.unshift token
1136
2012
  end
1137
2013
 
1138
- =begin
1139
- self.LOOKAHEAD_CLASSES.each_with_index{|classes,i|
1140
- case classes
1141
- when Class: huh
1142
- when Array: classes.flatten.each{huh}
1143
- else
1144
- end
1145
- }
1146
- =end
1147
-
1148
- # def fixup_multiple_assignments!; end
1149
2014
  end
1150
2015
 
1151
2016
 
1152
2017
  if __FILE__==$0
2018
+ #this code has moved to bin/redparse; really, all this should just go away
1153
2019
  require 'problemfiles'
1154
2020
  class NeverExecThis<RuntimeError; end
1155
2021