redparse 0.8.4 → 1.0.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (45) hide show
  1. checksums.yaml +4 -0
  2. data/COPYING.LGPL +503 -158
  3. data/History.txt +192 -0
  4. data/Makefile +9 -0
  5. data/README.txt +72 -39
  6. data/bin/redparse +108 -14
  7. data/lib/miniredparse.rb +1543 -0
  8. data/lib/redparse.rb +971 -105
  9. data/lib/redparse/ReduceWithsFor_RedParse_1_8.rb +17412 -0
  10. data/lib/redparse/ReduceWithsFor_RedParse_1_9.rb +17633 -0
  11. data/lib/redparse/babynodes.rb +17 -0
  12. data/lib/redparse/babyparser.rb +17 -0
  13. data/lib/redparse/cache.rb +290 -6
  14. data/lib/redparse/compile.rb +6 -97
  15. data/lib/redparse/decisiontree.rb +1 -1
  16. data/lib/redparse/float_accurate_to_s.rb +30 -6
  17. data/lib/redparse/generate.rb +18 -0
  18. data/lib/redparse/node.rb +415 -124
  19. data/lib/redparse/parse_tree_server.rb +20 -2
  20. data/lib/redparse/problemfiles.rb +1 -1
  21. data/lib/redparse/pthelper.rb +17 -31
  22. data/lib/redparse/reg_more_sugar.rb +1 -1
  23. data/lib/redparse/replacing/parse_tree.rb +30 -0
  24. data/lib/redparse/replacing/ripper.rb +20 -0
  25. data/lib/redparse/replacing/ruby_parser.rb +28 -0
  26. data/lib/redparse/ripper.rb +393 -0
  27. data/lib/redparse/ripper_sexp.rb +153 -0
  28. data/lib/redparse/stackableclasses.rb +113 -0
  29. data/lib/redparse/version.rb +18 -1
  30. data/redparse.gemspec +29 -9
  31. data/rplt.txt +31 -0
  32. data/test/data/hd_with_blank_string.rb +3 -0
  33. data/test/data/pt_known_output.rb +13273 -0
  34. data/test/data/wp.pp +0 -0
  35. data/test/generate_parse_tree_server_rc.rb +17 -0
  36. data/test/rp-locatetest.rb +2 -2
  37. data/test/test_1.9.rb +338 -35
  38. data/test/test_all.rb +22 -3
  39. data/test/test_part.rb +32 -0
  40. data/test/test_redparse.rb +396 -74
  41. data/test/test_xform_tree.rb +18 -0
  42. data/test/unparse_1.9_exceptions.txt +85 -0
  43. data/test/unparse_1.9_exceptions.txt.old +81 -0
  44. metadata +71 -46
  45. data/Rakefile +0 -35
@@ -1,6 +1,6 @@
1
1
  =begin
2
2
  redparse - a ruby parser written in ruby
3
- Copyright (C) 2008,2009 Caleb Clausen
3
+ Copyright (C) 2008,2009, 2012, 2016 Caleb Clausen
4
4
 
5
5
  This program is free software: you can redistribute it and/or modify
6
6
  it under the terms of the GNU Lesser General Public License as published by
@@ -20,6 +20,8 @@
20
20
 
21
21
  require 'forwardable'
22
22
 
23
+ require 'digest/sha2'
24
+
23
25
  begin
24
26
  require 'rubygems'
25
27
  rescue LoadError=>e
@@ -33,14 +35,39 @@ require 'reglookab'
33
35
  require "redparse/node"
34
36
  #require "redparse/decisiontree"
35
37
  require "redparse/reg_more_sugar"
36
- require "redparse/generate"
38
+ #require "redparse/generate"
37
39
  require "redparse/cache"
38
40
  #require "redparse/compile"
39
41
 
40
42
  class RedParse
43
+
44
+
45
+
46
+ alias :dump :inspect # preserve old inspect functionality
41
47
 
42
- ####### generic stuff for parsing any(?) language
48
+ # irb friendly #inspect/#to_s
49
+ def to_s
50
+ mods=class<<self; ancestors; end.reject{|k| !k.name }-self.class.ancestors
51
+ mods=mods.map{|mod| mod.name }.join('+')
52
+ mods="+"<<mods unless mods.empty?
53
+ input=@input||@lexer.input
54
+ "#<#{self.class.name}#{mods}: [#{input.inspect}]>"
55
+ end
56
+
57
+ alias :inspect :to_s
58
+
59
+ def pretty_stack max=nil
60
+ target=@stack
61
+ target=target[-max..-1] if max and max<target.size
62
+
63
+ target.map{|n|
64
+ res=n.inspect
65
+ res<<"\n" unless res[-1]=="\n"
66
+ res
67
+ }
68
+ end
43
69
 
70
+ ####### generic stuff for parsing any(?) language
44
71
  # include Nodes
45
72
  class StackMonkey
46
73
  def initialize(name,first_changed_index,and_expect_node,options={},&monkey_code)
@@ -119,12 +146,16 @@ end
119
146
  #index of data at which to start matching
120
147
  i=@stack.size-1 #-1 because last element of @stack is always lookahead
121
148
 
149
+ =begin was, but now done by expanded_RULES
122
150
  #I could call this a JIT compiler, but that's a bit grandiose....
123
151
  #more of a JIT pre-processor
124
152
  compiled_rule=@compiled_rules[rule]||=
125
153
  rule.map{|pattern|
126
154
  String|Regexp===pattern ? KW(pattern) : pattern
127
155
  }
156
+ =end
157
+ assert(rule.grep(String|Regexp|Reg::Subseq|Reg::LookAhead|Reg::LookBack|Proc).empty?)
158
+ compiled_rule=rule
128
159
 
129
160
  #what's the minimum @stack size this rule could match?
130
161
  rule_min_size=@min_sizes[compiled_rule]||=
@@ -198,12 +229,417 @@ end
198
229
  return true #let caller know we found a match
199
230
 
200
231
 
201
- rescue Exception=>e
232
+ rescue Exception #=>e
202
233
  #puts "error (#{e}) while executing rule: #{rule.inspect}"
203
234
  #puts e.backtrace.join("\n")
204
235
  raise
205
236
  end
206
237
 
238
+
239
+ def coalesce_loop(klass=nil,ident=nil,klass2=nil,ident2=nil)
240
+ eligible=rules.reverse.map!{|rule| can_coalesce?(rule,klass,ident,klass2,ident2)&&rule }
241
+ i=rules.size
242
+ eligible.map!{|rule|
243
+ i-=1
244
+ next unless rule
245
+ if @size_cache
246
+ @size_cache[[i,rule.right]]||=1
247
+ @size_cache[[i,rule.right]]+=1
248
+ end
249
+ coalesce rule, i, klass,ident,klass2,ident2
250
+ }
251
+ eligible.compact!
252
+ @size_cache[klass2 ? [klass,ident,klass2,ident2] : ident ? ident : klass]= eligible.size if @size_cache
253
+
254
+ @empty_reduce_withs+=1 if defined? @empty_reduce_withs and eligible.size.zero?
255
+
256
+ return eligible
257
+ end
258
+
259
+ def can_coalesce? rule,klass=nil,ident=nil,klass2=nil,ident2=nil
260
+ Reg::Transform===rule or fail
261
+ node_type= rule.right
262
+ rule=rule.left.subregs.dup
263
+ rule.pop if Proc|::Reg::LookAhead===rule.last
264
+ rule[0]=rule[0].subregs[0] if ::Reg::LookBack===rule[0]
265
+
266
+ =begin was, but now done by expanded_RULES
267
+ #I could call this a JIT compiler, but that's a bit grandiose....
268
+ #more of a JIT pre-processor
269
+ compiled_rule=@compiled_rules[rule]||=
270
+ rule.map{|pattern|
271
+ String|Regexp===pattern ? KW(pattern) : pattern
272
+ }
273
+ =end
274
+ assert(rule.grep(String|Regexp|Reg::Subseq|Reg::LookAhead|Reg::LookBack|Proc).empty?)
275
+
276
+
277
+ return false if klass && !can_combine?(rule,klass,ident)
278
+ return false if klass2 && !can_combine2?(rule,klass2,ident2,-2)
279
+ warn "plain lit matches #{node_type}" if klass==LiteralNode and klass2.nil?
280
+ return true
281
+ end
282
+
283
+ def coalesce rule,rulenum,klass=nil,ident=nil,klass2=nil,ident2=nil
284
+ #last 4 params aren't actually neeeded anymore
285
+
286
+ @coalesce_result||=[]
287
+ result=@coalesce_result[rulenum]
288
+ return result if result
289
+
290
+ #dissect the rule
291
+ Reg::Transform===rule or fail
292
+ node_type= rule.right
293
+ rule=rule.left.subregs.dup
294
+ lookahead_processor=(rule.pop if Proc|::Reg::LookAhead===rule.last)
295
+ lookback=rule[0]=rule[0].subregs[0] if ::Reg::LookBack===rule[0]
296
+
297
+ assert @rules[rulenum].right==node_type
298
+
299
+ if klass==VarNode and klass2==KeywordToken
300
+ #warn "can_combine2? about to fail"
301
+ end
302
+
303
+ needends=0
304
+ result=["\n##{mui node_type}\n"]
305
+
306
+ #index of data at which to start matching
307
+ result<<"i=@stack.size-1 ##{mui node_type}\n#-1 because last element of @stack is always lookahead\n"
308
+
309
+ =begin was, but now done by expanded_RULES
310
+ #I could call this a JIT compiler, but that's a bit grandiose....
311
+ #more of a JIT pre-processor
312
+ compiled_rule=@compiled_rules[rule]||=
313
+ rule.map{|pattern|
314
+ String|Regexp===pattern ? KW(pattern) : pattern
315
+ }
316
+ =end
317
+ assert(rule.grep(String|Regexp|Reg::Subseq|Reg::LookAhead|Reg::LookBack|Proc).empty?)
318
+ compiled_rule=rule
319
+
320
+ return if klass && !can_combine?(compiled_rule,klass,ident) #should never happen
321
+ return if klass2 && !can_combine2?(compiled_rule,klass2,ident2,-2) #should never happen
322
+
323
+ #what's the minimum @stack size this rule could match?
324
+ rule_min_size=@min_sizes[compiled_rule]||=
325
+ compiled_rule.inject(0){|sum,pattern|
326
+ sum + pattern.itemrange.begin
327
+ }
328
+ if rule_min_size > 1
329
+ needends+=1
330
+ result<<"if i>=#{rule_min_size}\n"
331
+ min_i=rule_min_size
332
+ end
333
+ #@@has_loop||=[]
334
+ #@@has_optional||=[]
335
+ has_loop=#@@has_loop[rulenum]||=
336
+ compiled_rule.find{|x| x.itemrange.last.to_f.infinite? }
337
+ has_optional=#@@has_optional[rulenum]||=
338
+ compiled_rule.find{|x| x.itemrange.first.zero? }
339
+
340
+ if Class===node_type and has_loop||has_optional
341
+ result<<"matching=[]\n"
342
+ need_matching=true
343
+ end
344
+
345
+ j=compiled_rule.size
346
+ #actually try to match rule elements against each @stack element in turn
347
+ first1=true
348
+ compiled_rule.reverse_each{|matcher|
349
+ j-=1
350
+ result<<"i.zero? and fail\n" unless min_i && min_i>0 or first1
351
+ first1=false
352
+ #is this matcher optional? looping?
353
+ maximum= matcher.itemrange.last
354
+ minimum= matcher.itemrange.first
355
+ loop= maximum.to_f.infinite?
356
+ optional=minimum.zero?
357
+ fail "looping matcher with finite maximum not supported" if maximum>1 and !loop
358
+ if need_matching
359
+ success="matching.unshift item"
360
+ loopsuccess="target.unshift item"
361
+ optfail="matching.unshift nil"
362
+
363
+ result<<"matching.unshift target=[]\n" if loop
364
+ end
365
+ is_lookback=matcher .equal? lookback
366
+ if loop or optional
367
+ matcher=matcher.subregs[0]
368
+ fail "lookback is not a scalar" if is_lookback
369
+ end
370
+
371
+ itemget="@stack[i-=1]"
372
+ itemget="(item=#{itemget})" if success
373
+ test="#{ref_to matcher,rulenum,j}===#{itemget} #try match of #{mui matcher}"
374
+ p [:misparse_start, matcher] if node_type===MisparsedNode and j.zero?
375
+ matcher= ~ (matcher.subregs[0]|NilClass) if Reg::Not===matcher
376
+ if matcher===nil and j.zero?
377
+ warn "rule ##{rulenum}(>>#{node_type}) can match nil at start; might match emptiness before start of stack"
378
+ end
379
+ if !loop
380
+ fail unless maximum==1
381
+ min_i-=1 if min_i
382
+ result<<<<-END
383
+ if #{test}
384
+ #{success if !is_lookback}
385
+ END
386
+ optional ? result<<<<-END : needends+=1
387
+ else
388
+ #ignore optional match fail
389
+ #but bump the data position back up, since the latest datum
390
+ #didn't actually match anything.
391
+ i+=1
392
+ #{optfail}
393
+ end
394
+ END
395
+ else
396
+ min_i=nil
397
+ if minimum<10
398
+ needends+=minimum
399
+ result<<<<-END*minimum
400
+ if #{test}
401
+ #{loopsuccess}
402
+ END
403
+ result<<<<-END
404
+ while #{test}
405
+ #{loopsuccess}
406
+ end
407
+ #but bump the data position back up, since the latest datum
408
+ #didn't actually match anything.
409
+ i+=1
410
+ END
411
+ else
412
+ needends+=1
413
+ result<<<<-END
414
+ #{"n=#{minimum}" unless need_matching}
415
+ while #{test}
416
+ #{loopsuccess || "n-=1"}
417
+ end
418
+ if #{need_matching ? "target.size>=minimum" : "n<=0"} then
419
+ #but bump the data position back up, since the latest datum
420
+ #didn't actually match anything.
421
+ i+=1
422
+ END
423
+ end
424
+
425
+ end
426
+ }
427
+
428
+ #give lookahead matcher (if any) a chance to fail the match
429
+ result<<case lookahead_processor
430
+ when ::Reg::LookAhead
431
+ action_idx=compiled_rule.size+1
432
+ needends+=1
433
+ "if #{ref_to lookahead_processor.subregs[0],rulenum,compiled_rule.size}===@stack.last ##{mui lookahead_processor.subregs[0] }\n"
434
+ when Proc
435
+ action_idx=compiled_rule.size+1
436
+ needends+=1
437
+ "if #{ref_to lookahead_processor,rulenum,compiled_rule.size}[self,@stack.last] ##{mui lookahead_processor}\n"
438
+ else ''
439
+ end
440
+
441
+ #if there was a lookback item, don't include it in the matched set
442
+ #result<<"matching.shift\n" if lookback and need_matching
443
+
444
+ need_return=true
445
+
446
+ #replace matching elements in @stack with node type found
447
+ result<<
448
+ case node_type
449
+ when Class
450
+ #if there was a lookback item, don't include it in the new node
451
+ <<-END
452
+ #{"i+=1" if lookback}
453
+ matchrange= i...-1 #what elems in @stack were matched?
454
+ #{"matching=@stack.slice! matchrange" unless need_matching}
455
+ node=#{ref_to node_type,rulenum,action_idx||rule.size}.create(*matching) ##{mui node_type}
456
+ node.startline||=#{need_matching ? "@stack[i]" : "matching.first"}.startline
457
+ node.endline=@endline
458
+ #{need_matching ? "@stack[matchrange]=[node]" : "@stack.insert i,node" }
459
+ END
460
+ when Proc,StackMonkey; ref_to(node_type,rulenum,action_idx||rule.size)+"[@stack] ##{mui node_type}\n"
461
+ when :shift; need_return=false; "return 0\n"
462
+ when :accept,:error; need_return=false; "throw :ParserDone\n"
463
+ else fail
464
+ end
465
+
466
+ result<<"return true #let caller know we found a match\n" if need_return
467
+ result<<"end;"*needends
468
+ result<<"\n"
469
+
470
+ return @coalesce_result[rulenum]=result
471
+ rescue Exception #=>e
472
+ #puts "error (#{e}) while executing rule: #{rule.inspect}"
473
+ #puts e.backtrace.join("\n")
474
+ raise
475
+ end
476
+
477
+ @@ref_to_cache={}
478
+ @@ref_to_cache_by_id={}
479
+ @@ref_to_idx=-1
480
+ def ref_to obj,i,j
481
+ assert j<=0x3FF
482
+ if Module===obj and obj.name
483
+ return obj.name
484
+ elsif ref=@@ref_to_cache_by_id[obj.__id__] || @@ref_to_cache[(i<<10)+j]
485
+ return ref
486
+ else
487
+ @@ref_to_rules||=
488
+ rules.map{|rule|
489
+ rule.left.subregs.map{|pat|
490
+ case pat
491
+ when String,Regexp #not needed anymore...?
492
+ RedParse::KW(pat)
493
+ when Reg::LookBack,Reg::LookAhead,Reg::Repeat #Reg::Repeat should be handled already by now
494
+ pat.subregs[0]
495
+ #subseqs handled already
496
+ else pat
497
+ end
498
+ }<<rule.right
499
+ }
500
+
501
+ @ref_to_code||=[]
502
+ name="@@ref_#{@@ref_to_idx+=1}"
503
+ #eval "#{name}=obj"
504
+ unless @@ref_to_rules[i][j]==obj
505
+ warn "ref_to mismatch"
506
+ end
507
+ @ref_to_code<<"#{name}=rules[#{i}][#{j}]"
508
+ @ref_to_code<<"warn_unless_equal #@@ref_to_idx,mui(#{name}),#{squote mui( obj )}"
509
+ @@ref_to_cache[(i<<10)+j]=name
510
+ @@ref_to_cache_by_id[obj.__id__]=name
511
+ end
512
+ end
513
+
514
+ module ReduceWithUtils
515
+ #a version of inspect that is especially likely to be stable;
516
+ #no embedded addresses and ivar order is always the same
517
+ def matcher_unique_inspect(m)
518
+ result=m.inspect
519
+ return result unless /\A#<[A-Z]/===result
520
+ "#<#{m.class}: "+
521
+ m.instance_variables.sort.map{|iv|
522
+ val=m.instance_variable_get(iv).inspect
523
+ val.gsub!(/#<(Proc|(?:Stack|Delete)Monkey):(?:0[xX])?[0-9a-fA-F]+/){ "#<#$1:" }
524
+ iv.to_s+"="+val
525
+ }.join(" ")+">"
526
+ end
527
+ alias mui matcher_unique_inspect
528
+
529
+ def squote(str)
530
+ "'#{str.gsub(/['\\]/){|ch| %[\\]+ch }}'"
531
+ end
532
+
533
+ @@unequal_parser_ref_vars=0
534
+ @@line_mismatch_parser_ref_vars=0
535
+ def warn_unless_equal i,ref,orig
536
+ return if ref==orig
537
+ msg="expected @ref_#{i} to == #{squote orig}, saw #{squote ref}"
538
+ ref=ref.gsub(/\.rb:\d+>/,".rb:X>")
539
+ orig=orig.gsub(/\.rb:\d+>/,".rb:X>")
540
+ count=
541
+ if ref==orig
542
+ msg="@ref_#{i} differed in line nums"
543
+ warn "more @ref_ vars differed in line nums..." if @@line_mismatch_parser_ref_vars==1
544
+ @@line_mismatch_parser_ref_vars+=1
545
+ else
546
+ @@unequal_parser_ref_vars+=1
547
+ end
548
+ warn msg if 1==count
549
+ end
550
+ end
551
+ include ReduceWithUtils
552
+
553
+ def classes_matched_by(matcher)
554
+ result=[]
555
+ worklist=[matcher]
556
+ begin
557
+ case x=worklist.shift
558
+ when Reg::And,Reg::Or; worklist.concat x.subregs
559
+ when Class; result<<x
560
+ end
561
+ end until worklist.empty?
562
+ return [Object] if result.empty?
563
+ return result
564
+ end
565
+
566
+
567
+
568
+ def can_combine? rule,klass,ident
569
+ rule.reverse_each{|matcher|
570
+ if Reg::Repeat===matcher
571
+ optional= matcher.times.first==0
572
+ matcher=matcher.subregs[0]
573
+ end
574
+ if ident
575
+ return true if matcher===klass.new(ident)
576
+ optional ? next : break
577
+ end
578
+
579
+ =begin was
580
+ orlist= Reg::Or===matcher ? matcher.subregs : [matcher]
581
+ orlist.map!{|m|
582
+ classes=(Reg::And===m ? m.subregs : [m]).grep(Class)
583
+ case classes.size
584
+ when 0; return true
585
+ when 1
586
+ else warn "multiple classes in matcher #{matcher}"
587
+ end
588
+ classes if classes.all?{|k| klass<=k }
589
+ }
590
+ return true if orlist.compact.flatten[0]
591
+ =end
592
+ return true if classes_matched_by(matcher).any?{|k| klass<=k }
593
+
594
+ break unless optional
595
+ }
596
+ return false
597
+ end
598
+
599
+ def can_combine2? rule,klass,ident,index=-1
600
+ #very similar to can_combine?, just above
601
+ #i think can_combine2? with 3 params is equiv to can_combine?
602
+ #so, the two should be merged
603
+ index=-index
604
+ rule_max_size=rule.inject(0){|sum,pattern|
605
+ sum + pattern.itemrange.end
606
+ }
607
+ return true if rule_max_size<index
608
+ min=max=0
609
+ rule.reverse_each{|matcher|
610
+ break if index<min
611
+ if Reg::Repeat===matcher
612
+ #optional= matcher.times.first==0
613
+ min+=matcher.times.first
614
+ max+=matcher.times.last
615
+ matcher=matcher.subregs[0]
616
+ else
617
+ min+=1
618
+ max+=1
619
+ end
620
+ next if index>max
621
+ if ident
622
+ return true if matcher===klass.new(ident)
623
+ next #was: optional ? next : break
624
+ end
625
+ =begin was
626
+ orlist= Reg::Or===matcher ? matcher.subregs : [matcher]
627
+ orlist.map!{|m|
628
+ classes=(Reg::And===m ? m.subregs : [m]).grep(Class)
629
+ case classes.size
630
+ when 0; return true
631
+ when 1
632
+ else warn "multiple classes in matcher #{matcher}: #{classes.inspect}"
633
+ end
634
+ classes if classes.all?{|k| klass<=k }
635
+ }
636
+ return true if orlist.compact.flatten[0]
637
+ =end
638
+ return true if classes_matched_by(matcher).any?{|k| klass<=k }
639
+ }
640
+ return false
641
+ end
642
+
207
643
  class ParseError<RuntimeError
208
644
  def initialize(msg,stack)
209
645
  super(msg)
@@ -234,7 +670,7 @@ end
234
670
  end
235
671
 
236
672
  #try all possible reductions
237
- def reduce
673
+ def old_slow_reduce
238
674
  shift=nil
239
675
  @rules.reverse_each{|rule|
240
676
  shift=evaluate(rule) and break
@@ -242,20 +678,279 @@ end
242
678
  return shift
243
679
  end
244
680
 
245
- def parse
681
+ HASHED_REDUCER=!ENV['REDUCE_INTERPRETER']
246
682
 
247
- #hack, so StringToken can know what parser its called from
248
- #so it can use it to parse inclusions
249
- oldparser=Thread.current[:$RedParse_parser]
250
- Thread.current[:$RedParse_parser]||=self
683
+ @@rules_compile_cache={}
251
684
 
252
- return @cached_result if defined? @cached_result
685
+ #try all possible reductions
686
+ def reduce
687
+ code=@@rules_compile_cache[class<<self; ancestors end.reject{|k| !k.name}<<@rubyversion]||=coalesce_loop().join
688
+ code= <<-END
689
+ class RedParse
690
+ def (Thread.current['$RedParse_instance']).reduce
691
+ #{code}
692
+ return nil
693
+ end
694
+ end
695
+ END
253
696
 
254
- @rules||=expanded_RULES()
255
- # @inputs||=enumerate_exemplars
697
+ f=Tempfile.new("reduce")
698
+ Thread.current['$RedParse_instance']=self
699
+ p [:code_hash, code.hash]
700
+ f.write code
701
+ f.flush
702
+ load f.path
256
703
 
257
- @stack=[StartToken.new, get_token]
258
- #last token on @stack is always implicitly the lookahead
704
+ reduce
705
+ ensure f.close if f
706
+ end if !HASHED_REDUCER
707
+
708
+
709
+ # include StackableClasses
710
+
711
+ Punc2name={
712
+ "("=>"lparen", ")"=>"rparen",
713
+ "["=>"lbracket", "]"=>"rbracket",
714
+ "{"=>"lbrace", "}"=>"rbrace",
715
+ ","=>"comma",
716
+ ";"=>"semicolon",
717
+ "::"=>"double_colon",
718
+ "."=>"dot",
719
+ "?"=>"question_mark", ":"=>"colon",
720
+ "="=>"equals",
721
+ "|"=>"pipe",
722
+ "<<"=>"leftleft", ">>"=>"rightright",
723
+ "=>"=>"arrow",
724
+ "->"=>"stabby",
725
+ "rhs,"=>"rhs_comma",
726
+ "lhs,"=>"lhs_comma",
727
+ "||="=>"or_equals",
728
+ "&&="=>"and_equals",
729
+ }
730
+
731
+
732
+ RUBYUNOPERATORS=::RubyLexer::RUBYUNOPERATORS
733
+ RUBYBINOPERATORS=::RubyLexer::RUBYBINOPERATORS
734
+ RUBYSYMOPERATORS=::RubyLexer::RUBYSYMOPERATORS
735
+ RUBYNONSYMOPERATORS=::RubyLexer::RUBYNONSYMOPERATORS
736
+ OPERATORS=RUBYUNOPERATORS-%w[~@ !@]+RUBYBINOPERATORS+RUBYNONSYMOPERATORS+
737
+ %w[while until if unless rescue and or not unary* unary& rescue3 lhs* rhs*]
738
+ OPERATORS.uniq!
739
+ RUBYKEYWORDLIST=(
740
+ RubyLexer::RUBYKEYWORDLIST+Punc2name.keys+
741
+ RUBYSYMOPERATORS+RUBYNONSYMOPERATORS
742
+ ).uniq
743
+
744
+ def rubyoperatorlist; OPERATORS end
745
+ def rubykeywordlist; RUBYKEYWORDLIST end
746
+
747
+ class KeywordToken
748
+ def reducer_method(stack)
749
+ :"reduce_with_tos_KeywordToken_#@ident"
750
+ end
751
+ def reducer_ident
752
+ :"KeywordToken_#@ident"
753
+ end
754
+ end
755
+
756
+ class OperatorToken
757
+ def reducer_ident
758
+ :"OperatorToken_#@ident"
759
+ end
760
+ end
761
+
762
+ class ValueNode
763
+ def reducer_method(stack)
764
+ :"reduce_with_tos_#{stack[-3].reducer_ident}_then_#{reducer_ident}"
765
+ end
766
+ end
767
+
768
+ def parser_identity
769
+ #what is the relationship between this method and #signature?
770
+ #can the two be combined?
771
+ result=class<<self; ancestors end.reject{|k| !k.name}
772
+ result.reject!{|k| !!((::RedParse<k)..false) }
773
+ result.reject!{|k| k.name[/^(?:RedParse::)?ReduceWiths/] }
774
+ result.reverse!
775
+ result.push @rubyversion
776
+ #@rubyversion in identity is a hack; should have RedParse1_9 module instead
777
+ end
778
+
779
+ def code_for_reduce_with ident, code
780
+ code=coalesce_loop(*code) if Array===code
781
+ ident.gsub!(/[\\']/){|x| "\\"+x}
782
+ code=code.join
783
+ @reduce_with_defns+=1
784
+ if name=@reduce_with_cache[code]
785
+ @reduce_with_aliases+=1
786
+ "alias :'reduce_with_tos_#{ident}' :'#{name}'\n"
787
+ else
788
+ @reduce_with_cache[code]=name="reduce_with_tos_#{ident}"
789
+ ["define_method('", name ,"') do\n", code ,"\nnil\nend\n"]
790
+ end
791
+ end
792
+
793
+ def addl_node_containers; [] end
794
+
795
+ def write_reduce_withs path=nil
796
+ return unless HASHED_REDUCER
797
+ start=Time.now
798
+ @size_cache={}
799
+ identity=parser_identity
800
+ @reduce_with_cache={}
801
+ @reduce_with_aliases=0
802
+ @empty_reduce_withs=@reduce_with_defns=0
803
+
804
+ expanded_RULES()
805
+ shortnames=[] #[[],[]]
806
+ list=[self.class,*addl_node_containers].map{|mod|
807
+ mod.constants.select{|k|
808
+ /(?:Node|Token)$/===k.to_s
809
+ }.map{|k|
810
+ mod.const_get k
811
+ }
812
+ }.flatten.grep(Class).uniq
813
+ #list=STACKABLE_CLASSES()
814
+ list -= [KeywordToken,ImplicitParamListStartToken,ImplicitParamListEndToken,
815
+ Token,WToken,NewlineToken,DecoratorToken,Node,ValueNode]
816
+ list.reject!{|x| IgnoreToken>=x and not /(^|:)AssignmentRhs/===x.name}
817
+ exprclasses,list=list.partition{|k| k<=ValueNode }
818
+ fail unless list.include? StartToken
819
+ indexcode=list.map{|klass|
820
+ shortname=klass.to_s[/[^:]+$/]
821
+ warn "empty reducer_ident for ::#{klass}" if shortname.empty?
822
+ <<-END
823
+ class ::#{klass}
824
+ def reducer_method(stack)
825
+ :reduce_with_tos_#{shortname}
826
+ end if instance_methods(false).&(["reducer_method",:reducer_method]).empty?
827
+ def reducer_ident
828
+ :#{shortname}
829
+ end if instance_methods(false).&(["reducer_ident",:reducer_ident]).empty?
830
+ end
831
+ END
832
+ }.concat(exprclasses.map{|exprclass|
833
+ shec=exprclass.name[/[^:]+$/]
834
+ warn "empty reducer_ident for ::#{exprclass}" if shec.empty?
835
+ <<-END
836
+ class ::#{exprclass}
837
+ def reducer_ident
838
+ :#{shec}
839
+ end if instance_methods(false).&(["reducer_ident",:reducer_ident]).empty?
840
+ end
841
+ END
842
+ })
843
+ ruby=["#Copyright (C) #{Time.now.year} #{ENV['COPYRIGHT_OWNER']||'Caleb Clausen'}\n"+
844
+ "#Generated with ruby v#{RUBY_VERSION}\n"
845
+ ].concat list.map{|klass|
846
+ shortname=klass.to_s[/[^:]+$/]
847
+ shortnames<<[shortname,klass,nil]
848
+ code_for_reduce_with( shortname, [klass] )
849
+ }.concat(rubykeywordlist.map{|kw|
850
+ shortname="KeywordToken_#{kw}"
851
+ shortnames<<[shortname,KeywordToken,kw]
852
+ code_for_reduce_with( shortname, [KeywordToken, kw] )
853
+ }).concat({ImplicitParamListStartToken=>'(',ImplicitParamListEndToken=>')'}.map{|(k,v)|
854
+ shortnames<<[k.name,k,v]
855
+ code_for_reduce_with k.name, [k,v]
856
+ })
857
+ shortnames.delete ["OperatorToken",OperatorToken,nil]
858
+ record=shortnames.dup
859
+ ruby.concat(exprclasses.map{|exprclass|
860
+ shec=exprclass.name[/[^:]+$/]
861
+ shortnames.map{|(sn,snclass,snparam)|
862
+ warn "empty shortname for #{snclass}" if sn.empty?
863
+ record<<["#{sn}_then_#{shec}", exprclass, nil, snclass, snparam]
864
+ code_for_reduce_with "#{sn}_then_#{shec}", [exprclass, nil, snclass, snparam]
865
+ }
866
+ })
867
+ ruby.concat(exprclasses.map{|exprclass|
868
+ shec=exprclass.name[/[^:]+$/]
869
+ rubyoperatorlist.map{|op|
870
+ record<<["OperatorToken_#{op}_then_#{shec}", exprclass, nil, OperatorToken, op]
871
+ code_for_reduce_with "OperatorToken_#{op}_then_#{shec}", [exprclass, nil, OperatorToken, op]
872
+ }
873
+ }).concat([LiteralNode,VarNode].map{|k|
874
+ shec=k.name[/[^:]+$/]
875
+ record<<["#{shec}_then_#{shec}", k, nil, k, nil]
876
+ code_for_reduce_with "#{shec}_then_#{shec}", [k, nil, k, nil]
877
+ })
878
+
879
+ modname="ReduceWithsFor_#{parser_identity.join('_').tr(':.','_')}"
880
+
881
+ size_cache,rule_popularity=@size_cache.partition{|((i,action),size)| Integer===i }
882
+
883
+ ruby.unshift [<<-END,@ref_to_code.join("\n"),<<-END2]
884
+ #number of coalescences: #{size_cache.size}
885
+ #empty coalescences: #@empty_reduce_withs
886
+ #duplicate coalescences: #@reduce_with_aliases
887
+ #nonduplicate coalescences: #{@reduce_with_cache.size}
888
+ #reduce_with_defns: #@reduce_with_defns
889
+ extend RedParse::ReduceWithUtils
890
+ def self.redparse_modules_init(parser)
891
+ return if defined? @@ref_0
892
+ rules=parser.rules.map{|rule|
893
+ rule.left.subregs.map{|pat|
894
+ case pat
895
+ when String,Regexp #not needed anymore...?
896
+ RedParse::KW(pat)
897
+ when Reg::LookBack,Reg::LookAhead,Reg::Repeat #Reg::Repeat should be handled already by now
898
+ pat.subregs[0]
899
+ #subseqs handled already
900
+ else pat
901
+ end
902
+ }<<rule.right
903
+ }
904
+ END
905
+
906
+ end
907
+ def redparse_modules_init
908
+ ::RedParse::#{modname}.redparse_modules_init(self)
909
+ super
910
+ end
911
+ END2
912
+
913
+ ruby.unshift( "#15 largest coalescences:\n",
914
+ *size_cache.sort_by{|(k,size)| size}[-15..-1].map{ \
915
+ |(k,size)| "##{k.inspect}=#{size}\n"
916
+ })
917
+
918
+ ruby.unshift("#10 most popular rules:\n",
919
+ *rule_popularity.sort_by{|(rule,pop)| pop}[-10..-1].map{ \
920
+ |((i,action),pop)| "##{i} #{action.inspect}=#{pop}\n"
921
+ })
922
+
923
+ warn "15 largest coalescences:"
924
+ size_cache.sort_by{|(klass,size)| size}[-15..-1].each{ \
925
+ |(klass,size)| warn "#{klass.inspect}=#{size}"
926
+ }
927
+
928
+ warn "10 most popular rules:"
929
+ rule_popularity.sort_by{|(rule,pop)| pop}[-10..-1].each{ \
930
+ |((i,action),pop)| warn "#{i} #{action.inspect}=#{pop}"
931
+ }
932
+
933
+
934
+ @ref_to_code=nil
935
+ ruby=["module RedParse::#{modname}\n",ruby,"\nend\n",indexcode]
936
+ @@rules_compile_cache[identity]=ruby
937
+
938
+ path ||= $LOAD_PATH.find{|d| File.exist? File.join(d,"redparse.rb") }+"/redparse/"
939
+ #should use reduce_withs_directory here somehow instead...
940
+
941
+ path += modname+".rb" if path[-1]==?/
942
+ File.open(path,"wb") {|f| ruby.flatten.each{|frag| f.write frag } }
943
+
944
+ #warn "actual write_reduce_withs writing took #{Time.now-start}s"
945
+ warn "size of #{path}: #{File.size path}"
946
+
947
+ ensure
948
+ warn "write_reduce_withs took #{Time.now-start}s" if start
949
+ @reduce_with_cache=nil if @reduce_with_cache
950
+ @size_cache=nil if @size_cache
951
+ end
952
+
953
+ def old_reduce_loop
259
954
  catch(:ParserDone){ loop {
260
955
  #try all possible reductions
261
956
  next if reduce==true
@@ -269,6 +964,55 @@ end
269
964
  #shift our token onto the @stack
270
965
  @stack.push tok
271
966
  }}
967
+ end
968
+
969
+ =begin should be
970
+ reduce_call= HASHED_REDUCER ?
971
+ 'send(@stack[-2].reducer_method(@stack))' :
972
+ 'reduce'
973
+ eval <<-END,__FILE__,__LINE__
974
+ def reduce_loop
975
+ catch(:ParserDone){ ( @stack.push(get_token||break) unless(#{reduce_call}==true) ) while true }
976
+ end
977
+ END
978
+ =end
979
+ def reduce_loop
980
+ catch(:ParserDone){ while true
981
+ #try all possible reductions
982
+ #was: next if reduce==true
983
+ next if send(@stack[-2].reducer_method(@stack))==true
984
+
985
+ #no rule can match current @stack, get another token
986
+ tok=get_token or break
987
+
988
+ #are we done yet?
989
+ #tok.nil? or EoiToken===tok && EoiToken===@stack.last and break
990
+
991
+ #shift our token onto the @stack
992
+ @stack.push tok
993
+ end }
994
+ end
995
+
996
+ if ENV['REDUCE_INTERPRETER']
997
+ alias reduce old_slow_reduce
998
+ alias reduce_loop old_reduce_loop
999
+ end
1000
+
1001
+ def parse
1002
+
1003
+ #hack, so StringToken can know what parser its called from
1004
+ #so it can use it to parse inclusions
1005
+ oldparser=Thread.current[:$RedParse_parser]
1006
+ Thread.current[:$RedParse_parser]||=self
1007
+
1008
+ return @cached_result if defined? @cached_result
1009
+
1010
+ expanded_RULES()
1011
+ # @inputs||=enumerate_exemplars
1012
+
1013
+ @stack=[StartToken.new, get_token]
1014
+ #last token on @stack is always implicitly the lookahead
1015
+ reduce_loop
272
1016
 
273
1017
  @stack.size==2 and return result=NopNode.new #handle empty parse string
274
1018
 
@@ -276,7 +1020,7 @@ end
276
1020
  #with the last an Eoi, and first a StartToken
277
1021
  #there was a parse error
278
1022
  unless @stack.size==3
279
- pp @stack[-[15,@stack.size].min..-1] if ENV['PRINT_STACK']
1023
+ puts( pretty_stack( 15 ))if ENV['PRINT_STACK']
280
1024
  top=MisparsedNode.new("(toplevel)", @stack[1...-1],'')
281
1025
  raise ParseError.new(top.msg,@stack)
282
1026
  end
@@ -314,17 +1058,19 @@ end
314
1058
 
315
1059
  =begin
316
1060
  rescue Exception=>e
1061
+ if ENV['PRINT_PARSE_ERRORS']
317
1062
  input=@lexer
318
1063
  if Array===input
319
- puts "error while parsing:"
320
- pp input
1064
+ STDERR.puts "error while parsing:"
1065
+ STDERR.write input.pretty_inspect
321
1066
  input=nil
322
1067
  else
323
1068
  input=input.original_file
324
- inputname=@lexer.filename
325
- input.to_s.size>1000 and input=inputname
326
- puts "error while parsing: <<< #{input} >>>"
1069
+ # inputname=@lexer.filename
1070
+ STDERR.puts "error while parsing #@filename:#@endline: <<< #{input.inspect if input.inspect.size<=1000} >>>"
327
1071
  end
1072
+ e.backtrace.each{|l| p l }
1073
+ end
328
1074
  raise
329
1075
  else
330
1076
  =end
@@ -354,18 +1100,23 @@ end
354
1100
 
355
1101
  #inline any subsequences in RULES right into the patterns
356
1102
  #reg should do this already, but current release does not
1103
+ #also expand regexp/string to keyword matcher
357
1104
  def expanded_RULES
1105
+ return @rules if defined? @rules
358
1106
  result=RULES()
359
- return result if (-[:foo, -[:bar]]).subregs.grep(Reg::Subseq).empty?
360
- result.map!{|rule|
361
- unless rule.left.subregs.grep(Reg::Subseq)
1107
+ #return result if (-[:foo, -[:bar]]).subregs.grep(Reg::Subseq).empty?
1108
+ @rules=result.map!{|rule|
1109
+ if rule.left.subregs.grep(Reg::Subseq|String|Regexp).empty?
362
1110
  then rule
363
1111
  else
364
1112
  right=rule.right
365
1113
  rule=rule.left.subregs.dup
366
1114
  (rule.size-1).downto(0){|i|
367
- if Reg::Subseq===rule[i]
368
- rule[i,1]=rule[i].subregs
1115
+ case mtr=rule[i]
1116
+ when Reg::Subseq
1117
+ rule[i,1]=mtr.subregs
1118
+ when String,Regexp
1119
+ rule[i]=RedParse::KW(mtr)
369
1120
  end
370
1121
  }
371
1122
  -rule>>right
@@ -482,8 +1233,8 @@ end
482
1233
  #the 'precedence' of comma is somewhat controversial. it actually has
483
1234
  #several different precedences depending on which kind of comma it is.
484
1235
  #the precedence of , is higher than :, => and the assignment operators
485
- #in certain (lhs) contexts. therefore, the precedence of lhs, should
486
- #really be above =.
1236
+ #in certain (lhs) contexts. therefore, the precedence of lhs-comma should
1237
+ #really be above "=".
487
1238
 
488
1239
  #"unary" prefix function names seen has operators have this precedence
489
1240
  #but, rubylexer handles precedence of these and outputs fake parens
@@ -514,21 +1265,6 @@ if defined? SPECIALIZED_KEYWORDS
514
1265
  end
515
1266
 
516
1267
  KW2class={}
517
-
518
- Punc2name={
519
- "("=>"lparen", ")"=>"rparen",
520
- "["=>"lbracket", "]"=>"rbracket",
521
- "{"=>"lbrace", "}"=>"rbrace",
522
- ","=>"comma",
523
- ";"=>"semicolon",
524
- "::"=>"double_colon",
525
- "."=>"dot",
526
- "?"=>"question_mark", ":"=>"colon",
527
- "="=>"equals",
528
- "|"=>"pipe",
529
- "<<"=>"leftleft", ">>"=>"rightright",
530
- "=>"=>"arrow",
531
- }
532
1268
  end
533
1269
 
534
1270
  def self.KW(ident)
@@ -665,17 +1401,24 @@ end
665
1401
  end
666
1402
 
667
1403
  def lower_op
1404
+ @lower_op||=proc{|parser,op|
1405
+ LOWEST_OP===op or (!(parser.VALUELIKE_LA() === op) and
1406
+ parser.left_op_higher(parser.stack[-3],op)
1407
+ )
1408
+ }.extend LowerOp_inspect
1409
+ =begin was
668
1410
  return @lower_op if defined? @lower_op
669
1411
  lower_op=item_that{|op| left_op_higher(@stack[-3],op) }
670
- lower_op=(LOWEST_OP|(~VALUELIKE_LA & lower_op)).la
1412
+ lower_op=(LOWEST_OP|(~VALUELIKE_LA() & lower_op)).la
671
1413
  lower_op.extend LowerOp_inspect
672
1414
  @lower_op=lower_op
1415
+ =end
673
1416
  end
674
1417
 
675
1418
  #this is a hack, should use graphcopy to search for Deferreds and replace with double-Deferred as below
676
1419
  def item_that(*a,&b)
677
1420
  if defined? @generating_parse_tables
678
- huh unless b
1421
+ fail unless b
679
1422
  #double supers, one of them in a block executed after this method returns....
680
1423
  #man that's weird
681
1424
  super(*a){|ob| @saw_item_that[[super(*a,&b),ob]]=true}
@@ -741,21 +1484,31 @@ end
741
1484
  LHS_COMMA=Op('lhs,',true)#&-{:tag => :lhs}
742
1485
  RHS_COMMA=Op('rhs,',true)#&-{:tag => :rhs}
743
1486
  #PARAM_COMMA=Op('param,',true)#&-{:tag => :param}
744
- FUNCLIKE_KEYWORD=KeywordToken&-{:ident=>RubyLexer::FUNCLIKE_KEYWORDS}
1487
+ def FUNCLIKE_KEYWORD
1488
+ KeywordToken&-{:ident=>@funclikes}
1489
+ end
745
1490
  IGN_SEMI_BEFORE=KW(/^(#{RubyLexer::INNERBOUNDINGWORDS.gsub(/(rescue|then)\|/,'')[1...-1]}|end|[)}\]])$/)|EoiToken
746
1491
  IGN_SEMI_AFTER=KW(/^(begin|[;:({|]|then|do|else|ensure)$/)|BlockFormalsNode
747
1492
 
748
1493
  #for use in lookback patterns
749
1494
  OPERATORLIKE_LB=OperatorToken|
750
- KW(/^(not | defined\? | rescue3 | .*[@,] | [ ~ ! ; \( \[ \{ ? : ] | \.{1,3} | :: | => | ![=~])$/x)|
1495
+ KW(/^(not | defined\? | rescue3 | .*[@,] | [~!;\(\[\{?:] | \.{1,3} | :: | => | ![=~])$/x)|
751
1496
  KW(%r{^( \*\*? | << | >> | &&? | \|\|? | \^ | % | / | - | \+ )?=$}x)|
752
1497
  KW(BEGINWORDS)|KW(/^#{INNERBOUNDINGWORDS}$/)|RescueHeaderNode|StartToken|
753
1498
  GoalPostToken|BlockFormalsNode|AssignmentRhsListStartToken
754
1499
 
755
1500
  #for use in lookahead patterns
756
- VALUELIKE_LA=KW(RubyLexer::VARLIKE_KEYWORDS)|NumberToken|SymbolToken|StringToken|UNOP|DEFOP|
757
- KW(/^[({]$/x)|VarNameToken|MethNameToken|HerePlaceholderToken|
758
- KW(BEGINWORDS)|FUNCLIKE_KEYWORD|AssignmentRhsListStartToken
1501
+ def VALUELIKE_LA
1502
+ @valuelike_la ||=
1503
+
1504
+ KW(@varlikes)|NumberToken|SymbolToken|StringToken|UNOP|DEFOP|
1505
+ KW(/^[({]$/x)|VarNameToken|MethNameToken|HerePlaceholderToken|
1506
+ KW(BEGINWORDS)|FUNCLIKE_KEYWORD()|AssignmentRhsListStartToken
1507
+
1508
+ #why isn't this a sufficient implementation of this method:
1509
+ # KW('(')
1510
+ #in which case, '(' can be made the highest precedence operator instead
1511
+ end
759
1512
  LOWEST_OP=KW(/^(#{ENDWORDS})$/)|KW(/^#{INNERBOUNDINGWORDS.sub('rescue|','')}$/)|
760
1513
  EoiToken|GoalPostToken|AssignmentRhsListEndToken
761
1514
 
@@ -770,6 +1523,8 @@ end
770
1523
  def RULES
771
1524
  lower_op= lower_op()
772
1525
 
1526
+
1527
+ result=
773
1528
  [-[StartToken.lb, Expr.-, EoiToken.la]>>:accept,
774
1529
  -[EoiToken]>>:error,
775
1530
  ]+
@@ -786,12 +1541,11 @@ end
786
1541
 
787
1542
  # -[Op('=',true)|KW(/^(rescue|when|\[)$/)|Op(/,$/,true),
788
1543
  # Op(/^(?:unary|rhs)\*$/), ValueNode, (MODIFYASSIGNOP|Op('=',true)).la]>>:shift,
789
- # -[MethNameToken|FUNCLIKE_KEYWORD, KW('('),
1544
+ # -[MethNameToken|FUNCLIKE_KEYWORD(), KW('('),
790
1545
  # Op(/^(?:unary|rhs)\*$/), ValueNode, (MODIFYASSIGNOP|Op('=',true)).la]>>:shift,
791
1546
  #star should not be used in an lhs if an rhs or param list context is available to eat it.
792
1547
  #(including param lists for keywords such as return,break,next,rescue,yield,when)
793
1548
 
794
- #hmmm.... | in char classes below looks useless (predates GoalPostToken)
795
1549
  -[Op(/^(?:unary|lhs)\*$/), (GoalPostToken|Op(/,$/,true)|KW(/^(in|[=)|;])$/)).la]>>DanglingStarNode, #dangling *
796
1550
  -[Op(/,$/,true), (GoalPostToken|KW(/^(in|[=)|;])$/)).la]>> #dangling ,
797
1551
  stack_monkey("DanglingComma",1,DanglingCommaNode){|stack|
@@ -799,9 +1553,12 @@ end
799
1553
  dcomma.offset=stack.last.offset
800
1554
  stack.push dcomma, stack.pop
801
1555
  },
1556
+ #hmmm.... | in char classes above looks useless (predates GoalPostToken)
1557
+
1558
+ # -[Expr, Op|KW_Op, Expr, lower_op]>>RawOpNode, #most operators
1559
+ -[Expr, Op, Expr, lower_op]>>RawOpNode, #most operators
1560
+ -[Expr, "=>", Expr, lower_op]>>ArrowOpNode,
802
1561
 
803
- -[Expr, Op|KW_Op, Expr, lower_op]>>RawOpNode, #most operators
804
-
805
1562
  #assignment
806
1563
  -[Lvalue, MODIFYASSIGNOP, Expr, lower_op]>>AssignNode,
807
1564
  -[Lvalue, Op('=',true), AssignmentRhsNode, lower_op]>>AssignNode,
@@ -840,11 +1597,6 @@ end
840
1597
  stack[-3].after_comma=true},
841
1598
  #mebbe this should be a lexer hack?
842
1599
 
843
- -[#(OPERATORLIKE_LB&~(MethNameToken|FUNCLIKE_KEYWORD)).lb,
844
- '(', Expr, KW(')')&~(-{:callsite? =>true}|-{:not_real? =>true})]>>ParenedNode,
845
- -[#(OPERATORLIKE_LB&~(MethNameToken|FUNCLIKE_KEYWORD)).lb,
846
- '(', KW(')')&~(-{:callsite? =>true}|-{:not_real? =>true})]>>VarLikeNode, #(), alias for nil
847
-
848
1600
  -[#(OPERATORLIKE_LB&~Op('=',true)).lb,
849
1601
  Expr, RESCUE_OP, Expr, lower_op]>>RescueOpNode,
850
1602
 
@@ -853,12 +1605,22 @@ end
853
1605
  -[Expr, DotOp, CallNode, lower_op]>>DotCall, #binary .
854
1606
  -[Expr, DoubleColonOp, CallNode, lower_op]>>DotCall, #binary ::
855
1607
  -[Expr, DoubleColonOp, VarNode, lower_op]>>ConstantNode,#binary ::
1608
+ #lower_op constaints on lookahead are unnecessary in above 4 (unless I give openparen a precedence)
856
1609
 
857
1610
  -[Expr, "?", Expr, ":", Expr, lower_op]>>TernaryNode,
858
1611
 
859
1612
 
860
1613
  -[MethNameToken, '(', Expr.-, ')', BlockNode.-, KW('do').~.la]>>CallNode,
861
- -[FUNCLIKE_KEYWORD, '(', Expr.-, ')', BlockNode.-, KW('do').~.la]>>KWCallNode,
1614
+ -[FUNCLIKE_KEYWORD(), '(', Expr.-, ')', BlockNode.-, KW('do').~.la]>>KWCallNode,
1615
+
1616
+ -[#(OPERATORLIKE_LB&
1617
+ (MethNameToken|FUNCLIKE_KEYWORD()).~.lb,
1618
+ '(', Expr, KW(')')&~(-{:callsite? =>true}|-{:not_real? =>true}), KW('do').~.la]>>ParenedNode,
1619
+
1620
+ -[#(OPERATORLIKE_LB&
1621
+ (MethNameToken|FUNCLIKE_KEYWORD()).~.lb,
1622
+ '(', KW(')')&~(-{:callsite? =>true}|-{:not_real? =>true}), KW('do').~.la]>>VarLikeNode, #(), alias for nil
1623
+ #constraint on do in above 2 rules is probably overkill
862
1624
 
863
1625
  -[ValueNode, Op(/,$/,true), ValueNode, lower_op]>>CommaOpNode,
864
1626
 
@@ -996,17 +1758,26 @@ end
996
1758
  },
997
1759
 
998
1760
  #treat these keywords like (rvalue) variables.
999
- -[RubyLexer::VARLIKE_KEYWORDS]>>VarLikeNode,
1761
+ -[@varlikes]>>VarLikeNode,
1000
1762
 
1001
1763
  #here docs
1002
1764
  -[HerePlaceholderToken]>>HereDocNode,
1003
- -[HereBodyToken.la]>>delete_monkey(1,"delete_here_body"),
1004
- ##this is rediculous. this should be a lexer hack?
1765
+ #-[HereBodyToken.la]>>delete_monkey(1,"delete_here_body"), ##this is rediculous. this should be a lexer hack?
1005
1766
 
1006
1767
  -[VarNameToken]>>VarNode,
1007
1768
 
1008
1769
 
1009
1770
  ]
1771
+
1772
+ if @rubyversion >= 1.9
1773
+ result.concat [
1774
+ # -['->', ParenedNode.-, 'do', Expr.-, 'end']>>ProcLiteralNode,
1775
+ # -['->', VarLikeNode["nil",{:@value=>nil}].reg, 'do', Expr.-, 'end']>>ProcLiteralNode,
1776
+ -[(DotOp|DoubleColonOp).lb, '(',Expr.-,')', BlockNode.-, KW('do').~.la]>>CallNode,
1777
+ ]
1778
+ end
1779
+
1780
+ return result
1010
1781
  end
1011
1782
 
1012
1783
  if defined? END_ATTACK
@@ -1014,32 +1785,62 @@ if defined? END_ATTACK
1014
1785
  include Reducer
1015
1786
  end
1016
1787
 
1017
- def initialize(input,name="(eval)",line=1,lvars=[],options={})
1018
- @rubyversion=options[:rubyversion]||1.8
1788
+ def signature
1789
+ ancs=class<<self; ancestors end.reject{|k| !k.name}
1790
+ ancs.slice!(ancs.index(RedParse)..-1)
1019
1791
 
1020
- cache=Cache.new(name,line,lvars.sort.join(" "),@rubyversion,self.class.name)
1021
- cache_mode=options[:cache_mode]||:read_write
1022
- raise ArgumentError unless /^(?:read_(?:write|only)|write_only|none)$/===cache_mode.to_s
1023
- read_cache= /read/===cache_mode.to_s
1792
+ lancs=class<<@lexer; ancestors end.reject{|k| !k.name}
1793
+ [RubyLexer,Array].each{|k|
1794
+ if i=lancs.index(k)
1795
+ lancs.slice!(i..-1)
1796
+ end
1797
+ }
1798
+
1799
+ RedParse.signature(@encoding,ancs+lancs)
1800
+ end
1801
+ def RedParse.signature(encoding,ancs=ancestors)
1802
+ @@my_sha||=Digest::SHA256.file(__FILE__)
1803
+ @@node_sha||=Digest::SHA256.file(__FILE__.sub(/\.rb\z/,"/node.rb"))
1804
+ [ancs.map{|m| m.name}, encoding, @@my_sha, @@node_sha,]
1805
+ end
1806
+
1807
+ def initialize(input,name=nil,line=nil,lvars=nil,options=nil)
1808
+ line,name=name,nil if Hash===name or Array===name or Integer===name
1809
+ lvars,line=line,nil if Hash===line or Array===line
1810
+ options,lvars=lvars,nil if Hash===lvars
1811
+ options||={}
1812
+ name||=options[:name]||"(eval)"
1813
+ line||=options[:line]||1
1814
+ lvars||=options[:lvars]||[]
1815
+ @rubyversion=options[:rubyversion]||1.8
1024
1816
  input.binmode if input.respond_to? :binmode
1025
- if read_cache and cache and result=cache.get(input)
1026
- @cached_result=result
1027
- @write_cache=nil
1028
- return
1029
- end
1030
- if /write/===cache_mode.to_s
1031
- @write_cache,@input= cache,input
1032
- else
1033
- @write_cache=nil
1034
- end
1817
+
1818
+ @encoding=options[:encoding]||:ascii
1819
+ @encoding=:binary if @rubyversion<=1.8
1035
1820
 
1036
1821
  if Array===input
1037
1822
  def input.get1token; shift end
1038
1823
  @lexer=input
1824
+ if @rubyversion>=1.9
1825
+ @funclikes=RubyLexer::RubyLexer1_9::FUNCLIKE_KEYWORDS
1826
+ @varlikes=RubyLexer::RubyLexer1_9::VARLIKE_KEYWORDS
1827
+ else
1828
+ @funclikes=RubyLexer::FUNCLIKE_KEYWORDS
1829
+ @varlikes=RubyLexer::VARLIKE_KEYWORDS
1830
+ end
1039
1831
  else
1040
- @lexer=RubyLexer.new(name,input,line,0,:rubyversion=>@rubyversion)
1832
+ if RubyLexer===input
1833
+ @lexer=input
1834
+ else
1835
+ @lexer=RubyLexer.new(name,input,line,0,:rubyversion=>@rubyversion,:encoding=>@encoding)
1836
+ end
1837
+ @funclikes=@lexer::FUNCLIKE_KEYWORDS()
1838
+ @varlikes=@lexer::VARLIKE_KEYWORDS()
1041
1839
  lvars.each{|lvar| @lexer.localvars[lvar]=true }
1840
+ @encoding=@lexer.encoding_name_normalize(@encoding.to_s).to_sym
1841
+ warn "#{@encoding} encoding won't really work right now" if RubyLexer::NONWORKING_ENCODINGS.include? @encoding
1042
1842
  end
1843
+ @funclikes=/#@funclikes|^->$/ if @rubyversion>=1.9
1043
1844
  @filename=name
1044
1845
  @min_sizes={}
1045
1846
  @compiled_rules={}
@@ -1052,26 +1853,82 @@ if defined? END_ATTACK
1052
1853
  compile
1053
1854
  end
1054
1855
  @saw_item_that=nil
1856
+ @print_filter=proc{true}
1857
+
1858
+ if modules=options[:modules]
1859
+ modules.each{|m| extend m}
1860
+ end
1861
+ if modules=options[:lexer_modules]
1862
+ modules.each{|m| @lexer.extend m}
1863
+ end
1864
+
1865
+ dir=reduce_withs_directory
1866
+ modname="ReduceWithsFor_#{parser_identity.join('_').tr(':.','_')}"
1867
+
1868
+ cache=Cache.new(
1869
+ File===input,name,
1870
+ :line,line,:encoding,@encoding,:locals,lvars.sort.join(","),
1871
+ @rubyversion, :/, *signature
1872
+ )
1873
+ cache_mode=options[:cache_mode]||ENV['REDPARSE_CACHE']||:read_write
1874
+ cache_mode=cache_mode.to_sym
1875
+ raise ArgumentError,"bad cache mode #{cache_mode}" unless /^(?:read_(?:write|only)|write_only|none)$/===cache_mode.to_s
1876
+ read_cache= /read/===cache_mode.to_s
1877
+ if read_cache and cache and result=cache.get(input)
1878
+ @cached_result=result
1879
+ @write_cache=nil
1880
+ return
1881
+ end
1882
+ if /write/===cache_mode.to_s
1883
+ @write_cache,@input= cache,input
1884
+ else
1885
+ @write_cache=nil
1886
+ end
1887
+
1888
+ #but, need to skip warning lines matching this regexp:
1889
+ # /(^|[/\\])#{modname}\.rb:\d+: warning: mismatched indentations at 'end' with 'if' at \d+$/
1890
+
1891
+ begin
1892
+ require File.join(dir,modname)
1893
+ rescue LoadError
1894
+ else
1895
+ extend self.class.const_get( modname )
1896
+ end
1897
+ redparse_modules_init
1898
+ end
1899
+
1900
+ def self.parse(*args)
1901
+ new(*args).parse
1902
+ end
1903
+
1904
+ def redparse_modules_init
1905
+
1906
+ end
1907
+
1908
+ def reduce_withs_directory
1909
+ "redparse"
1055
1910
  end
1056
1911
 
1057
- attr_accessor :lexer
1058
- attr :rubyversion
1912
+ attr_accessor :lexer, :print_filter
1913
+ attr_reader :rubyversion, :stack
1914
+
1915
+ alias rules expanded_RULES
1059
1916
 
1060
1917
  def get_token(recursing=false)
1061
1918
  unless @moretokens.empty?
1062
1919
  @last_token=@moretokens.shift
1063
- p @last_token if ENV['PRINT_TOKENS'] unless recursing
1920
+ p @last_token if ENV['PRINT_TOKENS'] && @print_filter[@last_token] and not recursing
1064
1921
  return @last_token
1065
1922
  end
1066
1923
 
1067
1924
  rpt=ENV['RAW_PRINT_TOKENS']
1068
1925
  begin
1069
1926
  result=@lexer.get1token or break
1070
- p result if rpt
1927
+ p result if rpt and @print_filter[result]
1071
1928
 
1072
1929
  #set token's line
1073
1930
  result.startline= @endline||=1
1074
- result.endline||=@endline if result.respond_to? :endline=
1931
+ #result.endline||=@endline if result.respond_to? :endline=
1075
1932
 
1076
1933
  if result.respond_to?(:as) and as=result.as
1077
1934
  #result=make_kw(as,result.offset)
@@ -1080,7 +1937,10 @@ end
1080
1937
  result=result.dup
1081
1938
  result.ident=as
1082
1939
  else
1083
- result=make_kw(as,result.offset)
1940
+ result2=make_kw(as,result.offset)
1941
+ result2.startline=result.startline
1942
+ result2.endline=result.endline
1943
+ result=result2
1084
1944
  end
1085
1945
  result.not_real! if result.respond_to? :not_real!
1086
1946
  else
@@ -1102,28 +1962,44 @@ end
1102
1962
  case name=result.ident
1103
1963
 
1104
1964
  when /^(#{BINOP_KEYWORDS.join '|'})$/o #should be like this in rubylexer
1105
- result=OperatorToken.new(name,result.offset) unless result.has_end?
1106
- when "|"; result=GoalPostToken.new(result.offset) #is this needed still?
1965
+ unless result.has_end?
1966
+ orig=result
1967
+ result=OperatorToken.new(name,result.offset)
1968
+ result.endline=orig.endline
1969
+ end
1970
+ when "|";
1971
+ orig=result
1972
+ result=GoalPostToken.new(result.offset) #is this needed still? (yes)
1973
+ result.endline=orig.endline
1107
1974
  when "__FILE__"; #I wish rubylexer would handle this
1108
- class<<result; attr_accessor :value; end
1109
- result.value=@file.dup
1975
+ #class<<result; attr_accessor :value; end
1976
+ assert result.value==@file.dup
1110
1977
  when "__LINE__"; #I wish rubylexer would handle this
1111
- class<<result; attr_accessor :value; end
1112
- result.value=@endline
1978
+ #class<<result; attr_accessor :value; end
1979
+ assert result.value==@endline
1113
1980
  else
1114
1981
  result=make_kw name,result.offset if defined? SPECIALIZED_KEYWORDS
1115
1982
  #warning, this may discard information stored in instance vars of result
1116
1983
  end
1117
1984
 
1985
+ when StringToken,HerePlaceholderToken
1986
+ @endline=result.endline
1987
+
1118
1988
  when EoiToken; break
1119
- when HereBodyToken; break
1989
+ when HereBodyToken;
1990
+ @endline=result.endline
1991
+ redo
1120
1992
  when AssignmentRhsListStartToken; break
1121
1993
  when AssignmentRhsListEndToken; break
1122
1994
  when IgnoreToken; redo
1123
1995
  end
1124
1996
  end
1125
1997
  end while false
1126
- p result if ENV['PRINT_TOKENS'] unless recursing
1998
+ p result if ENV['PRINT_TOKENS'] && @print_filter[@last_token] unless recursing
1999
+
2000
+ #ugly weak assertion
2001
+ assert result.endline==@endline unless result.ident==';' && result.endline-1==@endline or EoiToken===result
2002
+
1127
2003
  return @last_token=result
1128
2004
  end
1129
2005
 
@@ -1135,21 +2011,11 @@ end
1135
2011
  @moretokens.unshift token
1136
2012
  end
1137
2013
 
1138
- =begin
1139
- self.LOOKAHEAD_CLASSES.each_with_index{|classes,i|
1140
- case classes
1141
- when Class: huh
1142
- when Array: classes.flatten.each{huh}
1143
- else
1144
- end
1145
- }
1146
- =end
1147
-
1148
- # def fixup_multiple_assignments!; end
1149
2014
  end
1150
2015
 
1151
2016
 
1152
2017
  if __FILE__==$0
2018
+ #this code has moved to bin/redparse; really, all this should just go away
1153
2019
  require 'problemfiles'
1154
2020
  class NeverExecThis<RuntimeError; end
1155
2021