redparse 0.8.1 → 0.8.2

Sign up to get free protection for your applications and to get access to all the features.
data/Rakefile CHANGED
@@ -3,7 +3,15 @@
3
3
  require 'rubygems'
4
4
  require 'hoe'
5
5
  require 'lib/redparse/version.rb'
6
-
6
+
7
+ if $*==["test"]
8
+ #hack to get 'rake test' to stay in one process
9
+ #which keeps netbeans happy
10
+ $:<<"lib"
11
+ require "test/test_redparse.rb"
12
+ Test::Unit::AutoRunner.run
13
+ exit
14
+ end
7
15
 
8
16
  readme=open("README.txt")
9
17
  readme.readline("\n== DESCRIPTION:")
@@ -13,13 +21,14 @@ require 'lib/redparse/version.rb'
13
21
  hoe=Hoe.new("redparse", RedParse::VERSION) do |_|
14
22
  _.author = "Caleb Clausen"
15
23
  _.email = "redparse-owner @at@ inforadical .dot. net"
16
- _.url = ["http://redparse.rubyforge.org/", "http://rubyforge.org/projects/redparse/"]
17
- _.extra_deps << ['rubylexer', '>= 0.7.3']
24
+ _.url = ["http://github.com/coatl/redparse/", "http://rubyforge.org/projects/redparse/"]
25
+ _.extra_deps << ['rubylexer', '>= 0.7.4']
18
26
  _.extra_deps << ['reg', '>= 0.4.7']
27
+ _.extra_deps << 'Ron'
19
28
  # _.test_globs=["test/*"]
20
29
  _.description=desc
21
30
  _.summary=desc[/\A[^.]+\./]
22
- # _.spec_extras={:bindir=>''}
31
+ _.spec_extras={:bindir=>'bin/'}
23
32
  # _.rdoc_pattern=/\A(README\.txt|lib\/.*\.rb)\Z/
24
33
  # _.remote_rdoc_dir="/"
25
34
  end
@@ -73,10 +73,11 @@ class ParseTree<RawParseTree
73
73
  return tree,warnings
74
74
  ensure
75
75
  STDERR.reopen oldSTDERR
76
-
76
+ if warnstash
77
77
  warnstash.rewind
78
78
  warnings.replace warnstash.read.split
79
79
  warnstash.close
80
+ end
80
81
  end
81
82
  end
82
83
 
@@ -185,7 +186,7 @@ inputs.each_index{|i|
185
186
  when :lisp
186
187
  puts tree.to_lisp
187
188
  when :unparse
188
- puts tree.unparse({})
189
+ puts tree.unparse
189
190
  when :parsetree
190
191
  tree=tree.to_parsetree
191
192
  hack=tree.dup
@@ -19,19 +19,1968 @@
19
19
  #warn 'hacking up LOAD_PATH to include the latest RubyLexer!'
20
20
  #$:.unshift Dir.pwd+'/../rubylexer/lib', Dir.pwd+'/../rubylexer'
21
21
 
22
- # "faster rule compiler is untested"
23
22
 
24
- require 'rubygems'
23
+ require 'forwardable'
24
+
25
+ begin
26
+ require 'rubygems'
27
+ rescue LoadError=>e
28
+ #hope we don't need it
29
+ raise unless /rubygems/===e.message
30
+ end
25
31
  require 'rubylexer'
26
32
  require 'reg'
27
33
 
28
- require "redparse/node"
29
- #require "redparse/decisiontree"
30
- require "redparse/reg_more_sugar"
31
- class RedParse
32
- # include Nodes
34
+ require "redparse/node"
35
+ #require "redparse/decisiontree"
36
+ require "redparse/reg_more_sugar"
37
+ require "redparse/generate"
38
+
39
+ class RedParse
40
+
41
+ ####### generic stuff for parsing any(?) language
42
+
43
+ # include Nodes
44
+ class StackMonkey
45
+ def initialize(name,first_changed_index,and_expect_node,options={},&monkey_code)
46
+ first_changed_index=-first_changed_index if first_changed_index>0
47
+ @name,@first_changed_index,@and_expect_node,@monkey_code=
48
+ name,first_changed_index,and_expect_node,monkey_code
49
+ end
50
+
51
+ attr_reader :name, :first_changed_index, :and_expect_node, :monkey_code
52
+ alias hint and_expect_node
53
+ attr_accessor :exemplars
54
+
55
+ def [](stack)
56
+ result=@monkey_code[stack]
57
+ return result
58
+ end
59
+
60
+ def _dump depth
61
+ @name
62
+ end
63
+
64
+ def self._load str
65
+ Thread.current[:$RedParse_parser].undumpables[@name]
66
+ end
67
+
68
+ def action2c
69
+ #"return the whole thing on first call, just a goto stmt after that"
70
+ return " goto #@goto_label;\n" if defined? @goto_label
71
+
72
+ =begin
73
+ <<-E
74
+ #{@goto_label=@name.gsub(/[^a-z0-9_]/,'_')}:
75
+ monkey=rb_hash_get(undumpables,rb_cstr2str("#@name"));
76
+ rb_funcall(monkey,rb_intern("[]"),huh_stack);
77
+
78
+ /*recover from stackmonkey fiddling*/
79
+ for(i=0;i<#{-@first_changed_index};++i) {
80
+ rb_ary_unshift(lexer_moretokens,
81
+ rb_ary_pop(huh_semantic_stack));
82
+ rb_ary_pop(huh_syntax_stack);
83
+ }
84
+
85
+ goto #{Node===@and_expect_node ?
86
+ postreduceaction4this_state(@and_expect_node) :
87
+ shiftaction4this_state
88
+ };
89
+ E
90
+ =end
91
+ end
92
+ end
93
+ class DeleteMonkey<StackMonkey
94
+ def initialize(index,name)
95
+ index=-index if index>0
96
+ @index=index
97
+ super(name,index,nil){|stack| stack.delete_at( index )}
98
+ end
99
+ end
100
+ def stack_monkey(*args,&block) StackMonkey.new(*args,&block) end
101
+ def self.stack_monkey(*args,&block) StackMonkey.new(*args,&block) end
102
+ def delete_monkey(index,name) DeleteMonkey.new(index,name) end
103
+
104
+ def evaluate rule
105
+ #dissect the rule
106
+ if false
107
+ rule=rule.dup
108
+ lookahead_processor=(rule.pop if Proc===rule.last)
109
+ node_type=rule.pop
110
+ else
111
+ Reg::Transform===rule or fail
112
+ node_type= rule.right
113
+ rule=rule.left.subregs.dup
114
+ lookahead_processor=(rule.pop if Proc|::Reg::LookAhead===rule.last)
115
+ lookback=rule[0]=rule[0].subregs[0] if ::Reg::LookBack===rule[0]
116
+ end
117
+
118
+ #index of data at which to start matching
119
+ i=@stack.size-1 #-1 because last element of @stack is always lookahead
120
+
121
+ #I could call this a JIT compiler, but that's a bit grandiose....
122
+ #more of a JIT pre-processor
123
+ compiled_rule=@compiled_rules[rule]||=
124
+ rule.map{|pattern|
125
+ String|Regexp===pattern ? KW(pattern) : pattern
126
+ }
127
+
128
+ #what's the minimum @stack size this rule could match?
129
+ rule_min_size=@min_sizes[compiled_rule]||=
130
+ compiled_rule.inject(0){|sum,pattern|
131
+ sum + pattern.itemrange.begin
132
+ }
133
+ i>=rule_min_size or return false
134
+
135
+ matching=[]
136
+
137
+ #actually try to match rule elements against each @stack element in turn
138
+ compiled_rule.reverse_each{|matcher|
139
+ i.zero? and fail
140
+ target=matching
141
+ #is this matcher optional? looping?
142
+ loop= matcher.itemrange.last.to_f.infinite?
143
+ minimum=matcher.itemrange.first
144
+ optional=minimum.zero?
145
+ matching.unshift target=[] if loop
146
+ if loop or optional
147
+ matcher=matcher.subregs[0]
148
+ end
149
+
150
+ begin
151
+ if matcher===@stack[i-=1] #try match
152
+ target.unshift @stack[i]
153
+ else
154
+ #if match failed, the whole rule fails
155
+ #unless this match was optional, in which case, ignore it
156
+ #or was looping and met its minimum
157
+ #but bump the data position back up, since the latest datum
158
+ #didn't actually match anything.
159
+ return false unless optional or loop&&target.size>=minimum
160
+ i+=1
161
+ matching.unshift nil unless loop
162
+ break
163
+ end
164
+ end while loop
165
+ }
166
+
167
+ matchrange= i...-1 #what elems in @stack were matched?
168
+
169
+ #give lookahead matcher (if any) a chance to fail the match
170
+ case lookahead_processor
171
+ when ::Reg::LookAhead
172
+ return false unless lookahead_processor.subregs[0]===@stack.last
173
+ when Proc
174
+ return false unless lookahead_processor[self,@stack.last]
175
+ end
176
+
177
+ #if there was a lookback item, don't include it in the new node
178
+ if lookback
179
+ matchrange= i+1...-1 #what elems in @stack were matched?
180
+ matching.shift
181
+ end
182
+
183
+
184
+ #replace matching elements in @stack with node type found
185
+ case node_type
186
+ when Class
187
+ node=node_type.new(*matching)
188
+ node.startline||=@stack[matchrange.first].startline
189
+ node.endline=@endline
190
+ @stack[matchrange]=[node]
191
+ when Proc,StackMonkey; node_type[@stack]
192
+ when :shift; return 0
193
+ when :accept,:error; throw :ParserDone
194
+ else fail
195
+ end
196
+
197
+ return true #let caller know we found a match
198
+
199
+
200
+ rescue Exception=>e
201
+ #puts "error (#{e}) while executing rule: #{rule.inspect}"
202
+ #puts e.backtrace.join("\n")
203
+ raise
204
+ end
205
+
206
+ class ParseError<RuntimeError
207
+ def initialize(msg,stack)
208
+ super(msg)
209
+ @stack=stack
210
+ if false
211
+ ranges=(1..stack.size-2).map{|i|
212
+ node=stack[i]
213
+ if node.respond_to? :linerange
214
+ node.linerange
215
+ elsif node.respond_to? :endline
216
+ node.endline..node.endline
217
+ end
218
+ }
219
+ types=(1..stack.size-2).map{|i| stack[i].class }
220
+ msg += "couldn't interpret #{types.inspect} at line ranges: #{ranges.inspect}"
221
+ end
222
+ super(msg)
223
+ end
224
+ attr :stack
225
+ end
226
+
227
+ def [](*args)
228
+ @stack.[](*args)
229
+ end
230
+
231
+ def []=(*args)
232
+ @stack.[]=(*args)
233
+ end
234
+
235
+ #try all possible reductions
236
+ def reduce
237
+ shift=nil
238
+ @rules.reverse_each{|rule|
239
+ shift=evaluate(rule) and break
240
+ }
241
+ return shift
242
+ end
243
+
244
+ def parse
245
+ #hack, so StringToken can know what parser its called from
246
+ #so it can use it to parse inclusions
247
+ oldparser=Thread.current[:$RedParse_parser]
248
+ Thread.current[:$RedParse_parser]||=self
249
+
250
+ @rules||=expanded_RULES()
251
+ # @inputs||=enumerate_exemplars
252
+
253
+ @stack=[StartToken.new, get_token]
254
+ #last token on @stack is always implicitly the lookahead
255
+ catch(:ParserDone){ loop {
256
+ #try all possible reductions
257
+ next if reduce==true
258
+
259
+ #no rule can match current @stack, get another token
260
+ tok=get_token or break
261
+
262
+ #are we done yet?
263
+ #tok.nil? or EoiToken===tok && EoiToken===@stack.last and break
264
+
265
+ #shift our token onto the @stack
266
+ @stack.push tok
267
+ }}
268
+
269
+ @stack.size==2 and return NopNode.new #handle empty parse string
270
+
271
+ #unless the @stack is 3 tokens,
272
+ #with the last an Eoi, and first a StartToken
273
+ #there was a parse error
274
+ unless @stack.size==3
275
+ pp @stack[-[15,@stack.size].min..-1] if ENV['PRINT_STACK']
276
+ top=MisparsedNode.new("(toplevel)", @stack[1...-1],'')
277
+ raise ParseError.new(top.msg,@stack)
278
+ end
279
+ EoiToken===@stack.last or fail
280
+ StartToken===@stack.first or fail
281
+
282
+ result= @stack[1]
283
+
284
+
285
+ #multiple assignment must be resolved
286
+ #afterwards by walking the parse tree.
287
+ #(because the relative precedences of = and ,
288
+ #are reversed in multiple assignment.)
289
+ # result.respond_to? :fixup_multiple_assignments! and
290
+ # result=result.fixup_multiple_assignments!
291
+
292
+ #relative precedence of = and rescue are also inverted sometimes
293
+ # result.respond_to? :fixup_rescue_assignments! and
294
+ # result=result.fixup_rescue_assignments!
295
+
296
+ #do something with error nodes
297
+ msgs=[]
298
+ result.walk{|parent,i,subi,node|
299
+ not if node.respond_to? :error and node.error?(@rubyversion)
300
+ msgs<< @filename+":"+node.blame.msg
301
+ end
302
+ } if result.respond_to? :walk #hack hack
303
+ result.errors=msgs unless msgs.empty?
304
+ #other types of errors (lexer errors, exceptions in lexer or parser actions)
305
+ #should be handled in the same way, but currently are not
306
+ # puts msgs.join("\n")
307
+
308
+ =begin
309
+ rescue Exception=>e
310
+ input=@lexer
311
+ if Array===input
312
+ puts "error while parsing:"
313
+ pp input
314
+ input=nil
315
+ else
316
+ input=input.original_file
317
+ inputname=@lexer.filename
318
+ input.to_s.size>1000 and input=inputname
319
+ puts "error while parsing: <<< #{input} >>>"
320
+ end
321
+ raise
322
+ else
323
+ =end
324
+ unless msgs.empty?
325
+ pp @stack[-[15,@stack.size].min..-1] if ENV['PRINT_STACK']
326
+ raise RedParse::ParseError.new(msgs.join("\n"),@stack)
327
+ end
328
+
329
+ # result=NopNode.new if EoiToken===result
330
+ return result
331
+ ensure
332
+ @stack=nil
333
+ Thread.current[:$RedParse_parser]=oldparser
334
+ end
335
+
336
+
337
+ #HIER=Class::FlattenedHierarchy.new *STACKABLE_CLASSES
338
+
339
+ def new_disabled_reduce
340
+ #@hier||=Class::FlattenedHierarchy.new *STACKABLE_CLASSES()
341
+ @reducer||=Reducer.new(@rules)
342
+
343
+ @reducer.reduce(@stack)
344
+ end #
345
+ #
346
+ if defined? END_ATTACK
347
+ class RuleSet
348
+ def initialize(rules)
349
+ @rules=rules.reverse
350
+ #rule order must be reversed relative to the usual RedParse rule
351
+ #order... merely so that ffs can work right.
352
+ @maxmask=(1<<@rules.size)-1
353
+ @subclasses_of=child_relations_among(*STACKABLE_CLASSES())
354
+ end
355
+
356
+ def rules2mask(rules)
357
+ mask=0
358
+ @rules.each_with_index{|r,i|
359
+ mask |= 1<<i if rules.include? r
360
+ }
361
+ return mask
362
+ end
363
+
364
+ def mask2rules(mask)
365
+ rules=[]
366
+ @rules.each_with_index{|r,i|
367
+ rules<<r if mask&(1<<i)
368
+ }
369
+ return rules
370
+ end
371
+
372
+ def mask2rules(mask)
373
+ result=[]
374
+ while mask.nonzero?
375
+ result<< @rules[i=ffs(mask)-1]
376
+ mask &= ~(1<<i)
377
+ end
378
+ return result
379
+ end
380
+
381
+ def each_rule(mask=-1)
382
+ @rules.each_with_index{|r,i|
383
+ yield r,i if mask&(1<<i)
384
+ }
385
+ end
386
+
387
+ def each_rule(mask=@maxmask)
388
+ while mask.nonzero?
389
+ yield @rules[i=ffs(mask)-1],i
390
+ mask &= ~(1<<i)
391
+ end
392
+ end
393
+
394
+
395
+ @@FFS_TABLE=[nil]
396
+ 1.upto(8){|n|
397
+ @@FFS_TABLE*=2
398
+ @@FFS_TABLE[@@FFS_TABLE.size/2]=n
399
+ }
400
+ def rb_ffs(mask)
401
+ chunks=0
402
+ until mask.zero?
403
+ result=@@FFS_TABLE[mask&0xFF]
404
+ return result+(chunks<<3) if result
405
+ chunks+=1
406
+ mask>>=8
407
+ end
408
+ return 0
409
+ end
410
+
411
+ begin
412
+ require 'inline'
413
+ inline{|inline|
414
+ inline.prefix '#define _GNU_SOURCE'
415
+ inline.include '"string.h"'
416
+ inline.include '"limits.h"'
417
+ inline.c %{
418
+ unsigned c_ffs(VALUE mask){
419
+ if FIXNUM_P(mask) {
420
+ return ffsl(NUM2UINT(mask));
421
+ } else if(TYPE(mask)==T_BIGNUM) {
422
+ struct RBignum* bn=RBIGNUM(mask);
423
+ int len=bn->len;
424
+ int i;
425
+ unsigned offset=0;
426
+ unsigned result=0;
427
+ for(i=0;i<len;++i){
428
+ /*printf("least:%x\\n", ((BDIGIT*)(bn->digits))[i]);*/
429
+ /*printf("most:%x\\n", ((BDIGIT*)(bn->digits))[len]);*/
430
+ result=ffs(((BDIGIT*)(bn->digits))[i]);
431
+ if (result) break;
432
+ offset+=sizeof(int)*CHAR_BIT;
433
+ }
434
+ if (result==0) return 0;
435
+ return result+offset;
436
+ } else {
437
+ rb_fatal("bad argument to ffs");
438
+ }
439
+ }
440
+ }
441
+ }
442
+ alias ffs c_ffs
443
+ rescue Exception=>e
444
+ warn "error (#{e.class}) while defining inline c ffs()"
445
+ warn "original error: #{e}"
446
+ warn "falling back to ruby version of ffs()"
447
+ alias ffs rb_ffs
448
+
449
+ end
450
+
451
+
452
+
453
+
454
+ #just the left side (the stack/lookahead matchers)
455
+ def LEFT
456
+ @rules.map{|r| r.left.subregs }.flatten
457
+ end
458
+
459
+ #remove lookahead and lookback decoration
460
+ def LEFT_NO_LOOKING
461
+ l=LEFT()
462
+ l.map!{|m|
463
+ case m #
464
+ when Reg::LookAhead,Reg::LookBack; m.subregs[0]
465
+ when Proc; []
466
+ else m #
467
+ end #
468
+ }
469
+ l
470
+ end
471
+
472
+ #all classes mentioned in rules, on left and right sides
473
+ def STACKABLE_CLASSES #
474
+ return @sc_result unless @sc_result.nil?
475
+ @sc_result=false
476
+ l=LEFT_NO_LOOKING()
477
+ l=l.map{|lm| sc_juice lm}.flatten.compact
478
+ r= @rules.map{|rr| rr.right }.grep(Class) #classes in productions
479
+ result=l+r
480
+ @sc_result=result.grep(Class).uniq
481
+ fail if @sc_result.empty?
482
+ return @sc_result
483
+ end
484
+
485
+ def juice(m)
486
+ case m #
487
+ when Class;
488
+ return [m] unless @subclasses_of
489
+ result=[m] # and subclasses too
490
+ i=0
491
+ while item=result[i]
492
+ #p item
493
+ result.concat @subclasses_of[item]
494
+ i += 1
495
+ end
496
+ result
497
+ when String,Regexp; juice(RedParse.KW(m))
498
+ when Reg::And; m.subregs.map{|x| juice(x).flatten.compact}.inject{|sum,rr| sum&rr}
499
+ when Reg::Or; m.subregs.map( &method(:juice) )
500
+ when Reg::Not;
501
+ m=m.subregs[0]
502
+ if Class===m or (Reg::Or===m and
503
+ m.subregs.inject{|sum,x| sum && (Class===x) })
504
+ j=juice(m)
505
+ STACKABLE_CLASSES()-j.flatten.compact rescue j
506
+ else
507
+ STACKABLE_CLASSES()
508
+ end
509
+ else STACKABLE_CLASSES()
510
+ end
511
+ end
512
+
513
+ def sc_juice(m)
514
+ case m #
515
+ when Class; [m]
516
+ when String,Regexp; juice(RedParse.KW(m))
517
+ # when String,Regexp; [KeywordToken]
518
+ when Reg::And; m.subregs.map{|x| sc_juice(x)}.compact.map{|x| x.flatten.compact}.inject{|sum,rr| sum&rr }
519
+ when Reg::Or; m.subregs.map( &method(:sc_juice) )
520
+ when Reg::Not; sc_juice(m.subregs[0])
521
+ when Reg::LookAhead, Reg::LookBack; sc_juice(m.subregs[0])
522
+ else []
523
+ end
524
+ end
525
+
526
+ def LOOKAHEAD_CLASSES rule
527
+ last=rule.left.subregs.last
528
+ return STACKABLE_CLASSES() unless Reg::LookAhead===last
529
+ la= last.subregs[0]
530
+ return juice(la).flatten.compact
531
+ end
532
+ #
533
+ def TOS_CLASSES rule
534
+ i=-1
535
+ mats=rule.left.subregs
536
+ m=mats[i]
537
+ m=mats[i-=1] if Reg::LookAhead===m || Proc===m
538
+ result=[]
539
+ while Reg::Repeat===m and m.times.min.zero?
540
+ result<<juice(m.subregs[0])
541
+ m=mats[i-=1]
542
+ end
543
+ return (result+juice(m)).flatten.compact
544
+ end
545
+
546
+ def [](i)
547
+ @rules[i]
548
+ end
549
+
550
+ end #
551
+ #
552
+ module Reducer
553
+ @@rulesets={}
554
+ @@class_narrowerses={}
555
+ def compile(recompile=false)
556
+ klass=self.class
557
+
558
+ #use cached result if available
559
+ if @@rulesets[klass] and !recompile
560
+ @ruleset=@@rulesets[klass]
561
+ @class_narrowers=@@class_narrowerses[klass]
562
+ return
563
+ end
564
+
565
+ #actual rule compilation
566
+ @ruleset=RuleSet.new @rules
567
+ @class_narrowers=[tos=Hash.new(0),la=Hash.new(0)]
568
+ @ruleset.each_rule{|r,i|
569
+ @ruleset.LOOKAHEAD_CLASSES(r).each{|klass2|
570
+ la[klass2] |= 1<<i
571
+ }
572
+ @ruleset.TOS_CLASSES(r).each{|klass2|
573
+ tos[klass2] |= 1<<i
574
+ }
575
+ }
576
+
577
+ #save result to cache if not too dynamic
578
+ if !recompile
579
+ @@rulesets[klass]=@ruleset
580
+ @@class_narrowerses[klass]=@class_narrowers
581
+ end
582
+ end
583
+
584
+ def new_reduce
585
+ # mask=-1
586
+ # (-1).downto(-@class_narrowers.size){|i|
587
+ # mask &= @class_narrowers[i][@stack[i].class]
588
+ # }
589
+ mask=
590
+ @class_narrowers[-1][@stack[-1].class]&
591
+ @class_narrowers[-2][@stack[-2].class]
592
+ @ruleset.each_rule(mask){|r,i|
593
+ res=evaluate(r) and return res
594
+ }
595
+ return false
596
+ end
597
+ end
598
+ end
599
+
600
+ def map_with_index(list)
601
+ result=[]
602
+ list.each_with_index{|elem,i| result<<yield(elem,i)}
603
+ result
604
+ end
605
+
606
+ def all_rules
607
+ return @all_rules if defined? @all_rules
608
+
609
+ @inputs||=enumerate_exemplars
610
+ @rules=expanded_RULES #force it to be recalculated
611
+ @all_rules = map_with_index(@rules){|r,i| Rule.new r,i}
612
+
613
+ @all_rules.each{|r|
614
+ if StackMonkey===r.action
615
+ r.action.exemplars=@inputs.grep r.action.hint
616
+ end
617
+ }
618
+
619
+ warn "error recovery rules disabled for now; creates too many states and masks errors"
620
+ @all_rules.reject!{|r| r.action==MisparsedNode }
621
+
622
+ #names have to be allocated globally to make sure they don't collide
623
+ names=@all_rules.map{|r|
624
+ if r.action.respond_to? :name
625
+ r.action.name
626
+ else
627
+ r.action.to_s
628
+ end
629
+ }.sort
630
+ dups={}
631
+ names.each_with_index{|name,i|
632
+ dups[name]=0 if name==names[i+1]
633
+ }
634
+ @all_rules.each{|r|
635
+ r.name=
636
+ if r.action.respond_to? :name
637
+ r.action.name.dup
638
+ else
639
+ r.action.to_s
640
+ end
641
+ if dups[r.name]
642
+ count=dups[r.name]+=1
643
+ r.name<<"_#{count}"
644
+ end
645
+ }
646
+ end
647
+
648
+ def all_dotted_rules
649
+ all_rules.map{|rule|
650
+ (0...rule.patterns.size).map{|i|
651
+ DottedRule.create(rule,i,self)
652
+ }
653
+ }.flatten
654
+ end
655
+
656
+ #$OLD_PAA=1
657
+
658
+ def all_initial_dotted_rules
659
+ return @all_initial_dotted_rules if defined? @all_initial_dotted_rules
660
+ @all_initial_dotted_rules=result=
661
+ all_rules.map{|rule| DottedRule.create(rule,0,nil) }
662
+
663
+ p :all_init
664
+
665
+ unless defined? $OLD_PAA
666
+ scanning=result
667
+ provisionals=nil
668
+ while true
669
+ old_provisionals=provisionals
670
+ provisionals={}
671
+ scanning.each{|dr|
672
+ dr.also_allow=dr.compute_also_allow(provisional=[false]) #fill out dr.also_allow
673
+ provisionals[dr]=provisional[0]
674
+ }
675
+ scanning=provisionals.map{|dr,val| dr if val }.compact
676
+ end until provisionals==old_provisionals
677
+ end
678
+ p :all_init_done
679
+
680
+ return result
681
+ end
682
+
683
+ class Rule #original user rules, slightly chewed on
684
+ def initialize(rawrule,priority)
685
+ @priority=priority
686
+ @action=rawrule.right
687
+ @patterns=rawrule.left.subregs.dup
688
+ #remove lookback decoration if any, just note that lb was present
689
+ if Reg::LookBack===@patterns[0]
690
+ @lookback=true
691
+ @patterns[0]=@patterns[0].subregs[0]
692
+ end
693
+
694
+ case @patterns[-1]
695
+ #Symbol is pointless here, methinks.
696
+ when Proc,Symbol; #do nothing
697
+ when Reg::LookAhead; @patterns[-1]=@patterns[-1].subregs[0]
698
+ else @patterns.push Object #add la if none was present
699
+ end
700
+
701
+ #search for looping matchers with minimum >0 and replace them
702
+ #with a number of scalars (== the minimum) followed by a loop with 0 min.
703
+ #search for bare strings or regexps and replace with KW( ) wrapper
704
+ @patterns.each_with_index{|p,i|
705
+ case p
706
+ when String,Regexp; @patterns[i]=RedParse.KW(p)
707
+ when Reg::Repeat
708
+ if p.itemrange.first>0
709
+ @patterns[i,1]=
710
+ *[p.subregs[0]]*p.itemrange.first<< #minimum # as scalars
711
+ p.subregs[0].reg.* #0-based looper
712
+ end
713
+ end
714
+ }
715
+ @drs=[]
716
+ end
717
+
718
+ attr_reader :drs
719
+
720
+ def hash; priority end
721
+ def == other; Rule===other and priority==other.priority end
722
+ alias eql? ==
723
+
724
+ def lookback?; @lookback if defined? @lookback end
725
+
726
+ attr_reader :patterns,:action,:priority
727
+ attr_accessor :name
728
+
729
+ def at(n)
730
+ result=patterns[n]
731
+ result=result.subregs[0] if Reg::Repeat===result
732
+ result
733
+ end
734
+ def optional? n
735
+ p=patterns[n]
736
+ return Reg::Repeat===p && p.itemrange.first.zero?
737
+ end
738
+ def looping? n
739
+ p=patterns[n]
740
+ return false unless Reg::Repeat===p
741
+ return false if p.itemrange.last==1
742
+ fail unless p.itemrange.last.infinite?
743
+ return true
744
+ rescue Exception
745
+ return false
746
+ end
747
+
748
+ def reduces_to
749
+ case @action
750
+ when Class; @action
751
+ when StackMonkey; @action.exemplars
752
+ when :error,:shift,:accept; nil
753
+ else fail "#@action unexpected in reduces_to"
754
+ end
755
+ end
756
+
757
+ def unruly?
758
+ return if action==:accept
759
+ action.class!=Class || lookback?
760
+ end
761
+
762
+ def final_promised_pattern
763
+ case @action
764
+ when DeleteMonkey #delete_monkey
765
+ vector_indexes=(@action.first_changed_index..-1).select{|i| Reg::Repeat===@patterns[i] }
766
+ fail unless vector_indexes.empty?
767
+ result=@patterns.dup
768
+ result.delete_at @action.first_changed_index
769
+ when StackMonkey #stack_monkey
770
+ result=@patterns.dup
771
+ result[@action.first_changed_index..-1]=[@action.hint]
772
+ when Class
773
+ result= [@action,@patterns.last]
774
+ result.unshift @patterns.first if lookback?
775
+ when :accept, :error, :shift
776
+ result=@patterns.dup
777
+ else
778
+ pp @action
779
+ fail
780
+ end
781
+ result[-1]=result[-1].la unless result.empty?
782
+ result
783
+ end
784
+
785
+ def final_promised_rule
786
+ @final_promised_rule ||=
787
+ Rule.new(-final_promised_pattern>>nil,-priority)
788
+ end
789
+ end
790
+
791
+ class DottedRule
792
+ def initialize(rule,pos,parser)
793
+ @rule,@pos=rule,pos
794
+ fail unless (0...rule.patterns.size)===@pos
795
+ # @also_allow= compute_also_allow(parser) if parser unless defined? $OLD_PAA
796
+ end
797
+ def compute_also_allow(parser,provisional=[false])
798
+ parser.all_initial_dotted_rules.map{|dr|
799
+ next if dr==self
800
+ fake_rule=dr.rule.final_promised_rule
801
+ final_more_dr=DottedRule.create(fake_rule,0,nil)
802
+ also=dr.also_allow
803
+ unless also
804
+ provisional[0]||=0
805
+ provisional[0]+=1
806
+ also=[]
807
+ end
808
+ also+[dr] if optionally_combine final_more_dr,parser
809
+ }.flatten.compact.uniq
810
+ end
811
+ attr_reader :rule,:pos
812
+ attr_accessor :also_allow
813
+
814
+ def self.create(rule,pos,parser)
815
+ result=rule.drs[pos] and return result
816
+ result=rule.drs[pos]=DottedRule.new(rule,pos,parser)
817
+ unless defined? $OLD_PAA
818
+ result.also_allow=result.compute_also_allow(parser) if parser
819
+ end
820
+ return result
821
+ end
822
+
823
+ def hash; (@rule.priority<<3)^@pos end
824
+ def == other; DottedRule===other and @pos==other.pos and @rule==other.rule end
825
+ alias eql? ==
826
+
827
+ def name; @rule.name+"@#@pos" end
828
+
829
+ def looping?
830
+ @rule.looping?(@pos)
831
+ end
832
+
833
+ #returns Conditional|Rule|DottedRule|+[DottedRule.+]|nil
834
+ def evolve input, parser, seenlist,result2
835
+ #print "["
836
+ #$stdout.flush
837
+ idname=input.identity_name
838
+ idname=parser.identity_name_alias? idname
839
+ cache=seenlist[[self,idname]]
840
+ unless cache==:dunno_yet
841
+ result2.concat Array(cache).flatten.compact.uniq.sort_by{|x| x.name}
842
+ return cache
843
+ end
844
+ i=pos
845
+ lasti=i-1
846
+ result=[]
847
+ result=loop do #might need multiple tries if optional matcher(s) here
848
+ fail unless i>lasti
849
+ lasti=i
850
+ p=@rule.at(i) #what is current pattern in this dottedrule?
851
+ fail if Proc===p #shouldnt happen anymore
852
+ if parser.pattern_matches_nodes? p
853
+
854
+ #if any dotted rules have nodes at this point,
855
+ #also include the set of rules@0 which
856
+ #can (possibly indirectly) generate that node.
857
+ #(match tokens found on left sides of productions for p)
858
+ seenlist[[self,idname]]=result
859
+ if false
860
+ result.concat recurse_match_drs(parser).uniq.map{|dr|
861
+ dr and
862
+ #begin print "{#{dr.name}"
863
+ dr.evolve input,parser,seenlist,result2
864
+ #ensure print "}" end
865
+ }.flatten.compact.uniq
866
+ end
867
+ end
868
+ @saw_item_that={}
869
+ if p===input
870
+ i+=1 unless @rule.looping?(i)
871
+ fail if i>@rule.patterns.size
872
+
873
+ if !@saw_item_that.empty?
874
+ p(:saw_item_that!)
875
+ fail unless @saw_item_that.size==1
876
+ pair=@saw_item_that.to_a.first
877
+ fail unless p.equal? pair.last
878
+ it=pair.first
879
+ action=
880
+ if i==@rule.patterns.size
881
+ @rule
882
+ else
883
+ DottedRule.create(@rule,i,parser)
884
+ end
885
+ break Conditional.new(it,action)
886
+ end
887
+ @saw_item_that=nil
888
+
889
+ if i == @rule.patterns.size
890
+ break @rule
891
+ else
892
+ break result<<DottedRule.create(@rule,i,parser)
893
+ end
894
+ elsif !@rule.optional?(i)
895
+ break result.empty? ? nil : result
896
+ elsif (i+=1) >= @rule.patterns.size
897
+ break @rule
898
+ #else next p
899
+ end
900
+ end #loop
901
+ seenlist[[self,idname]]=result
902
+ result2.concat Array(result).flatten.compact.uniq.sort_by{|x| x.name}
903
+ return result
904
+ #ensure print "]"
905
+ end
906
+
907
+ #returns +[(DottedRule|nil).*]
908
+ def recurse_match_drs parser, result=nil
909
+ unless result
910
+ table=parser.rmd_cache
911
+ if table
912
+ cache=table[self]
913
+ return cache if cache
914
+ else
915
+ parser.rmd_cache={}
916
+ end
917
+
918
+ result=[]
919
+ end
920
+ #print "("
921
+ #print @rule.name+"@#@pos"
922
+ p=@rule.at(@pos)
923
+
924
+ #find set of nodes that could match here
925
+ nodes_here=parser.exemplars_that_match(p&Node)
926
+
927
+ #find the set of rules that could generate a node in our list
928
+ rrules=parser.all_rules.select{|rule|
929
+ !rule.unruly? and !nodes_here.grep(rule.action).empty?
930
+ }.map{|rule|
931
+ DottedRule.create(rule,0,parser)
932
+ }
933
+
934
+ #if any generating rules match a node in the leftmost pattern,
935
+ #add the rules which can generate _that_ node too.
936
+ result.push self #force self to be excluded from future recursion
937
+ oldsize=result.size
938
+ unless rrules.empty?
939
+ result.concat rrules
940
+
941
+ unless result.respond_to? :index_of
942
+ class<<result
943
+ attr_accessor :index_of
944
+ end
945
+ result.index_of={}
946
+ end
947
+ rio=result.index_of
948
+ oldsize.upto(result.size){|i| rio[result[i]]||=i }
949
+ rrules.each{|rrule|
950
+ i=rio[rrule] or fail #index() inside each() == O(N**2) complexity. this is the slow line.
951
+ #but skip recursion on rules already done at a higher level
952
+ rrule.recurse_match_drs parser,result if i>=oldsize
953
+ }
954
+ end
955
+ result[oldsize-1]=nil #don't actually include self in result
956
+ #result.update_indices oldsize-1, oldsize-1
957
+
958
+ parser.rmd_cache[self]=result
959
+ return result
960
+ #ensure print ")"
961
+ end
962
+
963
+ def optionally_combine weaker,parser
964
+ #lotsa caching needed if this is ever to be performant
965
+ if parser.oc_cache
966
+ result=parser.oc_cache[[self,weaker]]
967
+ return result unless result.nil?
968
+ else
969
+ parser.oc_cache={}
970
+ end
971
+
972
+ other=weaker
973
+ mymatches,myposes= self.outcomes
974
+ matches, poses = other.outcomes
975
+ matches.each_with_index{|match,i|
976
+ mymatches.each_with_index{|mymatch,myi|
977
+ intersect=parser.inputs.grep(match&mymatch)
978
+ unless intersect.empty?
979
+
980
+ #but don't allow matches that would be matched
981
+ #by an earlier (but optional) pattern.
982
+ disallowed=Reg::Or.new(
983
+ *possible_matchers_til(myi)+
984
+ other.possible_matchers_til(i)
985
+ )
986
+ intersect.reject{|x| disallowed===x }
987
+
988
+ if intersect.empty?
989
+ return result=false
990
+ elsif poses[i]>=other.rule.patterns.size
991
+ return result=true #success if weaker rule is at an end
992
+ elsif myposes[myi]>=rule.patterns.size
993
+ return result=false #fail if stronger rule at an end
994
+ else
995
+ p [:**,rule.name,myposes[myi]]
996
+ mynew=DottedRule.create(rule,myposes[myi],parser)
997
+ new=DottedRule.create(other.rule,poses[i],parser)
998
+ return result=mynew.optionally_combine( new,parser )
999
+ end
1000
+ end
1001
+ }
1002
+ }
1003
+ return result=false
1004
+ ensure
1005
+ parser.oc_cache[[self,weaker]]=result
1006
+ end
1007
+
1008
+ def possible_matchers_til i
1009
+ (pos...i-1).map{|j|
1010
+ m=rule.at(j)
1011
+ Reg::Repeat===m ? m.subregs[0] : m
1012
+ }
1013
+ end
1014
+
1015
+ def outcomes
1016
+ til=@rule.patterns.size
1017
+ at=@pos
1018
+ result=[[],[]]
1019
+ loop do
1020
+ m=@rule.patterns[at]
1021
+ case m
1022
+ when Proc;
1023
+ result.first.push Object
1024
+ result.last.push at+1
1025
+ break
1026
+ when Reg::Repeat
1027
+ assert @rule.optional?(at)
1028
+ to=at
1029
+ to+=1 unless @rule.looping? at
1030
+ result.first.push m.subregs[0]
1031
+ result.last.push to
1032
+ else
1033
+ result.first.push m
1034
+ result.last.push at+1
1035
+ break
1036
+ end
1037
+ at+=1
1038
+ break if at>=til
1039
+ end
1040
+ return result
1041
+ end
1042
+
1043
+ end
1044
+
1045
+ attr_accessor :rmd_cache
1046
+ attr_accessor :oc_cache
1047
+ attr_accessor :sl2ms_cache
1048
+
1049
+ class Conditional
1050
+ def initialize(condition,action)
1051
+ @condition,@action=condition,action
1052
+ @condition.restore :hash,:==
1053
+ end
1054
+ attr_reader :condition,:action
1055
+
1056
+ def hash
1057
+ @condition.hash^@action.hash
1058
+ end
1059
+ def == other
1060
+ Conditional===other and @condition==other.condition and @action==other.action
1061
+ end
1062
+ alias eql? ==
1063
+
1064
+ def name; @condition.inspect+"?"+@action.name end
1065
+
1066
+ def priority; @action.priority end
1067
+ end
1068
+
1069
+ class ParserState; end
1070
+ class MultiShift; end
1071
+ class MultiReduce; end
1072
+
1073
+ ACTION_PATTERN=ParserState|Rule|MultiShift|MultiReduce|:accept|:error
1074
+ class ParserState #a union of dotted rules
1075
+ def initialize(dotteds,index)
1076
+ fail if dotteds.empty? #error state
1077
+ fail unless dotteds.grep(nil).empty?
1078
+ @dotteds=dotteds
1079
+ @index=index
1080
+ sort_substates!
1081
+ @actions={} #key is an input, value is ParserState|Rule|MultiShift|MultiReduce|:accept|:error
1082
+ end
1083
+
1084
+ attr_reader :actions
1085
+
1086
+ def [](k)
1087
+ result=@actions[k]
1088
+ assert ACTION_PATTERN===result
1089
+ result
1090
+ end
1091
+ def []=(k,v)
1092
+ assert ACTION_PATTERN===v
1093
+ @actions[k]=v
1094
+ end
1095
+
1096
+ def sort_substates!
1097
+ @dotteds=@dotteds.sort_by{|dotted| -dotted.pos}.uniq
1098
+ end
1099
+ attr :dotteds
1100
+
1101
+ def dup
1102
+ result=super
1103
+ result.instance_variable_set(:@dotteds,@dotteds.dup)
1104
+ return result
1105
+ end
1106
+
1107
+ def substates; [self] end
1108
+
1109
+ def shiftlist2multishift? shiftlist,parser
1110
+ return :error if shiftlist.empty?
1111
+ parser.sl2ms_cache||={}
1112
+ cache=parser.sl2ms_cache[shiftlist]
1113
+ return cache if cache
1114
+ fixed,varying=shiftlist.partition{|res| DottedRule===res}
1115
+ result=ParserState.new(fixed,nil)
1116
+ result.perhaps_also_allow parser.all_rules,parser
1117
+ unless varying.empty? #MultiShift
1118
+ varying.map!{|v| [v.condition,v.action]}.flatten
1119
+ result=MultiShift.new(result,varying)
1120
+ end
1121
+ parser.sl2ms_cache[shiftlist]=result
1122
+ return result
1123
+ end
1124
+
1125
+ #given a list of rules, see if any of them are compatible with
1126
+ #a current substate. (compatibility means the aggregate patterns
1127
+ #can be anded together and still be able to conceivably match something.)
1128
+ #if any of morerules are actually compatible, add it to current state.
1129
+ def perhaps_also_allow(morerules,parser)
1130
+ fail unless morerules==parser.all_rules
1131
+ @dotteds.concat @dotteds.map{|d| d.also_allow }.flatten.compact.uniq
1132
+ sort_substates!
1133
+ end
1134
+ def old_perhaps_also_allow(morerules,parser)
1135
+ morerules=morerules.dup
1136
+ need_sort=false
1137
+ scan_rules=@dotteds
1138
+ added={}
1139
+ while true
1140
+ adding=[]
1141
+ morerules.each{|morerule|
1142
+ next if added[morerule]
1143
+ fake_rule=morerule.final_promised_rule
1144
+ final_more_dr=DottedRule.create(fake_rule,0,parser)
1145
+ scan_rules.each{|dotted|
1146
+ if dotted.optionally_combine final_more_dr,parser
1147
+ adding<<DottedRule.create(morerule,0,parser)
1148
+ added[morerule]=1
1149
+ break
1150
+ end
1151
+ }
1152
+ }
1153
+ break if adding.empty?
1154
+ @dotteds.concat adding
1155
+ need_sort=true
1156
+ scan_rules=adding
1157
+ end
1158
+ sort_substates! if need_sort
1159
+ end
1160
+ alias perhaps_also_allow old_perhaps_also_allow if defined? $OLD_PAA
1161
+
1162
+
1163
+ #returns ParserState|MultiShift|MultiReduce|Rule|:accept|:error
1164
+ def evolve input,parser,seenlist
1165
+ result2=[]
1166
+ @dotteds.each{|dotted|
1167
+ dotted.evolve input,parser,seenlist,result2
1168
+ }
1169
+
1170
+ result=
1171
+ #seenlist.values.flatten.compact.uniq.sort_by{|x| x.name}
1172
+ result2=result2.uniq.compact.sort_by{|x| x.name}
1173
+ #pp [result,result2].map{|x| x.map{|res| DottedRule===res ? res.name : res }}
1174
+ #pp result2.map{|res| DottedRule===res ? res.name : res }
1175
+ # result==result2 or fail
1176
+
1177
+ return result=:error if result.empty?
1178
+
1179
+
1180
+ #ok, who wants to shift and who wants to reduce?
1181
+ shiftlist,reducelist=result.partition{|res|
1182
+ DottedRule===res or
1183
+ Conditional===res && DottedRule===res.action
1184
+ }
1185
+
1186
+ #if no reducers at all, just try (multi?)shift
1187
+ return result=shiftlist2multishift?( shiftlist,parser )if reducelist.empty?
1188
+
1189
+ #line up reducers by priority
1190
+ actions=reducelist \
1191
+ .sort_by{|rule| -rule.priority }
1192
+ # .map{|rule| rule.action }
1193
+ #actions is +[(Rule|Conditional[Rule]).*]
1194
+ action=actions.shift #this first (unless conditional)
1195
+ #action is Rule|Conditional[Rule]
1196
+ result=
1197
+ case action.action
1198
+ when :error; return :error
1199
+ when Class, StackMonkey
1200
+ action
1201
+ when :accept
1202
+ :accept
1203
+ when :shift #this counts as a reduce at this point, but it writes shift instructions
1204
+ shiftlist2multishift? shiftlist,parser
1205
+ when Rule #oy, vey, was a Conditional
1206
+ shiftaction=shiftlist2multishift?(shiftlist,parser)
1207
+ fail unless Rule===action.action
1208
+ case action.action.action
1209
+ when :error; huh
1210
+ when :shift, StackMonkey, :accept, Class #MultiReduce
1211
+ first_fixed_index=actions.size
1212
+ #actions is +[(Rule|Conditional[Rule]).*]
1213
+ actions.each_with_index{|act,i|
1214
+ break first_fixed_index=i unless Conditional===act
1215
+ }
1216
+ condactions=actions[0...first_fixed_index].unshift(action)
1217
+ condactions=condactions.inject([]){|sum,cond|
1218
+ act=cond.action
1219
+ act=shiftaction if act==:shift #=>shiftlist?
1220
+ sum.push cond.condition, act
1221
+ }
1222
+ #possible optimization: one or more :shift right at end could be ignored
1223
+ if actions[first_fixed_index]
1224
+ action=actions[first_fixed_index].action
1225
+ else
1226
+ action=shiftaction
1227
+ end
1228
+ MultiReduce.new condactions,action #=>shiftlist?
1229
+ else fail
1230
+ end
1231
+ else fail "#{action} not expected here"
1232
+ end
1233
+ #stack monkeys/:accept are treated like reduce here
1234
+ ensure
1235
+ assert ACTION_PATTERN===result
1236
+ end
1237
+
1238
+ def name
1239
+ @name||@dotteds.map{|dotted| dotted.name}.join(",")
1240
+ end
1241
+ attr_writer :name
1242
+
1243
+ def rename(name2count)
1244
+ return @name if defined? @name
1245
+ name=most_prominent_members.map{|dotted| dotted.name}.join(",")
1246
+ if name2count[name]
1247
+ name2count[name]+=1
1248
+ name+="___"+name2count[name].to_s
1249
+ else
1250
+ name2count[name]=1
1251
+ end
1252
+
1253
+ @name=name
1254
+ end
1255
+
1256
+ def most_prominent_members
1257
+ result=@dotteds.select{|dr| dr.pos==@dotteds.first.pos }
1258
+ close2end=@dotteds.map{|dr| [dr,dr.rule.patterns.size-dr.pos]}.sort_by{|(o,k)| -k}
1259
+ result+=close2end.select{|(dr,k)| k==close2end.first.last}.map{|(dr,k)| dr}
1260
+ result2=result.reject{|dr| dr.pos==0 or dr.pos==1&&dr.rule.lookback?}
1261
+ result=result2 unless result2.empty?
1262
+ return result
1263
+ end
1264
+
1265
+ def hash
1266
+ -@dotteds.hash
1267
+ end
1268
+ def == other
1269
+ ParserState===other and
1270
+ @dotteds==other.dotteds
1271
+ end
1272
+ alias eql? ==
1273
+
1274
+ def looping?
1275
+ @dotteds.any?{|dotted| dotted.looping? }
1276
+ end
1277
+
1278
+ def transition_to_loop? input #not used
1279
+ action=@actions.input
1280
+ case action
1281
+ when :error; false
1282
+ when ParserState; action.looping? and action!=self
1283
+ when MultiShift,MultiReduce;
1284
+ action.transition_to_loop? input
1285
+ else fail
1286
+ end
1287
+ end
1288
+
1289
+ def make_sr_goto_tables
1290
+ name2exemplar={}
1291
+ @inputs.each{|i| name2exemplar[i.name]=i }
1292
+
1293
+ @goto={}; @sr={}
1294
+ goto_counts=Hash.new(0); sr_counts=Hash.new(0)
1295
+ actions.each_pair{|k,v|
1296
+ if Node===name2exemplar[k]
1297
+ @goto[k]=v
1298
+ goto_counts[v]+=1
1299
+ else
1300
+ assert(Token===name2exemplar[k])
1301
+ @sr[k]=v
1302
+ sr_counts[v]+=1
1303
+ end
1304
+ }
1305
+ dflt=goto_counts.sort_by{|v,c| c}.last[0]
1306
+ @goto.delete_if{|k,v| v==dflt }
1307
+ @goto.default=dflt
1308
+
1309
+ dflt=sr_counts.sort_by{|v,c| c}.last[0]
1310
+ @sr.delete_if{|k,v| v==dflt }
1311
+ @sr.default=dflt
1312
+
1313
+ @actions=nil
1314
+ end
1315
+
1316
+ end
1317
+
1318
+ class MultiReduce
1319
+ def initialize(list,default)
1320
+ @list,@default=list,default
1321
+ #default can be any valid action (except another MultiReduce)
1322
+ end
1323
+
1324
+ attr_reader :list,:default
1325
+
1326
+ def act(x)
1327
+ (0...@list.size).step(2){|i|
1328
+ return @list[i+1] if @list[i]===x
1329
+ }
1330
+ return default
1331
+ end
1332
+
1333
+ def substates
1334
+ if @default.respond_to? :substates
1335
+ @default.substates
1336
+ else
1337
+ []
1338
+ end
1339
+ end
1340
+
1341
+ def actions
1342
+ result=[]
1343
+ (1...@list.size).step(2){|i|
1344
+ result << @list[i]
1345
+ }
1346
+ if @default.respond_to? :actions
1347
+ result.concat @default.actions
1348
+ elsif @default
1349
+ result<<@default
1350
+ end
1351
+ result
1352
+ end
1353
+
1354
+ def transition_to_loop? input #not used
1355
+ @default.transition_to_loop? input
1356
+ end
1357
+
1358
+ def hash
1359
+ @list.hash^~@default.hash
1360
+ end
1361
+
1362
+ def == other
1363
+ @list==other.list and @default==other.default
1364
+ end
1365
+ alias eql? ==
1366
+ end
1367
+
1368
+ class MultiShift
1369
+ def initialize(base,modifiers)
1370
+ @base,@modifiers=base,modifiers
1371
+ @map=
1372
+ (0...2**(modifiers.size/2)).map{|i| base.dup}
1373
+ @map.each_with_index{|state,i| #for each branch to the multishift
1374
+ (0...modifiers.size).step(2){|j| #for each predicate in the multishift
1375
+ if (i&(1<<j)).non_zero? #if the predicate tests true in this branch
1376
+ state.append modifiers[j+1] #add the predicates modifier to the state
1377
+ end
1378
+ }
1379
+ state.sort_substates!
1380
+ }
1381
+ end
1382
+
1383
+ def act(x)
1384
+ result=0
1385
+ (0...@modifiers.size).step(2){|i|
1386
+ result|=(1<<(i/2)) if @modifiers[i]===x
1387
+ }
1388
+ @map[result]
1389
+ end
1390
+
1391
+ attr_reader :map, :modifiers
1392
+
1393
+ def substates
1394
+ @map.dup
1395
+ end
1396
+
1397
+ def actions
1398
+ @map.dup
1399
+ end
1400
+
1401
+ def transition_to_loop? input #not used
1402
+ huh
1403
+ end
1404
+
1405
+ def hash
1406
+ huh
1407
+ end
1408
+ def == other
1409
+ huh
1410
+ end
1411
+ alias eql? ==
1412
+ end
1413
+
1414
+ #an action is one of:
1415
+ #a ParserState (shift)
1416
+ #a Rule (reduce)
1417
+ #nil (error)
1418
+ #:accept
1419
+ #MultiReduce
1420
+ #MultiShift
1421
+
1422
+ #just the left side (the stack/lookahead matchers)
1423
+ def LEFT
1424
+ # require 'md5'
1425
+ @rules=expanded_RULES()
1426
+ # p MD5.new(@rules).to_s
1427
+ @rules.map{|r| r.left.subregs }.flatten
1428
+ end
1429
+
1430
+ #remove lookahead and lookback decoration (not used?)
1431
+ def LEFT_NO_LOOKING
1432
+ l=LEFT()
1433
+ l.map!{|m|
1434
+ case m #
1435
+ when Reg::LookAhead,Reg::LookBack; fail #should be gone already now
1436
+ when Proc; []
1437
+ else m #
1438
+ end #
1439
+ }
1440
+ l
1441
+ end
1442
+
1443
+ def child_relations_among(*classes)
1444
+ classes.unshift Object
1445
+ result={}
1446
+ classes.each{|klass| result[klass]=[] }
1447
+
1448
+ #p classes
1449
+ classes.each{|klass|
1450
+ anclist=klass.ancestors
1451
+ anclist.shift==klass or fail
1452
+ anclist.each{|anc|
1453
+ if anc=result[anc]
1454
+ anc << klass
1455
+ break
1456
+ end
1457
+ }
1458
+ }
1459
+
1460
+ return result
1461
+ end
1462
+
1463
+ #all classes mentioned in rules, on left and right sides
1464
+ def STACKABLE_CLASSES #
1465
+ return @sc_result if defined? @sc_result
1466
+ @sc_result=[]
1467
+ @subclasses_of=child_relations_among(*vertices)
1468
+ # @sc_result=false
1469
+ l=LEFT()
1470
+ l=l.map{|lm| sc_juice lm}.flatten.compact
1471
+ assert l.grep(nil).empty?
1472
+ r= @rules.map{|rr| rr.right }.grep(Class) #classes in productions
1473
+ result=l+r
1474
+ @subclasses_of=nil
1475
+ @sc_result.replace result.grep(Class).uniq
1476
+ fail if @sc_result.empty?
1477
+ return @sc_result
1478
+ end
1479
+
1480
+ # def juice(m)
1481
+ # case m #
1482
+ # when Class
1483
+ # return [m] unless @subclasses_of
1484
+ # result=[m] # and subclasses too
1485
+ # i=0
1486
+ # while item=result[i]
1487
+ # p item
1488
+ # result.concat @subclasses_of[item] rescue nil
1489
+ # i += 1
1490
+ # end
1491
+ # result
1492
+ # when String,Regexp; juice(RedParse.KW(m))
1493
+ # when Reg::And; m.subregs.map{|x| juice(x).flatten.compact}.inject{|sum,rr| sum&rr}
1494
+ # when Reg::Or; m.subregs.map &method(:juice)
1495
+ # when Reg::Not
1496
+ # m=m.subregs[0]
1497
+ # if Class===m or (Reg::Or===m and
1498
+ # m.subregs.find{|x| Class===x })
1499
+ # juice(m)
1500
+ # else []
1501
+ # end
1502
+ # else []
1503
+ # end
1504
+ # end
1505
+
1506
+ def sc_juice(m)
1507
+ case m #
1508
+ when Class; [m]
1509
+ when String,Regexp; [KeywordToken]
1510
+ when Reg::And; m.subregs.map{|x| sc_juice(x)}.compact.map{|x| x.flatten.compact}.inject{|sum,rr| sum&rr }
1511
+ when Reg::Or; m.subregs.map(&method(:sc_juice))
1512
+ when Reg::Not; sc_juice(m.subregs[0])
1513
+ when Reg::LookAhead, Reg::LookBack; sc_juice(m.subregs[0])
1514
+ when Reg::Repeat; sc_juice(m.subregs[0])
1515
+ else []
1516
+ end
1517
+ end
33
1518
 
34
- def self.has_return_hash_fix?
1519
+ def unruly_rules
1520
+ return @unruly_rules if defined? @unruly_rules
1521
+
1522
+ @unruly_rules=
1523
+ all_rules.select{|rule| rule.unruly? }
1524
+
1525
+ p :unruly_rules
1526
+ pp @unruly_rules.map{|r| r.name}
1527
+
1528
+ @unruly_rules
1529
+ end
1530
+
1531
+ def enumerate_exemplars
1532
+ return @@exemplars if defined? @@exemplars #dunno why this is necessary
1533
+
1534
+ result= STACKABLE_CLASSES() \
1535
+ .map{|sc| sc.enumerate_exemplars } \
1536
+ .inject{|sum,sc| sum+sc}
1537
+
1538
+ result.map!{|sc|
1539
+ res=sc.shift.allocate
1540
+ until sc.empty?
1541
+ eval "def res.#{sc.shift}; #{sc.shift.inspect} end"
1542
+ end
1543
+ def res.to_s; identity_name end
1544
+ res
1545
+ }
1546
+
1547
+ return @@exemplars=result
1548
+ end
1549
+
1550
+ def check_for_parsealike_inputs
1551
+ all_patterns=all_rules.map{|r| r.patterns.map{|rp| Reg::Repeat===rp and rp=rp.subregs[0]; rp }}.flatten.uniq
1552
+ seen={}
1553
+ @identity_name_aliases={}
1554
+ warn "why are non_empty and after_equals params to BeginNode appearently ignored?"
1555
+ warn "some token identities overlap themselves?!?"
1556
+ warn "some overlaps are duplicated"
1557
+ warn ". and :: overlap => ..... surely that's not right"
1558
+ @inputs.map{|input|
1559
+ profile=all_patterns.map{|pat| Proc===pat ? pat : !!(pat===input)}
1560
+ if seen[profile]
1561
+ puts "#{input} overlaps #{seen[profile]}"
1562
+ @identity_name_aliases[seen[profile]]=input
1563
+ nil
1564
+ else
1565
+ seen[profile]=input
1566
+ end
1567
+ }.compact
1568
+ end
1569
+
1570
+ def enumerate_states
1571
+ inputs=check_for_parsealike_inputs
1572
+ inputs.reject!{|x| StartToken===x}
1573
+
1574
+ result=[]
1575
+ todo=[start_state]
1576
+
1577
+ seenlist = {}
1578
+ seenlist.default=:dunno_yet
1579
+
1580
+ j=0
1581
+ start=was=Time.now
1582
+ in_result={} #this should go away; obsoleted by @states
1583
+ state_num=-1
1584
+ todo.each{|st| in_result[st]=(state_num+=1) }
1585
+ ps=todo.first
1586
+ pp [-in_result[ps], *ps.dotteds.map{|dr| dr.name }]
1587
+ old_todo_size=todo.size
1588
+ while state=todo.shift
1589
+ result<<state
1590
+
1591
+ i=0
1592
+ inputs.each {|input|
1593
+ newstate=state.evolve input,self,seenlist
1594
+ assert ACTION_PATTERN===newstate
1595
+ #newstate is ParserState|MultiShift|MultiReduce|Rule|:accept|:error
1596
+ state[input.identity_name]=newstate
1597
+ next unless newstate.respond_to? :substates
1598
+ #newstate.substates is just [newstate] for plain ParserStates
1599
+ morestates=newstate.substates.reject{|x| in_result[x]}
1600
+ morestates.each{|st| in_result[st]=(state_num+=1) }
1601
+ # p [in_result[state],:+,input.identity_name,:>>,pretty(newstate,in_result)]
1602
+ todo.concat morestates
1603
+
1604
+ # pp morestates.map{|ps|
1605
+ # [-in_result[ps], *ps.dotteds.map{|dr| dr.name }]
1606
+ # }
1607
+ # pp pretty(newstate,in_result) unless ParserState===newstate
1608
+ }
1609
+
1610
+ now=Time.now
1611
+ p [:*,j+=1,todo.size,todo.size-old_todo_size,now-was,j/(now-start),(100.0*j/(j+todo.size)).to_i]
1612
+ old_todo_size=todo.size
1613
+ was=now
1614
+
1615
+ # if state.actions.values.uniq==[:error]
1616
+ #this can happen when the only dotted rule is for an :error
1617
+ #maybe this case can be optimized?
1618
+ # end
1619
+ end
1620
+ self.rmd_cache=nil
1621
+ self.oc_cache=nil
1622
+ self.sl2ms_cache=nil
1623
+ return result
1624
+ end
1625
+
1626
+ def pretty(x,in_result)
1627
+ case x
1628
+ when ParserState; in_result[x]
1629
+ when MultiReduce
1630
+ pairs=x.list.dup
1631
+ result=[]
1632
+ until pairs.empty?
1633
+ cond,act,*pairs=*pairs
1634
+ cond = cond.inspect
1635
+ result<<[cond,pretty(act.action,in_result)]
1636
+ end
1637
+ result<<pretty(x.default,in_result)
1638
+ result.unshift :MultiReduce
1639
+ when MultiShift
1640
+ h={}
1641
+ mods=x.modifiers
1642
+ its=[]
1643
+ (0...mods.size).step(2){|i| its<<mods[i] }
1644
+ x.map.each_with_index{|xx,i| h[i]=pretty(xx) }
1645
+ [:MultiShift, its,h]
1646
+ when Class; x.name
1647
+ when StackMonkey; x.name
1648
+ when :accept,:error; x
1649
+ else fail "not a valid action: #{x}"
1650
+ end
1651
+ end
1652
+
1653
+ attr_accessor :inputs
1654
+
1655
+ def all_states
1656
+ return @all_states if defined? @all_states
1657
+ @all_states=enumerate_states
1658
+ end
1659
+
1660
+ def exemplars_that_match p
1661
+ @inputs.grep p
1662
+ end
1663
+
1664
+ def pattern_matches_nodes? p
1665
+ !@inputs.grep(Node&p).empty?
1666
+ end
1667
+
1668
+ def pattern_matches_tokens? p
1669
+ !@inputs.grep(Token&p).empty?
1670
+ end
1671
+
1672
+ def identity_name_alias? name
1673
+ alias_=@identity_name_aliases[name]
1674
+ return( alias_||name )
1675
+ end
1676
+
1677
+ def compile
1678
+ oldparser=Thread.current[:$RedParse_parser]
1679
+ Thread.current[:$RedParse_parser]||=self
1680
+
1681
+ if File.exist?("cached_parse_tables.drb")
1682
+ dup=Marshal.load(f=open("cached_parse_tables.drb","rb"))
1683
+ instance_variables.each{|var| remove_instance_variable var }
1684
+ extend SingleForwardable
1685
+ def_singleton_delegators(dup,public_methods+private_methods+protected_methods)
1686
+
1687
+ self.inputs=enumerate_exemplars
1688
+ else
1689
+ @generating_parse_tables=true
1690
+ @inputs||=enumerate_exemplars
1691
+
1692
+ states=all_states
1693
+ # @rules=expanded_RULES
1694
+ @inputs=nil #Marshal no like it
1695
+
1696
+ begin
1697
+ p :dumping
1698
+ Marshal.dump(self,f=open("cached_parse_tables.drb","wb"))
1699
+ p :dump_done!
1700
+ rescue Exception
1701
+ p :dump_failed
1702
+ File.unlink "cached_parse_tables.drb"
1703
+ ensure
1704
+ @inputs=enumerate_exemplars
1705
+ end
1706
+ end
1707
+ f.close
1708
+
1709
+ #look for unused dotted rules and actions
1710
+ #also states with drs past the end
1711
+ past_end=0
1712
+ drs=all_dotted_rules
1713
+ dr_count=Hash.new(0)
1714
+ acts=all_rules#.map{|r| r.action }.uniq
1715
+ act_count=Hash.new(0)
1716
+ states.each{|state|
1717
+ state.dotteds.each{|dr|
1718
+ dr_count[dr]+=1
1719
+ past_end+=1 if dr.pos>=dr.rule.patterns.size
1720
+ }
1721
+ sav=state.actions.values
1722
+ sav.grep(Class|StackMonkey).each{|act| act_count[act.__id__]+=1 }
1723
+ sav.grep(MultiReduce|MultiShift).each{|multi| multi.actions.each{|act| act_count[act.__id__]+=1} }
1724
+ #p state.name if state.dotteds.select{|dr| dr.rule.action==BeginNode}
1725
+ }
1726
+ puts "#{past_end} dotted rules found past the end of their rule" if past_end>0
1727
+ nevers=0
1728
+ drs.each{|dr|
1729
+ next unless dr_count[dr].zero?
1730
+ puts "never reached #{dr.name}"
1731
+ nevers+=1
1732
+ }
1733
+ puts "#{nevers} dotted rules were never reached (out of #{drs.size})"
1734
+ nevers=0
1735
+ acts.each{|act|
1736
+ next unless act_count[act.__id__].zero?
1737
+ puts "never reached #{act.name rescue act}"
1738
+ nevers+=1
1739
+ }
1740
+ puts "#{nevers} actions were never reached (out of #{acts.size})"
1741
+ p :most_popular_nontrivial_drs
1742
+ pp dr_count.reject{|(dr,n)| dr.pos.zero? or dr.pos==1 && dr.rule.lookback?} \
1743
+ .sort_by{|(dr,n)| n}[-15..-1].map{|(dr,n)| [dr.name,n] }
1744
+
1745
+ #look for duplicate states
1746
+ actions2state={}
1747
+ dup_states=0
1748
+ states.each{|st|
1749
+ cache=actions2state[st.actions]
1750
+ if cache
1751
+ st.equivalent_to=cache
1752
+ dup_states+=1
1753
+ else
1754
+ actions2state[st.actions]=st
1755
+ end
1756
+ }
1757
+ puts "#{dup_states} duplicate states" if dup_states.nonzero?
1758
+
1759
+ name2count={}
1760
+ states.each{|state| state.rename(name2count) }
1761
+
1762
+ #divide each state's actions into sr and goto tables
1763
+ #also scan states for the most common sr and goto actions and make them default
1764
+ states.each{|state| state.make_sr_goto_tables }
1765
+
1766
+
1767
+ # pp states
1768
+ # pp states.size
1769
+
1770
+ generate_c $stdout
1771
+ return self
1772
+ ensure
1773
+ remove_instance_variable :@generating_parse_tables rescue nil
1774
+ Thread.current[:$RedParse_parser]=oldparser
1775
+ end
1776
+
1777
+ def ultimate_goal_nodes
1778
+ result=[]
1779
+ all_rules.each{|rule|
1780
+ if rule.patterns.size==0 and
1781
+ rule.patterns.first==StartToken and
1782
+ rule.patterns.last==EoiToken
1783
+ result << juice(rule.patterns[1])
1784
+ end
1785
+ }
1786
+ result.flatten!
1787
+ return result
1788
+ end
1789
+
1790
+
1791
+ # def start_state
1792
+ # goal=ultimate_goal_nodes
1793
+ # result=all_rules.select{|rule|
1794
+ # rt=rule.reduces_to and
1795
+ # !goal.select{|node| node>=rt}.empty?
1796
+ # }
1797
+ # result.map!{|rule| DottedRule.create(rule,0,parser)}
1798
+ #
1799
+ # result=ParserState.new result
1800
+ # result.name="start_state"
1801
+ # result
1802
+ # end
1803
+
1804
+ def new_state(drs,unruly_also=false)
1805
+ result=ParserState.new drs,@states.size
1806
+ result.perhaps_also_allow all_rules,self
1807
+ cache=@states[result]
1808
+ return cache if cache
1809
+ @states[result]=@states.size
1810
+ return result
1811
+ end
1812
+
1813
+ def initial_state
1814
+ @states={}
1815
+ all_initial_dotted_rules #is this still needed?
1816
+ result=new_state all_rules.map{|r| DottedRule.create(r,0,self)}
1817
+ result.name="initial"
1818
+ #result.perhaps_also_allow all_rules,self #silly here
1819
+ result
1820
+ end
1821
+
1822
+ attr_reader :states
1823
+
1824
+ def start_state
1825
+ seenlist = {}
1826
+ seenlist.default=:dunno_yet
1827
+ result=initial_state.evolve StartToken.new, self,seenlist
1828
+ result.perhaps_also_allow all_rules,self
1829
+ result.name="start"
1830
+ result
1831
+ #pp [:initial_seenlist, seenlist]
1832
+ #ensure p :/
1833
+ end
1834
+
1835
+ #inline any subsequences in RULES right into the patterns
1836
+ #reg should do this already, but current release does not
1837
+ def expanded_RULES
1838
+ result=RULES()
1839
+ return result if (-[:foo, -[:bar]]).subregs.grep(Reg::Subseq).empty?
1840
+ result.map!{|rule|
1841
+ unless rule.left.subregs.grep(Reg::Subseq)
1842
+ then rule
1843
+ else
1844
+ right=rule.right
1845
+ rule=rule.left.subregs.dup
1846
+ (rule.size-1).downto(0){|i|
1847
+ if Reg::Subseq===rule[i]
1848
+ rule[i,1]=rule[i].subregs
1849
+ end
1850
+ }
1851
+ -rule>>right
1852
+ end
1853
+ }
1854
+ end
1855
+
1856
+ module NamedConstant
1857
+ attr_accessor :constant_name
1858
+ def inspect; constant_name end
1859
+ end
1860
+ def self.inspect_constant_names
1861
+ constants.each{|kn|
1862
+ k=const_get(kn)
1863
+ next if Class|Module|Numeric|Symbol|true|false|nil===k
1864
+ k.extend NamedConstant
1865
+ k.constant_name=kn
1866
+ }
1867
+ end
1868
+
1869
+ def undumpables
1870
+ return @undumpables if @undumpables
1871
+ @rules||=expanded_RULES
1872
+ n=-1
1873
+ @undumpables={}
1874
+ abortable_graphwalk(@rules){|cntr,o,i,ty|
1875
+ !case o
1876
+ when StackMonkey
1877
+ @undumpables[o.name]=o
1878
+ when Reg::Deferred
1879
+ @undumpables[n+=1]=o
1880
+ class<<o
1881
+ attr_accessor :undump_key
1882
+ end
1883
+ o.undump_key=n
1884
+ end
1885
+ }
1886
+ end
1887
+
1888
+ class ::Proc #hack hack hack
1889
+ #only define hacky _dump if one isn't defined already
1890
+ unless instance_methods.include?("_dump") or
1891
+ instance_methods.include?("marshal_dump") or
1892
+ (Marshal.dump(proc{}) rescue false)
1893
+ def _dump depth
1894
+ undump_key.to_s
1895
+ end
1896
+ def self._load str
1897
+ Thread.current[:$RedParse_parser].undumpables[str.to_i]
1898
+ end
1899
+ end
1900
+ end
1901
+
1902
+ =begin disabled, uses too much memory!!
1903
+ class MarshalProxy
1904
+ def initialize(key)
1905
+ @key=key
1906
+ end
1907
+ attr :key
1908
+ end
1909
+
1910
+ #convert unmarshalables, such as stackmonkeys into proxies
1911
+ def proxify
1912
+ n=-1
1913
+ seen={}
1914
+ mkproxy=proc{|cntr,o,i,ty,useit|
1915
+ case o
1916
+ when StackMonkey
1917
+ useit[0]=true
1918
+ seen[o.__id__]||=MarshalProxy.new(o.name)
1919
+ when Reg::Deferred
1920
+ useit[0]=true
1921
+ seen[o.__id__]||=MarshalProxy.new(n+=1)
1922
+ end
1923
+ }
1924
+ Ron::GraphWalk.graphmodify!(@rules,&mkproxy)
1925
+ Ron::GraphWalk.graphmodify!(self,&mkproxy)
1926
+
1927
+ end
1928
+
1929
+ def _dump depth
1930
+ fail unless @rules
1931
+ proxify
1932
+ ivs=instance_variables
1933
+ a=ivs+ivs.reverse.map{|var| instance_variable_get var }
1934
+ result=Marshal.dump(a,depth)
1935
+ unproxify
1936
+ return result
1937
+ end
1938
+
1939
+ #convert marshal proxies back to the real thing
1940
+ def unproxify
1941
+ #build a lookup table for unmarshalables by walking @rules
1942
+ @rules||=expanded_RULES
1943
+ n=-1;lookup={}
1944
+ Ron::GraphWalk.graphwalk(@rules){|cntr,o,i,ty|
1945
+ case o
1946
+ when StackMonkey
1947
+ lookup[o.name]=o
1948
+ when Reg::Deferred
1949
+ lookup[n+=1]=o
1950
+ end
1951
+ }
1952
+
1953
+ Ron::GraphWalk.graphmodify!(self){|cntr,o,i,ty,useit|
1954
+ if MarshalProxy===o
1955
+ useit[0]=true
1956
+ lookup[o.key]
1957
+ end
1958
+ }
1959
+ end
1960
+
1961
+ def self._load(str,*more)
1962
+ result=allocate
1963
+ a=Marshal.load(str,*more)
1964
+
1965
+ result.unproxify
1966
+
1967
+ (0...a.size/2).each{|i| result.instance_variable_set a[i],a[-i] }
1968
+ return result
1969
+ end
1970
+ =end
1971
+
1972
+ ###### specific to parsing ruby
1973
+
1974
+
1975
+ UCLETTER=RubyLexer::UCLETTER
1976
+
1977
+ LCLETTER=RubyLexer::LCLETTER
1978
+ LETTER=RubyLexer::LETTER
1979
+ LETTER_DIGIT=RubyLexer::LETTER_DIGIT
1980
+
1981
+ def vertices; self.class.constants.grep(Node|Token) end
1982
+
1983
+ def self.has_return_hash_fix? #is this needed? it's not used in this file....
35
1984
  rl=RubyLexer.new("","return {}.size")
36
1985
  return(
37
1986
  FileAndLineToken===rl.get1token and
@@ -109,11 +2058,12 @@ class RedParse
109
2058
 
110
2059
  "?"=>106, # ":"=>106, #not sure what to do with ":"
111
2060
 
112
- "*@"=>105.5, "&@"=>105.5, #unary * and & operators
2061
+ "unary*"=>105, "unary&"=>105, #unary * and & operators
2062
+ "lhs*"=>105, "rhs*"=>105, #this should remain above =, but other unary stars are below it
113
2063
 
114
- "="=>105, "%="=>105, "/="=>105, "-="=>105, "+="=>105,
115
- "|="=>105, "&="=>105, ">>="=>105, "<<="=>105, "*="=>105,
116
- "&&="=>105, "||="=>105, "**="=>105, "^="=>105,
2064
+ "="=>104, "%="=>104, "/="=>104, "-="=>104, "+="=>104,
2065
+ "|="=>104, "&="=>104, ">>="=>104, "<<="=>104, "*="=>104,
2066
+ "&&="=>104, "||="=>104, "**="=>104, "^="=>104,
117
2067
 
118
2068
  "defined?"=>103,
119
2069
  "not"=>103,
@@ -121,11 +2071,14 @@ class RedParse
121
2071
  "rescue3"=>102,
122
2072
 
123
2073
  "=>"=>101,
124
- ","=>100,
2074
+ "lhs,"=>100,
2075
+ "rhs,"=>100, #"call,"=>100, "array,"=>100, "param,"=>100,
2076
+ ","=>100,
125
2077
  #the 'precedence' of comma is somewhat controversial. it actually has
126
2078
  #several different precedences depending on which kind of comma it is.
127
2079
  #the precedence of , is higher than :, => and the assignment operators
128
- #in certain contexts.
2080
+ #in certain (lhs) contexts. therefore, the precedence of lhs, should
2081
+ #really be above =.
129
2082
 
130
2083
  #"unary" prefix function names seen has operators have this precedence
131
2084
  #but, rubylexer handles precedence of these and outputs fake parens
@@ -142,29 +2095,110 @@ class RedParse
142
2095
  end
143
2096
 
144
2097
  module BracketsCall; end
145
-
146
2098
  Value= #NumberToken|SymbolToken|
147
2099
  #HerePlaceholderToken|
148
- ((VarNameToken|ValueNode)&-{:lvalue? =>nil})
2100
+ ValueNode&-{:lvalue =>nil}
149
2101
  Expr=Value
150
2102
 
2103
+ if defined? SPECIALIZED_KEYWORDS
2104
+ class SpecializedKeywordToken<KeywordToken
2105
+ def inspect
2106
+ "#<"+self.class.name+">"
2107
+ end
2108
+ alias image inspect
2109
+ end
2110
+
2111
+ KW2class={}
2112
+
2113
+ Punc2name={
2114
+ "("=>"lparen", ")"=>"rparen",
2115
+ "["=>"lbracket", "]"=>"rbracket",
2116
+ "{"=>"lbrace", "}"=>"rbrace",
2117
+ ","=>"comma",
2118
+ ";"=>"semicolon",
2119
+ "::"=>"double_colon",
2120
+ "."=>"dot",
2121
+ "?"=>"question_mark", ":"=>"colon",
2122
+ "="=>"equals",
2123
+ "|"=>"pipe",
2124
+ "<<"=>"leftleft", ">>"=>"rightright",
2125
+ "=>"=>"arrow",
2126
+ }
2127
+ end
2128
+
151
2129
  def self.KW(ident)
2130
+ if defined? SPECIALIZED_KEYWORDS
2131
+ fail if /\\/===ident
2132
+ orig_ident=ident
2133
+ if Regexp===ident
2134
+ list=ident.to_s[/\(?-mix:\^\((.*)\)\$\)/,1]
2135
+
2136
+ #pick apart any char class in ident
2137
+ if open_bracket_idx=list.index(/([^\\]|^)\[/)
2138
+ open_bracket_idx+=1 unless list[open_bracket_idx]=="["
2139
+ close_bracket_idx=list.index(/[^\\]\]/,open_bracket_idx+1)
2140
+ close_bracket_idx+=1 unless list[close_bracket_idx]=="]"
2141
+ cclass=list.slice!(open_bracket_idx..close_bracket_idx)
2142
+ cclass=cclass[1...-1]
2143
+ cclass=cclass.scan( /[^\\]|\\./ )
2144
+ cclass.map!{|ch| ch.size==1 ? ch : ch[1..1] }
2145
+ end
2146
+
2147
+ #rest of it should be a list of words separated by |
2148
+ list=list.split(/\|/).reject{|x| x==''}
2149
+ list.concat cclass if cclass
2150
+ list.map{|w|
2151
+ w.gsub!(/\\/,'')
2152
+ KW(w)
2153
+ }.inject{|sum,kw| sum|kw}
2154
+ else
2155
+ fail unless String===ident
2156
+ ident=Punc2name[ident] unless /^(?:(?!#{LETTER_DIGIT}).)+$/o===ident
2157
+ fail "no name for #{orig_ident}" unless ident
2158
+ eval %{
2159
+ class Keyword_#{ident} < SpecializedKeywordToken
2160
+ def ident; '#{orig_ident}' end
2161
+ # def self.instance; @instance ||= allocate end
2162
+ # def self.new; instance end
2163
+ def initialize(offset)
2164
+ @offset=offset
2165
+ end
2166
+ end
2167
+ }
2168
+ KW2class[ident]||=const_get("Keyword_#{ident}")
2169
+ end
2170
+ else
152
2171
  ident=case ident
153
- when Integer: ident.chr
154
- when String,Regexp: ident
2172
+ when Integer; ident.chr
2173
+ when String,Regexp; ident
155
2174
  else ident.to_s
156
2175
  end
157
2176
 
158
2177
  return KeywordToken&-{:ident=>ident}
2178
+ end
159
2179
  end
160
2180
  def KW(ident); self.class.KW(ident) end
2181
+
2182
+ if defined? SPECIALIZED_KEYWORDS
2183
+ def make_specialized_kw(name,offset)
2184
+ name=Punc2name[name] unless /^((?!#{LETTER_DIGIT}).)+$/o===name
2185
+ KW2class[name].new(offset)
2186
+ end
2187
+ alias make_kw make_specialized_kw
2188
+ else
2189
+ def make_kw(name,offset)
2190
+ KeywordToken.new(name,offset)
2191
+ end
2192
+ end
2193
+
161
2194
  UNOP=
162
2195
  (OperatorToken|KeywordToken)&-{ #sppflt! KeywordToken here is a hack too
163
- :ident=>/^[*&+-]@$/,
164
- :unary =>true,
2196
+ # :ident=>/^(?:[+-]@|unary[&*]|(?:lhs|rhs)[*])$/,
2197
+ :ident=>/^(?:[+-]@|unary[&])$/,
2198
+ #:unary =>true,
165
2199
  }|
166
2200
  (OperatorToken|KeywordToken)&-{ #sppflt! KeywordToken here is a hack too
167
- :ident=>/^([~!]|not|defined\?)$/,
2201
+ :ident=>/^([~!]|not|defined\?)$/, #defined? should be removed from here, its handled separately
168
2202
  } #|
169
2203
  DEFOP=
170
2204
  (OperatorToken|KeywordToken)&-{ #sppflt! KeywordToken here is a hack too
@@ -200,285 +2234,325 @@ class RedParse
200
2234
  :ident=>/^([\[({!+*?:,]|\.{1,3}|::|=>)$/
201
2235
  }
202
2236
  =end
203
- DotOp= KeywordToken & -{ :ident=>"." }
204
- DoubleColonOp= KeywordToken & -{ :ident=>"::" }
2237
+ DotOp= KW('.') #KeywordToken & -{ :ident=>"." }
2238
+ DoubleColonOp= KW('::') #KeywordToken & -{ :ident=>"::" }
205
2239
 
206
2240
  Op=Op()
207
2241
  MODIFYASSIGNOP=Op( /^(([^=])\2|[^<>=!])=$/, true )
208
2242
  NONASSIGNOP=Op( /([^=]|[<>=!]=)$/)
209
2243
  KW_Op= #some of these ought to be regular operators, fer gosh sake
210
- Op(/^((![=~])|\.\.\.?|=>)$/,true)|Op(/^(#{BINOP_KEYWORDS.join('|')})$/)
2244
+ Op(/^(![=~]|\.\.\.?|=>)$/,true)|Op(/^(#{BINOP_KEYWORDS.join('|')})$/)
211
2245
 
212
2246
  EPSILON=Float::EPSILON*10_000_000 #this should be <<1 and >0
213
2247
  fail unless 1+EPSILON>1
214
2248
  fail unless EPSILON<0.1
215
2249
 
216
2250
  def left_op_higher(op,op2)
217
- # (Op|KeywordOp|KeywordOp2|ASSIGNOP===op2) or return true
218
2251
  KeywordToken===op2 or OperatorToken===op2 or return true
219
2252
  rightprec=@precedence[op2.to_s] or return true
220
- #or fail "unrecognized right operator: #{op2.inspect}"
221
2253
  rightprec+=EPSILON if @RIGHT_ASSOCIATIVE[op2.to_s]
222
2254
  return @precedence[op.to_s]>=rightprec
223
2255
  end
224
2256
 
225
- LowerOp= proc{|parser,op2| parser.left_op_higher(parser[-3],op2) }
2257
+ # LowerOp= proc{|parser,op2| parser.left_op_higher(parser[-3],op2) }
2258
+ def lower_op
2259
+ return @lower_op if defined? @lower_op
2260
+ lower_op=item_that{|op| left_op_higher(@stack[-3],op) }
2261
+ lower_op=(LOWEST_OP|(~VALUELIKE_LA & lower_op)).la
2262
+ def lower_op.inspect; "lower_op" end
2263
+ @lower_op=lower_op
2264
+ end
226
2265
 
227
- def dont_postpone_semi
228
- @dps||=~wants_semi_context
2266
+ #this is a hack, should use graphcopy to search for Deferreds and replace with double-Deferred as below
2267
+ def item_that(*a,&b)
2268
+ if defined? @generating_parse_tables
2269
+ huh unless b
2270
+ #double supers, one of them in a block executed after this method returns....
2271
+ #man that's weird
2272
+ super(*a){|ob| @saw_item_that[[super(*a,&b),ob]]=true}
2273
+ else
2274
+ super(*a,&b) #and then here's another
2275
+ end
229
2276
  end
2277
+
230
2278
  WANTS_SEMI=%w[while until if unless
231
2279
  def case when in rescue
232
2280
  elsif class module << => . ::
233
2281
  ]
234
2282
  def wants_semi_context
235
- Op('<<')|KW(/^(#{WANTS_SEMI.map{|ws| Regexp.quote ws }.join('|')})$/)
2283
+ Op(/^(<<|=>|\.|::)$/)|KW(/^(#{WANTS_SEMI.map{|ws| Regexp.quote ws }.join('|')})$/)
2284
+ end
2285
+ def dont_postpone_semi
2286
+ @dps||=~wants_semi_context
236
2287
  end
237
2288
 
238
- NeverBlockKeyword=MethNameToken&-{:ident=>/^(return|break|next)$/}
239
- FakeBegin=KW('(')&-{:not_real? =>true}
240
- FakeEnd=KW(')')&-{:not_real? =>true}
2289
+ #NeverBlockKeyword=MethNameToken&-{:ident=>/^(return|break|next)$/}
2290
+ #FakeBegin=KW('(')&-{:not_real? =>true}
2291
+ #FakeEnd=KW(')')&-{:not_real? =>true}
241
2292
 
242
2293
  #rule format:
243
2294
  # -[syntax pattern_matchers.+, lookahead.-]>>node type
244
2295
 
245
- DotCall=proc{|stack|
2296
+ DotCall=stack_monkey("DotCall",4,CallNode){|stack|
2297
+ left,dot=*stack.slice!(-4..-3)
246
2298
  right=stack[-2]
247
- left,bogus=*stack.slice!(-4..-3)
248
2299
 
2300
+ right.startline=left.startline
249
2301
  right.set_receiver! left
250
2302
  }
251
2303
 
252
- Lvalue=(VarNameToken|CallSiteNode|BracketsGetNode|CommaOpNode|
253
- ParenedNode|ConstantNode|UnaryStarNode)&-{:lvalue? =>true}
2304
+ Lvalue=(VarNode|CallSiteNode|BracketsGetNode|CommaOpNode|
2305
+ ParenedNode|ConstantNode|UnaryStarNode)&-{:lvalue =>true}
254
2306
 
255
- BareMethod=MethNameToken|LiteralNode&-{:val=>Symbol|StringNode}
2307
+ BareMethod=MethNameToken|(LiteralNode&-{:bare_method=>true})
256
2308
 
257
2309
  BEGINWORDLIST=RubyLexer::BEGINWORDLIST + %w"( [ {"
258
2310
  ENDWORDLIST=%w"end ) ] }"
259
- BEGIN2END={"{"=>"}", "("=>")", "["=>"]", }
260
- endword="end"
261
- RubyLexer::BEGINWORDLIST.each{|bw| BEGIN2END[bw]=endword }
2311
+ ENDWORDS=ENDWORDLIST.map{|x| Regexp.quote x}.join('|')
2312
+ BEGINWORDS=RubyLexer::BEGINWORDS
2313
+ INNERBOUNDINGWORDS=RubyLexer::INNERBOUNDINGWORDS
2314
+
2315
+ BEGIN2END={"{"=>"}", "("=>")", "["=>"]", BEGINWORDS=>"end"}
262
2316
  def beginsendsmatcher
263
2317
  @bem||=
264
- /^(#{(BEGINWORDLIST+ENDWORDLIST).map{|x| Regexp.quote x}.join('|')})$/
2318
+ /^(#{BEGINWORDS}|#{ENDWORDS})$/
265
2319
  end
266
2320
 
267
- MULTIASSIGN=UnaryStarNode|CommaOpNode|(ParenedNode&-{:size=>1})
268
- WITHCOMMAS=UnaryStarNode|CommaOpNode|
269
- (CallSiteNode&-{:real_parens=>false, :args=>-{:size=>~0.reg}}) #|(ParenedNode&-{:size=>~1.reg})
2321
+ MULTIASSIGN=UnaryStarNode|CommaOpNode|ParenedNode
2322
+ WITHCOMMAS=UnaryStarNode|CommaOpNode|(CallSiteNode&-{:with_commas=>true})
2323
+ #(CallSiteNode&-{:real_parens=>false, :args=>-{:size=>~0.reg}})
270
2324
 
271
2325
  BEGINAFTEREQUALS=
272
- ParenedNode&
273
- -{:size =>~1.reg, :op? =>NilClass|FalseClass, :after_equals =>nil}&
274
- (-{:body=>item_that.size>0}|-{:rescues=>item_that.size>0}|-{:ensures=>~NilClass})
275
- # item_that{|x| x.body.size+x.rescues.size > 0 or x.ensures }
2326
+ BeginNode&
2327
+ -{:after_equals =>nil}&-{:non_empty=>true}
2328
+ BEGINAFTEREQUALS_MARKED=
2329
+ BeginNode&
2330
+ -{:after_equals =>true}&-{:non_empty=>true}
276
2331
 
277
- # ASSIGN_COMMA=Op(',',true)&-{:comma_type=>Symbol}
278
- LHS_COMMA=Op(',',true)&-{:comma_type => :lhs}
279
- RHS_COMMA=Op(',',true)&-{:comma_type => :rhs}
280
- PARAM_COMMA=Op(',',true)&-{:comma_type => :param}
2332
+ LHS_COMMA=Op('lhs,',true)#&-{:tag => :lhs}
2333
+ RHS_COMMA=Op('rhs,',true)#&-{:tag => :rhs}
2334
+ #PARAM_COMMA=Op('param,',true)#&-{:tag => :param}
281
2335
  FUNCLIKE_KEYWORD=KeywordToken&-{:ident=>RubyLexer::FUNCLIKE_KEYWORDS}
2336
+ IGN_SEMI_BEFORE=KW(/^(#{RubyLexer::INNERBOUNDINGWORDS.gsub(/(rescue|then)\|/,'')[1...-1]}|end|[)}\]])$/)|EoiToken
2337
+ IGN_SEMI_AFTER=KW(/^(begin|[;:({|]|then|do|else|ensure)$/)|BlockFormalsNode
2338
+
2339
+ #for use in lookback patterns
2340
+ OPERATORLIKE_LB=OperatorToken|
2341
+ KW(/^(not | defined\? | .*[@,] | [ ~ ! ; \( \[ \{ ? : ] | \.{1,3} | :: | => | ![=~])$/x)|
2342
+ KW(%r{^( \*\*? | << | >> | &&? | \|\|? | \^ | % | / | - | \+ )?=$}x)|
2343
+ KW(BEGINWORDS)|KW(/^#{INNERBOUNDINGWORDS}$/)|RescueHeaderNode|StartToken|
2344
+ GoalPostToken|BlockFormalsNode
2345
+
2346
+ #for use in lookahead patterns
2347
+ VALUELIKE_LA=KW(RubyLexer::VARLIKE_KEYWORDS)|NumberToken|SymbolToken|StringToken|UNOP|DEFOP|
2348
+ KW(/^( \( | \{ | )$/x)|VarNameToken|MethNameToken|HerePlaceholderToken|KW(BEGINWORDS)|FUNCLIKE_KEYWORD
2349
+ LOWEST_OP=KW(/^(#{ENDWORDS})$/)|KW(/^#{INNERBOUNDINGWORDS.sub('rescue|','')}$/)|EoiToken|GoalPostToken
2350
+
2351
+ RESCUE_BODY=-[Expr.-, RescueNode.*, ElseNode.-, EnsureNode.-,]
2352
+
2353
+ RESCUE_OP=Op('rescue')|(KW('rescue')&-{:infix=>true})
2354
+
2355
+ RESCUE_KW=KW('rescue')&-{:infix=>nil}
2356
+
2357
+ inspect_constant_names
282
2358
 
283
2359
  def RULES
2360
+ lower_op= lower_op()
2361
+
2362
+ [-[StartToken.lb, Expr.-, EoiToken.la]>>:accept,
2363
+ -[EoiToken]>>:error,
2364
+ ]+
2365
+
284
2366
  #these must be the lowest possible priority, and hence first in the rules list
285
2367
  BEGIN2END.map{|_beg,_end|
286
- -[KW(_beg), KW(beginsendsmatcher).~.*, KW(_end), KW(/^(do|\{)$/).~.la]>>MisparsedNode
2368
+ -[KW(_beg), (KW(_beg)|KW(_end)).~.*, KW(_end), KW(/^(do|\{)$/).~.la]>>MisparsedNode
287
2369
  }+
288
2370
 
289
2371
  [
290
- -[UNOP, Value, LowerOp]>>UnOpNode,
291
- -[DEFOP, ParenedNode&-{:size=>1}]>>UnOpNode,
292
- -[Op('*@'), VarNameToken|ValueNode, LowerOp]>>UnaryStarNode,
2372
+ -[UNOP, Expr, lower_op]>>UnOpNode,
2373
+ -[DEFOP, ParenedNode]>>UnOpNode,
2374
+ -[Op(/^(?:unary|lhs|rhs)\*$/), ValueNode, lower_op]>>UnaryStarNode,
293
2375
 
294
- -[Op('=',true)|KW(/^(rescue|when|\[)$/)|-{:comma_type=>:call.reg|:array|:param|:rhs},
295
- Op('*@'), VarNameToken|ValueNode, (MODIFYASSIGNOP|Op('=',true)).la]>>:shift,
2376
+ -[Op('=',true)|KW(/^(rescue|when|\[)$/)|Op(/,$/,true),
2377
+ Op(/^(?:unary|rhs)\*$/), ValueNode, (MODIFYASSIGNOP|Op('=',true)).la]>>:shift,
296
2378
  -[MethNameToken|FUNCLIKE_KEYWORD, KW('('),
297
- Op('*@'), VarNameToken|ValueNode, (MODIFYASSIGNOP|Op('=',true)).la]>>:shift,
298
- # -[KW('[')|-{:comma_type=>:call.reg|:array},
299
- # Op('*@'), VarNameToken|ValueNode, Op('=',true).la]>>:shift,
2379
+ Op(/^(?:unary|rhs)\*$/), ValueNode, (MODIFYASSIGNOP|Op('=',true)).la]>>:shift,
300
2380
  #star should not be used in an lhs if an rhs or param list context is available to eat it.
301
- #(including param lists for keywords such as return,break,next,continue,rescue,yield,when)
2381
+ #(including param lists for keywords such as return,break,next,rescue,yield,when)
302
2382
 
303
- -[Op('*@'), (GoalPostNode|KW(/^(in|[=)|,;])$/)).la]>>DanglingStarNode, #dangling *
304
- -[',', (GoalPostNode|KW(/^(in|[=)|;])$/)).la]>> #dangling ,
305
- proc{|stack|
2383
+ #hmmm.... | in char classes below looks useless (predates GoalPostToken)
2384
+ -[Op(/^(?:unary|lhs)\*$/), (GoalPostToken|Op(/,$/,true)|KW(/^(in|[=)|;])$/)).la]>>DanglingStarNode, #dangling *
2385
+ -[Op(/,$/,true), (GoalPostToken|KW(/^(in|[=)|;])$/)).la]>> #dangling ,
2386
+ stack_monkey("DanglingComma",1,DanglingCommaNode){|stack|
306
2387
  dcomma=DanglingCommaNode.new
307
2388
  dcomma.offset=stack.last.offset
308
2389
  stack.push dcomma, stack.pop
309
2390
  },
310
2391
 
311
- -[Value, Op|KW_Op, Value, LowerOp]>>RawOpNode, #most operators
2392
+ -[Expr, Op|KW_Op, Expr, lower_op]>>RawOpNode, #most operators
312
2393
 
313
2394
  #assignment
314
- -[Lvalue, MODIFYASSIGNOP, Value, LowerOp]>>AssignNode,
315
- -[Lvalue, Op('=',true), AssignmentRhsNode, LowerOp]>>AssignNode,
316
- -[Op('=',true).lb, Value, LowerOp]>>AssignmentRhsNode,
317
- #was: -[AssignmentRhsListStartToken, Value, AssignmentRhsListEndToken]>>AssignmentRhsNode,
318
- -[Lvalue&~MULTIASSIGN, Op('=',true), AssignmentRhsNode&-{:val =>~WITHCOMMAS},
319
- Op('rescue3',true), Value, LowerOp]>>AssignNode,
320
- -[Lvalue&~MULTIASSIGN, Op('=',true), AssignmentRhsNode&-{:val =>~WITHCOMMAS},
2395
+ -[Lvalue, MODIFYASSIGNOP, Expr, lower_op]>>AssignNode,
2396
+ -[Lvalue, Op('=',true), AssignmentRhsNode, lower_op]>>AssignNode,
2397
+ -[Op('=',true).lb, Expr, lower_op]>>AssignmentRhsNode,
2398
+
2399
+ # a = b rescue c acts like a ternary,,,
2400
+ #provided that both a and b are not multiple and b
2401
+ #(if it is a parenless callsite) has just 1 param
2402
+ -[Lvalue&~MULTIASSIGN, Op('=',true), AssignmentRhsNode&-{:is_list=>true},
2403
+ Op('rescue3',true), Expr, lower_op]>>AssignNode,
2404
+ -[Lvalue&~MULTIASSIGN, Op('=',true), AssignmentRhsNode&-{:is_list=>true},
321
2405
  Op('rescue3',true).la]>>:shift,
322
- -[Lvalue&~MULTIASSIGN, Op('=',true), AssignmentRhsNode&-{:val =>~WITHCOMMAS},
323
- Op('rescue',true).la] >>proc{|stack|
2406
+ -[Lvalue&~MULTIASSIGN, Op('=',true), AssignmentRhsNode&-{:is_list=>true},
2407
+ RESCUE_OP.la] >>
2408
+ stack_monkey("rescue3",1,Op('rescue3',true)){|stack|
324
2409
  resc=stack.last.dup
325
2410
  resc.ident += '3'
326
2411
  stack[-1]=resc
327
2412
  },
328
- # a = b rescue c acts like a ternary,,,
329
- #provided that both a and b are not multiple and b
330
- #(if it is a parenless callsite) has just 1 param
331
-
332
- # -[Op('=',true), ~WITHCOMMAS, Op('rescue',true).la]>>:shift,
333
2413
  #relative precedence of = and rescue are to be inverted if rescue
334
2414
  #is to the right and assignment is not multiple.
335
2415
 
336
- -[Op('=',true).~.lb, OB, Op('=',true), Value, RHS_COMMA.la]>>:shift,
337
- -[RHS_COMMA.lb, Lvalue, Op('=',true), Value, RHS_COMMA.la ]>>AssignNode,
338
- -[ValueNode|VarNameToken, LHS_COMMA, ValueNode|VarNameToken, Op('=',true).la]>>CommaOpNode,
2416
+ #if assignment rhs contains commas, don't reduce til they've been read
2417
+ #(unless we're already on an rhs)
2418
+ -[(Op('=',true)|Expr).~.lb, Lvalue, Op('=',true), Expr, RHS_COMMA.la]>>:shift,
2419
+ -[RHS_COMMA.lb, Lvalue, Op('=',true), Expr, RHS_COMMA.la ]>>AssignNode,
2420
+ -[ValueNode, LHS_COMMA, ValueNode, Op('=',true).la]>>CommaOpNode,
339
2421
  #relative precedence of = and lhs/rhs , are to be inverted.
340
2422
 
341
- -[KW(',')&-{:comma_type=>:lhs}, (UnaryStarNode|ParenedNode)&~-{:after_comma =>true}, Op('=',true)]>>
342
- proc{|stack| stack[-3].after_comma=true}, #mebbe this should be a lexer hack
343
2423
  #mark parentheses and unary stars that come after lhs commas
2424
+ -[LHS_COMMA, (UnaryStarNode|ParenedNode)&~-{:after_comma =>true}, Op('=',true)]>>
2425
+ stack_monkey("after_comma",3,(UnaryStarNode|ParenedNode)&-{:after_comma =>true}){|stack|
2426
+ stack[-3].after_comma=true},
2427
+ #mebbe this should be a lexer hack?
344
2428
 
345
- #-[Value, DotOp|DoubleColonOp, MethNameToken,
346
- # ASSIGNOP, Value, LowerOp]>>AccessorAssignNode,
347
-
348
- -[(MethNameToken|FUNCLIKE_KEYWORD).~.lb, '(', Value, ')']>>ParenedNode,
349
- -[(MethNameToken|FUNCLIKE_KEYWORD).~.lb, '(', ')']>>VarLikeNode, #alias for nil
2429
+ -[#(OPERATORLIKE_LB&~(MethNameToken|FUNCLIKE_KEYWORD)).lb,
2430
+ '(', Expr, KW(')')&~(-{:callsite? =>true}|-{:not_real? =>true})]>>ParenedNode,
2431
+ -[#(OPERATORLIKE_LB&~(MethNameToken|FUNCLIKE_KEYWORD)).lb,
2432
+ '(', KW(')')&~(-{:callsite? =>true}|-{:not_real? =>true})]>>VarLikeNode, #(), alias for nil
350
2433
 
351
- # -[Value, KeywordOp, Value, LowerOp]>>KeywordOpNode,
352
- -[Op('=',true).~.lb, Value, Op('rescue',true), Value, LowerOp]>>ParenedNode,
2434
+ -[#(OPERATORLIKE_LB&~Op('=',true)).lb,
2435
+ Expr, RESCUE_OP, Expr, lower_op]>>RescueOpNode,
353
2436
 
354
2437
  #dot and double-colon
355
- -[DoubleColonOp, VarNameToken, LowerOp]>>ConstantNode,#unary ::
356
- -[Value, DotOp, CallNode, LowerOp]>>DotCall, #binary .
357
- -[Value, DoubleColonOp, CallNode, LowerOp]>>DotCall, #binary ::
358
- -[Value, DoubleColonOp, VarNameToken, LowerOp]>>ConstantNode,#binary ::
2438
+ -[DoubleColonOp, VarNode, lower_op]>>ConstantNode,#unary ::
2439
+ -[Expr, DotOp, CallNode, lower_op]>>DotCall, #binary .
2440
+ -[Expr, DoubleColonOp, CallNode, lower_op]>>DotCall, #binary ::
2441
+ -[Expr, DoubleColonOp, VarNode, lower_op]>>ConstantNode,#binary ::
359
2442
 
360
- -[Value, "?", Value, ":", Value, LowerOp]>>TernaryNode,
2443
+ -[Expr, "?", Expr, ":", Expr, lower_op]>>TernaryNode,
361
2444
 
362
- # -[Value, /^\.\.\.?$/, Value, LowerOp]>>RangeNode,
363
2445
 
364
- -[MethNameToken, '(', Value.-, ')', BlockNode.-, KW(/^(do|\{)$/).~.la]>>CallNode,
365
- -[FUNCLIKE_KEYWORD, '(', Value.-, ')',
366
- BlockNode.-, KW(/^(do|\{)$/).~.la]>>KWCallNode,
2446
+ -[MethNameToken, '(', Expr.-, ')', BlockNode.-, KW('do').~.la]>>CallNode,
2447
+ -[FUNCLIKE_KEYWORD, '(', Expr.-, ')', BlockNode.-, KW('do').~.la]>>KWCallNode,
367
2448
 
368
- -[ValueNode|VarNameToken, ',', ValueNode|VarNameToken, LowerOp]>>CommaOpNode,
2449
+ -[ValueNode, Op(/,$/,true), ValueNode, lower_op]>>CommaOpNode,
369
2450
 
370
- -[dont_postpone_semi.lb,
371
- Value, ';', Value, LowerOp]>>SequenceNode,
2451
+ -[(OPERATORLIKE_LB&dont_postpone_semi).lb,
2452
+ Expr, ';', Expr, lower_op]>>SequenceNode,
372
2453
 
373
- # -[Value, '=>', Value, LowerOp]>>ArrowOpNode,
374
2454
 
375
- -[KW(')').~.lb, '{', (CommaOpNode|ArrowOpNode).-, '}']>>HashLiteralNode,
2455
+ -[#(OPERATORLIKE_LB&~KW(')')).lb,
2456
+ '{', (CommaOpNode|ArrowOpNode).-, '}']>>HashLiteralNode, #-40
376
2457
 
377
- # -[CallSiteNode.~.lb, '{', Value, '}']>>HashLiteralNode,
378
-
379
- # -[KW(')').lb, '{', BlockFormalsNode.-, Value.-, '}']>>BlockNode,
380
- -[KW(')').lb, 'do', BlockFormalsNode.-, Value.-, 'end']>>BlockNode,
2458
+ -[KW(')').lb, 'do', BlockFormalsNode.-, Expr.-, 'end']>>BlockNode,
2459
+ #this does {} as well... converted to do...end
381
2460
  #rubylexer handles the 'low precedence' of do...end
382
2461
 
383
- -[GoalPostNode, Value.-, GoalPostNode]>>BlockFormalsNode,
2462
+ -[GoalPostToken, Expr.-, GoalPostToken]>>BlockFormalsNode,
384
2463
  #rubylexer disambiguated operator vs keyword '|'
385
2464
 
386
- -[/^(while|until)$/, Value, /^([:;]|do)$/, Value.-, 'end']>>LoopNode,
2465
+ -[/^(while|until)$/, Expr, /^([:;]|do)$/, Expr.-, 'end']>>LoopNode,
387
2466
 
388
- -[/^(if|unless)$/, Value, /^(;|then|:)$/,
389
- Value.-, ElsifNode.*, ElseNode.-, 'end'
2467
+ -[/^(if|unless)$/, Expr, /^(;|then|:)$/,
2468
+ Expr.-, ElsifNode.*, ElseNode.-, 'end'
390
2469
  ]>>IfNode,
391
2470
 
392
- -['else', Value.-, KW(/^(ensure|end)$/).la]>>ElseNode,
2471
+ -['else', Expr.-, KW(/^(ensure|end)$/).la]>>ElseNode,
393
2472
 
394
- -['elsif', Value, /^(;|then|:)$/, Value.-,
395
- KW(/^e(nd|ls(e|if))$/).la
2473
+ -['elsif', Expr, /^(;|then|:)$/, Expr.-,
2474
+ KW(/^(end|else|elsif)$/).la
396
2475
  ]>>ElsifNode,
397
2476
 
398
- -['module', ConstantNode|VarNameToken, KW(';'), Value.-, 'end']>>ModuleNode,
399
- # -['module', ConstantNode|VarNameToken, KW(/^(;|::)$/).~.la]>>
400
- # proc{|stack| #insert ; at end of module header if none was present
2477
+ # -['module', ConstantNode|VarNode, KW(/^(;|::)$/).~.la]>>
2478
+ # stack_monkey(1,KW(';')){|stack| #insert ; at end of module header if none was present
401
2479
  # stack.push KeywordToken.new(';'), stack.pop
402
2480
  # },
403
- -['class', Value, ';', Value.-, 'end']>>ClassNode,
404
- -['class', Value, Op('<'), Value, KW(';').~.la]>>:shift,
405
- -['class', Op('<<'), Value, ';', Value.-, 'end']>>MetaClassNode,
2481
+ -['module', ConstantNode|VarNode, ';', RESCUE_BODY, 'end']>>ModuleNode,
2482
+ -['class', Expr, ';', RESCUE_BODY, 'end']>>ClassNode,
2483
+ -['class', Expr, Op('<'), Expr, KW(';').~.la]>>:shift,
2484
+ -['class', Op('<<'), Expr, ';', RESCUE_BODY, 'end']>>MetaClassNode, #-30
406
2485
 
407
- -['alias', BareMethod|VarNameToken, BareMethod|VarNameToken]>>AliasNode,
2486
+ -['alias', BareMethod|VarNode, BareMethod|VarNode]>>AliasNode,
408
2487
  -['undef', BareMethod]>>UndefNode,
409
- -[UndefNode, ',', BareMethod]>>UndefNode,
2488
+ -[UndefNode, Op(',',true), BareMethod]>>UndefNode,
410
2489
 
411
- -['def', CallSiteNode, Op('=').-, KW(';'),
412
- Value.-, RescueNode.*, ElseNode.-, EnsureNode.-,
2490
+ -['def', CallSiteNode, Op('=').-, KW(';'), RESCUE_BODY,
2491
+ # Expr.-, RescueNode.*, ElseNode.-, EnsureNode.-,
413
2492
  'end'
414
2493
  ]>>MethodNode,
415
2494
 
416
- -['begin',
417
- Value.-, RescueNode.*, ElseNode.-, EnsureNode.-,
2495
+ -['begin', RESCUE_BODY,
2496
+ # Expr.-, RescueNode.*, ElseNode.-, EnsureNode.-,
418
2497
  'end'
419
- ]>>ParenedNode,
2498
+ ]>>BeginNode,
420
2499
 
421
- -[Op('=',true), BEGINAFTEREQUALS, Op('rescue',true).la]>>
422
- proc{ |stack| stack[-2].after_equals=true },
2500
+ -[Op('=',true), BEGINAFTEREQUALS, RESCUE_OP.la]>>
2501
+ stack_monkey("begin after equals",2,BEGINAFTEREQUALS_MARKED){ |stack| stack[-2].after_equals=true },
423
2502
  #this is bs. all for an extra :begin in the parsetree
424
2503
 
425
- -[(KW(/^(;|begin)$/)|ParenedNode|RescueNode).lb,
426
- 'rescue', KW('=>').-, Value.-, /^([:;]|then)$/,
2504
+ -[(KW(/^(;|begin)$/)|RescueNode).lb, #ParenedNode|RescueOpNode|BeginNode used to be here too
2505
+ RESCUE_KW, KW('=>').-, Expr.-, /^([:;]|then)$/,
427
2506
  ]>>RescueHeaderNode,
428
- -[ RescueHeaderNode, Value.-, KW(';').-, KW(/^(rescue|else|ensure|end)$/).la
2507
+ -[ RescueHeaderNode, Expr.-, KW(';').-, (KW(/^(else|ensure|end)$/)|RESCUE_KW).la
429
2508
  ]>>RescueNode,
430
2509
 
431
- -['ensure', Value.-, KW('end').la]>>EnsureNode,
2510
+ -['ensure', Expr.-, KW('end').la]>>EnsureNode,
432
2511
 
433
- -['[', Value.-, ']']>>ArrayLiteralNode,
2512
+ -['[', Expr.-, ']']>>ArrayLiteralNode, #-20
434
2513
 
435
- -[Value, '[', Value.-, ']']>>BracketsGetNode,
2514
+ -[Expr, '[', Expr.-, ']']>>BracketsGetNode,
436
2515
 
437
- -[HereDocNode, StringToken.*, StringToken, StringToken.~.la]>>StringCatNode,
438
- -[(StringToken|HereDocNode).~.lb, StringToken.*, StringToken, StringToken, StringToken.~.la]>>StringCatNode,
439
- -[(StringToken|HereDocNode).~.lb, StringToken, StringToken.~.la]>>StringNode, #includes regexp, wordlist, backquotes
2516
+ -[HereDocNode, StringToken+1, StringToken.~.la]>>StringCatNode,
2517
+ -[(OPERATORLIKE_LB&~(StringToken|HereDocNode)).lb, StringToken+2, StringToken.~.la]>>StringCatNode,
2518
+ -[(OPERATORLIKE_LB&~(StringToken|HereDocNode)).lb, StringToken, StringToken.~.la]>>StringNode, #includes regexp, wordlist, backquotes
440
2519
 
441
- -['case', Value.-, KW(/^[:;]$/).-, WhenNode.*, ElseNode.-, 'end']>>CaseNode,
2520
+ -['case', Expr.-, KW(';').-, WhenNode.*, ElseNode.-, 'end']>>CaseNode,
442
2521
 
443
- -['when', Value, /^([:;]|then)$/, Value.-,
2522
+ -['when', Expr, /^([:;]|then)$/, Expr.-,
444
2523
  KW(/^(when|else|end)$/).la
445
2524
  ]>>WhenNode,
446
2525
 
447
- -['for', Value, 'in', Value, /^([:;]|do)$/, Value.-, 'end']>>ForNode,
2526
+ -['for', Expr, 'in', Expr, /^([:;]|do)$/, Expr.-, 'end']>>ForNode,
448
2527
 
449
2528
  #semicolon cleanup....
450
- -[dont_postpone_semi.lb,
451
- Value, ';',
452
- (KW(/^(#{RubyLexer::INNERBOUNDINGWORDS.gsub(/(rescue|then)\|/,'')}|end|[)}\]])$/)|EoiToken).la
453
- ]>>proc{|stack| stack.delete_at -2 },
454
- -[Value, ';', KW('then').la
455
- ]>>proc{|stack| stack.delete_at -2 },
456
- -[dont_postpone_semi.lb, Value, ';', RescueNode
457
- ]>>proc{|stack| stack.delete_at -3 },
458
- -[(KW(/^(begin|[;:({|]|then|do|else|ensure)$/)|StartNode|RescueHeaderNode).lb, ';'
459
- ]>>proc{|stack| stack.delete_at -2 },
2529
+ -[(OPERATORLIKE_LB&dont_postpone_semi).lb,Expr, ';', IGN_SEMI_BEFORE.la] \
2530
+ >>delete_monkey(2,"semi_cleanup_before_ISB"),
2531
+ -[Expr, ';', KW('then').la] >>delete_monkey(2,"semi_cleanup_before_then"),
2532
+ -[dont_postpone_semi.lb, Expr, ';', RescueNode] >>delete_monkey(3,"semi_cleanup_before_rescue"), #-10
2533
+ -[IGN_SEMI_AFTER.lb, ';'] >>delete_monkey(2,"semi_cleanup_after_oplike"),
2534
+ -[(StartToken|RescueHeaderNode).lb, ';' ] >>delete_monkey(2,"semi_cleanup_after_rescue"),
460
2535
  #this rule is somewhat more forgiving than matz' parser...
461
2536
  #not all semicolons after :, (, and { keywords should
462
2537
  #be ignored. some should cause syntax errors.
463
2538
 
464
2539
 
465
2540
  #comma cleanup....
466
- -[',', KW(/^[}\]]$/).la]>>proc{|stack| stack.delete_at -2},
2541
+ -[Op(/,$/,true), KW(/^([}\]])$/).la] >>delete_monkey(2, "comma_cleanup"),
467
2542
  #likewise, this is somewhat too forgiving.
468
2543
  #some commas before } or ] should cause syntax errors
469
2544
 
470
- #multiple assignment.... (handled in a subsequent stage?)
471
- #(cause it requires that the relative priorities of = and , be reversed!)
472
-
473
-
474
2545
  #turn lvalues into rvalues if not followed by an assignop
475
- -[-{:lvalue? =>true}, (Op('=',true)|MODIFYASSIGNOP).~.la]>>proc{|stack| stack[-2].lvalue=nil},
2546
+ -[-{:lvalue =>true}, (Op('=',true)|MODIFYASSIGNOP|LHS_COMMA).~.la]>>
2547
+ stack_monkey("lval2rval",2,-{:lvalue =>nil}){|stack|
2548
+ stack[-2].lvalue=nil
2549
+ },
476
2550
 
477
2551
  #expand the = into a separate token in calls to settors (after . or ::).
478
2552
  #but not in method headers
479
- -[KW('def').~.lb, Value, DotOp|DoubleColonOp,
480
- (MethNameToken&-{:ident=>/^[a-z_][a-z0-9_]*=$/i}).la]>>
481
- proc{|stack|
2553
+ -[(OPERATORLIKE_LB&~KW('def')).lb, Expr, DotOp|DoubleColonOp,
2554
+ (MethNameToken&-{:has_equals=>true}).la]>>
2555
+ stack_monkey("expand_equals",1,CallNode){|stack|
482
2556
  methname=stack.pop
483
2557
  methname.ident.chomp!('=')
484
2558
  offset=methname.offset+methname.ident.size
@@ -493,48 +2567,63 @@ class RedParse
493
2567
  #lexer does the wrong thing with -22**44.5, making the - part
494
2568
  #of the first number token. it's actually lower precedence than
495
2569
  #**... this rule fixes that problem.
496
- -[NumberToken&-{:ident=>/\A-/}, Op('**').la]>>
497
- proc{|stack|
498
- neg_op=OperatorToken.new("-@",stack[-2].offset)
499
- neg_op.unary=true
500
- stack[-2,0]=neg_op
501
- stack[-2].ident.sub!(/\A-/,'')
502
- stack[-2].offset+=1
2570
+ #in theory, unary - is lower precedence than ., ::, and [] as well, but
2571
+ #that appears not to apply to unary - in numeric tokens
2572
+ -[NumberToken&-{:negative=>true}, Op('**').la]>>
2573
+ stack_monkey("fix_neg_exp",2,Op("-@",true)){|stack|
2574
+ #neg_op.unary=true
2575
+ num=stack[-2]
2576
+ op=OperatorToken.new("-@",num.offset)
2577
+ # op.startline=num.startline
2578
+ stack[-2,0]=op
2579
+ num.ident.sub!(/\A-/,'')
2580
+ num.offset+=1
503
2581
  },
504
2582
 
505
2583
  #treat these keywords like (rvalue) variables.
506
- -[/^(nil|false|true|__FILE__|__LINE__|self)$/]>>VarLikeNode,
2584
+ -[RubyLexer::VARLIKE_KEYWORDS]>>VarLikeNode,
507
2585
 
508
2586
  #here docs
509
2587
  -[HerePlaceholderToken]>>HereDocNode,
510
- -[HereBodyToken]>>proc{|stack|
511
- stack.delete_at(-2)#.instance_eval{@headtok}.node.saw_body!
512
- },
2588
+ -[HereBodyToken.la]>>delete_monkey(1,"delete_here_body"),
2589
+ ##this is rediculous. this should be a lexer hack?
2590
+
2591
+ -[VarNameToken]>>VarNode,
2592
+
513
2593
 
514
2594
  ]
515
2595
  end
516
2596
 
2597
+ if defined? END_ATTACK
2598
+ module Reducer; end
2599
+ include Reducer
2600
+ end
517
2601
 
518
-
519
- def initialize(input,name="(eval)",line=1,lvars=[])
2602
+ def initialize(input,name="(eval)",line=1,lvars=[],options={:rubyversion=>1.8})
2603
+ @rubyversion=options[:rubyversion]
520
2604
  if Array===input
521
2605
  def input.get1token; shift end
522
2606
  @lexer=input
523
2607
  else
524
- @lexer=RubyLexer.new(name,input,line)
2608
+ @lexer=RubyLexer.new(name,input,line,0,:rubyversion=>@rubyversion)
525
2609
  lvars.each{|lvar| @lexer.localvars[lvar]=true }
526
2610
  end
527
2611
  @filename=name
528
2612
  @min_sizes={}
529
2613
  @compiled_rules={}
530
2614
  @moretokens=[]
531
- @unary_or_binary_op=/^[-+&*]$/
532
- @rules=self.RULES
2615
+ @unary_or_binary_op=/^[-+]$/
2616
+ # @rules=self.expaneded_RULES
533
2617
  @precedence=self.PRECEDENCE
534
2618
  @RIGHT_ASSOCIATIVE=self.RIGHT_ASSOCIATIVE
2619
+ if defined? END_ATTACK
2620
+ compile
2621
+ end
2622
+ @saw_item_that=nil
535
2623
  end
536
2624
 
537
2625
  attr_accessor :lexer
2626
+ attr :rubyversion
538
2627
 
539
2628
  def get_token(recursing=false)
540
2629
  unless @moretokens.empty?
@@ -543,78 +2632,60 @@ class RedParse
543
2632
  return @last_token
544
2633
  end
545
2634
 
2635
+ rpt=ENV['RAW_PRINT_TOKENS']
546
2636
  begin
547
2637
  result=@lexer.get1token or break
548
- p result if ENV['RAW_PRINT_TOKENS']
2638
+ p result if rpt
549
2639
 
550
- #set token's line if wanted
551
- result.line||=@line if result.respond_to? :line=
2640
+ #set token's line
2641
+ result.startline= @endline||=1
2642
+ result.endline||=@endline if result.respond_to? :endline=
552
2643
 
553
2644
  if result.respond_to?(:as) and as=result.as
554
- result=KeywordToken.new(as,result.offset)
555
- result.not_real!
2645
+ #result=make_kw(as,result.offset)
2646
+ #result.originally=result.ident
2647
+ if OperatorToken===result #or KeywordToken===result
2648
+ result=result.dup
2649
+ result.ident=as
2650
+ else
2651
+ result=make_kw(as,result.offset)
2652
+ end
2653
+ result.not_real! if result.respond_to? :not_real!
556
2654
  else
557
2655
 
558
2656
  case result
559
- #=begin
560
- when ImplicitParamListStartToken: #treat it like (
561
- result=KeywordToken.new('(', result.offset)
562
- result.not_real!
563
- #=end
564
- #=begin
565
- when ImplicitParamListEndToken:
566
- result=KeywordToken.new(')', result.offset)
567
- result.not_real!
568
- #=end
569
- # when AssignmentRhsListStartToken, AssignmentRhsListEndToken:
570
- #do nothing, pass it thru
571
- #=begin
572
- when NewlineToken:
573
- result=KeywordToken.new(';',result.offset)
574
- #=end
575
- when FileAndLineToken: #so __FILE__ and __LINE__ can know what their values are
2657
+ when FileAndLineToken #so __FILE__ and __LINE__ can know what their values are
576
2658
  @file=result.file
577
- @line=result.line
2659
+ @endline=result.line
578
2660
  redo
579
- when NoWsToken:
580
- #rubylexer disambiguates array literal from
581
- #call to [] or []= method with a preceding NoWsToken...
582
- #kind of a dumb interface.
583
- result=get_token(true)
584
- result.ident=='[' and result.extend BracketsCall
585
-
586
2661
 
587
- when OperatorToken:
588
- if @unary_or_binary_op===result.ident and result.unary
2662
+ when OperatorToken
2663
+ if @unary_or_binary_op===result.ident and result.unary || result.tag==:unary
589
2664
  result=result.dup
590
2665
  result.ident+="@"
591
2666
  end
592
2667
 
593
2668
  #more symbol table maintenance....
594
- when KeywordToken:
2669
+ when KeywordToken
595
2670
  case name=result.ident
596
2671
 
597
- #=begin
598
- when "do":
599
- if result.has_end?
600
- else
601
- result=KeywordToken.new(';',result.offset)
602
- end
603
- #=end
604
2672
  when /^(#{BINOP_KEYWORDS.join '|'})$/: #should be like this in rubylexer
605
2673
  result=OperatorToken.new(name,result.offset) unless result.has_end?
606
- when "|": result=GoalPostNode.new(result.offset) #is this needed still?
607
- when "__FILE__": #I wish rubylexer would handle this
2674
+ when "|"; result=GoalPostToken.new(result.offset) #is this needed still?
2675
+ when "__FILE__"; #I wish rubylexer would handle this
608
2676
  class<<result; attr_accessor :value; end
609
2677
  result.value=@file.dup
610
- when "__LINE__": #I wish rubylexer would handle this
2678
+ when "__LINE__"; #I wish rubylexer would handle this
611
2679
  class<<result; attr_accessor :value; end
612
- result.value=@line
2680
+ result.value=@endline
2681
+ else
2682
+ result=make_kw name,result.offset if defined? SPECIALIZED_KEYWORDS
2683
+ #warning, this may discard information stored in instance vars of result
613
2684
  end
614
2685
 
615
- when EoiToken: break
616
- when HereBodyToken: break
617
- when IgnoreToken: redo
2686
+ when EoiToken; break
2687
+ when HereBodyToken; break
2688
+ when IgnoreToken; redo
618
2689
  end
619
2690
  end
620
2691
  end while false
@@ -622,251 +2693,16 @@ class RedParse
622
2693
  return @last_token=result
623
2694
  end
624
2695
 
625
- def evaluate rule
626
- #dissect the rule
627
- if false
628
- rule=rule.dup
629
- lookahead_processor=(rule.pop if Proc===rule.last)
630
- node_type=rule.pop
631
- else
632
- Reg::Transform===rule or fail
633
- node_type= rule.right
634
- rule=rule.left.subregs.dup
635
- lookahead_processor=(rule.pop if Proc|::Reg::LookAhead===rule.last)
636
- lookback=rule[0]=rule[0].regs(0) if ::Reg::LookBack===rule[0]
637
- end
638
-
639
- #index of data at which to start matching
640
- i=@stack.size-1 #-1 because last element of @stack is always lookahead
641
-
642
- #I could call this a JIT compiler, but that's a bit grandiose....
643
- #more of a JIT pre-processor
644
- compiled_rule=@compiled_rules[rule]||=
645
- rule.map{|pattern|
646
- String|Regexp===pattern ? KW(pattern) : pattern
647
- }
648
-
649
- #what's the minimum @stack size this rule could match?
650
- rule_min_size=@min_sizes[compiled_rule]||=
651
- compiled_rule.inject(0){|sum,pattern|
652
- sum + pattern.itemrange.begin
653
- }
654
- i>=rule_min_size or return false
655
-
656
- matching=[]
657
-
658
- #actually try to match rule elements against each @stack element in turn
659
- compiled_rule.reverse_each{|matcher|
660
- i.zero? and fail
661
- target=matching
662
- #is this matcher optional? looping?
663
- loop= matcher.itemrange.last.to_f.infinite?
664
- optional=matcher.itemrange.first.zero?
665
- matching.unshift target=[] if loop
666
- if loop or optional
667
- matcher=matcher.regs(0)
668
- end
669
-
670
- begin
671
- if matcher===@stack[i-=1] #try match
672
- target.unshift @stack[i]
673
- else
674
- #if match failed, the whole rule fails
675
- #unless this match was optional, in which case, ignore it
676
- #but bump the data position back up, since the latest datum
677
- #didn't actually match anything.
678
- return false unless optional or loop&&!target.empty?
679
- i+=1
680
- matching.unshift nil unless loop
681
- break
682
- end
683
- end while loop
684
- }
685
-
686
- matchrange= i...-1 #what elems in @stack were matched?
687
-
688
- #give lookahead matcher (if any) a chance to fail the match
689
- case lookahead_processor
690
- when ::Reg::LookAhead:
691
- return false unless lookahead_processor.regs(0)===@stack.last
692
- when Proc:
693
- return false unless lookahead_processor[self,@stack.last]
694
- end
695
-
696
- #if there was a lookback item, don't include it in the new node
697
- if lookback
698
- matchrange= i+1...-1 #what elems in @stack were matched?
699
- matching.shift
700
- end
701
-
702
- #replace matching elements in @stack with node type found
703
- case node_type
704
- when Class
705
- node=node_type.new(*matching)
706
- node.line=@line
707
- @stack[matchrange]=[node]
708
- when Proc; node_type[@stack]
709
- when :shift; return 0
710
- else fail
711
- end
712
-
713
- return true #let caller know we found a match
714
-
715
-
716
- rescue Exception=>e
717
- puts "error (#{e}) while executing rule: #{rule.inspect}"
718
- puts e.backtrace.join("\n")
719
- raise
720
- end
721
-
722
- class ParseError<RuntimeError
723
- def initialize(msg,stack)
724
- super(msg)
725
- @stack=stack
726
- if false
727
- ranges=(1..stack.size-2).map{|i|
728
- node=stack[i]
729
- if node.respond_to? :linerange
730
- node.linerange
731
- elsif node.respond_to? :line
732
- node.line..node.line
733
- end
734
- }
735
- types=(1..stack.size-2).map{|i| stack[i].class }
736
- msg += "couldn't interpret #{types.inspect} at line ranges: #{ranges.inspect}"
737
- end
738
- super(msg)
739
- end
740
- attr :stack
741
- end
742
-
743
- def [](*args)
744
- @stack.[] *args
745
- end
746
-
747
- def []=(*args)
748
- @stack.[]= *args
2696
+ def unget_tokens(*tokens)
2697
+ @moretokens=tokens.concat @moretokens
749
2698
  end
750
2699
 
751
- def parse
752
- oldparser= Thread.current[:$RedParse_parser]
753
- Thread.current[:$RedParse_parser]||=self
754
-
755
- @stack=[StartNode.new, get_token]
756
- #last token on @stack is always implicitly the lookahead
757
- loop {
758
- #try all possible reductions
759
- shift=nil
760
- @rules.reverse_each{|rule|
761
- shift=evaluate(rule) and break
762
- }
763
- next if shift==true
764
-
765
- #no rule can match current @stack, get another token
766
- tok=get_token
767
-
768
- #are we done yet?
769
- tok.nil? or EoiToken===tok && EoiToken===@stack.last and break
770
-
771
- #shift our token onto the @stack
772
- @stack.push tok
773
- }
774
-
775
- @stack.size==2 and return NopNode.new #handle empty parse string
776
-
777
- #unless the @stack is 3 tokens,
778
- #with the last an Eoi, and first a StartNode
779
- #there was a parse error
780
- unless @stack.size==3
781
- pp @stack[-[15,@stack.size].min..-1] if ENV['PRINT_STACK']
782
- top=MisparsedNode.new("(toplevel)", @stack[1...-1],'')
783
- raise ParseError.new(top.msg,@stack)
784
- end
785
- EoiToken===@stack.last or fail
786
- StartNode===@stack.first or fail
787
-
788
- result= @stack[1]
789
-
790
-
791
- #multiple assignment must be resolved
792
- #afterwards by walking the parse tree.
793
- #(because the relative precedences of = and ,
794
- #are reversed in multiple assignment.)
795
- # result.respond_to? :fixup_multiple_assignments! and
796
- # result=result.fixup_multiple_assignments!
797
-
798
- #relative precedence of = and rescue are also inverted sometimes
799
- # result.respond_to? :fixup_rescue_assignments! and
800
- # result=result.fixup_rescue_assignments!
801
-
802
- #do something with error nodes
803
- msgs=[]
804
- result.walk{|parent,i,subi,node|
805
- not if ErrorNode===node
806
- msgs<< @filename+":"+node.blame.msg
807
- end
808
- } if result.respond_to? :walk #hack hack
809
- result.errors=msgs unless msgs.empty?
810
- #other types of errors (lexer errors, exceptions in lexer or parser actions)
811
- #should be handled in the same way, but currently are not
812
- # puts msgs.join("\n")
813
-
814
- rescue Exception=>e
815
- # input=@filename
816
- # if input=="(eval)"
817
- input=@lexer
818
- if Array===input
819
- puts "error while parsing:"
820
- pp input
821
- input=nil
822
- else
823
- input=input.original_file
824
- inputname=@lexer.filename
825
- input.to_s.size>1000 and input=inputname
826
- end
827
- # end
828
- puts "error while parsing: <<< #{input} >>>"
829
- raise
830
- else
831
- unless msgs.empty?
832
- pp @stack[-[15,@stack.size].min..-1] if ENV['PRINT_STACK']
833
- raise RedParse::ParseError.new(msgs.join("\n"),@stack)
834
- end
835
-
836
- return result
837
- ensure
838
- Thread.current[:$RedParse_parser]=oldparser
2700
+ def unget_token(token)
2701
+ @moretokens.unshift token
839
2702
  end
840
2703
 
841
- def LEFT_MATCHERS;self.RULES.map{|r| r.left.subregs }.flatten; end
842
- def STACKABLE_CLASSES
843
-
844
-
845
- _LEFT_MATCHERS.map!{|m|
846
- case m
847
- when Reg::LookAhead,Reg::LookBack: m.regs(0)
848
- else m
849
- end
850
- } #remove lookahead and lookback decoration
851
- rule_juicer=proc{|m|
852
- case m
853
- when Class: m
854
- when Reg::And: m.subregs.map &rule_juicer
855
- when Reg::Or: m.subregs.map &rule_juicer
856
- else #fukit
857
- end
858
- }
859
- _LEFT_CLASSES=_LEFT_MATCHERS.map{|m| rule_juicer[m] }.flatten.compact
860
- _RIGHT_CLASSES= self.RULES.map{|r| r.right }.grep(Class) #classes in productions
861
- _LEFT_CLASSES+_RIGHT_CLASSES
862
- end
863
2704
  =begin
864
- HIER=Class::FlattenedHierarchy.new *STACKABLE_CLASSES
865
-
866
- LOOKAHEAD_MATCHERS=self.RULES.map{|r| r.left.subregs.last }.map{|la| Reg::LookAhead===la and la.regs(0) }
867
-
868
- LOOKAHEAD_CLASSES=LOOKAHEAD_MATCHERS.map(&rule_juicer)
869
- LOOKAHEAD_CLASSES.each_with_index{|classes,i|
2705
+ self.LOOKAHEAD_CLASSES.each_with_index{|classes,i|
870
2706
  case classes
871
2707
  when Class: huh
872
2708
  when Array: classes.flatten.each{huh}
@@ -874,6 +2710,7 @@ end
874
2710
  end
875
2711
  }
876
2712
  =end
2713
+
877
2714
  # def fixup_multiple_assignments!; end
878
2715
  end
879
2716
 
@@ -922,16 +2759,16 @@ if __FILE__==$0
922
2759
  quiet=true
923
2760
  while /^-/===ARGV.first
924
2761
  case opt=ARGV.shift
925
- when "--": break
926
- when "--pp": output=:pp
927
- when "--lisp": output=:lisp
928
- when "--parsetree": output=:parsetree
929
- when "--vsparsetree": output=:vsparsetree
930
- when "--vsparsetree2": output=:vsparsetree2
931
- when "--update-problemfiles": problemfiles=ProblemFiles.new
932
- when "-q": quiet=true
933
- when "-v": quiet=false
934
- when "-e": inputs=[ARGV.join(" ")]; names=["-e"]; break
2762
+ when "--"; break
2763
+ when "--pp"; output=:pp
2764
+ when "--lisp"; output=:lisp
2765
+ when "--parsetree"; output=:parsetree
2766
+ when "--vsparsetree"; output=:vsparsetree
2767
+ when "--vsparsetree2"; output=:vsparsetree2
2768
+ when "--update-problemfiles"; problemfiles=ProblemFiles.new
2769
+ when "-q"; quiet=true
2770
+ when "-v"; quiet=false
2771
+ when "-e"; inputs=[ARGV.join(" ")]; names=["-e"]; break
935
2772
  else fail "unknown option: #{opt}"
936
2773
 
937
2774
  end
@@ -1060,24 +2897,121 @@ if __FILE__==$0
1060
2897
  exit result
1061
2898
  end
1062
2899
 
1063
- =begin todo:
2900
+ =begin old todo:
1064
2901
  v merge DotCallNode and CallSiteNode and CallWithBlockNode
1065
- remove actual Tokens from parse tree...
1066
- instead, each node has a corresponding range of tokens
1067
- -in an (optional) array of all tokens printed by the tokenizer.
1068
- split ParenedNode into ParenedNode + Rescue/EnsureNode
1069
- 'incomplete' subtrees such as ElseNode, ElsifNode, RescueNode
1070
- -should not appear in final output
2902
+ v remove actual Tokens from parse tree...
2903
+ v split ParenedNode into ParenedNode + Rescue/EnsureNode
2904
+ x 'incomplete' subtrees such as ElseNode, ElsifNode, RescueNode
2905
+ x -should not appear in final output
1071
2906
  v split keywordopnode into loop and if varieties?
1072
2907
  =end
1073
2908
 
1074
- =begin optimization opportunities:
1075
- top of stack slot contains mostly keywords, specific node classes, and Value
1076
- lookahead slot contains mostly LowerOp and keywords, with a few classes and inverted keywords
1077
- -(LowerOp is hard to optimize)
1078
- if top of stack matcher is Value, then the next matcher down is mostly keywords, with some operators
2909
+ =begin old optimization opportunities:, ha!
2910
+ top of stack slot contains mostly keywords, specific node classes, and Expr
2911
+ lookahead slot contains mostly lower_op and keywords, with a few classes and inverted keywords
2912
+ -(lower_op is hard to optimize)
2913
+ if top of stack matcher is Expr, then the next matcher down is mostly keywords, with some operators
1079
2914
  class membership can be optimized to test of integer within a range
1080
2915
  keywords could be stored as symbols instead of strings
1081
2916
  a few rules may need exploding (eg, ensure) to spoon feed the optimizer
1082
2917
  make all Nodes descendants of Array
1083
2918
  =end
2919
+
2920
+ #todo:
2921
+ #each node should have a corresponding range of tokens
2922
+ #-in an (optional) array of all tokens printed by the tokenizer.
2923
+ #v test stack_monkey mods
2924
+ #v break ParenedNode into 2 (3?) classes
2925
+ #x invent BEGINNode/ENDNode? (what other keywords?)
2926
+ #v at least make BEGIN/END be KWCallNode
2927
+ #v replace VarNameToken with VarNode in parser
2928
+ #x convert raw rules to lists of vertex identities?
2929
+ #v DottedRule class
2930
+ #v ParserState class (set of DottedRules)
2931
+ #v MultiReduce
2932
+ #v MultiShift
2933
+ #v ParserState#evolve(identity)
2934
+ #v DottedRule#evolve(identity)
2935
+ #v RedParse#enumerate_states
2936
+ #v RedParse#enumerate_exemplars
2937
+ #v Node/Token.enumerate_exemplars
2938
+ #v Node/Token.identity_param
2939
+ #v rename #lvalue? => #lvalue
2940
+ #x likewise get rid of other oddly named identity params
2941
+ #v BareMethod,WITHCOMMAS,BEGINAFTEREQUALS should have predicate methods defined for them
2942
+ #v do something about BEGINAFTEREQUALS... lots predicates, ugly to identify
2943
+ #v document identity parameters in nodes and tokens
2944
+ #operator and keyword tokens have some identity_param variations remaining...maybe?
2945
+ #xx all identity readers have to have writers as well (even if fake)
2946
+ #v sort out vertex identities... call identity_param in apt classes
2947
+ #convert identities<=>small ints
2948
+ #convert ParserStates<=>small ints
2949
+ #> lower_op/proc lookahead requires special action type with shift and reduce branches
2950
+ #x stack monkeys dictate some nodes appear in s/r table... which ones?
2951
+ #x some stack monkeys pushback nodes, action table must take take those as input
2952
+ #v retype GoalPostNode => GoalPostToken
2953
+ #v then, pushback* should go away
2954
+ #v build shift/reduce table
2955
+ #v build goto table
2956
+ #split tables into shift/reduce and goto....?
2957
+ #v integrate with c code generator
2958
+ #finish c code generator
2959
+ #code generator needs a way to deal with :
2960
+ #backtracking (to more than 1 node/token???)
2961
+ #actions (stack monkeys/lower_op)
2962
+ #every reduce requires b/ting thru the lookahead
2963
+ #garbage collection
2964
+ #sharing ruby objects between ruby code and generated c code
2965
+ #optimizer?
2966
+ #ruby code generator?
2967
+ #v what to do with :shift ?
2968
+ #what to do with :accept ?
2969
+ #what to do with :error ?
2970
+ #Node.create (used in generated code)
2971
+ #Node.create <= takes input directly from semantic stack
2972
+ #build Node.create param list generator
2973
+ #v names for rules, dotted rules, parser states, identities
2974
+ #x StartNode may be a problem... used by a stack monkey,
2975
+ #to remove extra ;s from the very beginning of input.
2976
+ #use a lexer hack instead?
2977
+ #v convert StartNode to StartToken?
2978
+ #convert names to numbers and numbers to names
2979
+ #for states, rules, vertex identities
2980
+ #in ruby and c (??)
2981
+ #x rule for HereBodyToken should be a lexer hack?
2982
+ #v stack monkeys should have names
2983
+ #how to handle a stack monkey whose 2nd parameter is not a single identity?
2984
+ #even reduces may not have enough info since 1 node class may have multiple identities
2985
+ #v RedParse constants should be named in inspect
2986
+ #v toplevel rule?
2987
+ #v semantic stack in generated c code should be a ruby array
2988
+ #x state stack should keep size of semantic stack at the time states are pushed,
2989
+ #so that i can restore semantic stack to former state when b-ting/reducing
2990
+ #urk, how do I know how many levels of state stack to pop when reducing?
2991
+ #in looping error rules, just scan back in semantic stack for rule start
2992
+ #in regular looping rules, transition to loop state is saved on a special stack
2993
+ #so that at reduce time, we can b/t to that point for a start
2994
+ #if rule contains only scalars, b/t is easy
2995
+ #else rule contains scalars and optionals:
2996
+ #scan for rule start vertex starting at highest node
2997
+ #on semantic stack that can contain it and working downward.
2998
+ #also, statically verify that relevent rules contain no collisions among first (how many?) matchers
2999
+
3000
+ #is lookahead in code generator even useful? my tables have built-in lookahead....
3001
+ #need hack to declare nonerror looping matchers as irrevokable (for speed, when reducing)
3002
+ #v assignmentRhsNode needs an identity_param for with_commas
3003
+ #v -** fixup and setter breakout rules need dedicated identity_params too
3004
+ # = rescue ternary is broken again now...
3005
+ #v instead of shift states and is_shift_state? to find them,
3006
+ #v i should have shift transitions. (transitions that imply a shift... in response to a token input.)
3007
+ #v all states will have 2 entry points, for shift and nonshift transitions.
3008
+ #split big table into goto(node) and sr(token) tables
3009
+ #in each state, most common sr action should be made default
3010
+ #unused entries in goto table can be ignored.
3011
+ #most common goto entries (if any) can be default.
3012
+ #is the change_index arg in stack_monkey calls really correct everywhere? what are
3013
+ #the exact semantics of that argument? what about stack_monkeys that change the stack size?
3014
+ #should there be another arg to keep track of that?
3015
+ #maybe rewrite stack_monkeys so they're a little clearer and easier to analyze (by hand)
3016
+ #MultiShift/MultiReduce are not supported actions in generate.rb
3017
+ #:accept/:error are not supported actions in generate.rb