redparse 0.8.1 → 0.8.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/Rakefile CHANGED
@@ -3,7 +3,15 @@
3
3
  require 'rubygems'
4
4
  require 'hoe'
5
5
  require 'lib/redparse/version.rb'
6
-
6
+
7
+ if $*==["test"]
8
+ #hack to get 'rake test' to stay in one process
9
+ #which keeps netbeans happy
10
+ $:<<"lib"
11
+ require "test/test_redparse.rb"
12
+ Test::Unit::AutoRunner.run
13
+ exit
14
+ end
7
15
 
8
16
  readme=open("README.txt")
9
17
  readme.readline("\n== DESCRIPTION:")
@@ -13,13 +21,14 @@ require 'lib/redparse/version.rb'
13
21
  hoe=Hoe.new("redparse", RedParse::VERSION) do |_|
14
22
  _.author = "Caleb Clausen"
15
23
  _.email = "redparse-owner @at@ inforadical .dot. net"
16
- _.url = ["http://redparse.rubyforge.org/", "http://rubyforge.org/projects/redparse/"]
17
- _.extra_deps << ['rubylexer', '>= 0.7.3']
24
+ _.url = ["http://github.com/coatl/redparse/", "http://rubyforge.org/projects/redparse/"]
25
+ _.extra_deps << ['rubylexer', '>= 0.7.4']
18
26
  _.extra_deps << ['reg', '>= 0.4.7']
27
+ _.extra_deps << 'Ron'
19
28
  # _.test_globs=["test/*"]
20
29
  _.description=desc
21
30
  _.summary=desc[/\A[^.]+\./]
22
- # _.spec_extras={:bindir=>''}
31
+ _.spec_extras={:bindir=>'bin/'}
23
32
  # _.rdoc_pattern=/\A(README\.txt|lib\/.*\.rb)\Z/
24
33
  # _.remote_rdoc_dir="/"
25
34
  end
@@ -73,10 +73,11 @@ class ParseTree<RawParseTree
73
73
  return tree,warnings
74
74
  ensure
75
75
  STDERR.reopen oldSTDERR
76
-
76
+ if warnstash
77
77
  warnstash.rewind
78
78
  warnings.replace warnstash.read.split
79
79
  warnstash.close
80
+ end
80
81
  end
81
82
  end
82
83
 
@@ -185,7 +186,7 @@ inputs.each_index{|i|
185
186
  when :lisp
186
187
  puts tree.to_lisp
187
188
  when :unparse
188
- puts tree.unparse({})
189
+ puts tree.unparse
189
190
  when :parsetree
190
191
  tree=tree.to_parsetree
191
192
  hack=tree.dup
@@ -19,19 +19,1968 @@
19
19
  #warn 'hacking up LOAD_PATH to include the latest RubyLexer!'
20
20
  #$:.unshift Dir.pwd+'/../rubylexer/lib', Dir.pwd+'/../rubylexer'
21
21
 
22
- # "faster rule compiler is untested"
23
22
 
24
- require 'rubygems'
23
+ require 'forwardable'
24
+
25
+ begin
26
+ require 'rubygems'
27
+ rescue LoadError=>e
28
+ #hope we don't need it
29
+ raise unless /rubygems/===e.message
30
+ end
25
31
  require 'rubylexer'
26
32
  require 'reg'
27
33
 
28
- require "redparse/node"
29
- #require "redparse/decisiontree"
30
- require "redparse/reg_more_sugar"
31
- class RedParse
32
- # include Nodes
34
+ require "redparse/node"
35
+ #require "redparse/decisiontree"
36
+ require "redparse/reg_more_sugar"
37
+ require "redparse/generate"
38
+
39
+ class RedParse
40
+
41
+ ####### generic stuff for parsing any(?) language
42
+
43
+ # include Nodes
44
+ class StackMonkey
45
+ def initialize(name,first_changed_index,and_expect_node,options={},&monkey_code)
46
+ first_changed_index=-first_changed_index if first_changed_index>0
47
+ @name,@first_changed_index,@and_expect_node,@monkey_code=
48
+ name,first_changed_index,and_expect_node,monkey_code
49
+ end
50
+
51
+ attr_reader :name, :first_changed_index, :and_expect_node, :monkey_code
52
+ alias hint and_expect_node
53
+ attr_accessor :exemplars
54
+
55
+ def [](stack)
56
+ result=@monkey_code[stack]
57
+ return result
58
+ end
59
+
60
+ def _dump depth
61
+ @name
62
+ end
63
+
64
+ def self._load str
65
+ Thread.current[:$RedParse_parser].undumpables[@name]
66
+ end
67
+
68
+ def action2c
69
+ #"return the whole thing on first call, just a goto stmt after that"
70
+ return " goto #@goto_label;\n" if defined? @goto_label
71
+
72
+ =begin
73
+ <<-E
74
+ #{@goto_label=@name.gsub(/[^a-z0-9_]/,'_')}:
75
+ monkey=rb_hash_get(undumpables,rb_cstr2str("#@name"));
76
+ rb_funcall(monkey,rb_intern("[]"),huh_stack);
77
+
78
+ /*recover from stackmonkey fiddling*/
79
+ for(i=0;i<#{-@first_changed_index};++i) {
80
+ rb_ary_unshift(lexer_moretokens,
81
+ rb_ary_pop(huh_semantic_stack));
82
+ rb_ary_pop(huh_syntax_stack);
83
+ }
84
+
85
+ goto #{Node===@and_expect_node ?
86
+ postreduceaction4this_state(@and_expect_node) :
87
+ shiftaction4this_state
88
+ };
89
+ E
90
+ =end
91
+ end
92
+ end
93
+ class DeleteMonkey<StackMonkey
94
+ def initialize(index,name)
95
+ index=-index if index>0
96
+ @index=index
97
+ super(name,index,nil){|stack| stack.delete_at( index )}
98
+ end
99
+ end
100
+ def stack_monkey(*args,&block) StackMonkey.new(*args,&block) end
101
+ def self.stack_monkey(*args,&block) StackMonkey.new(*args,&block) end
102
+ def delete_monkey(index,name) DeleteMonkey.new(index,name) end
103
+
104
+ def evaluate rule
105
+ #dissect the rule
106
+ if false
107
+ rule=rule.dup
108
+ lookahead_processor=(rule.pop if Proc===rule.last)
109
+ node_type=rule.pop
110
+ else
111
+ Reg::Transform===rule or fail
112
+ node_type= rule.right
113
+ rule=rule.left.subregs.dup
114
+ lookahead_processor=(rule.pop if Proc|::Reg::LookAhead===rule.last)
115
+ lookback=rule[0]=rule[0].subregs[0] if ::Reg::LookBack===rule[0]
116
+ end
117
+
118
+ #index of data at which to start matching
119
+ i=@stack.size-1 #-1 because last element of @stack is always lookahead
120
+
121
+ #I could call this a JIT compiler, but that's a bit grandiose....
122
+ #more of a JIT pre-processor
123
+ compiled_rule=@compiled_rules[rule]||=
124
+ rule.map{|pattern|
125
+ String|Regexp===pattern ? KW(pattern) : pattern
126
+ }
127
+
128
+ #what's the minimum @stack size this rule could match?
129
+ rule_min_size=@min_sizes[compiled_rule]||=
130
+ compiled_rule.inject(0){|sum,pattern|
131
+ sum + pattern.itemrange.begin
132
+ }
133
+ i>=rule_min_size or return false
134
+
135
+ matching=[]
136
+
137
+ #actually try to match rule elements against each @stack element in turn
138
+ compiled_rule.reverse_each{|matcher|
139
+ i.zero? and fail
140
+ target=matching
141
+ #is this matcher optional? looping?
142
+ loop= matcher.itemrange.last.to_f.infinite?
143
+ minimum=matcher.itemrange.first
144
+ optional=minimum.zero?
145
+ matching.unshift target=[] if loop
146
+ if loop or optional
147
+ matcher=matcher.subregs[0]
148
+ end
149
+
150
+ begin
151
+ if matcher===@stack[i-=1] #try match
152
+ target.unshift @stack[i]
153
+ else
154
+ #if match failed, the whole rule fails
155
+ #unless this match was optional, in which case, ignore it
156
+ #or was looping and met its minimum
157
+ #but bump the data position back up, since the latest datum
158
+ #didn't actually match anything.
159
+ return false unless optional or loop&&target.size>=minimum
160
+ i+=1
161
+ matching.unshift nil unless loop
162
+ break
163
+ end
164
+ end while loop
165
+ }
166
+
167
+ matchrange= i...-1 #what elems in @stack were matched?
168
+
169
+ #give lookahead matcher (if any) a chance to fail the match
170
+ case lookahead_processor
171
+ when ::Reg::LookAhead
172
+ return false unless lookahead_processor.subregs[0]===@stack.last
173
+ when Proc
174
+ return false unless lookahead_processor[self,@stack.last]
175
+ end
176
+
177
+ #if there was a lookback item, don't include it in the new node
178
+ if lookback
179
+ matchrange= i+1...-1 #what elems in @stack were matched?
180
+ matching.shift
181
+ end
182
+
183
+
184
+ #replace matching elements in @stack with node type found
185
+ case node_type
186
+ when Class
187
+ node=node_type.new(*matching)
188
+ node.startline||=@stack[matchrange.first].startline
189
+ node.endline=@endline
190
+ @stack[matchrange]=[node]
191
+ when Proc,StackMonkey; node_type[@stack]
192
+ when :shift; return 0
193
+ when :accept,:error; throw :ParserDone
194
+ else fail
195
+ end
196
+
197
+ return true #let caller know we found a match
198
+
199
+
200
+ rescue Exception=>e
201
+ #puts "error (#{e}) while executing rule: #{rule.inspect}"
202
+ #puts e.backtrace.join("\n")
203
+ raise
204
+ end
205
+
206
+ class ParseError<RuntimeError
207
+ def initialize(msg,stack)
208
+ super(msg)
209
+ @stack=stack
210
+ if false
211
+ ranges=(1..stack.size-2).map{|i|
212
+ node=stack[i]
213
+ if node.respond_to? :linerange
214
+ node.linerange
215
+ elsif node.respond_to? :endline
216
+ node.endline..node.endline
217
+ end
218
+ }
219
+ types=(1..stack.size-2).map{|i| stack[i].class }
220
+ msg += "couldn't interpret #{types.inspect} at line ranges: #{ranges.inspect}"
221
+ end
222
+ super(msg)
223
+ end
224
+ attr :stack
225
+ end
226
+
227
+ def [](*args)
228
+ @stack.[](*args)
229
+ end
230
+
231
+ def []=(*args)
232
+ @stack.[]=(*args)
233
+ end
234
+
235
+ #try all possible reductions
236
+ def reduce
237
+ shift=nil
238
+ @rules.reverse_each{|rule|
239
+ shift=evaluate(rule) and break
240
+ }
241
+ return shift
242
+ end
243
+
244
+ def parse
245
+ #hack, so StringToken can know what parser its called from
246
+ #so it can use it to parse inclusions
247
+ oldparser=Thread.current[:$RedParse_parser]
248
+ Thread.current[:$RedParse_parser]||=self
249
+
250
+ @rules||=expanded_RULES()
251
+ # @inputs||=enumerate_exemplars
252
+
253
+ @stack=[StartToken.new, get_token]
254
+ #last token on @stack is always implicitly the lookahead
255
+ catch(:ParserDone){ loop {
256
+ #try all possible reductions
257
+ next if reduce==true
258
+
259
+ #no rule can match current @stack, get another token
260
+ tok=get_token or break
261
+
262
+ #are we done yet?
263
+ #tok.nil? or EoiToken===tok && EoiToken===@stack.last and break
264
+
265
+ #shift our token onto the @stack
266
+ @stack.push tok
267
+ }}
268
+
269
+ @stack.size==2 and return NopNode.new #handle empty parse string
270
+
271
+ #unless the @stack is 3 tokens,
272
+ #with the last an Eoi, and first a StartToken
273
+ #there was a parse error
274
+ unless @stack.size==3
275
+ pp @stack[-[15,@stack.size].min..-1] if ENV['PRINT_STACK']
276
+ top=MisparsedNode.new("(toplevel)", @stack[1...-1],'')
277
+ raise ParseError.new(top.msg,@stack)
278
+ end
279
+ EoiToken===@stack.last or fail
280
+ StartToken===@stack.first or fail
281
+
282
+ result= @stack[1]
283
+
284
+
285
+ #multiple assignment must be resolved
286
+ #afterwards by walking the parse tree.
287
+ #(because the relative precedences of = and ,
288
+ #are reversed in multiple assignment.)
289
+ # result.respond_to? :fixup_multiple_assignments! and
290
+ # result=result.fixup_multiple_assignments!
291
+
292
+ #relative precedence of = and rescue are also inverted sometimes
293
+ # result.respond_to? :fixup_rescue_assignments! and
294
+ # result=result.fixup_rescue_assignments!
295
+
296
+ #do something with error nodes
297
+ msgs=[]
298
+ result.walk{|parent,i,subi,node|
299
+ not if node.respond_to? :error and node.error?(@rubyversion)
300
+ msgs<< @filename+":"+node.blame.msg
301
+ end
302
+ } if result.respond_to? :walk #hack hack
303
+ result.errors=msgs unless msgs.empty?
304
+ #other types of errors (lexer errors, exceptions in lexer or parser actions)
305
+ #should be handled in the same way, but currently are not
306
+ # puts msgs.join("\n")
307
+
308
+ =begin
309
+ rescue Exception=>e
310
+ input=@lexer
311
+ if Array===input
312
+ puts "error while parsing:"
313
+ pp input
314
+ input=nil
315
+ else
316
+ input=input.original_file
317
+ inputname=@lexer.filename
318
+ input.to_s.size>1000 and input=inputname
319
+ puts "error while parsing: <<< #{input} >>>"
320
+ end
321
+ raise
322
+ else
323
+ =end
324
+ unless msgs.empty?
325
+ pp @stack[-[15,@stack.size].min..-1] if ENV['PRINT_STACK']
326
+ raise RedParse::ParseError.new(msgs.join("\n"),@stack)
327
+ end
328
+
329
+ # result=NopNode.new if EoiToken===result
330
+ return result
331
+ ensure
332
+ @stack=nil
333
+ Thread.current[:$RedParse_parser]=oldparser
334
+ end
335
+
336
+
337
+ #HIER=Class::FlattenedHierarchy.new *STACKABLE_CLASSES
338
+
339
+ def new_disabled_reduce
340
+ #@hier||=Class::FlattenedHierarchy.new *STACKABLE_CLASSES()
341
+ @reducer||=Reducer.new(@rules)
342
+
343
+ @reducer.reduce(@stack)
344
+ end #
345
+ #
346
+ if defined? END_ATTACK
347
+ class RuleSet
348
+ def initialize(rules)
349
+ @rules=rules.reverse
350
+ #rule order must be reversed relative to the usual RedParse rule
351
+ #order... merely so that ffs can work right.
352
+ @maxmask=(1<<@rules.size)-1
353
+ @subclasses_of=child_relations_among(*STACKABLE_CLASSES())
354
+ end
355
+
356
+ def rules2mask(rules)
357
+ mask=0
358
+ @rules.each_with_index{|r,i|
359
+ mask |= 1<<i if rules.include? r
360
+ }
361
+ return mask
362
+ end
363
+
364
+ def mask2rules(mask)
365
+ rules=[]
366
+ @rules.each_with_index{|r,i|
367
+ rules<<r if mask&(1<<i)
368
+ }
369
+ return rules
370
+ end
371
+
372
+ def mask2rules(mask)
373
+ result=[]
374
+ while mask.nonzero?
375
+ result<< @rules[i=ffs(mask)-1]
376
+ mask &= ~(1<<i)
377
+ end
378
+ return result
379
+ end
380
+
381
+ def each_rule(mask=-1)
382
+ @rules.each_with_index{|r,i|
383
+ yield r,i if mask&(1<<i)
384
+ }
385
+ end
386
+
387
+ def each_rule(mask=@maxmask)
388
+ while mask.nonzero?
389
+ yield @rules[i=ffs(mask)-1],i
390
+ mask &= ~(1<<i)
391
+ end
392
+ end
393
+
394
+
395
+ @@FFS_TABLE=[nil]
396
+ 1.upto(8){|n|
397
+ @@FFS_TABLE*=2
398
+ @@FFS_TABLE[@@FFS_TABLE.size/2]=n
399
+ }
400
+ def rb_ffs(mask)
401
+ chunks=0
402
+ until mask.zero?
403
+ result=@@FFS_TABLE[mask&0xFF]
404
+ return result+(chunks<<3) if result
405
+ chunks+=1
406
+ mask>>=8
407
+ end
408
+ return 0
409
+ end
410
+
411
+ begin
412
+ require 'inline'
413
+ inline{|inline|
414
+ inline.prefix '#define _GNU_SOURCE'
415
+ inline.include '"string.h"'
416
+ inline.include '"limits.h"'
417
+ inline.c %{
418
+ unsigned c_ffs(VALUE mask){
419
+ if FIXNUM_P(mask) {
420
+ return ffsl(NUM2UINT(mask));
421
+ } else if(TYPE(mask)==T_BIGNUM) {
422
+ struct RBignum* bn=RBIGNUM(mask);
423
+ int len=bn->len;
424
+ int i;
425
+ unsigned offset=0;
426
+ unsigned result=0;
427
+ for(i=0;i<len;++i){
428
+ /*printf("least:%x\\n", ((BDIGIT*)(bn->digits))[i]);*/
429
+ /*printf("most:%x\\n", ((BDIGIT*)(bn->digits))[len]);*/
430
+ result=ffs(((BDIGIT*)(bn->digits))[i]);
431
+ if (result) break;
432
+ offset+=sizeof(int)*CHAR_BIT;
433
+ }
434
+ if (result==0) return 0;
435
+ return result+offset;
436
+ } else {
437
+ rb_fatal("bad argument to ffs");
438
+ }
439
+ }
440
+ }
441
+ }
442
+ alias ffs c_ffs
443
+ rescue Exception=>e
444
+ warn "error (#{e.class}) while defining inline c ffs()"
445
+ warn "original error: #{e}"
446
+ warn "falling back to ruby version of ffs()"
447
+ alias ffs rb_ffs
448
+
449
+ end
450
+
451
+
452
+
453
+
454
+ #just the left side (the stack/lookahead matchers)
455
+ def LEFT
456
+ @rules.map{|r| r.left.subregs }.flatten
457
+ end
458
+
459
+ #remove lookahead and lookback decoration
460
+ def LEFT_NO_LOOKING
461
+ l=LEFT()
462
+ l.map!{|m|
463
+ case m #
464
+ when Reg::LookAhead,Reg::LookBack; m.subregs[0]
465
+ when Proc; []
466
+ else m #
467
+ end #
468
+ }
469
+ l
470
+ end
471
+
472
+ #all classes mentioned in rules, on left and right sides
473
+ def STACKABLE_CLASSES #
474
+ return @sc_result unless @sc_result.nil?
475
+ @sc_result=false
476
+ l=LEFT_NO_LOOKING()
477
+ l=l.map{|lm| sc_juice lm}.flatten.compact
478
+ r= @rules.map{|rr| rr.right }.grep(Class) #classes in productions
479
+ result=l+r
480
+ @sc_result=result.grep(Class).uniq
481
+ fail if @sc_result.empty?
482
+ return @sc_result
483
+ end
484
+
485
+ def juice(m)
486
+ case m #
487
+ when Class;
488
+ return [m] unless @subclasses_of
489
+ result=[m] # and subclasses too
490
+ i=0
491
+ while item=result[i]
492
+ #p item
493
+ result.concat @subclasses_of[item]
494
+ i += 1
495
+ end
496
+ result
497
+ when String,Regexp; juice(RedParse.KW(m))
498
+ when Reg::And; m.subregs.map{|x| juice(x).flatten.compact}.inject{|sum,rr| sum&rr}
499
+ when Reg::Or; m.subregs.map( &method(:juice) )
500
+ when Reg::Not;
501
+ m=m.subregs[0]
502
+ if Class===m or (Reg::Or===m and
503
+ m.subregs.inject{|sum,x| sum && (Class===x) })
504
+ j=juice(m)
505
+ STACKABLE_CLASSES()-j.flatten.compact rescue j
506
+ else
507
+ STACKABLE_CLASSES()
508
+ end
509
+ else STACKABLE_CLASSES()
510
+ end
511
+ end
512
+
513
+ def sc_juice(m)
514
+ case m #
515
+ when Class; [m]
516
+ when String,Regexp; juice(RedParse.KW(m))
517
+ # when String,Regexp; [KeywordToken]
518
+ when Reg::And; m.subregs.map{|x| sc_juice(x)}.compact.map{|x| x.flatten.compact}.inject{|sum,rr| sum&rr }
519
+ when Reg::Or; m.subregs.map( &method(:sc_juice) )
520
+ when Reg::Not; sc_juice(m.subregs[0])
521
+ when Reg::LookAhead, Reg::LookBack; sc_juice(m.subregs[0])
522
+ else []
523
+ end
524
+ end
525
+
526
+ def LOOKAHEAD_CLASSES rule
527
+ last=rule.left.subregs.last
528
+ return STACKABLE_CLASSES() unless Reg::LookAhead===last
529
+ la= last.subregs[0]
530
+ return juice(la).flatten.compact
531
+ end
532
+ #
533
+ def TOS_CLASSES rule
534
+ i=-1
535
+ mats=rule.left.subregs
536
+ m=mats[i]
537
+ m=mats[i-=1] if Reg::LookAhead===m || Proc===m
538
+ result=[]
539
+ while Reg::Repeat===m and m.times.min.zero?
540
+ result<<juice(m.subregs[0])
541
+ m=mats[i-=1]
542
+ end
543
+ return (result+juice(m)).flatten.compact
544
+ end
545
+
546
+ def [](i)
547
+ @rules[i]
548
+ end
549
+
550
+ end #
551
+ #
552
+ module Reducer
553
+ @@rulesets={}
554
+ @@class_narrowerses={}
555
+ def compile(recompile=false)
556
+ klass=self.class
557
+
558
+ #use cached result if available
559
+ if @@rulesets[klass] and !recompile
560
+ @ruleset=@@rulesets[klass]
561
+ @class_narrowers=@@class_narrowerses[klass]
562
+ return
563
+ end
564
+
565
+ #actual rule compilation
566
+ @ruleset=RuleSet.new @rules
567
+ @class_narrowers=[tos=Hash.new(0),la=Hash.new(0)]
568
+ @ruleset.each_rule{|r,i|
569
+ @ruleset.LOOKAHEAD_CLASSES(r).each{|klass2|
570
+ la[klass2] |= 1<<i
571
+ }
572
+ @ruleset.TOS_CLASSES(r).each{|klass2|
573
+ tos[klass2] |= 1<<i
574
+ }
575
+ }
576
+
577
+ #save result to cache if not too dynamic
578
+ if !recompile
579
+ @@rulesets[klass]=@ruleset
580
+ @@class_narrowerses[klass]=@class_narrowers
581
+ end
582
+ end
583
+
584
+ def new_reduce
585
+ # mask=-1
586
+ # (-1).downto(-@class_narrowers.size){|i|
587
+ # mask &= @class_narrowers[i][@stack[i].class]
588
+ # }
589
+ mask=
590
+ @class_narrowers[-1][@stack[-1].class]&
591
+ @class_narrowers[-2][@stack[-2].class]
592
+ @ruleset.each_rule(mask){|r,i|
593
+ res=evaluate(r) and return res
594
+ }
595
+ return false
596
+ end
597
+ end
598
+ end
599
+
600
+ def map_with_index(list)
601
+ result=[]
602
+ list.each_with_index{|elem,i| result<<yield(elem,i)}
603
+ result
604
+ end
605
+
606
+ def all_rules
607
+ return @all_rules if defined? @all_rules
608
+
609
+ @inputs||=enumerate_exemplars
610
+ @rules=expanded_RULES #force it to be recalculated
611
+ @all_rules = map_with_index(@rules){|r,i| Rule.new r,i}
612
+
613
+ @all_rules.each{|r|
614
+ if StackMonkey===r.action
615
+ r.action.exemplars=@inputs.grep r.action.hint
616
+ end
617
+ }
618
+
619
+ warn "error recovery rules disabled for now; creates too many states and masks errors"
620
+ @all_rules.reject!{|r| r.action==MisparsedNode }
621
+
622
+ #names have to be allocated globally to make sure they don't collide
623
+ names=@all_rules.map{|r|
624
+ if r.action.respond_to? :name
625
+ r.action.name
626
+ else
627
+ r.action.to_s
628
+ end
629
+ }.sort
630
+ dups={}
631
+ names.each_with_index{|name,i|
632
+ dups[name]=0 if name==names[i+1]
633
+ }
634
+ @all_rules.each{|r|
635
+ r.name=
636
+ if r.action.respond_to? :name
637
+ r.action.name.dup
638
+ else
639
+ r.action.to_s
640
+ end
641
+ if dups[r.name]
642
+ count=dups[r.name]+=1
643
+ r.name<<"_#{count}"
644
+ end
645
+ }
646
+ end
647
+
648
+ def all_dotted_rules
649
+ all_rules.map{|rule|
650
+ (0...rule.patterns.size).map{|i|
651
+ DottedRule.create(rule,i,self)
652
+ }
653
+ }.flatten
654
+ end
655
+
656
+ #$OLD_PAA=1
657
+
658
+ def all_initial_dotted_rules
659
+ return @all_initial_dotted_rules if defined? @all_initial_dotted_rules
660
+ @all_initial_dotted_rules=result=
661
+ all_rules.map{|rule| DottedRule.create(rule,0,nil) }
662
+
663
+ p :all_init
664
+
665
+ unless defined? $OLD_PAA
666
+ scanning=result
667
+ provisionals=nil
668
+ while true
669
+ old_provisionals=provisionals
670
+ provisionals={}
671
+ scanning.each{|dr|
672
+ dr.also_allow=dr.compute_also_allow(provisional=[false]) #fill out dr.also_allow
673
+ provisionals[dr]=provisional[0]
674
+ }
675
+ scanning=provisionals.map{|dr,val| dr if val }.compact
676
+ end until provisionals==old_provisionals
677
+ end
678
+ p :all_init_done
679
+
680
+ return result
681
+ end
682
+
683
+ class Rule #original user rules, slightly chewed on
684
+ def initialize(rawrule,priority)
685
+ @priority=priority
686
+ @action=rawrule.right
687
+ @patterns=rawrule.left.subregs.dup
688
+ #remove lookback decoration if any, just note that lb was present
689
+ if Reg::LookBack===@patterns[0]
690
+ @lookback=true
691
+ @patterns[0]=@patterns[0].subregs[0]
692
+ end
693
+
694
+ case @patterns[-1]
695
+ #Symbol is pointless here, methinks.
696
+ when Proc,Symbol; #do nothing
697
+ when Reg::LookAhead; @patterns[-1]=@patterns[-1].subregs[0]
698
+ else @patterns.push Object #add la if none was present
699
+ end
700
+
701
+ #search for looping matchers with minimum >0 and replace them
702
+ #with a number of scalars (== the minimum) followed by a loop with 0 min.
703
+ #search for bare strings or regexps and replace with KW( ) wrapper
704
+ @patterns.each_with_index{|p,i|
705
+ case p
706
+ when String,Regexp; @patterns[i]=RedParse.KW(p)
707
+ when Reg::Repeat
708
+ if p.itemrange.first>0
709
+ @patterns[i,1]=
710
+ *[p.subregs[0]]*p.itemrange.first<< #minimum # as scalars
711
+ p.subregs[0].reg.* #0-based looper
712
+ end
713
+ end
714
+ }
715
+ @drs=[]
716
+ end
717
+
718
+ attr_reader :drs
719
+
720
+ def hash; priority end
721
+ def == other; Rule===other and priority==other.priority end
722
+ alias eql? ==
723
+
724
+ def lookback?; @lookback if defined? @lookback end
725
+
726
+ attr_reader :patterns,:action,:priority
727
+ attr_accessor :name
728
+
729
+ def at(n)
730
+ result=patterns[n]
731
+ result=result.subregs[0] if Reg::Repeat===result
732
+ result
733
+ end
734
+ def optional? n
735
+ p=patterns[n]
736
+ return Reg::Repeat===p && p.itemrange.first.zero?
737
+ end
738
+ def looping? n
739
+ p=patterns[n]
740
+ return false unless Reg::Repeat===p
741
+ return false if p.itemrange.last==1
742
+ fail unless p.itemrange.last.infinite?
743
+ return true
744
+ rescue Exception
745
+ return false
746
+ end
747
+
748
+ def reduces_to
749
+ case @action
750
+ when Class; @action
751
+ when StackMonkey; @action.exemplars
752
+ when :error,:shift,:accept; nil
753
+ else fail "#@action unexpected in reduces_to"
754
+ end
755
+ end
756
+
757
+ def unruly?
758
+ return if action==:accept
759
+ action.class!=Class || lookback?
760
+ end
761
+
762
+ def final_promised_pattern
763
+ case @action
764
+ when DeleteMonkey #delete_monkey
765
+ vector_indexes=(@action.first_changed_index..-1).select{|i| Reg::Repeat===@patterns[i] }
766
+ fail unless vector_indexes.empty?
767
+ result=@patterns.dup
768
+ result.delete_at @action.first_changed_index
769
+ when StackMonkey #stack_monkey
770
+ result=@patterns.dup
771
+ result[@action.first_changed_index..-1]=[@action.hint]
772
+ when Class
773
+ result= [@action,@patterns.last]
774
+ result.unshift @patterns.first if lookback?
775
+ when :accept, :error, :shift
776
+ result=@patterns.dup
777
+ else
778
+ pp @action
779
+ fail
780
+ end
781
+ result[-1]=result[-1].la unless result.empty?
782
+ result
783
+ end
784
+
785
+ def final_promised_rule
786
+ @final_promised_rule ||=
787
+ Rule.new(-final_promised_pattern>>nil,-priority)
788
+ end
789
+ end
790
+
791
+ class DottedRule
792
+ def initialize(rule,pos,parser)
793
+ @rule,@pos=rule,pos
794
+ fail unless (0...rule.patterns.size)===@pos
795
+ # @also_allow= compute_also_allow(parser) if parser unless defined? $OLD_PAA
796
+ end
797
+ def compute_also_allow(parser,provisional=[false])
798
+ parser.all_initial_dotted_rules.map{|dr|
799
+ next if dr==self
800
+ fake_rule=dr.rule.final_promised_rule
801
+ final_more_dr=DottedRule.create(fake_rule,0,nil)
802
+ also=dr.also_allow
803
+ unless also
804
+ provisional[0]||=0
805
+ provisional[0]+=1
806
+ also=[]
807
+ end
808
+ also+[dr] if optionally_combine final_more_dr,parser
809
+ }.flatten.compact.uniq
810
+ end
811
+ attr_reader :rule,:pos
812
+ attr_accessor :also_allow
813
+
814
+ def self.create(rule,pos,parser)
815
+ result=rule.drs[pos] and return result
816
+ result=rule.drs[pos]=DottedRule.new(rule,pos,parser)
817
+ unless defined? $OLD_PAA
818
+ result.also_allow=result.compute_also_allow(parser) if parser
819
+ end
820
+ return result
821
+ end
822
+
823
+ def hash; (@rule.priority<<3)^@pos end
824
+ def == other; DottedRule===other and @pos==other.pos and @rule==other.rule end
825
+ alias eql? ==
826
+
827
+ def name; @rule.name+"@#@pos" end
828
+
829
+ def looping?
830
+ @rule.looping?(@pos)
831
+ end
832
+
833
+ #returns Conditional|Rule|DottedRule|+[DottedRule.+]|nil
834
+ def evolve input, parser, seenlist,result2
835
+ #print "["
836
+ #$stdout.flush
837
+ idname=input.identity_name
838
+ idname=parser.identity_name_alias? idname
839
+ cache=seenlist[[self,idname]]
840
+ unless cache==:dunno_yet
841
+ result2.concat Array(cache).flatten.compact.uniq.sort_by{|x| x.name}
842
+ return cache
843
+ end
844
+ i=pos
845
+ lasti=i-1
846
+ result=[]
847
+ result=loop do #might need multiple tries if optional matcher(s) here
848
+ fail unless i>lasti
849
+ lasti=i
850
+ p=@rule.at(i) #what is current pattern in this dottedrule?
851
+ fail if Proc===p #shouldnt happen anymore
852
+ if parser.pattern_matches_nodes? p
853
+
854
+ #if any dotted rules have nodes at this point,
855
+ #also include the set of rules@0 which
856
+ #can (possibly indirectly) generate that node.
857
+ #(match tokens found on left sides of productions for p)
858
+ seenlist[[self,idname]]=result
859
+ if false
860
+ result.concat recurse_match_drs(parser).uniq.map{|dr|
861
+ dr and
862
+ #begin print "{#{dr.name}"
863
+ dr.evolve input,parser,seenlist,result2
864
+ #ensure print "}" end
865
+ }.flatten.compact.uniq
866
+ end
867
+ end
868
+ @saw_item_that={}
869
+ if p===input
870
+ i+=1 unless @rule.looping?(i)
871
+ fail if i>@rule.patterns.size
872
+
873
+ if !@saw_item_that.empty?
874
+ p(:saw_item_that!)
875
+ fail unless @saw_item_that.size==1
876
+ pair=@saw_item_that.to_a.first
877
+ fail unless p.equal? pair.last
878
+ it=pair.first
879
+ action=
880
+ if i==@rule.patterns.size
881
+ @rule
882
+ else
883
+ DottedRule.create(@rule,i,parser)
884
+ end
885
+ break Conditional.new(it,action)
886
+ end
887
+ @saw_item_that=nil
888
+
889
+ if i == @rule.patterns.size
890
+ break @rule
891
+ else
892
+ break result<<DottedRule.create(@rule,i,parser)
893
+ end
894
+ elsif !@rule.optional?(i)
895
+ break result.empty? ? nil : result
896
+ elsif (i+=1) >= @rule.patterns.size
897
+ break @rule
898
+ #else next p
899
+ end
900
+ end #loop
901
+ seenlist[[self,idname]]=result
902
+ result2.concat Array(result).flatten.compact.uniq.sort_by{|x| x.name}
903
+ return result
904
+ #ensure print "]"
905
+ end
906
+
907
+ #returns +[(DottedRule|nil).*]
908
+ def recurse_match_drs parser, result=nil
909
+ unless result
910
+ table=parser.rmd_cache
911
+ if table
912
+ cache=table[self]
913
+ return cache if cache
914
+ else
915
+ parser.rmd_cache={}
916
+ end
917
+
918
+ result=[]
919
+ end
920
+ #print "("
921
+ #print @rule.name+"@#@pos"
922
+ p=@rule.at(@pos)
923
+
924
+ #find set of nodes that could match here
925
+ nodes_here=parser.exemplars_that_match(p&Node)
926
+
927
+ #find the set of rules that could generate a node in our list
928
+ rrules=parser.all_rules.select{|rule|
929
+ !rule.unruly? and !nodes_here.grep(rule.action).empty?
930
+ }.map{|rule|
931
+ DottedRule.create(rule,0,parser)
932
+ }
933
+
934
+ #if any generating rules match a node in the leftmost pattern,
935
+ #add the rules which can generate _that_ node too.
936
+ result.push self #force self to be excluded from future recursion
937
+ oldsize=result.size
938
+ unless rrules.empty?
939
+ result.concat rrules
940
+
941
+ unless result.respond_to? :index_of
942
+ class<<result
943
+ attr_accessor :index_of
944
+ end
945
+ result.index_of={}
946
+ end
947
+ rio=result.index_of
948
+ oldsize.upto(result.size){|i| rio[result[i]]||=i }
949
+ rrules.each{|rrule|
950
+ i=rio[rrule] or fail #index() inside each() == O(N**2) complexity. this is the slow line.
951
+ #but skip recursion on rules already done at a higher level
952
+ rrule.recurse_match_drs parser,result if i>=oldsize
953
+ }
954
+ end
955
+ result[oldsize-1]=nil #don't actually include self in result
956
+ #result.update_indices oldsize-1, oldsize-1
957
+
958
+ parser.rmd_cache[self]=result
959
+ return result
960
+ #ensure print ")"
961
+ end
962
+
963
+ def optionally_combine weaker,parser
964
+ #lotsa caching needed if this is ever to be performant
965
+ if parser.oc_cache
966
+ result=parser.oc_cache[[self,weaker]]
967
+ return result unless result.nil?
968
+ else
969
+ parser.oc_cache={}
970
+ end
971
+
972
+ other=weaker
973
+ mymatches,myposes= self.outcomes
974
+ matches, poses = other.outcomes
975
+ matches.each_with_index{|match,i|
976
+ mymatches.each_with_index{|mymatch,myi|
977
+ intersect=parser.inputs.grep(match&mymatch)
978
+ unless intersect.empty?
979
+
980
+ #but don't allow matches that would be matched
981
+ #by an earlier (but optional) pattern.
982
+ disallowed=Reg::Or.new(
983
+ *possible_matchers_til(myi)+
984
+ other.possible_matchers_til(i)
985
+ )
986
+ intersect.reject{|x| disallowed===x }
987
+
988
+ if intersect.empty?
989
+ return result=false
990
+ elsif poses[i]>=other.rule.patterns.size
991
+ return result=true #success if weaker rule is at an end
992
+ elsif myposes[myi]>=rule.patterns.size
993
+ return result=false #fail if stronger rule at an end
994
+ else
995
+ p [:**,rule.name,myposes[myi]]
996
+ mynew=DottedRule.create(rule,myposes[myi],parser)
997
+ new=DottedRule.create(other.rule,poses[i],parser)
998
+ return result=mynew.optionally_combine( new,parser )
999
+ end
1000
+ end
1001
+ }
1002
+ }
1003
+ return result=false
1004
+ ensure
1005
+ parser.oc_cache[[self,weaker]]=result
1006
+ end
1007
+
1008
+ def possible_matchers_til i
1009
+ (pos...i-1).map{|j|
1010
+ m=rule.at(j)
1011
+ Reg::Repeat===m ? m.subregs[0] : m
1012
+ }
1013
+ end
1014
+
1015
+ def outcomes
1016
+ til=@rule.patterns.size
1017
+ at=@pos
1018
+ result=[[],[]]
1019
+ loop do
1020
+ m=@rule.patterns[at]
1021
+ case m
1022
+ when Proc;
1023
+ result.first.push Object
1024
+ result.last.push at+1
1025
+ break
1026
+ when Reg::Repeat
1027
+ assert @rule.optional?(at)
1028
+ to=at
1029
+ to+=1 unless @rule.looping? at
1030
+ result.first.push m.subregs[0]
1031
+ result.last.push to
1032
+ else
1033
+ result.first.push m
1034
+ result.last.push at+1
1035
+ break
1036
+ end
1037
+ at+=1
1038
+ break if at>=til
1039
+ end
1040
+ return result
1041
+ end
1042
+
1043
+ end
1044
+
1045
+ attr_accessor :rmd_cache
1046
+ attr_accessor :oc_cache
1047
+ attr_accessor :sl2ms_cache
1048
+
1049
+ class Conditional
1050
+ def initialize(condition,action)
1051
+ @condition,@action=condition,action
1052
+ @condition.restore :hash,:==
1053
+ end
1054
+ attr_reader :condition,:action
1055
+
1056
+ def hash
1057
+ @condition.hash^@action.hash
1058
+ end
1059
+ def == other
1060
+ Conditional===other and @condition==other.condition and @action==other.action
1061
+ end
1062
+ alias eql? ==
1063
+
1064
+ def name; @condition.inspect+"?"+@action.name end
1065
+
1066
+ def priority; @action.priority end
1067
+ end
1068
+
1069
+ class ParserState; end
1070
+ class MultiShift; end
1071
+ class MultiReduce; end
1072
+
1073
+ ACTION_PATTERN=ParserState|Rule|MultiShift|MultiReduce|:accept|:error
1074
+ class ParserState #a union of dotted rules
1075
+ def initialize(dotteds,index)
1076
+ fail if dotteds.empty? #error state
1077
+ fail unless dotteds.grep(nil).empty?
1078
+ @dotteds=dotteds
1079
+ @index=index
1080
+ sort_substates!
1081
+ @actions={} #key is an input, value is ParserState|Rule|MultiShift|MultiReduce|:accept|:error
1082
+ end
1083
+
1084
+ attr_reader :actions
1085
+
1086
+ def [](k)
1087
+ result=@actions[k]
1088
+ assert ACTION_PATTERN===result
1089
+ result
1090
+ end
1091
+ def []=(k,v)
1092
+ assert ACTION_PATTERN===v
1093
+ @actions[k]=v
1094
+ end
1095
+
1096
+ def sort_substates!
1097
+ @dotteds=@dotteds.sort_by{|dotted| -dotted.pos}.uniq
1098
+ end
1099
+ attr :dotteds
1100
+
1101
+ def dup
1102
+ result=super
1103
+ result.instance_variable_set(:@dotteds,@dotteds.dup)
1104
+ return result
1105
+ end
1106
+
1107
+ def substates; [self] end
1108
+
1109
+ def shiftlist2multishift? shiftlist,parser
1110
+ return :error if shiftlist.empty?
1111
+ parser.sl2ms_cache||={}
1112
+ cache=parser.sl2ms_cache[shiftlist]
1113
+ return cache if cache
1114
+ fixed,varying=shiftlist.partition{|res| DottedRule===res}
1115
+ result=ParserState.new(fixed,nil)
1116
+ result.perhaps_also_allow parser.all_rules,parser
1117
+ unless varying.empty? #MultiShift
1118
+ varying.map!{|v| [v.condition,v.action]}.flatten
1119
+ result=MultiShift.new(result,varying)
1120
+ end
1121
+ parser.sl2ms_cache[shiftlist]=result
1122
+ return result
1123
+ end
1124
+
1125
+ #given a list of rules, see if any of them are compatible with
1126
+ #a current substate. (compatibility means the aggregate patterns
1127
+ #can be anded together and still be able to conceivably match something.)
1128
+ #if any of morerules are actually compatible, add it to current state.
1129
+ def perhaps_also_allow(morerules,parser)
1130
+ fail unless morerules==parser.all_rules
1131
+ @dotteds.concat @dotteds.map{|d| d.also_allow }.flatten.compact.uniq
1132
+ sort_substates!
1133
+ end
1134
+ def old_perhaps_also_allow(morerules,parser)
1135
+ morerules=morerules.dup
1136
+ need_sort=false
1137
+ scan_rules=@dotteds
1138
+ added={}
1139
+ while true
1140
+ adding=[]
1141
+ morerules.each{|morerule|
1142
+ next if added[morerule]
1143
+ fake_rule=morerule.final_promised_rule
1144
+ final_more_dr=DottedRule.create(fake_rule,0,parser)
1145
+ scan_rules.each{|dotted|
1146
+ if dotted.optionally_combine final_more_dr,parser
1147
+ adding<<DottedRule.create(morerule,0,parser)
1148
+ added[morerule]=1
1149
+ break
1150
+ end
1151
+ }
1152
+ }
1153
+ break if adding.empty?
1154
+ @dotteds.concat adding
1155
+ need_sort=true
1156
+ scan_rules=adding
1157
+ end
1158
+ sort_substates! if need_sort
1159
+ end
1160
+ alias perhaps_also_allow old_perhaps_also_allow if defined? $OLD_PAA
1161
+
1162
+
1163
+ #returns ParserState|MultiShift|MultiReduce|Rule|:accept|:error
1164
+ def evolve input,parser,seenlist
1165
+ result2=[]
1166
+ @dotteds.each{|dotted|
1167
+ dotted.evolve input,parser,seenlist,result2
1168
+ }
1169
+
1170
+ result=
1171
+ #seenlist.values.flatten.compact.uniq.sort_by{|x| x.name}
1172
+ result2=result2.uniq.compact.sort_by{|x| x.name}
1173
+ #pp [result,result2].map{|x| x.map{|res| DottedRule===res ? res.name : res }}
1174
+ #pp result2.map{|res| DottedRule===res ? res.name : res }
1175
+ # result==result2 or fail
1176
+
1177
+ return result=:error if result.empty?
1178
+
1179
+
1180
+ #ok, who wants to shift and who wants to reduce?
1181
+ shiftlist,reducelist=result.partition{|res|
1182
+ DottedRule===res or
1183
+ Conditional===res && DottedRule===res.action
1184
+ }
1185
+
1186
+ #if no reducers at all, just try (multi?)shift
1187
+ return result=shiftlist2multishift?( shiftlist,parser )if reducelist.empty?
1188
+
1189
+ #line up reducers by priority
1190
+ actions=reducelist \
1191
+ .sort_by{|rule| -rule.priority }
1192
+ # .map{|rule| rule.action }
1193
+ #actions is +[(Rule|Conditional[Rule]).*]
1194
+ action=actions.shift #this first (unless conditional)
1195
+ #action is Rule|Conditional[Rule]
1196
+ result=
1197
+ case action.action
1198
+ when :error; return :error
1199
+ when Class, StackMonkey
1200
+ action
1201
+ when :accept
1202
+ :accept
1203
+ when :shift #this counts as a reduce at this point, but it writes shift instructions
1204
+ shiftlist2multishift? shiftlist,parser
1205
+ when Rule #oy, vey, was a Conditional
1206
+ shiftaction=shiftlist2multishift?(shiftlist,parser)
1207
+ fail unless Rule===action.action
1208
+ case action.action.action
1209
+ when :error; huh
1210
+ when :shift, StackMonkey, :accept, Class #MultiReduce
1211
+ first_fixed_index=actions.size
1212
+ #actions is +[(Rule|Conditional[Rule]).*]
1213
+ actions.each_with_index{|act,i|
1214
+ break first_fixed_index=i unless Conditional===act
1215
+ }
1216
+ condactions=actions[0...first_fixed_index].unshift(action)
1217
+ condactions=condactions.inject([]){|sum,cond|
1218
+ act=cond.action
1219
+ act=shiftaction if act==:shift #=>shiftlist?
1220
+ sum.push cond.condition, act
1221
+ }
1222
+ #possible optimization: one or more :shift right at end could be ignored
1223
+ if actions[first_fixed_index]
1224
+ action=actions[first_fixed_index].action
1225
+ else
1226
+ action=shiftaction
1227
+ end
1228
+ MultiReduce.new condactions,action #=>shiftlist?
1229
+ else fail
1230
+ end
1231
+ else fail "#{action} not expected here"
1232
+ end
1233
+ #stack monkeys/:accept are treated like reduce here
1234
+ ensure
1235
+ assert ACTION_PATTERN===result
1236
+ end
1237
+
1238
+ def name
1239
+ @name||@dotteds.map{|dotted| dotted.name}.join(",")
1240
+ end
1241
+ attr_writer :name
1242
+
1243
+ def rename(name2count)
1244
+ return @name if defined? @name
1245
+ name=most_prominent_members.map{|dotted| dotted.name}.join(",")
1246
+ if name2count[name]
1247
+ name2count[name]+=1
1248
+ name+="___"+name2count[name].to_s
1249
+ else
1250
+ name2count[name]=1
1251
+ end
1252
+
1253
+ @name=name
1254
+ end
1255
+
1256
+ def most_prominent_members
1257
+ result=@dotteds.select{|dr| dr.pos==@dotteds.first.pos }
1258
+ close2end=@dotteds.map{|dr| [dr,dr.rule.patterns.size-dr.pos]}.sort_by{|(o,k)| -k}
1259
+ result+=close2end.select{|(dr,k)| k==close2end.first.last}.map{|(dr,k)| dr}
1260
+ result2=result.reject{|dr| dr.pos==0 or dr.pos==1&&dr.rule.lookback?}
1261
+ result=result2 unless result2.empty?
1262
+ return result
1263
+ end
1264
+
1265
+ def hash
1266
+ -@dotteds.hash
1267
+ end
1268
+ def == other
1269
+ ParserState===other and
1270
+ @dotteds==other.dotteds
1271
+ end
1272
+ alias eql? ==
1273
+
1274
+ def looping?
1275
+ @dotteds.any?{|dotted| dotted.looping? }
1276
+ end
1277
+
1278
+ def transition_to_loop? input #not used
1279
+ action=@actions.input
1280
+ case action
1281
+ when :error; false
1282
+ when ParserState; action.looping? and action!=self
1283
+ when MultiShift,MultiReduce;
1284
+ action.transition_to_loop? input
1285
+ else fail
1286
+ end
1287
+ end
1288
+
1289
+ def make_sr_goto_tables
1290
+ name2exemplar={}
1291
+ @inputs.each{|i| name2exemplar[i.name]=i }
1292
+
1293
+ @goto={}; @sr={}
1294
+ goto_counts=Hash.new(0); sr_counts=Hash.new(0)
1295
+ actions.each_pair{|k,v|
1296
+ if Node===name2exemplar[k]
1297
+ @goto[k]=v
1298
+ goto_counts[v]+=1
1299
+ else
1300
+ assert(Token===name2exemplar[k])
1301
+ @sr[k]=v
1302
+ sr_counts[v]+=1
1303
+ end
1304
+ }
1305
+ dflt=goto_counts.sort_by{|v,c| c}.last[0]
1306
+ @goto.delete_if{|k,v| v==dflt }
1307
+ @goto.default=dflt
1308
+
1309
+ dflt=sr_counts.sort_by{|v,c| c}.last[0]
1310
+ @sr.delete_if{|k,v| v==dflt }
1311
+ @sr.default=dflt
1312
+
1313
+ @actions=nil
1314
+ end
1315
+
1316
+ end
1317
+
1318
+ class MultiReduce
1319
+ def initialize(list,default)
1320
+ @list,@default=list,default
1321
+ #default can be any valid action (except another MultiReduce)
1322
+ end
1323
+
1324
+ attr_reader :list,:default
1325
+
1326
+ def act(x)
1327
+ (0...@list.size).step(2){|i|
1328
+ return @list[i+1] if @list[i]===x
1329
+ }
1330
+ return default
1331
+ end
1332
+
1333
+ def substates
1334
+ if @default.respond_to? :substates
1335
+ @default.substates
1336
+ else
1337
+ []
1338
+ end
1339
+ end
1340
+
1341
+ def actions
1342
+ result=[]
1343
+ (1...@list.size).step(2){|i|
1344
+ result << @list[i]
1345
+ }
1346
+ if @default.respond_to? :actions
1347
+ result.concat @default.actions
1348
+ elsif @default
1349
+ result<<@default
1350
+ end
1351
+ result
1352
+ end
1353
+
1354
+ def transition_to_loop? input #not used
1355
+ @default.transition_to_loop? input
1356
+ end
1357
+
1358
+ def hash
1359
+ @list.hash^~@default.hash
1360
+ end
1361
+
1362
+ def == other
1363
+ @list==other.list and @default==other.default
1364
+ end
1365
+ alias eql? ==
1366
+ end
1367
+
1368
+ class MultiShift
1369
+ def initialize(base,modifiers)
1370
+ @base,@modifiers=base,modifiers
1371
+ @map=
1372
+ (0...2**(modifiers.size/2)).map{|i| base.dup}
1373
+ @map.each_with_index{|state,i| #for each branch to the multishift
1374
+ (0...modifiers.size).step(2){|j| #for each predicate in the multishift
1375
+ if (i&(1<<j)).non_zero? #if the predicate tests true in this branch
1376
+ state.append modifiers[j+1] #add the predicates modifier to the state
1377
+ end
1378
+ }
1379
+ state.sort_substates!
1380
+ }
1381
+ end
1382
+
1383
+ def act(x)
1384
+ result=0
1385
+ (0...@modifiers.size).step(2){|i|
1386
+ result|=(1<<(i/2)) if @modifiers[i]===x
1387
+ }
1388
+ @map[result]
1389
+ end
1390
+
1391
+ attr_reader :map, :modifiers
1392
+
1393
+ def substates
1394
+ @map.dup
1395
+ end
1396
+
1397
+ def actions
1398
+ @map.dup
1399
+ end
1400
+
1401
+ def transition_to_loop? input #not used
1402
+ huh
1403
+ end
1404
+
1405
+ def hash
1406
+ huh
1407
+ end
1408
+ def == other
1409
+ huh
1410
+ end
1411
+ alias eql? ==
1412
+ end
1413
+
1414
+ #an action is one of:
1415
+ #a ParserState (shift)
1416
+ #a Rule (reduce)
1417
+ #nil (error)
1418
+ #:accept
1419
+ #MultiReduce
1420
+ #MultiShift
1421
+
1422
+ #just the left side (the stack/lookahead matchers)
1423
+ def LEFT
1424
+ # require 'md5'
1425
+ @rules=expanded_RULES()
1426
+ # p MD5.new(@rules).to_s
1427
+ @rules.map{|r| r.left.subregs }.flatten
1428
+ end
1429
+
1430
+ #remove lookahead and lookback decoration (not used?)
1431
+ def LEFT_NO_LOOKING
1432
+ l=LEFT()
1433
+ l.map!{|m|
1434
+ case m #
1435
+ when Reg::LookAhead,Reg::LookBack; fail #should be gone already now
1436
+ when Proc; []
1437
+ else m #
1438
+ end #
1439
+ }
1440
+ l
1441
+ end
1442
+
1443
+ def child_relations_among(*classes)
1444
+ classes.unshift Object
1445
+ result={}
1446
+ classes.each{|klass| result[klass]=[] }
1447
+
1448
+ #p classes
1449
+ classes.each{|klass|
1450
+ anclist=klass.ancestors
1451
+ anclist.shift==klass or fail
1452
+ anclist.each{|anc|
1453
+ if anc=result[anc]
1454
+ anc << klass
1455
+ break
1456
+ end
1457
+ }
1458
+ }
1459
+
1460
+ return result
1461
+ end
1462
+
1463
+ #all classes mentioned in rules, on left and right sides
1464
+ def STACKABLE_CLASSES #
1465
+ return @sc_result if defined? @sc_result
1466
+ @sc_result=[]
1467
+ @subclasses_of=child_relations_among(*vertices)
1468
+ # @sc_result=false
1469
+ l=LEFT()
1470
+ l=l.map{|lm| sc_juice lm}.flatten.compact
1471
+ assert l.grep(nil).empty?
1472
+ r= @rules.map{|rr| rr.right }.grep(Class) #classes in productions
1473
+ result=l+r
1474
+ @subclasses_of=nil
1475
+ @sc_result.replace result.grep(Class).uniq
1476
+ fail if @sc_result.empty?
1477
+ return @sc_result
1478
+ end
1479
+
1480
+ # def juice(m)
1481
+ # case m #
1482
+ # when Class
1483
+ # return [m] unless @subclasses_of
1484
+ # result=[m] # and subclasses too
1485
+ # i=0
1486
+ # while item=result[i]
1487
+ # p item
1488
+ # result.concat @subclasses_of[item] rescue nil
1489
+ # i += 1
1490
+ # end
1491
+ # result
1492
+ # when String,Regexp; juice(RedParse.KW(m))
1493
+ # when Reg::And; m.subregs.map{|x| juice(x).flatten.compact}.inject{|sum,rr| sum&rr}
1494
+ # when Reg::Or; m.subregs.map &method(:juice)
1495
+ # when Reg::Not
1496
+ # m=m.subregs[0]
1497
+ # if Class===m or (Reg::Or===m and
1498
+ # m.subregs.find{|x| Class===x })
1499
+ # juice(m)
1500
+ # else []
1501
+ # end
1502
+ # else []
1503
+ # end
1504
+ # end
1505
+
1506
+ def sc_juice(m)
1507
+ case m #
1508
+ when Class; [m]
1509
+ when String,Regexp; [KeywordToken]
1510
+ when Reg::And; m.subregs.map{|x| sc_juice(x)}.compact.map{|x| x.flatten.compact}.inject{|sum,rr| sum&rr }
1511
+ when Reg::Or; m.subregs.map(&method(:sc_juice))
1512
+ when Reg::Not; sc_juice(m.subregs[0])
1513
+ when Reg::LookAhead, Reg::LookBack; sc_juice(m.subregs[0])
1514
+ when Reg::Repeat; sc_juice(m.subregs[0])
1515
+ else []
1516
+ end
1517
+ end
33
1518
 
34
- def self.has_return_hash_fix?
1519
+ def unruly_rules
1520
+ return @unruly_rules if defined? @unruly_rules
1521
+
1522
+ @unruly_rules=
1523
+ all_rules.select{|rule| rule.unruly? }
1524
+
1525
+ p :unruly_rules
1526
+ pp @unruly_rules.map{|r| r.name}
1527
+
1528
+ @unruly_rules
1529
+ end
1530
+
1531
+ def enumerate_exemplars
1532
+ return @@exemplars if defined? @@exemplars #dunno why this is necessary
1533
+
1534
+ result= STACKABLE_CLASSES() \
1535
+ .map{|sc| sc.enumerate_exemplars } \
1536
+ .inject{|sum,sc| sum+sc}
1537
+
1538
+ result.map!{|sc|
1539
+ res=sc.shift.allocate
1540
+ until sc.empty?
1541
+ eval "def res.#{sc.shift}; #{sc.shift.inspect} end"
1542
+ end
1543
+ def res.to_s; identity_name end
1544
+ res
1545
+ }
1546
+
1547
+ return @@exemplars=result
1548
+ end
1549
+
1550
+ def check_for_parsealike_inputs
1551
+ all_patterns=all_rules.map{|r| r.patterns.map{|rp| Reg::Repeat===rp and rp=rp.subregs[0]; rp }}.flatten.uniq
1552
+ seen={}
1553
+ @identity_name_aliases={}
1554
+ warn "why are non_empty and after_equals params to BeginNode appearently ignored?"
1555
+ warn "some token identities overlap themselves?!?"
1556
+ warn "some overlaps are duplicated"
1557
+ warn ". and :: overlap => ..... surely that's not right"
1558
+ @inputs.map{|input|
1559
+ profile=all_patterns.map{|pat| Proc===pat ? pat : !!(pat===input)}
1560
+ if seen[profile]
1561
+ puts "#{input} overlaps #{seen[profile]}"
1562
+ @identity_name_aliases[seen[profile]]=input
1563
+ nil
1564
+ else
1565
+ seen[profile]=input
1566
+ end
1567
+ }.compact
1568
+ end
1569
+
1570
+ def enumerate_states
1571
+ inputs=check_for_parsealike_inputs
1572
+ inputs.reject!{|x| StartToken===x}
1573
+
1574
+ result=[]
1575
+ todo=[start_state]
1576
+
1577
+ seenlist = {}
1578
+ seenlist.default=:dunno_yet
1579
+
1580
+ j=0
1581
+ start=was=Time.now
1582
+ in_result={} #this should go away; obsoleted by @states
1583
+ state_num=-1
1584
+ todo.each{|st| in_result[st]=(state_num+=1) }
1585
+ ps=todo.first
1586
+ pp [-in_result[ps], *ps.dotteds.map{|dr| dr.name }]
1587
+ old_todo_size=todo.size
1588
+ while state=todo.shift
1589
+ result<<state
1590
+
1591
+ i=0
1592
+ inputs.each {|input|
1593
+ newstate=state.evolve input,self,seenlist
1594
+ assert ACTION_PATTERN===newstate
1595
+ #newstate is ParserState|MultiShift|MultiReduce|Rule|:accept|:error
1596
+ state[input.identity_name]=newstate
1597
+ next unless newstate.respond_to? :substates
1598
+ #newstate.substates is just [newstate] for plain ParserStates
1599
+ morestates=newstate.substates.reject{|x| in_result[x]}
1600
+ morestates.each{|st| in_result[st]=(state_num+=1) }
1601
+ # p [in_result[state],:+,input.identity_name,:>>,pretty(newstate,in_result)]
1602
+ todo.concat morestates
1603
+
1604
+ # pp morestates.map{|ps|
1605
+ # [-in_result[ps], *ps.dotteds.map{|dr| dr.name }]
1606
+ # }
1607
+ # pp pretty(newstate,in_result) unless ParserState===newstate
1608
+ }
1609
+
1610
+ now=Time.now
1611
+ p [:*,j+=1,todo.size,todo.size-old_todo_size,now-was,j/(now-start),(100.0*j/(j+todo.size)).to_i]
1612
+ old_todo_size=todo.size
1613
+ was=now
1614
+
1615
+ # if state.actions.values.uniq==[:error]
1616
+ #this can happen when the only dotted rule is for an :error
1617
+ #maybe this case can be optimized?
1618
+ # end
1619
+ end
1620
+ self.rmd_cache=nil
1621
+ self.oc_cache=nil
1622
+ self.sl2ms_cache=nil
1623
+ return result
1624
+ end
1625
+
1626
+ def pretty(x,in_result)
1627
+ case x
1628
+ when ParserState; in_result[x]
1629
+ when MultiReduce
1630
+ pairs=x.list.dup
1631
+ result=[]
1632
+ until pairs.empty?
1633
+ cond,act,*pairs=*pairs
1634
+ cond = cond.inspect
1635
+ result<<[cond,pretty(act.action,in_result)]
1636
+ end
1637
+ result<<pretty(x.default,in_result)
1638
+ result.unshift :MultiReduce
1639
+ when MultiShift
1640
+ h={}
1641
+ mods=x.modifiers
1642
+ its=[]
1643
+ (0...mods.size).step(2){|i| its<<mods[i] }
1644
+ x.map.each_with_index{|xx,i| h[i]=pretty(xx) }
1645
+ [:MultiShift, its,h]
1646
+ when Class; x.name
1647
+ when StackMonkey; x.name
1648
+ when :accept,:error; x
1649
+ else fail "not a valid action: #{x}"
1650
+ end
1651
+ end
1652
+
1653
+ attr_accessor :inputs
1654
+
1655
+ def all_states
1656
+ return @all_states if defined? @all_states
1657
+ @all_states=enumerate_states
1658
+ end
1659
+
1660
+ def exemplars_that_match p
1661
+ @inputs.grep p
1662
+ end
1663
+
1664
+ def pattern_matches_nodes? p
1665
+ !@inputs.grep(Node&p).empty?
1666
+ end
1667
+
1668
+ def pattern_matches_tokens? p
1669
+ !@inputs.grep(Token&p).empty?
1670
+ end
1671
+
1672
+ def identity_name_alias? name
1673
+ alias_=@identity_name_aliases[name]
1674
+ return( alias_||name )
1675
+ end
1676
+
1677
+ def compile
1678
+ oldparser=Thread.current[:$RedParse_parser]
1679
+ Thread.current[:$RedParse_parser]||=self
1680
+
1681
+ if File.exist?("cached_parse_tables.drb")
1682
+ dup=Marshal.load(f=open("cached_parse_tables.drb","rb"))
1683
+ instance_variables.each{|var| remove_instance_variable var }
1684
+ extend SingleForwardable
1685
+ def_singleton_delegators(dup,public_methods+private_methods+protected_methods)
1686
+
1687
+ self.inputs=enumerate_exemplars
1688
+ else
1689
+ @generating_parse_tables=true
1690
+ @inputs||=enumerate_exemplars
1691
+
1692
+ states=all_states
1693
+ # @rules=expanded_RULES
1694
+ @inputs=nil #Marshal no like it
1695
+
1696
+ begin
1697
+ p :dumping
1698
+ Marshal.dump(self,f=open("cached_parse_tables.drb","wb"))
1699
+ p :dump_done!
1700
+ rescue Exception
1701
+ p :dump_failed
1702
+ File.unlink "cached_parse_tables.drb"
1703
+ ensure
1704
+ @inputs=enumerate_exemplars
1705
+ end
1706
+ end
1707
+ f.close
1708
+
1709
+ #look for unused dotted rules and actions
1710
+ #also states with drs past the end
1711
+ past_end=0
1712
+ drs=all_dotted_rules
1713
+ dr_count=Hash.new(0)
1714
+ acts=all_rules#.map{|r| r.action }.uniq
1715
+ act_count=Hash.new(0)
1716
+ states.each{|state|
1717
+ state.dotteds.each{|dr|
1718
+ dr_count[dr]+=1
1719
+ past_end+=1 if dr.pos>=dr.rule.patterns.size
1720
+ }
1721
+ sav=state.actions.values
1722
+ sav.grep(Class|StackMonkey).each{|act| act_count[act.__id__]+=1 }
1723
+ sav.grep(MultiReduce|MultiShift).each{|multi| multi.actions.each{|act| act_count[act.__id__]+=1} }
1724
+ #p state.name if state.dotteds.select{|dr| dr.rule.action==BeginNode}
1725
+ }
1726
+ puts "#{past_end} dotted rules found past the end of their rule" if past_end>0
1727
+ nevers=0
1728
+ drs.each{|dr|
1729
+ next unless dr_count[dr].zero?
1730
+ puts "never reached #{dr.name}"
1731
+ nevers+=1
1732
+ }
1733
+ puts "#{nevers} dotted rules were never reached (out of #{drs.size})"
1734
+ nevers=0
1735
+ acts.each{|act|
1736
+ next unless act_count[act.__id__].zero?
1737
+ puts "never reached #{act.name rescue act}"
1738
+ nevers+=1
1739
+ }
1740
+ puts "#{nevers} actions were never reached (out of #{acts.size})"
1741
+ p :most_popular_nontrivial_drs
1742
+ pp dr_count.reject{|(dr,n)| dr.pos.zero? or dr.pos==1 && dr.rule.lookback?} \
1743
+ .sort_by{|(dr,n)| n}[-15..-1].map{|(dr,n)| [dr.name,n] }
1744
+
1745
+ #look for duplicate states
1746
+ actions2state={}
1747
+ dup_states=0
1748
+ states.each{|st|
1749
+ cache=actions2state[st.actions]
1750
+ if cache
1751
+ st.equivalent_to=cache
1752
+ dup_states+=1
1753
+ else
1754
+ actions2state[st.actions]=st
1755
+ end
1756
+ }
1757
+ puts "#{dup_states} duplicate states" if dup_states.nonzero?
1758
+
1759
+ name2count={}
1760
+ states.each{|state| state.rename(name2count) }
1761
+
1762
+ #divide each state's actions into sr and goto tables
1763
+ #also scan states for the most common sr and goto actions and make them default
1764
+ states.each{|state| state.make_sr_goto_tables }
1765
+
1766
+
1767
+ # pp states
1768
+ # pp states.size
1769
+
1770
+ generate_c $stdout
1771
+ return self
1772
+ ensure
1773
+ remove_instance_variable :@generating_parse_tables rescue nil
1774
+ Thread.current[:$RedParse_parser]=oldparser
1775
+ end
1776
+
1777
+ def ultimate_goal_nodes
1778
+ result=[]
1779
+ all_rules.each{|rule|
1780
+ if rule.patterns.size==0 and
1781
+ rule.patterns.first==StartToken and
1782
+ rule.patterns.last==EoiToken
1783
+ result << juice(rule.patterns[1])
1784
+ end
1785
+ }
1786
+ result.flatten!
1787
+ return result
1788
+ end
1789
+
1790
+
1791
+ # def start_state
1792
+ # goal=ultimate_goal_nodes
1793
+ # result=all_rules.select{|rule|
1794
+ # rt=rule.reduces_to and
1795
+ # !goal.select{|node| node>=rt}.empty?
1796
+ # }
1797
+ # result.map!{|rule| DottedRule.create(rule,0,parser)}
1798
+ #
1799
+ # result=ParserState.new result
1800
+ # result.name="start_state"
1801
+ # result
1802
+ # end
1803
+
1804
+ def new_state(drs,unruly_also=false)
1805
+ result=ParserState.new drs,@states.size
1806
+ result.perhaps_also_allow all_rules,self
1807
+ cache=@states[result]
1808
+ return cache if cache
1809
+ @states[result]=@states.size
1810
+ return result
1811
+ end
1812
+
1813
+ def initial_state
1814
+ @states={}
1815
+ all_initial_dotted_rules #is this still needed?
1816
+ result=new_state all_rules.map{|r| DottedRule.create(r,0,self)}
1817
+ result.name="initial"
1818
+ #result.perhaps_also_allow all_rules,self #silly here
1819
+ result
1820
+ end
1821
+
1822
+ attr_reader :states
1823
+
1824
+ def start_state
1825
+ seenlist = {}
1826
+ seenlist.default=:dunno_yet
1827
+ result=initial_state.evolve StartToken.new, self,seenlist
1828
+ result.perhaps_also_allow all_rules,self
1829
+ result.name="start"
1830
+ result
1831
+ #pp [:initial_seenlist, seenlist]
1832
+ #ensure p :/
1833
+ end
1834
+
1835
+ #inline any subsequences in RULES right into the patterns
1836
+ #reg should do this already, but current release does not
1837
+ def expanded_RULES
1838
+ result=RULES()
1839
+ return result if (-[:foo, -[:bar]]).subregs.grep(Reg::Subseq).empty?
1840
+ result.map!{|rule|
1841
+ unless rule.left.subregs.grep(Reg::Subseq)
1842
+ then rule
1843
+ else
1844
+ right=rule.right
1845
+ rule=rule.left.subregs.dup
1846
+ (rule.size-1).downto(0){|i|
1847
+ if Reg::Subseq===rule[i]
1848
+ rule[i,1]=rule[i].subregs
1849
+ end
1850
+ }
1851
+ -rule>>right
1852
+ end
1853
+ }
1854
+ end
1855
+
1856
+ module NamedConstant
1857
+ attr_accessor :constant_name
1858
+ def inspect; constant_name end
1859
+ end
1860
+ def self.inspect_constant_names
1861
+ constants.each{|kn|
1862
+ k=const_get(kn)
1863
+ next if Class|Module|Numeric|Symbol|true|false|nil===k
1864
+ k.extend NamedConstant
1865
+ k.constant_name=kn
1866
+ }
1867
+ end
1868
+
1869
+ def undumpables
1870
+ return @undumpables if @undumpables
1871
+ @rules||=expanded_RULES
1872
+ n=-1
1873
+ @undumpables={}
1874
+ abortable_graphwalk(@rules){|cntr,o,i,ty|
1875
+ !case o
1876
+ when StackMonkey
1877
+ @undumpables[o.name]=o
1878
+ when Reg::Deferred
1879
+ @undumpables[n+=1]=o
1880
+ class<<o
1881
+ attr_accessor :undump_key
1882
+ end
1883
+ o.undump_key=n
1884
+ end
1885
+ }
1886
+ end
1887
+
1888
+ class ::Proc #hack hack hack
1889
+ #only define hacky _dump if one isn't defined already
1890
+ unless instance_methods.include?("_dump") or
1891
+ instance_methods.include?("marshal_dump") or
1892
+ (Marshal.dump(proc{}) rescue false)
1893
+ def _dump depth
1894
+ undump_key.to_s
1895
+ end
1896
+ def self._load str
1897
+ Thread.current[:$RedParse_parser].undumpables[str.to_i]
1898
+ end
1899
+ end
1900
+ end
1901
+
1902
+ =begin disabled, uses too much memory!!
1903
+ class MarshalProxy
1904
+ def initialize(key)
1905
+ @key=key
1906
+ end
1907
+ attr :key
1908
+ end
1909
+
1910
+ #convert unmarshalables, such as stackmonkeys into proxies
1911
+ def proxify
1912
+ n=-1
1913
+ seen={}
1914
+ mkproxy=proc{|cntr,o,i,ty,useit|
1915
+ case o
1916
+ when StackMonkey
1917
+ useit[0]=true
1918
+ seen[o.__id__]||=MarshalProxy.new(o.name)
1919
+ when Reg::Deferred
1920
+ useit[0]=true
1921
+ seen[o.__id__]||=MarshalProxy.new(n+=1)
1922
+ end
1923
+ }
1924
+ Ron::GraphWalk.graphmodify!(@rules,&mkproxy)
1925
+ Ron::GraphWalk.graphmodify!(self,&mkproxy)
1926
+
1927
+ end
1928
+
1929
+ def _dump depth
1930
+ fail unless @rules
1931
+ proxify
1932
+ ivs=instance_variables
1933
+ a=ivs+ivs.reverse.map{|var| instance_variable_get var }
1934
+ result=Marshal.dump(a,depth)
1935
+ unproxify
1936
+ return result
1937
+ end
1938
+
1939
+ #convert marshal proxies back to the real thing
1940
+ def unproxify
1941
+ #build a lookup table for unmarshalables by walking @rules
1942
+ @rules||=expanded_RULES
1943
+ n=-1;lookup={}
1944
+ Ron::GraphWalk.graphwalk(@rules){|cntr,o,i,ty|
1945
+ case o
1946
+ when StackMonkey
1947
+ lookup[o.name]=o
1948
+ when Reg::Deferred
1949
+ lookup[n+=1]=o
1950
+ end
1951
+ }
1952
+
1953
+ Ron::GraphWalk.graphmodify!(self){|cntr,o,i,ty,useit|
1954
+ if MarshalProxy===o
1955
+ useit[0]=true
1956
+ lookup[o.key]
1957
+ end
1958
+ }
1959
+ end
1960
+
1961
+ def self._load(str,*more)
1962
+ result=allocate
1963
+ a=Marshal.load(str,*more)
1964
+
1965
+ result.unproxify
1966
+
1967
+ (0...a.size/2).each{|i| result.instance_variable_set a[i],a[-i] }
1968
+ return result
1969
+ end
1970
+ =end
1971
+
1972
+ ###### specific to parsing ruby
1973
+
1974
+
1975
+ UCLETTER=RubyLexer::UCLETTER
1976
+
1977
+ LCLETTER=RubyLexer::LCLETTER
1978
+ LETTER=RubyLexer::LETTER
1979
+ LETTER_DIGIT=RubyLexer::LETTER_DIGIT
1980
+
1981
+ def vertices; self.class.constants.grep(Node|Token) end
1982
+
1983
+ def self.has_return_hash_fix? #is this needed? it's not used in this file....
35
1984
  rl=RubyLexer.new("","return {}.size")
36
1985
  return(
37
1986
  FileAndLineToken===rl.get1token and
@@ -109,11 +2058,12 @@ class RedParse
109
2058
 
110
2059
  "?"=>106, # ":"=>106, #not sure what to do with ":"
111
2060
 
112
- "*@"=>105.5, "&@"=>105.5, #unary * and & operators
2061
+ "unary*"=>105, "unary&"=>105, #unary * and & operators
2062
+ "lhs*"=>105, "rhs*"=>105, #this should remain above =, but other unary stars are below it
113
2063
 
114
- "="=>105, "%="=>105, "/="=>105, "-="=>105, "+="=>105,
115
- "|="=>105, "&="=>105, ">>="=>105, "<<="=>105, "*="=>105,
116
- "&&="=>105, "||="=>105, "**="=>105, "^="=>105,
2064
+ "="=>104, "%="=>104, "/="=>104, "-="=>104, "+="=>104,
2065
+ "|="=>104, "&="=>104, ">>="=>104, "<<="=>104, "*="=>104,
2066
+ "&&="=>104, "||="=>104, "**="=>104, "^="=>104,
117
2067
 
118
2068
  "defined?"=>103,
119
2069
  "not"=>103,
@@ -121,11 +2071,14 @@ class RedParse
121
2071
  "rescue3"=>102,
122
2072
 
123
2073
  "=>"=>101,
124
- ","=>100,
2074
+ "lhs,"=>100,
2075
+ "rhs,"=>100, #"call,"=>100, "array,"=>100, "param,"=>100,
2076
+ ","=>100,
125
2077
  #the 'precedence' of comma is somewhat controversial. it actually has
126
2078
  #several different precedences depending on which kind of comma it is.
127
2079
  #the precedence of , is higher than :, => and the assignment operators
128
- #in certain contexts.
2080
+ #in certain (lhs) contexts. therefore, the precedence of lhs, should
2081
+ #really be above =.
129
2082
 
130
2083
  #"unary" prefix function names seen has operators have this precedence
131
2084
  #but, rubylexer handles precedence of these and outputs fake parens
@@ -142,29 +2095,110 @@ class RedParse
142
2095
  end
143
2096
 
144
2097
  module BracketsCall; end
145
-
146
2098
  Value= #NumberToken|SymbolToken|
147
2099
  #HerePlaceholderToken|
148
- ((VarNameToken|ValueNode)&-{:lvalue? =>nil})
2100
+ ValueNode&-{:lvalue =>nil}
149
2101
  Expr=Value
150
2102
 
2103
+ if defined? SPECIALIZED_KEYWORDS
2104
+ class SpecializedKeywordToken<KeywordToken
2105
+ def inspect
2106
+ "#<"+self.class.name+">"
2107
+ end
2108
+ alias image inspect
2109
+ end
2110
+
2111
+ KW2class={}
2112
+
2113
+ Punc2name={
2114
+ "("=>"lparen", ")"=>"rparen",
2115
+ "["=>"lbracket", "]"=>"rbracket",
2116
+ "{"=>"lbrace", "}"=>"rbrace",
2117
+ ","=>"comma",
2118
+ ";"=>"semicolon",
2119
+ "::"=>"double_colon",
2120
+ "."=>"dot",
2121
+ "?"=>"question_mark", ":"=>"colon",
2122
+ "="=>"equals",
2123
+ "|"=>"pipe",
2124
+ "<<"=>"leftleft", ">>"=>"rightright",
2125
+ "=>"=>"arrow",
2126
+ }
2127
+ end
2128
+
151
2129
  def self.KW(ident)
2130
+ if defined? SPECIALIZED_KEYWORDS
2131
+ fail if /\\/===ident
2132
+ orig_ident=ident
2133
+ if Regexp===ident
2134
+ list=ident.to_s[/\(?-mix:\^\((.*)\)\$\)/,1]
2135
+
2136
+ #pick apart any char class in ident
2137
+ if open_bracket_idx=list.index(/([^\\]|^)\[/)
2138
+ open_bracket_idx+=1 unless list[open_bracket_idx]=="["
2139
+ close_bracket_idx=list.index(/[^\\]\]/,open_bracket_idx+1)
2140
+ close_bracket_idx+=1 unless list[close_bracket_idx]=="]"
2141
+ cclass=list.slice!(open_bracket_idx..close_bracket_idx)
2142
+ cclass=cclass[1...-1]
2143
+ cclass=cclass.scan( /[^\\]|\\./ )
2144
+ cclass.map!{|ch| ch.size==1 ? ch : ch[1..1] }
2145
+ end
2146
+
2147
+ #rest of it should be a list of words separated by |
2148
+ list=list.split(/\|/).reject{|x| x==''}
2149
+ list.concat cclass if cclass
2150
+ list.map{|w|
2151
+ w.gsub!(/\\/,'')
2152
+ KW(w)
2153
+ }.inject{|sum,kw| sum|kw}
2154
+ else
2155
+ fail unless String===ident
2156
+ ident=Punc2name[ident] unless /^(?:(?!#{LETTER_DIGIT}).)+$/o===ident
2157
+ fail "no name for #{orig_ident}" unless ident
2158
+ eval %{
2159
+ class Keyword_#{ident} < SpecializedKeywordToken
2160
+ def ident; '#{orig_ident}' end
2161
+ # def self.instance; @instance ||= allocate end
2162
+ # def self.new; instance end
2163
+ def initialize(offset)
2164
+ @offset=offset
2165
+ end
2166
+ end
2167
+ }
2168
+ KW2class[ident]||=const_get("Keyword_#{ident}")
2169
+ end
2170
+ else
152
2171
  ident=case ident
153
- when Integer: ident.chr
154
- when String,Regexp: ident
2172
+ when Integer; ident.chr
2173
+ when String,Regexp; ident
155
2174
  else ident.to_s
156
2175
  end
157
2176
 
158
2177
  return KeywordToken&-{:ident=>ident}
2178
+ end
159
2179
  end
160
2180
  def KW(ident); self.class.KW(ident) end
2181
+
2182
+ if defined? SPECIALIZED_KEYWORDS
2183
+ def make_specialized_kw(name,offset)
2184
+ name=Punc2name[name] unless /^((?!#{LETTER_DIGIT}).)+$/o===name
2185
+ KW2class[name].new(offset)
2186
+ end
2187
+ alias make_kw make_specialized_kw
2188
+ else
2189
+ def make_kw(name,offset)
2190
+ KeywordToken.new(name,offset)
2191
+ end
2192
+ end
2193
+
161
2194
  UNOP=
162
2195
  (OperatorToken|KeywordToken)&-{ #sppflt! KeywordToken here is a hack too
163
- :ident=>/^[*&+-]@$/,
164
- :unary =>true,
2196
+ # :ident=>/^(?:[+-]@|unary[&*]|(?:lhs|rhs)[*])$/,
2197
+ :ident=>/^(?:[+-]@|unary[&])$/,
2198
+ #:unary =>true,
165
2199
  }|
166
2200
  (OperatorToken|KeywordToken)&-{ #sppflt! KeywordToken here is a hack too
167
- :ident=>/^([~!]|not|defined\?)$/,
2201
+ :ident=>/^([~!]|not|defined\?)$/, #defined? should be removed from here, its handled separately
168
2202
  } #|
169
2203
  DEFOP=
170
2204
  (OperatorToken|KeywordToken)&-{ #sppflt! KeywordToken here is a hack too
@@ -200,285 +2234,325 @@ class RedParse
200
2234
  :ident=>/^([\[({!+*?:,]|\.{1,3}|::|=>)$/
201
2235
  }
202
2236
  =end
203
- DotOp= KeywordToken & -{ :ident=>"." }
204
- DoubleColonOp= KeywordToken & -{ :ident=>"::" }
2237
+ DotOp= KW('.') #KeywordToken & -{ :ident=>"." }
2238
+ DoubleColonOp= KW('::') #KeywordToken & -{ :ident=>"::" }
205
2239
 
206
2240
  Op=Op()
207
2241
  MODIFYASSIGNOP=Op( /^(([^=])\2|[^<>=!])=$/, true )
208
2242
  NONASSIGNOP=Op( /([^=]|[<>=!]=)$/)
209
2243
  KW_Op= #some of these ought to be regular operators, fer gosh sake
210
- Op(/^((![=~])|\.\.\.?|=>)$/,true)|Op(/^(#{BINOP_KEYWORDS.join('|')})$/)
2244
+ Op(/^(![=~]|\.\.\.?|=>)$/,true)|Op(/^(#{BINOP_KEYWORDS.join('|')})$/)
211
2245
 
212
2246
  EPSILON=Float::EPSILON*10_000_000 #this should be <<1 and >0
213
2247
  fail unless 1+EPSILON>1
214
2248
  fail unless EPSILON<0.1
215
2249
 
216
2250
  def left_op_higher(op,op2)
217
- # (Op|KeywordOp|KeywordOp2|ASSIGNOP===op2) or return true
218
2251
  KeywordToken===op2 or OperatorToken===op2 or return true
219
2252
  rightprec=@precedence[op2.to_s] or return true
220
- #or fail "unrecognized right operator: #{op2.inspect}"
221
2253
  rightprec+=EPSILON if @RIGHT_ASSOCIATIVE[op2.to_s]
222
2254
  return @precedence[op.to_s]>=rightprec
223
2255
  end
224
2256
 
225
- LowerOp= proc{|parser,op2| parser.left_op_higher(parser[-3],op2) }
2257
+ # LowerOp= proc{|parser,op2| parser.left_op_higher(parser[-3],op2) }
2258
+ def lower_op
2259
+ return @lower_op if defined? @lower_op
2260
+ lower_op=item_that{|op| left_op_higher(@stack[-3],op) }
2261
+ lower_op=(LOWEST_OP|(~VALUELIKE_LA & lower_op)).la
2262
+ def lower_op.inspect; "lower_op" end
2263
+ @lower_op=lower_op
2264
+ end
226
2265
 
227
- def dont_postpone_semi
228
- @dps||=~wants_semi_context
2266
+ #this is a hack, should use graphcopy to search for Deferreds and replace with double-Deferred as below
2267
+ def item_that(*a,&b)
2268
+ if defined? @generating_parse_tables
2269
+ huh unless b
2270
+ #double supers, one of them in a block executed after this method returns....
2271
+ #man that's weird
2272
+ super(*a){|ob| @saw_item_that[[super(*a,&b),ob]]=true}
2273
+ else
2274
+ super(*a,&b) #and then here's another
2275
+ end
229
2276
  end
2277
+
230
2278
  WANTS_SEMI=%w[while until if unless
231
2279
  def case when in rescue
232
2280
  elsif class module << => . ::
233
2281
  ]
234
2282
  def wants_semi_context
235
- Op('<<')|KW(/^(#{WANTS_SEMI.map{|ws| Regexp.quote ws }.join('|')})$/)
2283
+ Op(/^(<<|=>|\.|::)$/)|KW(/^(#{WANTS_SEMI.map{|ws| Regexp.quote ws }.join('|')})$/)
2284
+ end
2285
+ def dont_postpone_semi
2286
+ @dps||=~wants_semi_context
236
2287
  end
237
2288
 
238
- NeverBlockKeyword=MethNameToken&-{:ident=>/^(return|break|next)$/}
239
- FakeBegin=KW('(')&-{:not_real? =>true}
240
- FakeEnd=KW(')')&-{:not_real? =>true}
2289
+ #NeverBlockKeyword=MethNameToken&-{:ident=>/^(return|break|next)$/}
2290
+ #FakeBegin=KW('(')&-{:not_real? =>true}
2291
+ #FakeEnd=KW(')')&-{:not_real? =>true}
241
2292
 
242
2293
  #rule format:
243
2294
  # -[syntax pattern_matchers.+, lookahead.-]>>node type
244
2295
 
245
- DotCall=proc{|stack|
2296
+ DotCall=stack_monkey("DotCall",4,CallNode){|stack|
2297
+ left,dot=*stack.slice!(-4..-3)
246
2298
  right=stack[-2]
247
- left,bogus=*stack.slice!(-4..-3)
248
2299
 
2300
+ right.startline=left.startline
249
2301
  right.set_receiver! left
250
2302
  }
251
2303
 
252
- Lvalue=(VarNameToken|CallSiteNode|BracketsGetNode|CommaOpNode|
253
- ParenedNode|ConstantNode|UnaryStarNode)&-{:lvalue? =>true}
2304
+ Lvalue=(VarNode|CallSiteNode|BracketsGetNode|CommaOpNode|
2305
+ ParenedNode|ConstantNode|UnaryStarNode)&-{:lvalue =>true}
254
2306
 
255
- BareMethod=MethNameToken|LiteralNode&-{:val=>Symbol|StringNode}
2307
+ BareMethod=MethNameToken|(LiteralNode&-{:bare_method=>true})
256
2308
 
257
2309
  BEGINWORDLIST=RubyLexer::BEGINWORDLIST + %w"( [ {"
258
2310
  ENDWORDLIST=%w"end ) ] }"
259
- BEGIN2END={"{"=>"}", "("=>")", "["=>"]", }
260
- endword="end"
261
- RubyLexer::BEGINWORDLIST.each{|bw| BEGIN2END[bw]=endword }
2311
+ ENDWORDS=ENDWORDLIST.map{|x| Regexp.quote x}.join('|')
2312
+ BEGINWORDS=RubyLexer::BEGINWORDS
2313
+ INNERBOUNDINGWORDS=RubyLexer::INNERBOUNDINGWORDS
2314
+
2315
+ BEGIN2END={"{"=>"}", "("=>")", "["=>"]", BEGINWORDS=>"end"}
262
2316
  def beginsendsmatcher
263
2317
  @bem||=
264
- /^(#{(BEGINWORDLIST+ENDWORDLIST).map{|x| Regexp.quote x}.join('|')})$/
2318
+ /^(#{BEGINWORDS}|#{ENDWORDS})$/
265
2319
  end
266
2320
 
267
- MULTIASSIGN=UnaryStarNode|CommaOpNode|(ParenedNode&-{:size=>1})
268
- WITHCOMMAS=UnaryStarNode|CommaOpNode|
269
- (CallSiteNode&-{:real_parens=>false, :args=>-{:size=>~0.reg}}) #|(ParenedNode&-{:size=>~1.reg})
2321
+ MULTIASSIGN=UnaryStarNode|CommaOpNode|ParenedNode
2322
+ WITHCOMMAS=UnaryStarNode|CommaOpNode|(CallSiteNode&-{:with_commas=>true})
2323
+ #(CallSiteNode&-{:real_parens=>false, :args=>-{:size=>~0.reg}})
270
2324
 
271
2325
  BEGINAFTEREQUALS=
272
- ParenedNode&
273
- -{:size =>~1.reg, :op? =>NilClass|FalseClass, :after_equals =>nil}&
274
- (-{:body=>item_that.size>0}|-{:rescues=>item_that.size>0}|-{:ensures=>~NilClass})
275
- # item_that{|x| x.body.size+x.rescues.size > 0 or x.ensures }
2326
+ BeginNode&
2327
+ -{:after_equals =>nil}&-{:non_empty=>true}
2328
+ BEGINAFTEREQUALS_MARKED=
2329
+ BeginNode&
2330
+ -{:after_equals =>true}&-{:non_empty=>true}
276
2331
 
277
- # ASSIGN_COMMA=Op(',',true)&-{:comma_type=>Symbol}
278
- LHS_COMMA=Op(',',true)&-{:comma_type => :lhs}
279
- RHS_COMMA=Op(',',true)&-{:comma_type => :rhs}
280
- PARAM_COMMA=Op(',',true)&-{:comma_type => :param}
2332
+ LHS_COMMA=Op('lhs,',true)#&-{:tag => :lhs}
2333
+ RHS_COMMA=Op('rhs,',true)#&-{:tag => :rhs}
2334
+ #PARAM_COMMA=Op('param,',true)#&-{:tag => :param}
281
2335
  FUNCLIKE_KEYWORD=KeywordToken&-{:ident=>RubyLexer::FUNCLIKE_KEYWORDS}
2336
+ IGN_SEMI_BEFORE=KW(/^(#{RubyLexer::INNERBOUNDINGWORDS.gsub(/(rescue|then)\|/,'')[1...-1]}|end|[)}\]])$/)|EoiToken
2337
+ IGN_SEMI_AFTER=KW(/^(begin|[;:({|]|then|do|else|ensure)$/)|BlockFormalsNode
2338
+
2339
+ #for use in lookback patterns
2340
+ OPERATORLIKE_LB=OperatorToken|
2341
+ KW(/^(not | defined\? | .*[@,] | [ ~ ! ; \( \[ \{ ? : ] | \.{1,3} | :: | => | ![=~])$/x)|
2342
+ KW(%r{^( \*\*? | << | >> | &&? | \|\|? | \^ | % | / | - | \+ )?=$}x)|
2343
+ KW(BEGINWORDS)|KW(/^#{INNERBOUNDINGWORDS}$/)|RescueHeaderNode|StartToken|
2344
+ GoalPostToken|BlockFormalsNode
2345
+
2346
+ #for use in lookahead patterns
2347
+ VALUELIKE_LA=KW(RubyLexer::VARLIKE_KEYWORDS)|NumberToken|SymbolToken|StringToken|UNOP|DEFOP|
2348
+ KW(/^( \( | \{ | )$/x)|VarNameToken|MethNameToken|HerePlaceholderToken|KW(BEGINWORDS)|FUNCLIKE_KEYWORD
2349
+ LOWEST_OP=KW(/^(#{ENDWORDS})$/)|KW(/^#{INNERBOUNDINGWORDS.sub('rescue|','')}$/)|EoiToken|GoalPostToken
2350
+
2351
+ RESCUE_BODY=-[Expr.-, RescueNode.*, ElseNode.-, EnsureNode.-,]
2352
+
2353
+ RESCUE_OP=Op('rescue')|(KW('rescue')&-{:infix=>true})
2354
+
2355
+ RESCUE_KW=KW('rescue')&-{:infix=>nil}
2356
+
2357
+ inspect_constant_names
282
2358
 
283
2359
  def RULES
2360
+ lower_op= lower_op()
2361
+
2362
+ [-[StartToken.lb, Expr.-, EoiToken.la]>>:accept,
2363
+ -[EoiToken]>>:error,
2364
+ ]+
2365
+
284
2366
  #these must be the lowest possible priority, and hence first in the rules list
285
2367
  BEGIN2END.map{|_beg,_end|
286
- -[KW(_beg), KW(beginsendsmatcher).~.*, KW(_end), KW(/^(do|\{)$/).~.la]>>MisparsedNode
2368
+ -[KW(_beg), (KW(_beg)|KW(_end)).~.*, KW(_end), KW(/^(do|\{)$/).~.la]>>MisparsedNode
287
2369
  }+
288
2370
 
289
2371
  [
290
- -[UNOP, Value, LowerOp]>>UnOpNode,
291
- -[DEFOP, ParenedNode&-{:size=>1}]>>UnOpNode,
292
- -[Op('*@'), VarNameToken|ValueNode, LowerOp]>>UnaryStarNode,
2372
+ -[UNOP, Expr, lower_op]>>UnOpNode,
2373
+ -[DEFOP, ParenedNode]>>UnOpNode,
2374
+ -[Op(/^(?:unary|lhs|rhs)\*$/), ValueNode, lower_op]>>UnaryStarNode,
293
2375
 
294
- -[Op('=',true)|KW(/^(rescue|when|\[)$/)|-{:comma_type=>:call.reg|:array|:param|:rhs},
295
- Op('*@'), VarNameToken|ValueNode, (MODIFYASSIGNOP|Op('=',true)).la]>>:shift,
2376
+ -[Op('=',true)|KW(/^(rescue|when|\[)$/)|Op(/,$/,true),
2377
+ Op(/^(?:unary|rhs)\*$/), ValueNode, (MODIFYASSIGNOP|Op('=',true)).la]>>:shift,
296
2378
  -[MethNameToken|FUNCLIKE_KEYWORD, KW('('),
297
- Op('*@'), VarNameToken|ValueNode, (MODIFYASSIGNOP|Op('=',true)).la]>>:shift,
298
- # -[KW('[')|-{:comma_type=>:call.reg|:array},
299
- # Op('*@'), VarNameToken|ValueNode, Op('=',true).la]>>:shift,
2379
+ Op(/^(?:unary|rhs)\*$/), ValueNode, (MODIFYASSIGNOP|Op('=',true)).la]>>:shift,
300
2380
  #star should not be used in an lhs if an rhs or param list context is available to eat it.
301
- #(including param lists for keywords such as return,break,next,continue,rescue,yield,when)
2381
+ #(including param lists for keywords such as return,break,next,rescue,yield,when)
302
2382
 
303
- -[Op('*@'), (GoalPostNode|KW(/^(in|[=)|,;])$/)).la]>>DanglingStarNode, #dangling *
304
- -[',', (GoalPostNode|KW(/^(in|[=)|;])$/)).la]>> #dangling ,
305
- proc{|stack|
2383
+ #hmmm.... | in char classes below looks useless (predates GoalPostToken)
2384
+ -[Op(/^(?:unary|lhs)\*$/), (GoalPostToken|Op(/,$/,true)|KW(/^(in|[=)|;])$/)).la]>>DanglingStarNode, #dangling *
2385
+ -[Op(/,$/,true), (GoalPostToken|KW(/^(in|[=)|;])$/)).la]>> #dangling ,
2386
+ stack_monkey("DanglingComma",1,DanglingCommaNode){|stack|
306
2387
  dcomma=DanglingCommaNode.new
307
2388
  dcomma.offset=stack.last.offset
308
2389
  stack.push dcomma, stack.pop
309
2390
  },
310
2391
 
311
- -[Value, Op|KW_Op, Value, LowerOp]>>RawOpNode, #most operators
2392
+ -[Expr, Op|KW_Op, Expr, lower_op]>>RawOpNode, #most operators
312
2393
 
313
2394
  #assignment
314
- -[Lvalue, MODIFYASSIGNOP, Value, LowerOp]>>AssignNode,
315
- -[Lvalue, Op('=',true), AssignmentRhsNode, LowerOp]>>AssignNode,
316
- -[Op('=',true).lb, Value, LowerOp]>>AssignmentRhsNode,
317
- #was: -[AssignmentRhsListStartToken, Value, AssignmentRhsListEndToken]>>AssignmentRhsNode,
318
- -[Lvalue&~MULTIASSIGN, Op('=',true), AssignmentRhsNode&-{:val =>~WITHCOMMAS},
319
- Op('rescue3',true), Value, LowerOp]>>AssignNode,
320
- -[Lvalue&~MULTIASSIGN, Op('=',true), AssignmentRhsNode&-{:val =>~WITHCOMMAS},
2395
+ -[Lvalue, MODIFYASSIGNOP, Expr, lower_op]>>AssignNode,
2396
+ -[Lvalue, Op('=',true), AssignmentRhsNode, lower_op]>>AssignNode,
2397
+ -[Op('=',true).lb, Expr, lower_op]>>AssignmentRhsNode,
2398
+
2399
+ # a = b rescue c acts like a ternary,,,
2400
+ #provided that both a and b are not multiple and b
2401
+ #(if it is a parenless callsite) has just 1 param
2402
+ -[Lvalue&~MULTIASSIGN, Op('=',true), AssignmentRhsNode&-{:is_list=>true},
2403
+ Op('rescue3',true), Expr, lower_op]>>AssignNode,
2404
+ -[Lvalue&~MULTIASSIGN, Op('=',true), AssignmentRhsNode&-{:is_list=>true},
321
2405
  Op('rescue3',true).la]>>:shift,
322
- -[Lvalue&~MULTIASSIGN, Op('=',true), AssignmentRhsNode&-{:val =>~WITHCOMMAS},
323
- Op('rescue',true).la] >>proc{|stack|
2406
+ -[Lvalue&~MULTIASSIGN, Op('=',true), AssignmentRhsNode&-{:is_list=>true},
2407
+ RESCUE_OP.la] >>
2408
+ stack_monkey("rescue3",1,Op('rescue3',true)){|stack|
324
2409
  resc=stack.last.dup
325
2410
  resc.ident += '3'
326
2411
  stack[-1]=resc
327
2412
  },
328
- # a = b rescue c acts like a ternary,,,
329
- #provided that both a and b are not multiple and b
330
- #(if it is a parenless callsite) has just 1 param
331
-
332
- # -[Op('=',true), ~WITHCOMMAS, Op('rescue',true).la]>>:shift,
333
2413
  #relative precedence of = and rescue are to be inverted if rescue
334
2414
  #is to the right and assignment is not multiple.
335
2415
 
336
- -[Op('=',true).~.lb, OB, Op('=',true), Value, RHS_COMMA.la]>>:shift,
337
- -[RHS_COMMA.lb, Lvalue, Op('=',true), Value, RHS_COMMA.la ]>>AssignNode,
338
- -[ValueNode|VarNameToken, LHS_COMMA, ValueNode|VarNameToken, Op('=',true).la]>>CommaOpNode,
2416
+ #if assignment rhs contains commas, don't reduce til they've been read
2417
+ #(unless we're already on an rhs)
2418
+ -[(Op('=',true)|Expr).~.lb, Lvalue, Op('=',true), Expr, RHS_COMMA.la]>>:shift,
2419
+ -[RHS_COMMA.lb, Lvalue, Op('=',true), Expr, RHS_COMMA.la ]>>AssignNode,
2420
+ -[ValueNode, LHS_COMMA, ValueNode, Op('=',true).la]>>CommaOpNode,
339
2421
  #relative precedence of = and lhs/rhs , are to be inverted.
340
2422
 
341
- -[KW(',')&-{:comma_type=>:lhs}, (UnaryStarNode|ParenedNode)&~-{:after_comma =>true}, Op('=',true)]>>
342
- proc{|stack| stack[-3].after_comma=true}, #mebbe this should be a lexer hack
343
2423
  #mark parentheses and unary stars that come after lhs commas
2424
+ -[LHS_COMMA, (UnaryStarNode|ParenedNode)&~-{:after_comma =>true}, Op('=',true)]>>
2425
+ stack_monkey("after_comma",3,(UnaryStarNode|ParenedNode)&-{:after_comma =>true}){|stack|
2426
+ stack[-3].after_comma=true},
2427
+ #mebbe this should be a lexer hack?
344
2428
 
345
- #-[Value, DotOp|DoubleColonOp, MethNameToken,
346
- # ASSIGNOP, Value, LowerOp]>>AccessorAssignNode,
347
-
348
- -[(MethNameToken|FUNCLIKE_KEYWORD).~.lb, '(', Value, ')']>>ParenedNode,
349
- -[(MethNameToken|FUNCLIKE_KEYWORD).~.lb, '(', ')']>>VarLikeNode, #alias for nil
2429
+ -[#(OPERATORLIKE_LB&~(MethNameToken|FUNCLIKE_KEYWORD)).lb,
2430
+ '(', Expr, KW(')')&~(-{:callsite? =>true}|-{:not_real? =>true})]>>ParenedNode,
2431
+ -[#(OPERATORLIKE_LB&~(MethNameToken|FUNCLIKE_KEYWORD)).lb,
2432
+ '(', KW(')')&~(-{:callsite? =>true}|-{:not_real? =>true})]>>VarLikeNode, #(), alias for nil
350
2433
 
351
- # -[Value, KeywordOp, Value, LowerOp]>>KeywordOpNode,
352
- -[Op('=',true).~.lb, Value, Op('rescue',true), Value, LowerOp]>>ParenedNode,
2434
+ -[#(OPERATORLIKE_LB&~Op('=',true)).lb,
2435
+ Expr, RESCUE_OP, Expr, lower_op]>>RescueOpNode,
353
2436
 
354
2437
  #dot and double-colon
355
- -[DoubleColonOp, VarNameToken, LowerOp]>>ConstantNode,#unary ::
356
- -[Value, DotOp, CallNode, LowerOp]>>DotCall, #binary .
357
- -[Value, DoubleColonOp, CallNode, LowerOp]>>DotCall, #binary ::
358
- -[Value, DoubleColonOp, VarNameToken, LowerOp]>>ConstantNode,#binary ::
2438
+ -[DoubleColonOp, VarNode, lower_op]>>ConstantNode,#unary ::
2439
+ -[Expr, DotOp, CallNode, lower_op]>>DotCall, #binary .
2440
+ -[Expr, DoubleColonOp, CallNode, lower_op]>>DotCall, #binary ::
2441
+ -[Expr, DoubleColonOp, VarNode, lower_op]>>ConstantNode,#binary ::
359
2442
 
360
- -[Value, "?", Value, ":", Value, LowerOp]>>TernaryNode,
2443
+ -[Expr, "?", Expr, ":", Expr, lower_op]>>TernaryNode,
361
2444
 
362
- # -[Value, /^\.\.\.?$/, Value, LowerOp]>>RangeNode,
363
2445
 
364
- -[MethNameToken, '(', Value.-, ')', BlockNode.-, KW(/^(do|\{)$/).~.la]>>CallNode,
365
- -[FUNCLIKE_KEYWORD, '(', Value.-, ')',
366
- BlockNode.-, KW(/^(do|\{)$/).~.la]>>KWCallNode,
2446
+ -[MethNameToken, '(', Expr.-, ')', BlockNode.-, KW('do').~.la]>>CallNode,
2447
+ -[FUNCLIKE_KEYWORD, '(', Expr.-, ')', BlockNode.-, KW('do').~.la]>>KWCallNode,
367
2448
 
368
- -[ValueNode|VarNameToken, ',', ValueNode|VarNameToken, LowerOp]>>CommaOpNode,
2449
+ -[ValueNode, Op(/,$/,true), ValueNode, lower_op]>>CommaOpNode,
369
2450
 
370
- -[dont_postpone_semi.lb,
371
- Value, ';', Value, LowerOp]>>SequenceNode,
2451
+ -[(OPERATORLIKE_LB&dont_postpone_semi).lb,
2452
+ Expr, ';', Expr, lower_op]>>SequenceNode,
372
2453
 
373
- # -[Value, '=>', Value, LowerOp]>>ArrowOpNode,
374
2454
 
375
- -[KW(')').~.lb, '{', (CommaOpNode|ArrowOpNode).-, '}']>>HashLiteralNode,
2455
+ -[#(OPERATORLIKE_LB&~KW(')')).lb,
2456
+ '{', (CommaOpNode|ArrowOpNode).-, '}']>>HashLiteralNode, #-40
376
2457
 
377
- # -[CallSiteNode.~.lb, '{', Value, '}']>>HashLiteralNode,
378
-
379
- # -[KW(')').lb, '{', BlockFormalsNode.-, Value.-, '}']>>BlockNode,
380
- -[KW(')').lb, 'do', BlockFormalsNode.-, Value.-, 'end']>>BlockNode,
2458
+ -[KW(')').lb, 'do', BlockFormalsNode.-, Expr.-, 'end']>>BlockNode,
2459
+ #this does {} as well... converted to do...end
381
2460
  #rubylexer handles the 'low precedence' of do...end
382
2461
 
383
- -[GoalPostNode, Value.-, GoalPostNode]>>BlockFormalsNode,
2462
+ -[GoalPostToken, Expr.-, GoalPostToken]>>BlockFormalsNode,
384
2463
  #rubylexer disambiguated operator vs keyword '|'
385
2464
 
386
- -[/^(while|until)$/, Value, /^([:;]|do)$/, Value.-, 'end']>>LoopNode,
2465
+ -[/^(while|until)$/, Expr, /^([:;]|do)$/, Expr.-, 'end']>>LoopNode,
387
2466
 
388
- -[/^(if|unless)$/, Value, /^(;|then|:)$/,
389
- Value.-, ElsifNode.*, ElseNode.-, 'end'
2467
+ -[/^(if|unless)$/, Expr, /^(;|then|:)$/,
2468
+ Expr.-, ElsifNode.*, ElseNode.-, 'end'
390
2469
  ]>>IfNode,
391
2470
 
392
- -['else', Value.-, KW(/^(ensure|end)$/).la]>>ElseNode,
2471
+ -['else', Expr.-, KW(/^(ensure|end)$/).la]>>ElseNode,
393
2472
 
394
- -['elsif', Value, /^(;|then|:)$/, Value.-,
395
- KW(/^e(nd|ls(e|if))$/).la
2473
+ -['elsif', Expr, /^(;|then|:)$/, Expr.-,
2474
+ KW(/^(end|else|elsif)$/).la
396
2475
  ]>>ElsifNode,
397
2476
 
398
- -['module', ConstantNode|VarNameToken, KW(';'), Value.-, 'end']>>ModuleNode,
399
- # -['module', ConstantNode|VarNameToken, KW(/^(;|::)$/).~.la]>>
400
- # proc{|stack| #insert ; at end of module header if none was present
2477
+ # -['module', ConstantNode|VarNode, KW(/^(;|::)$/).~.la]>>
2478
+ # stack_monkey(1,KW(';')){|stack| #insert ; at end of module header if none was present
401
2479
  # stack.push KeywordToken.new(';'), stack.pop
402
2480
  # },
403
- -['class', Value, ';', Value.-, 'end']>>ClassNode,
404
- -['class', Value, Op('<'), Value, KW(';').~.la]>>:shift,
405
- -['class', Op('<<'), Value, ';', Value.-, 'end']>>MetaClassNode,
2481
+ -['module', ConstantNode|VarNode, ';', RESCUE_BODY, 'end']>>ModuleNode,
2482
+ -['class', Expr, ';', RESCUE_BODY, 'end']>>ClassNode,
2483
+ -['class', Expr, Op('<'), Expr, KW(';').~.la]>>:shift,
2484
+ -['class', Op('<<'), Expr, ';', RESCUE_BODY, 'end']>>MetaClassNode, #-30
406
2485
 
407
- -['alias', BareMethod|VarNameToken, BareMethod|VarNameToken]>>AliasNode,
2486
+ -['alias', BareMethod|VarNode, BareMethod|VarNode]>>AliasNode,
408
2487
  -['undef', BareMethod]>>UndefNode,
409
- -[UndefNode, ',', BareMethod]>>UndefNode,
2488
+ -[UndefNode, Op(',',true), BareMethod]>>UndefNode,
410
2489
 
411
- -['def', CallSiteNode, Op('=').-, KW(';'),
412
- Value.-, RescueNode.*, ElseNode.-, EnsureNode.-,
2490
+ -['def', CallSiteNode, Op('=').-, KW(';'), RESCUE_BODY,
2491
+ # Expr.-, RescueNode.*, ElseNode.-, EnsureNode.-,
413
2492
  'end'
414
2493
  ]>>MethodNode,
415
2494
 
416
- -['begin',
417
- Value.-, RescueNode.*, ElseNode.-, EnsureNode.-,
2495
+ -['begin', RESCUE_BODY,
2496
+ # Expr.-, RescueNode.*, ElseNode.-, EnsureNode.-,
418
2497
  'end'
419
- ]>>ParenedNode,
2498
+ ]>>BeginNode,
420
2499
 
421
- -[Op('=',true), BEGINAFTEREQUALS, Op('rescue',true).la]>>
422
- proc{ |stack| stack[-2].after_equals=true },
2500
+ -[Op('=',true), BEGINAFTEREQUALS, RESCUE_OP.la]>>
2501
+ stack_monkey("begin after equals",2,BEGINAFTEREQUALS_MARKED){ |stack| stack[-2].after_equals=true },
423
2502
  #this is bs. all for an extra :begin in the parsetree
424
2503
 
425
- -[(KW(/^(;|begin)$/)|ParenedNode|RescueNode).lb,
426
- 'rescue', KW('=>').-, Value.-, /^([:;]|then)$/,
2504
+ -[(KW(/^(;|begin)$/)|RescueNode).lb, #ParenedNode|RescueOpNode|BeginNode used to be here too
2505
+ RESCUE_KW, KW('=>').-, Expr.-, /^([:;]|then)$/,
427
2506
  ]>>RescueHeaderNode,
428
- -[ RescueHeaderNode, Value.-, KW(';').-, KW(/^(rescue|else|ensure|end)$/).la
2507
+ -[ RescueHeaderNode, Expr.-, KW(';').-, (KW(/^(else|ensure|end)$/)|RESCUE_KW).la
429
2508
  ]>>RescueNode,
430
2509
 
431
- -['ensure', Value.-, KW('end').la]>>EnsureNode,
2510
+ -['ensure', Expr.-, KW('end').la]>>EnsureNode,
432
2511
 
433
- -['[', Value.-, ']']>>ArrayLiteralNode,
2512
+ -['[', Expr.-, ']']>>ArrayLiteralNode, #-20
434
2513
 
435
- -[Value, '[', Value.-, ']']>>BracketsGetNode,
2514
+ -[Expr, '[', Expr.-, ']']>>BracketsGetNode,
436
2515
 
437
- -[HereDocNode, StringToken.*, StringToken, StringToken.~.la]>>StringCatNode,
438
- -[(StringToken|HereDocNode).~.lb, StringToken.*, StringToken, StringToken, StringToken.~.la]>>StringCatNode,
439
- -[(StringToken|HereDocNode).~.lb, StringToken, StringToken.~.la]>>StringNode, #includes regexp, wordlist, backquotes
2516
+ -[HereDocNode, StringToken+1, StringToken.~.la]>>StringCatNode,
2517
+ -[(OPERATORLIKE_LB&~(StringToken|HereDocNode)).lb, StringToken+2, StringToken.~.la]>>StringCatNode,
2518
+ -[(OPERATORLIKE_LB&~(StringToken|HereDocNode)).lb, StringToken, StringToken.~.la]>>StringNode, #includes regexp, wordlist, backquotes
440
2519
 
441
- -['case', Value.-, KW(/^[:;]$/).-, WhenNode.*, ElseNode.-, 'end']>>CaseNode,
2520
+ -['case', Expr.-, KW(';').-, WhenNode.*, ElseNode.-, 'end']>>CaseNode,
442
2521
 
443
- -['when', Value, /^([:;]|then)$/, Value.-,
2522
+ -['when', Expr, /^([:;]|then)$/, Expr.-,
444
2523
  KW(/^(when|else|end)$/).la
445
2524
  ]>>WhenNode,
446
2525
 
447
- -['for', Value, 'in', Value, /^([:;]|do)$/, Value.-, 'end']>>ForNode,
2526
+ -['for', Expr, 'in', Expr, /^([:;]|do)$/, Expr.-, 'end']>>ForNode,
448
2527
 
449
2528
  #semicolon cleanup....
450
- -[dont_postpone_semi.lb,
451
- Value, ';',
452
- (KW(/^(#{RubyLexer::INNERBOUNDINGWORDS.gsub(/(rescue|then)\|/,'')}|end|[)}\]])$/)|EoiToken).la
453
- ]>>proc{|stack| stack.delete_at -2 },
454
- -[Value, ';', KW('then').la
455
- ]>>proc{|stack| stack.delete_at -2 },
456
- -[dont_postpone_semi.lb, Value, ';', RescueNode
457
- ]>>proc{|stack| stack.delete_at -3 },
458
- -[(KW(/^(begin|[;:({|]|then|do|else|ensure)$/)|StartNode|RescueHeaderNode).lb, ';'
459
- ]>>proc{|stack| stack.delete_at -2 },
2529
+ -[(OPERATORLIKE_LB&dont_postpone_semi).lb,Expr, ';', IGN_SEMI_BEFORE.la] \
2530
+ >>delete_monkey(2,"semi_cleanup_before_ISB"),
2531
+ -[Expr, ';', KW('then').la] >>delete_monkey(2,"semi_cleanup_before_then"),
2532
+ -[dont_postpone_semi.lb, Expr, ';', RescueNode] >>delete_monkey(3,"semi_cleanup_before_rescue"), #-10
2533
+ -[IGN_SEMI_AFTER.lb, ';'] >>delete_monkey(2,"semi_cleanup_after_oplike"),
2534
+ -[(StartToken|RescueHeaderNode).lb, ';' ] >>delete_monkey(2,"semi_cleanup_after_rescue"),
460
2535
  #this rule is somewhat more forgiving than matz' parser...
461
2536
  #not all semicolons after :, (, and { keywords should
462
2537
  #be ignored. some should cause syntax errors.
463
2538
 
464
2539
 
465
2540
  #comma cleanup....
466
- -[',', KW(/^[}\]]$/).la]>>proc{|stack| stack.delete_at -2},
2541
+ -[Op(/,$/,true), KW(/^([}\]])$/).la] >>delete_monkey(2, "comma_cleanup"),
467
2542
  #likewise, this is somewhat too forgiving.
468
2543
  #some commas before } or ] should cause syntax errors
469
2544
 
470
- #multiple assignment.... (handled in a subsequent stage?)
471
- #(cause it requires that the relative priorities of = and , be reversed!)
472
-
473
-
474
2545
  #turn lvalues into rvalues if not followed by an assignop
475
- -[-{:lvalue? =>true}, (Op('=',true)|MODIFYASSIGNOP).~.la]>>proc{|stack| stack[-2].lvalue=nil},
2546
+ -[-{:lvalue =>true}, (Op('=',true)|MODIFYASSIGNOP|LHS_COMMA).~.la]>>
2547
+ stack_monkey("lval2rval",2,-{:lvalue =>nil}){|stack|
2548
+ stack[-2].lvalue=nil
2549
+ },
476
2550
 
477
2551
  #expand the = into a separate token in calls to settors (after . or ::).
478
2552
  #but not in method headers
479
- -[KW('def').~.lb, Value, DotOp|DoubleColonOp,
480
- (MethNameToken&-{:ident=>/^[a-z_][a-z0-9_]*=$/i}).la]>>
481
- proc{|stack|
2553
+ -[(OPERATORLIKE_LB&~KW('def')).lb, Expr, DotOp|DoubleColonOp,
2554
+ (MethNameToken&-{:has_equals=>true}).la]>>
2555
+ stack_monkey("expand_equals",1,CallNode){|stack|
482
2556
  methname=stack.pop
483
2557
  methname.ident.chomp!('=')
484
2558
  offset=methname.offset+methname.ident.size
@@ -493,48 +2567,63 @@ class RedParse
493
2567
  #lexer does the wrong thing with -22**44.5, making the - part
494
2568
  #of the first number token. it's actually lower precedence than
495
2569
  #**... this rule fixes that problem.
496
- -[NumberToken&-{:ident=>/\A-/}, Op('**').la]>>
497
- proc{|stack|
498
- neg_op=OperatorToken.new("-@",stack[-2].offset)
499
- neg_op.unary=true
500
- stack[-2,0]=neg_op
501
- stack[-2].ident.sub!(/\A-/,'')
502
- stack[-2].offset+=1
2570
+ #in theory, unary - is lower precedence than ., ::, and [] as well, but
2571
+ #that appears not to apply to unary - in numeric tokens
2572
+ -[NumberToken&-{:negative=>true}, Op('**').la]>>
2573
+ stack_monkey("fix_neg_exp",2,Op("-@",true)){|stack|
2574
+ #neg_op.unary=true
2575
+ num=stack[-2]
2576
+ op=OperatorToken.new("-@",num.offset)
2577
+ # op.startline=num.startline
2578
+ stack[-2,0]=op
2579
+ num.ident.sub!(/\A-/,'')
2580
+ num.offset+=1
503
2581
  },
504
2582
 
505
2583
  #treat these keywords like (rvalue) variables.
506
- -[/^(nil|false|true|__FILE__|__LINE__|self)$/]>>VarLikeNode,
2584
+ -[RubyLexer::VARLIKE_KEYWORDS]>>VarLikeNode,
507
2585
 
508
2586
  #here docs
509
2587
  -[HerePlaceholderToken]>>HereDocNode,
510
- -[HereBodyToken]>>proc{|stack|
511
- stack.delete_at(-2)#.instance_eval{@headtok}.node.saw_body!
512
- },
2588
+ -[HereBodyToken.la]>>delete_monkey(1,"delete_here_body"),
2589
+ ##this is rediculous. this should be a lexer hack?
2590
+
2591
+ -[VarNameToken]>>VarNode,
2592
+
513
2593
 
514
2594
  ]
515
2595
  end
516
2596
 
2597
+ if defined? END_ATTACK
2598
+ module Reducer; end
2599
+ include Reducer
2600
+ end
517
2601
 
518
-
519
- def initialize(input,name="(eval)",line=1,lvars=[])
2602
+ def initialize(input,name="(eval)",line=1,lvars=[],options={:rubyversion=>1.8})
2603
+ @rubyversion=options[:rubyversion]
520
2604
  if Array===input
521
2605
  def input.get1token; shift end
522
2606
  @lexer=input
523
2607
  else
524
- @lexer=RubyLexer.new(name,input,line)
2608
+ @lexer=RubyLexer.new(name,input,line,0,:rubyversion=>@rubyversion)
525
2609
  lvars.each{|lvar| @lexer.localvars[lvar]=true }
526
2610
  end
527
2611
  @filename=name
528
2612
  @min_sizes={}
529
2613
  @compiled_rules={}
530
2614
  @moretokens=[]
531
- @unary_or_binary_op=/^[-+&*]$/
532
- @rules=self.RULES
2615
+ @unary_or_binary_op=/^[-+]$/
2616
+ # @rules=self.expaneded_RULES
533
2617
  @precedence=self.PRECEDENCE
534
2618
  @RIGHT_ASSOCIATIVE=self.RIGHT_ASSOCIATIVE
2619
+ if defined? END_ATTACK
2620
+ compile
2621
+ end
2622
+ @saw_item_that=nil
535
2623
  end
536
2624
 
537
2625
  attr_accessor :lexer
2626
+ attr :rubyversion
538
2627
 
539
2628
  def get_token(recursing=false)
540
2629
  unless @moretokens.empty?
@@ -543,78 +2632,60 @@ class RedParse
543
2632
  return @last_token
544
2633
  end
545
2634
 
2635
+ rpt=ENV['RAW_PRINT_TOKENS']
546
2636
  begin
547
2637
  result=@lexer.get1token or break
548
- p result if ENV['RAW_PRINT_TOKENS']
2638
+ p result if rpt
549
2639
 
550
- #set token's line if wanted
551
- result.line||=@line if result.respond_to? :line=
2640
+ #set token's line
2641
+ result.startline= @endline||=1
2642
+ result.endline||=@endline if result.respond_to? :endline=
552
2643
 
553
2644
  if result.respond_to?(:as) and as=result.as
554
- result=KeywordToken.new(as,result.offset)
555
- result.not_real!
2645
+ #result=make_kw(as,result.offset)
2646
+ #result.originally=result.ident
2647
+ if OperatorToken===result #or KeywordToken===result
2648
+ result=result.dup
2649
+ result.ident=as
2650
+ else
2651
+ result=make_kw(as,result.offset)
2652
+ end
2653
+ result.not_real! if result.respond_to? :not_real!
556
2654
  else
557
2655
 
558
2656
  case result
559
- #=begin
560
- when ImplicitParamListStartToken: #treat it like (
561
- result=KeywordToken.new('(', result.offset)
562
- result.not_real!
563
- #=end
564
- #=begin
565
- when ImplicitParamListEndToken:
566
- result=KeywordToken.new(')', result.offset)
567
- result.not_real!
568
- #=end
569
- # when AssignmentRhsListStartToken, AssignmentRhsListEndToken:
570
- #do nothing, pass it thru
571
- #=begin
572
- when NewlineToken:
573
- result=KeywordToken.new(';',result.offset)
574
- #=end
575
- when FileAndLineToken: #so __FILE__ and __LINE__ can know what their values are
2657
+ when FileAndLineToken #so __FILE__ and __LINE__ can know what their values are
576
2658
  @file=result.file
577
- @line=result.line
2659
+ @endline=result.line
578
2660
  redo
579
- when NoWsToken:
580
- #rubylexer disambiguates array literal from
581
- #call to [] or []= method with a preceding NoWsToken...
582
- #kind of a dumb interface.
583
- result=get_token(true)
584
- result.ident=='[' and result.extend BracketsCall
585
-
586
2661
 
587
- when OperatorToken:
588
- if @unary_or_binary_op===result.ident and result.unary
2662
+ when OperatorToken
2663
+ if @unary_or_binary_op===result.ident and result.unary || result.tag==:unary
589
2664
  result=result.dup
590
2665
  result.ident+="@"
591
2666
  end
592
2667
 
593
2668
  #more symbol table maintenance....
594
- when KeywordToken:
2669
+ when KeywordToken
595
2670
  case name=result.ident
596
2671
 
597
- #=begin
598
- when "do":
599
- if result.has_end?
600
- else
601
- result=KeywordToken.new(';',result.offset)
602
- end
603
- #=end
604
2672
  when /^(#{BINOP_KEYWORDS.join '|'})$/: #should be like this in rubylexer
605
2673
  result=OperatorToken.new(name,result.offset) unless result.has_end?
606
- when "|": result=GoalPostNode.new(result.offset) #is this needed still?
607
- when "__FILE__": #I wish rubylexer would handle this
2674
+ when "|"; result=GoalPostToken.new(result.offset) #is this needed still?
2675
+ when "__FILE__"; #I wish rubylexer would handle this
608
2676
  class<<result; attr_accessor :value; end
609
2677
  result.value=@file.dup
610
- when "__LINE__": #I wish rubylexer would handle this
2678
+ when "__LINE__"; #I wish rubylexer would handle this
611
2679
  class<<result; attr_accessor :value; end
612
- result.value=@line
2680
+ result.value=@endline
2681
+ else
2682
+ result=make_kw name,result.offset if defined? SPECIALIZED_KEYWORDS
2683
+ #warning, this may discard information stored in instance vars of result
613
2684
  end
614
2685
 
615
- when EoiToken: break
616
- when HereBodyToken: break
617
- when IgnoreToken: redo
2686
+ when EoiToken; break
2687
+ when HereBodyToken; break
2688
+ when IgnoreToken; redo
618
2689
  end
619
2690
  end
620
2691
  end while false
@@ -622,251 +2693,16 @@ class RedParse
622
2693
  return @last_token=result
623
2694
  end
624
2695
 
625
- def evaluate rule
626
- #dissect the rule
627
- if false
628
- rule=rule.dup
629
- lookahead_processor=(rule.pop if Proc===rule.last)
630
- node_type=rule.pop
631
- else
632
- Reg::Transform===rule or fail
633
- node_type= rule.right
634
- rule=rule.left.subregs.dup
635
- lookahead_processor=(rule.pop if Proc|::Reg::LookAhead===rule.last)
636
- lookback=rule[0]=rule[0].regs(0) if ::Reg::LookBack===rule[0]
637
- end
638
-
639
- #index of data at which to start matching
640
- i=@stack.size-1 #-1 because last element of @stack is always lookahead
641
-
642
- #I could call this a JIT compiler, but that's a bit grandiose....
643
- #more of a JIT pre-processor
644
- compiled_rule=@compiled_rules[rule]||=
645
- rule.map{|pattern|
646
- String|Regexp===pattern ? KW(pattern) : pattern
647
- }
648
-
649
- #what's the minimum @stack size this rule could match?
650
- rule_min_size=@min_sizes[compiled_rule]||=
651
- compiled_rule.inject(0){|sum,pattern|
652
- sum + pattern.itemrange.begin
653
- }
654
- i>=rule_min_size or return false
655
-
656
- matching=[]
657
-
658
- #actually try to match rule elements against each @stack element in turn
659
- compiled_rule.reverse_each{|matcher|
660
- i.zero? and fail
661
- target=matching
662
- #is this matcher optional? looping?
663
- loop= matcher.itemrange.last.to_f.infinite?
664
- optional=matcher.itemrange.first.zero?
665
- matching.unshift target=[] if loop
666
- if loop or optional
667
- matcher=matcher.regs(0)
668
- end
669
-
670
- begin
671
- if matcher===@stack[i-=1] #try match
672
- target.unshift @stack[i]
673
- else
674
- #if match failed, the whole rule fails
675
- #unless this match was optional, in which case, ignore it
676
- #but bump the data position back up, since the latest datum
677
- #didn't actually match anything.
678
- return false unless optional or loop&&!target.empty?
679
- i+=1
680
- matching.unshift nil unless loop
681
- break
682
- end
683
- end while loop
684
- }
685
-
686
- matchrange= i...-1 #what elems in @stack were matched?
687
-
688
- #give lookahead matcher (if any) a chance to fail the match
689
- case lookahead_processor
690
- when ::Reg::LookAhead:
691
- return false unless lookahead_processor.regs(0)===@stack.last
692
- when Proc:
693
- return false unless lookahead_processor[self,@stack.last]
694
- end
695
-
696
- #if there was a lookback item, don't include it in the new node
697
- if lookback
698
- matchrange= i+1...-1 #what elems in @stack were matched?
699
- matching.shift
700
- end
701
-
702
- #replace matching elements in @stack with node type found
703
- case node_type
704
- when Class
705
- node=node_type.new(*matching)
706
- node.line=@line
707
- @stack[matchrange]=[node]
708
- when Proc; node_type[@stack]
709
- when :shift; return 0
710
- else fail
711
- end
712
-
713
- return true #let caller know we found a match
714
-
715
-
716
- rescue Exception=>e
717
- puts "error (#{e}) while executing rule: #{rule.inspect}"
718
- puts e.backtrace.join("\n")
719
- raise
720
- end
721
-
722
- class ParseError<RuntimeError
723
- def initialize(msg,stack)
724
- super(msg)
725
- @stack=stack
726
- if false
727
- ranges=(1..stack.size-2).map{|i|
728
- node=stack[i]
729
- if node.respond_to? :linerange
730
- node.linerange
731
- elsif node.respond_to? :line
732
- node.line..node.line
733
- end
734
- }
735
- types=(1..stack.size-2).map{|i| stack[i].class }
736
- msg += "couldn't interpret #{types.inspect} at line ranges: #{ranges.inspect}"
737
- end
738
- super(msg)
739
- end
740
- attr :stack
741
- end
742
-
743
- def [](*args)
744
- @stack.[] *args
745
- end
746
-
747
- def []=(*args)
748
- @stack.[]= *args
2696
+ def unget_tokens(*tokens)
2697
+ @moretokens=tokens.concat @moretokens
749
2698
  end
750
2699
 
751
- def parse
752
- oldparser= Thread.current[:$RedParse_parser]
753
- Thread.current[:$RedParse_parser]||=self
754
-
755
- @stack=[StartNode.new, get_token]
756
- #last token on @stack is always implicitly the lookahead
757
- loop {
758
- #try all possible reductions
759
- shift=nil
760
- @rules.reverse_each{|rule|
761
- shift=evaluate(rule) and break
762
- }
763
- next if shift==true
764
-
765
- #no rule can match current @stack, get another token
766
- tok=get_token
767
-
768
- #are we done yet?
769
- tok.nil? or EoiToken===tok && EoiToken===@stack.last and break
770
-
771
- #shift our token onto the @stack
772
- @stack.push tok
773
- }
774
-
775
- @stack.size==2 and return NopNode.new #handle empty parse string
776
-
777
- #unless the @stack is 3 tokens,
778
- #with the last an Eoi, and first a StartNode
779
- #there was a parse error
780
- unless @stack.size==3
781
- pp @stack[-[15,@stack.size].min..-1] if ENV['PRINT_STACK']
782
- top=MisparsedNode.new("(toplevel)", @stack[1...-1],'')
783
- raise ParseError.new(top.msg,@stack)
784
- end
785
- EoiToken===@stack.last or fail
786
- StartNode===@stack.first or fail
787
-
788
- result= @stack[1]
789
-
790
-
791
- #multiple assignment must be resolved
792
- #afterwards by walking the parse tree.
793
- #(because the relative precedences of = and ,
794
- #are reversed in multiple assignment.)
795
- # result.respond_to? :fixup_multiple_assignments! and
796
- # result=result.fixup_multiple_assignments!
797
-
798
- #relative precedence of = and rescue are also inverted sometimes
799
- # result.respond_to? :fixup_rescue_assignments! and
800
- # result=result.fixup_rescue_assignments!
801
-
802
- #do something with error nodes
803
- msgs=[]
804
- result.walk{|parent,i,subi,node|
805
- not if ErrorNode===node
806
- msgs<< @filename+":"+node.blame.msg
807
- end
808
- } if result.respond_to? :walk #hack hack
809
- result.errors=msgs unless msgs.empty?
810
- #other types of errors (lexer errors, exceptions in lexer or parser actions)
811
- #should be handled in the same way, but currently are not
812
- # puts msgs.join("\n")
813
-
814
- rescue Exception=>e
815
- # input=@filename
816
- # if input=="(eval)"
817
- input=@lexer
818
- if Array===input
819
- puts "error while parsing:"
820
- pp input
821
- input=nil
822
- else
823
- input=input.original_file
824
- inputname=@lexer.filename
825
- input.to_s.size>1000 and input=inputname
826
- end
827
- # end
828
- puts "error while parsing: <<< #{input} >>>"
829
- raise
830
- else
831
- unless msgs.empty?
832
- pp @stack[-[15,@stack.size].min..-1] if ENV['PRINT_STACK']
833
- raise RedParse::ParseError.new(msgs.join("\n"),@stack)
834
- end
835
-
836
- return result
837
- ensure
838
- Thread.current[:$RedParse_parser]=oldparser
2700
+ def unget_token(token)
2701
+ @moretokens.unshift token
839
2702
  end
840
2703
 
841
- def LEFT_MATCHERS;self.RULES.map{|r| r.left.subregs }.flatten; end
842
- def STACKABLE_CLASSES
843
-
844
-
845
- _LEFT_MATCHERS.map!{|m|
846
- case m
847
- when Reg::LookAhead,Reg::LookBack: m.regs(0)
848
- else m
849
- end
850
- } #remove lookahead and lookback decoration
851
- rule_juicer=proc{|m|
852
- case m
853
- when Class: m
854
- when Reg::And: m.subregs.map &rule_juicer
855
- when Reg::Or: m.subregs.map &rule_juicer
856
- else #fukit
857
- end
858
- }
859
- _LEFT_CLASSES=_LEFT_MATCHERS.map{|m| rule_juicer[m] }.flatten.compact
860
- _RIGHT_CLASSES= self.RULES.map{|r| r.right }.grep(Class) #classes in productions
861
- _LEFT_CLASSES+_RIGHT_CLASSES
862
- end
863
2704
  =begin
864
- HIER=Class::FlattenedHierarchy.new *STACKABLE_CLASSES
865
-
866
- LOOKAHEAD_MATCHERS=self.RULES.map{|r| r.left.subregs.last }.map{|la| Reg::LookAhead===la and la.regs(0) }
867
-
868
- LOOKAHEAD_CLASSES=LOOKAHEAD_MATCHERS.map(&rule_juicer)
869
- LOOKAHEAD_CLASSES.each_with_index{|classes,i|
2705
+ self.LOOKAHEAD_CLASSES.each_with_index{|classes,i|
870
2706
  case classes
871
2707
  when Class: huh
872
2708
  when Array: classes.flatten.each{huh}
@@ -874,6 +2710,7 @@ end
874
2710
  end
875
2711
  }
876
2712
  =end
2713
+
877
2714
  # def fixup_multiple_assignments!; end
878
2715
  end
879
2716
 
@@ -922,16 +2759,16 @@ if __FILE__==$0
922
2759
  quiet=true
923
2760
  while /^-/===ARGV.first
924
2761
  case opt=ARGV.shift
925
- when "--": break
926
- when "--pp": output=:pp
927
- when "--lisp": output=:lisp
928
- when "--parsetree": output=:parsetree
929
- when "--vsparsetree": output=:vsparsetree
930
- when "--vsparsetree2": output=:vsparsetree2
931
- when "--update-problemfiles": problemfiles=ProblemFiles.new
932
- when "-q": quiet=true
933
- when "-v": quiet=false
934
- when "-e": inputs=[ARGV.join(" ")]; names=["-e"]; break
2762
+ when "--"; break
2763
+ when "--pp"; output=:pp
2764
+ when "--lisp"; output=:lisp
2765
+ when "--parsetree"; output=:parsetree
2766
+ when "--vsparsetree"; output=:vsparsetree
2767
+ when "--vsparsetree2"; output=:vsparsetree2
2768
+ when "--update-problemfiles"; problemfiles=ProblemFiles.new
2769
+ when "-q"; quiet=true
2770
+ when "-v"; quiet=false
2771
+ when "-e"; inputs=[ARGV.join(" ")]; names=["-e"]; break
935
2772
  else fail "unknown option: #{opt}"
936
2773
 
937
2774
  end
@@ -1060,24 +2897,121 @@ if __FILE__==$0
1060
2897
  exit result
1061
2898
  end
1062
2899
 
1063
- =begin todo:
2900
+ =begin old todo:
1064
2901
  v merge DotCallNode and CallSiteNode and CallWithBlockNode
1065
- remove actual Tokens from parse tree...
1066
- instead, each node has a corresponding range of tokens
1067
- -in an (optional) array of all tokens printed by the tokenizer.
1068
- split ParenedNode into ParenedNode + Rescue/EnsureNode
1069
- 'incomplete' subtrees such as ElseNode, ElsifNode, RescueNode
1070
- -should not appear in final output
2902
+ v remove actual Tokens from parse tree...
2903
+ v split ParenedNode into ParenedNode + Rescue/EnsureNode
2904
+ x 'incomplete' subtrees such as ElseNode, ElsifNode, RescueNode
2905
+ x -should not appear in final output
1071
2906
  v split keywordopnode into loop and if varieties?
1072
2907
  =end
1073
2908
 
1074
- =begin optimization opportunities:
1075
- top of stack slot contains mostly keywords, specific node classes, and Value
1076
- lookahead slot contains mostly LowerOp and keywords, with a few classes and inverted keywords
1077
- -(LowerOp is hard to optimize)
1078
- if top of stack matcher is Value, then the next matcher down is mostly keywords, with some operators
2909
+ =begin old optimization opportunities:, ha!
2910
+ top of stack slot contains mostly keywords, specific node classes, and Expr
2911
+ lookahead slot contains mostly lower_op and keywords, with a few classes and inverted keywords
2912
+ -(lower_op is hard to optimize)
2913
+ if top of stack matcher is Expr, then the next matcher down is mostly keywords, with some operators
1079
2914
  class membership can be optimized to test of integer within a range
1080
2915
  keywords could be stored as symbols instead of strings
1081
2916
  a few rules may need exploding (eg, ensure) to spoon feed the optimizer
1082
2917
  make all Nodes descendants of Array
1083
2918
  =end
2919
+
2920
+ #todo:
2921
+ #each node should have a corresponding range of tokens
2922
+ #-in an (optional) array of all tokens printed by the tokenizer.
2923
+ #v test stack_monkey mods
2924
+ #v break ParenedNode into 2 (3?) classes
2925
+ #x invent BEGINNode/ENDNode? (what other keywords?)
2926
+ #v at least make BEGIN/END be KWCallNode
2927
+ #v replace VarNameToken with VarNode in parser
2928
+ #x convert raw rules to lists of vertex identities?
2929
+ #v DottedRule class
2930
+ #v ParserState class (set of DottedRules)
2931
+ #v MultiReduce
2932
+ #v MultiShift
2933
+ #v ParserState#evolve(identity)
2934
+ #v DottedRule#evolve(identity)
2935
+ #v RedParse#enumerate_states
2936
+ #v RedParse#enumerate_exemplars
2937
+ #v Node/Token.enumerate_exemplars
2938
+ #v Node/Token.identity_param
2939
+ #v rename #lvalue? => #lvalue
2940
+ #x likewise get rid of other oddly named identity params
2941
+ #v BareMethod,WITHCOMMAS,BEGINAFTEREQUALS should have predicate methods defined for them
2942
+ #v do something about BEGINAFTEREQUALS... lots predicates, ugly to identify
2943
+ #v document identity parameters in nodes and tokens
2944
+ #operator and keyword tokens have some identity_param variations remaining...maybe?
2945
+ #xx all identity readers have to have writers as well (even if fake)
2946
+ #v sort out vertex identities... call identity_param in apt classes
2947
+ #convert identities<=>small ints
2948
+ #convert ParserStates<=>small ints
2949
+ #> lower_op/proc lookahead requires special action type with shift and reduce branches
2950
+ #x stack monkeys dictate some nodes appear in s/r table... which ones?
2951
+ #x some stack monkeys pushback nodes, action table must take take those as input
2952
+ #v retype GoalPostNode => GoalPostToken
2953
+ #v then, pushback* should go away
2954
+ #v build shift/reduce table
2955
+ #v build goto table
2956
+ #split tables into shift/reduce and goto....?
2957
+ #v integrate with c code generator
2958
+ #finish c code generator
2959
+ #code generator needs a way to deal with :
2960
+ #backtracking (to more than 1 node/token???)
2961
+ #actions (stack monkeys/lower_op)
2962
+ #every reduce requires b/ting thru the lookahead
2963
+ #garbage collection
2964
+ #sharing ruby objects between ruby code and generated c code
2965
+ #optimizer?
2966
+ #ruby code generator?
2967
+ #v what to do with :shift ?
2968
+ #what to do with :accept ?
2969
+ #what to do with :error ?
2970
+ #Node.create (used in generated code)
2971
+ #Node.create <= takes input directly from semantic stack
2972
+ #build Node.create param list generator
2973
+ #v names for rules, dotted rules, parser states, identities
2974
+ #x StartNode may be a problem... used by a stack monkey,
2975
+ #to remove extra ;s from the very beginning of input.
2976
+ #use a lexer hack instead?
2977
+ #v convert StartNode to StartToken?
2978
+ #convert names to numbers and numbers to names
2979
+ #for states, rules, vertex identities
2980
+ #in ruby and c (??)
2981
+ #x rule for HereBodyToken should be a lexer hack?
2982
+ #v stack monkeys should have names
2983
+ #how to handle a stack monkey whose 2nd parameter is not a single identity?
2984
+ #even reduces may not have enough info since 1 node class may have multiple identities
2985
+ #v RedParse constants should be named in inspect
2986
+ #v toplevel rule?
2987
+ #v semantic stack in generated c code should be a ruby array
2988
+ #x state stack should keep size of semantic stack at the time states are pushed,
2989
+ #so that i can restore semantic stack to former state when b-ting/reducing
2990
+ #urk, how do I know how many levels of state stack to pop when reducing?
2991
+ #in looping error rules, just scan back in semantic stack for rule start
2992
+ #in regular looping rules, transition to loop state is saved on a special stack
2993
+ #so that at reduce time, we can b/t to that point for a start
2994
+ #if rule contains only scalars, b/t is easy
2995
+ #else rule contains scalars and optionals:
2996
+ #scan for rule start vertex starting at highest node
2997
+ #on semantic stack that can contain it and working downward.
2998
+ #also, statically verify that relevent rules contain no collisions among first (how many?) matchers
2999
+
3000
+ #is lookahead in code generator even useful? my tables have built-in lookahead....
3001
+ #need hack to declare nonerror looping matchers as irrevokable (for speed, when reducing)
3002
+ #v assignmentRhsNode needs an identity_param for with_commas
3003
+ #v -** fixup and setter breakout rules need dedicated identity_params too
3004
+ # = rescue ternary is broken again now...
3005
+ #v instead of shift states and is_shift_state? to find them,
3006
+ #v i should have shift transitions. (transitions that imply a shift... in response to a token input.)
3007
+ #v all states will have 2 entry points, for shift and nonshift transitions.
3008
+ #split big table into goto(node) and sr(token) tables
3009
+ #in each state, most common sr action should be made default
3010
+ #unused entries in goto table can be ignored.
3011
+ #most common goto entries (if any) can be default.
3012
+ #is the change_index arg in stack_monkey calls really correct everywhere? what are
3013
+ #the exact semantics of that argument? what about stack_monkeys that change the stack size?
3014
+ #should there be another arg to keep track of that?
3015
+ #maybe rewrite stack_monkeys so they're a little clearer and easier to analyze (by hand)
3016
+ #MultiShift/MultiReduce are not supported actions in generate.rb
3017
+ #:accept/:error are not supported actions in generate.rb