redparse 0.8.4 → 1.0.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (45) hide show
  1. checksums.yaml +4 -0
  2. data/COPYING.LGPL +503 -158
  3. data/History.txt +192 -0
  4. data/Makefile +9 -0
  5. data/README.txt +72 -39
  6. data/bin/redparse +108 -14
  7. data/lib/miniredparse.rb +1543 -0
  8. data/lib/redparse.rb +971 -105
  9. data/lib/redparse/ReduceWithsFor_RedParse_1_8.rb +17412 -0
  10. data/lib/redparse/ReduceWithsFor_RedParse_1_9.rb +17633 -0
  11. data/lib/redparse/babynodes.rb +17 -0
  12. data/lib/redparse/babyparser.rb +17 -0
  13. data/lib/redparse/cache.rb +290 -6
  14. data/lib/redparse/compile.rb +6 -97
  15. data/lib/redparse/decisiontree.rb +1 -1
  16. data/lib/redparse/float_accurate_to_s.rb +30 -6
  17. data/lib/redparse/generate.rb +18 -0
  18. data/lib/redparse/node.rb +415 -124
  19. data/lib/redparse/parse_tree_server.rb +20 -2
  20. data/lib/redparse/problemfiles.rb +1 -1
  21. data/lib/redparse/pthelper.rb +17 -31
  22. data/lib/redparse/reg_more_sugar.rb +1 -1
  23. data/lib/redparse/replacing/parse_tree.rb +30 -0
  24. data/lib/redparse/replacing/ripper.rb +20 -0
  25. data/lib/redparse/replacing/ruby_parser.rb +28 -0
  26. data/lib/redparse/ripper.rb +393 -0
  27. data/lib/redparse/ripper_sexp.rb +153 -0
  28. data/lib/redparse/stackableclasses.rb +113 -0
  29. data/lib/redparse/version.rb +18 -1
  30. data/redparse.gemspec +29 -9
  31. data/rplt.txt +31 -0
  32. data/test/data/hd_with_blank_string.rb +3 -0
  33. data/test/data/pt_known_output.rb +13273 -0
  34. data/test/data/wp.pp +0 -0
  35. data/test/generate_parse_tree_server_rc.rb +17 -0
  36. data/test/rp-locatetest.rb +2 -2
  37. data/test/test_1.9.rb +338 -35
  38. data/test/test_all.rb +22 -3
  39. data/test/test_part.rb +32 -0
  40. data/test/test_redparse.rb +396 -74
  41. data/test/test_xform_tree.rb +18 -0
  42. data/test/unparse_1.9_exceptions.txt +85 -0
  43. data/test/unparse_1.9_exceptions.txt.old +81 -0
  44. metadata +71 -46
  45. data/Rakefile +0 -35
@@ -0,0 +1,1543 @@
1
+ =begin
2
+ redparse - a ruby parser written in ruby
3
+ Copyright (C) 2008,2009, 2012, 2016 Caleb Clausen
4
+
5
+ This program is free software: you can redistribute it and/or modify
6
+ it under the terms of the GNU Lesser General Public License as published by
7
+ the Free Software Foundation, either version 3 of the License, or
8
+ (at your option) any later version.
9
+
10
+ This program is distributed in the hope that it will be useful,
11
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
12
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13
+ GNU Lesser General Public License for more details.
14
+
15
+ You should have received a copy of the GNU Lesser General Public License
16
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
17
+ =end
18
+
19
+
20
+
21
+ require 'forwardable'
22
+
23
+ require 'digest/sha2'
24
+
25
+ begin
26
+ require 'rubygems'
27
+ rescue LoadError=>e
28
+ #hope we don't need it
29
+ raise unless /rubygems/===e.message
30
+ end
31
+ require 'rubylexer'
32
+ require 'reg'
33
+ require 'reglookab'
34
+
35
+ require "redparse/node"
36
+ #require "redparse/decisiontree"
37
+ require "redparse/reg_more_sugar"
38
+ #require "redparse/generate"
39
+ require "redparse/cache"
40
+ #require "redparse/compile"
41
+
42
+ class RedParse
43
+
44
+
45
+
46
+ alias :dump :inspect # preserve old inspect functionality
47
+
48
+ # irb friendly #inspect/#to_s
49
+ def to_s
50
+ mods=class<<self;self end.ancestors-self.class.ancestors
51
+ mods=mods.map{|mod| mod.name }.join('+')
52
+ mods="+"<<mods unless mods.empty?
53
+ "#<#{self.class.name}#{mods}: [#{@input.inspect}]>"
54
+ end
55
+
56
+ alias :inspect :to_s
57
+
58
+ ####### generic stuff for parsing any(?) language
59
+ # include Nodes
60
+ class StackMonkey
61
+ def initialize(name,first_changed_index,and_expect_node,options={},&monkey_code)
62
+ first_changed_index=-first_changed_index if first_changed_index>0
63
+ @name,@first_changed_index,@and_expect_node,@monkey_code=
64
+ name,first_changed_index,and_expect_node,monkey_code
65
+ end
66
+
67
+ attr_reader :name, :first_changed_index, :and_expect_node, :monkey_code
68
+ alias hint and_expect_node
69
+ attr_accessor :exemplars
70
+
71
+ def [](stack)
72
+ result=@monkey_code[stack]
73
+ return result
74
+ end
75
+
76
+ def _dump depth
77
+ @name
78
+ end
79
+
80
+ def self._load str
81
+ Thread.current[:$RedParse_parser].undumpables[@name]
82
+ end
83
+
84
+ def action2c
85
+ #"return the whole thing on first call, just a goto stmt after that"
86
+ return " goto #@goto_label;\n" if defined? @goto_label
87
+
88
+ =begin
89
+ <<-E
90
+ #{@goto_label=@name.gsub(/[^a-z0-9_]/,'_')}:
91
+ monkey=rb_hash_get(undumpables,rb_cstr2str("#@name"));
92
+ rb_funcall(monkey,rb_intern("[]"),huh_stack);
93
+
94
+ /*recover from stackmonkey fiddling*/
95
+ for(i=0;i<#{-@first_changed_index};++i) {
96
+ rb_ary_unshift(lexer_moretokens,
97
+ rb_ary_pop(huh_semantic_stack));
98
+ rb_ary_pop(huh_syntax_stack);
99
+ }
100
+
101
+ goto #{Node===@and_expect_node ?
102
+ postreduceaction4this_state(@and_expect_node) :
103
+ shiftaction4this_state
104
+ };
105
+ E
106
+ =end
107
+ end
108
+ end
109
+ class DeleteMonkey<StackMonkey
110
+ def initialize(index,name)
111
+ index=-index if index>0
112
+ @index=index
113
+ super(name,index,nil){|stack| stack.delete_at( index )}
114
+ end
115
+ end
116
+ def stack_monkey(*args,&block) StackMonkey.new(*args,&block) end
117
+ def self.stack_monkey(*args,&block) StackMonkey.new(*args,&block) end
118
+ def delete_monkey(index,name) DeleteMonkey.new(index,name) end
119
+
120
+ def evaluate rule
121
+ #dissect the rule
122
+ if false
123
+ rule=rule.dup
124
+ lookahead_processor=(rule.pop if Proc===rule.last)
125
+ node_type=rule.pop
126
+ else
127
+ Reg::Transform===rule or fail
128
+ node_type= rule.right
129
+ rule=rule.left.subregs.dup
130
+ lookahead_processor=(rule.pop if Proc|::Reg::LookAhead===rule.last)
131
+ lookback=rule[0]=rule[0].subregs[0] if ::Reg::LookBack===rule[0]
132
+ end
133
+
134
+ #index of data at which to start matching
135
+ i=@stack.size-1 #-1 because last element of @stack is always lookahead
136
+
137
+ #I could call this a JIT compiler, but that's a bit grandiose....
138
+ #more of a JIT pre-processor
139
+ compiled_rule=@compiled_rules[rule]||=
140
+ rule.map{|pattern|
141
+ String|Regexp===pattern ? KW(pattern) : pattern
142
+ }
143
+
144
+ #what's the minimum @stack size this rule could match?
145
+ rule_min_size=@min_sizes[compiled_rule]||=
146
+ compiled_rule.inject(0){|sum,pattern|
147
+ sum + pattern.itemrange.begin
148
+ }
149
+ i>=rule_min_size or return false
150
+
151
+ matching=[]
152
+
153
+ #actually try to match rule elements against each @stack element in turn
154
+ compiled_rule.reverse_each{|matcher|
155
+ i.zero? and fail
156
+ target=matching
157
+ #is this matcher optional? looping?
158
+ loop= matcher.itemrange.last.to_f.infinite?
159
+ minimum=matcher.itemrange.first
160
+ optional=minimum.zero?
161
+ matching.unshift target=[] if loop
162
+ if loop or optional
163
+ matcher=matcher.subregs[0]
164
+ end
165
+
166
+ begin
167
+ if matcher===@stack[i-=1] #try match
168
+ target.unshift @stack[i]
169
+ else
170
+ #if match failed, the whole rule fails
171
+ #unless this match was optional, in which case, ignore it
172
+ #or was looping and met its minimum
173
+ #but bump the data position back up, since the latest datum
174
+ #didn't actually match anything.
175
+ return false unless optional or loop&&target.size>=minimum
176
+ i+=1
177
+ matching.unshift nil unless loop
178
+ break
179
+ end
180
+ end while loop
181
+ }
182
+
183
+ matchrange= i...-1 #what elems in @stack were matched?
184
+
185
+ #give lookahead matcher (if any) a chance to fail the match
186
+ case lookahead_processor
187
+ when ::Reg::LookAhead
188
+ return false unless lookahead_processor.subregs[0]===@stack.last
189
+ when Proc
190
+ return false unless lookahead_processor[self,@stack.last]
191
+ end
192
+
193
+ #if there was a lookback item, don't include it in the new node
194
+ if lookback
195
+ matchrange= i+1...-1 #what elems in @stack were matched?
196
+ matching.shift
197
+ end
198
+
199
+
200
+ #replace matching elements in @stack with node type found
201
+ case node_type
202
+ when Class
203
+ node=node_type.create(*matching)
204
+ node.startline||=@stack[matchrange.first].startline
205
+ node.endline=@endline
206
+ @stack[matchrange]=[node]
207
+ when Proc,StackMonkey; node_type[@stack]
208
+ when :shift; return 0
209
+ when :accept,:error; throw :ParserDone
210
+ else fail
211
+ end
212
+
213
+ return true #let caller know we found a match
214
+
215
+
216
+ rescue Exception=>e
217
+ #puts "error (#{e}) while executing rule: #{rule.inspect}"
218
+ #puts e.backtrace.join("\n")
219
+ raise
220
+ end
221
+
222
+ class ParseError<RuntimeError
223
+ def initialize(msg,stack)
224
+ super(msg)
225
+ @stack=stack
226
+ if false
227
+ ranges=(1..stack.size-2).map{|i|
228
+ node=stack[i]
229
+ if node.respond_to? :linerange
230
+ node.linerange
231
+ elsif node.respond_to? :endline
232
+ node.endline..node.endline
233
+ end
234
+ }
235
+ types=(1..stack.size-2).map{|i| stack[i].class }
236
+ msg += "couldn't interpret #{types.inspect} at line ranges: #{ranges.inspect}"
237
+ end
238
+ super(msg)
239
+ end
240
+ attr :stack
241
+ end
242
+
243
+ def [](*args)
244
+ @stack.[](*args)
245
+ end
246
+
247
+ def []=(*args)
248
+ @stack.[]=(*args)
249
+ end
250
+
251
+ #try all possible reductions
252
+ def reduce
253
+ shift=nil
254
+ @rules.reverse_each{|rule|
255
+ shift=evaluate(rule) and break
256
+ }
257
+ return shift
258
+ end
259
+
260
+ def parse
261
+
262
+ #hack, so StringToken can know what parser its called from
263
+ #so it can use it to parse inclusions
264
+ oldparser=Thread.current[:$RedParse_parser]
265
+ Thread.current[:$RedParse_parser]||=self
266
+
267
+ return @cached_result if defined? @cached_result
268
+
269
+ @rules||=expanded_RULES()
270
+ # @inputs||=enumerate_exemplars
271
+
272
+ @stack=[StartToken.new, get_token]
273
+ #last token on @stack is always implicitly the lookahead
274
+ catch(:ParserDone){ loop {
275
+ #try all possible reductions
276
+ next if reduce==true
277
+
278
+ #no rule can match current @stack, get another token
279
+ tok=get_token or break
280
+
281
+ #are we done yet?
282
+ #tok.nil? or EoiToken===tok && EoiToken===@stack.last and break
283
+
284
+ #shift our token onto the @stack
285
+ @stack.push tok
286
+ }}
287
+
288
+ @stack.size==2 and return result=NopNode.new #handle empty parse string
289
+
290
+ #unless the @stack is 3 tokens,
291
+ #with the last an Eoi, and first a StartToken
292
+ #there was a parse error
293
+ unless @stack.size==3
294
+ pp @stack[-[15,@stack.size].min..-1] if ENV['PRINT_STACK']
295
+ top=MisparsedNode.new("(toplevel)", @stack[1...-1],'')
296
+ raise ParseError.new(top.msg,@stack)
297
+ end
298
+ EoiToken===@stack.last or fail
299
+ StartToken===@stack.first or fail
300
+
301
+ result= @stack[1]
302
+
303
+
304
+ #multiple assignment must be resolved
305
+ #afterwards by walking the parse tree.
306
+ #(because the relative precedences of = and ,
307
+ #are reversed in multiple assignment.)
308
+ # result.respond_to? :fixup_multiple_assignments! and
309
+ # result=result.fixup_multiple_assignments!
310
+
311
+ #relative precedence of = and rescue are also inverted sometimes
312
+ # result.respond_to? :fixup_rescue_assignments! and
313
+ # result=result.fixup_rescue_assignments!
314
+
315
+ #do something with error nodes
316
+ msgs=[]
317
+ result.walk{|parent,i,subi,node|
318
+ if node.respond_to? :error? and node.error?(@rubyversion)
319
+ msgs<< @filename+":"+node.blame.msg
320
+ false
321
+ else
322
+ true
323
+ end
324
+ } if result.respond_to? :walk #hack hack
325
+ result.errors=msgs unless msgs.empty?
326
+ #other types of errors (lexer errors, exceptions in lexer or parser actions)
327
+ #should be handled in the same way, but currently are not
328
+ # puts msgs.join("\n")
329
+
330
+ rescue Exception=>e
331
+ input=@lexer
332
+ if Array===input
333
+ STDERR.puts "error while parsing:"
334
+ STDERR.write input.pretty_inspect
335
+ input=nil
336
+ else
337
+ input=input.original_file
338
+ inputname=@lexer.filename
339
+ STDERR.puts "error while parsing #@filename:#@endline: <<< #{input if input.to_s.size<=1000} >>>"
340
+ end
341
+ e.backtrace.each{|l| p l }
342
+ raise
343
+ else
344
+ unless msgs.empty?
345
+ pp @stack[-[15,@stack.size].min..-1] if ENV['PRINT_STACK']
346
+ raise RedParse::ParseError.new(msgs.join("\n"),@stack)
347
+ end
348
+
349
+ # result=NopNode.new if EoiToken===result
350
+ return result
351
+ ensure
352
+ @write_cache.put(@input,result) if @write_cache and result and !result.errors
353
+ @stack=nil
354
+ Thread.current[:$RedParse_parser]=oldparser
355
+ end
356
+
357
+
358
+ #HIER=Class::FlattenedHierarchy.new *STACKABLE_CLASSES
359
+
360
+ def new_disabled_reduce
361
+ #@hier||=Class::FlattenedHierarchy.new *STACKABLE_CLASSES()
362
+ @reducer||=Reducer.new(@rules)
363
+
364
+ @reducer.reduce(@stack)
365
+ end #
366
+
367
+
368
+ #inline any subsequences in RULES right into the patterns
369
+ #reg should do this already, but current release does not
370
+ def expanded_RULES
371
+ result=RULES()
372
+ return result if (-[:foo, -[:bar]]).subregs.grep(Reg::Subseq).empty?
373
+ result.map!{|rule|
374
+ unless rule.left.subregs.grep(Reg::Subseq)
375
+ then rule
376
+ else
377
+ right=rule.right
378
+ rule=rule.left.subregs.dup
379
+ (rule.size-1).downto(0){|i|
380
+ if Reg::Subseq===rule[i]
381
+ rule[i,1]=rule[i].subregs
382
+ end
383
+ }
384
+ -rule>>right
385
+ end
386
+ }
387
+ end
388
+
389
+ ###### specific to parsing ruby
390
+
391
+
392
+ UCLETTER=RubyLexer::UCLETTER
393
+
394
+ LCLETTER=RubyLexer::LCLETTER
395
+ LETTER=RubyLexer::LETTER
396
+ LETTER_DIGIT=RubyLexer::LETTER_DIGIT
397
+
398
+ def vertices; self.class.constants.grep(Node|Token) end
399
+
400
+ def self.has_return_hash_fix? #is this needed? it's not used in this file....
401
+ rl=RubyLexer.new("","return {}.size")
402
+ return(
403
+ FileAndLineToken===rl.get1token and
404
+ MethNameToken===rl.get1token and
405
+ ImplicitParamListStartToken===rl.get1token and
406
+ WsToken===rl.get1token and
407
+ KeywordToken===rl.get1token and
408
+ KeywordToken===rl.get1token and
409
+ KeywordToken===rl.get1token and
410
+ MethNameToken===rl.get1token and
411
+ ImplicitParamListStartToken===rl.get1token and
412
+ ImplicitParamListEndToken===rl.get1token and
413
+ ImplicitParamListEndToken===rl.get1token and
414
+ EoiToken===rl.get1token
415
+ )
416
+ end
417
+
418
+ #see pickaxe, 1st ed, page 221
419
+ def RIGHT_ASSOCIATIVE
420
+ {
421
+ # "defined?"=>120.5,
422
+ "**"=>118,
423
+
424
+ "="=>105, "%="=>105, "/="=>105, "-="=>105, "+="=>105,
425
+ "|="=>105, "&="=>105, ">>="=>105, "<<="=>105, "*="=>105,
426
+ "&&="=>105, "||="=>105, "**="=>105, "^="=>105,
427
+
428
+
429
+ # "and"=>99, "or"=>99,
430
+
431
+ # "if"=>98, "unless"=>98, "while"=>98, "until"=>98, "rescue"=>98,
432
+
433
+ # "&&"=>109, "||"=>108,
434
+ }
435
+ end
436
+
437
+ def PRECEDENCE
438
+ {
439
+
440
+ # "("=>122, #method param list
441
+ # "{"=>122, "do"=>122, #blocks
442
+
443
+ "::"=>121, "."=>121,
444
+
445
+ # "defined?"=>120.5,
446
+
447
+ "["=>120, #[] []= methods
448
+
449
+ "!"=>119, "~"=>119,
450
+ "+@"=>119,
451
+
452
+ "**"=>118,
453
+
454
+ "-@"=>117,
455
+
456
+ "*"=>116, "/"=>116, "%"=>116,
457
+
458
+ "+"=>115, "-"=>115,
459
+
460
+ "<<"=>114, ">>"=>114,
461
+
462
+ "&"=>113,
463
+
464
+ "^"=>112, "|"=>112,
465
+
466
+ "<="=>111, ">="=>111, "<"=>111, ">"=>111,
467
+
468
+ "<=>"=>110, "=="=>110, "==="=>110,
469
+ "!="=>110, "=~"=>110, "!~"=>110,
470
+
471
+ "&&"=>109,
472
+
473
+ "||"=>108,
474
+
475
+ ".."=>107, "..."=>107,
476
+
477
+ "?"=>106, # ":"=>106, #not sure what to do with ":"
478
+
479
+ "unary&"=>105, #unary * and & operators
480
+ "lhs*"=>105, #this should remain above =
481
+ "lhs,"=>105,
482
+ "rescue3"=>105,
483
+
484
+ "="=>104, "%="=>104, "/="=>104, "-="=>104, "+="=>104,
485
+ "|="=>104, "&="=>104, ">>="=>104, "<<="=>104, "*="=>104,
486
+ "&&="=>104, "||="=>104, "**="=>104, "^="=>104,
487
+
488
+ "defined?"=>103,
489
+ "not"=>103,
490
+ ":"=>102, #but not when used as a substitute for 'then'
491
+
492
+ "=>"=>101,
493
+ "rhs,"=>100, #"call,"=>100, "array,"=>100, "param,"=>100,
494
+ ","=>100, "rhs*"=>100, "unary*"=>100,
495
+ #the 'precedence' of comma is somewhat controversial. it actually has
496
+ #several different precedences depending on which kind of comma it is.
497
+ #the precedence of , is higher than :, => and the assignment operators
498
+ #in certain (lhs) contexts. therefore, the precedence of lhs-comma should
499
+ #really be above "=".
500
+
501
+ #"unary" prefix function names seen has operators have this precedence
502
+ #but, rubylexer handles precedence of these and outputs fake parens
503
+ #to tell us how its parsed
504
+
505
+ "or"=>99, "and"=>99,
506
+
507
+ "if"=>98, "unless"=>98, "while"=>98, "until"=>98,
508
+
509
+ "rescue"=>98,
510
+
511
+ ";"=>96,
512
+ }
513
+ end
514
+
515
+ module BracketsCall; end
516
+ Value= #NumberToken|SymbolToken|
517
+ #HerePlaceholderToken|
518
+ ValueNode&-{:lvalue =>nil}
519
+ Expr=Value
520
+
521
+ if defined? SPECIALIZED_KEYWORDS
522
+ class SpecializedKeywordToken<KeywordToken
523
+ def inspect
524
+ "#<"+self.class.name+">"
525
+ end
526
+ alias image inspect
527
+ end
528
+
529
+ KW2class={}
530
+
531
+ Punc2name={
532
+ "("=>"lparen", ")"=>"rparen",
533
+ "["=>"lbracket", "]"=>"rbracket",
534
+ "{"=>"lbrace", "}"=>"rbrace",
535
+ ","=>"comma",
536
+ ";"=>"semicolon",
537
+ "::"=>"double_colon",
538
+ "."=>"dot",
539
+ "?"=>"question_mark", ":"=>"colon",
540
+ "="=>"equals",
541
+ "|"=>"pipe",
542
+ "<<"=>"leftleft", ">>"=>"rightright",
543
+ "=>"=>"arrow",
544
+ }
545
+ end
546
+
547
+ def self.KW(ident)
548
+ if defined? SPECIALIZED_KEYWORDS
549
+ fail if /\\/===ident
550
+ orig_ident=ident
551
+ if Regexp===ident
552
+ list=ident.to_s[/\(?-mix:\^\((.*)\)\$\)/,1]
553
+
554
+ #pick apart any char class in ident
555
+ if open_bracket_idx=list.index(/([^\\]|^)\[/)
556
+ open_bracket_idx+=1 unless list[open_bracket_idx]=="["
557
+ close_bracket_idx=list.index(/[^\\]\]/,open_bracket_idx+1)
558
+ close_bracket_idx+=1 unless list[close_bracket_idx]=="]"
559
+ cclass=list.slice!(open_bracket_idx..close_bracket_idx)
560
+ cclass=cclass[1...-1]
561
+ cclass=cclass.scan( /[^\\]|\\./ )
562
+ cclass.map!{|ch| ch.size==1 ? ch : ch[1..1] }
563
+ end
564
+
565
+ #rest of it should be a list of words separated by |
566
+ list=list.split(/\|/).reject{|x| x==''}
567
+ list.concat cclass if cclass
568
+ list.map{|w|
569
+ w.gsub!(/\\/,'')
570
+ KW(w)
571
+ }.inject{|sum,kw| sum|kw}
572
+ else
573
+ fail unless String===ident
574
+ ident=Punc2name[ident] unless /^(?:(?!#{LETTER_DIGIT}).)+$/o===ident
575
+ fail "no name for #{orig_ident}" unless ident
576
+ eval %{
577
+ class Keyword_#{ident} < SpecializedKeywordToken
578
+ def ident; '#{orig_ident}' end
579
+ # def self.instance; @instance ||= allocate end
580
+ # def self.new; instance end
581
+ def initialize(offset)
582
+ @offset=offset
583
+ end
584
+ end
585
+ }
586
+ KW2class[ident]||=const_get("Keyword_#{ident}")
587
+ end
588
+ else
589
+ ident=case ident
590
+ when Integer; ident.chr
591
+ when String,Regexp; ident
592
+ else ident.to_s
593
+ end
594
+
595
+ return KeywordToken&-{:ident=>ident}
596
+ end
597
+ end
598
+ def KW(ident); self.class.KW(ident) end
599
+
600
+ if defined? SPECIALIZED_KEYWORDS
601
+ def make_specialized_kw(name,offset)
602
+ name=Punc2name[name] unless /^((?!#{LETTER_DIGIT}).)+$/o===name
603
+ KW2class[name].new(offset)
604
+ end
605
+ alias make_kw make_specialized_kw
606
+ else
607
+ def make_kw(name,offset)
608
+ KeywordToken.new(name,offset)
609
+ end
610
+ end
611
+
612
+ UNOP=
613
+ (OperatorToken|KeywordToken)&-{ #sppflt! KeywordToken here is a hack too
614
+ :ident=>/^(?:[+-]@|unary[&*]|(?:lhs|rhs)[*])$/,
615
+ # :ident=>/^(?:[+-]@|unary[&])$/,
616
+ #:unary =>true,
617
+ }|
618
+ (OperatorToken|KeywordToken)&-{ #sppflt! KeywordToken here is a hack too
619
+ :ident=>/^([~!]|not|defined\?)$/, #defined? should be removed from here, its handled separately
620
+ } #|
621
+ DEFOP=
622
+ (OperatorToken|KeywordToken)&-{ #sppflt! KeywordToken here is a hack too
623
+ :ident=>"defined?",
624
+ }
625
+ =begin
626
+ MethNameToken&-{ #hack, shouldn't be necessary
627
+ #rubylexer should know to generally treat "defined?" as a keyword
628
+ #or operator. (like most keywords, it can also be used as a method
629
+ # name....)
630
+ :ident=>"defined?"
631
+ }
632
+ =end
633
+
634
+ def self.Op(ident=nil, allow_keyword=false)
635
+ result=OperatorToken
636
+ result |= KeywordToken if allow_keyword
637
+ result &= -{:ident=>ident} if ident
638
+ #result[:infix?]=true
639
+ return result
640
+ end
641
+ def Op(*args); self.class.Op(*args); end
642
+ BINOP_KEYWORDS=%w[if unless while until and or && \|\|]
643
+
644
+ #HAS_PRECEDENCE=Op(/^#{PRECEDENCE.keys.map{|k| Regexp.quote k}.join('|')}$/,true)
645
+ =begin
646
+ KeywordOp=
647
+ KeywordToken & -{
648
+ :ident=>/^(#{BINOP_KEYWORDS.join('|')})$/
649
+ }
650
+ KeywordOp2=
651
+ KeywordToken & -{
652
+ :ident=>/^([\[({!+*?:,]|\.{1,3}|::|=>)$/
653
+ }
654
+ =end
655
+ DotOp= KW('.') #KeywordToken & -{ :ident=>"." }
656
+ DoubleColonOp= KW('::') #KeywordToken & -{ :ident=>"::" }
657
+
658
+ Op=Op()
659
+ MODIFYASSIGNOP=Op( /^(([^=])\2|[^<>=!])=$/, true )
660
+ NONASSIGNOP=Op( /([^=]|[<>=!]=)$/)
661
+ KW_Op= #some of these ought to be regular operators, fer gosh sake
662
+ Op(/^(![=~]|\.\.\.?|=>)$/,true)|Op(/^(#{BINOP_KEYWORDS.join('|')})$/)
663
+
664
+ EPSILON=Float::EPSILON*10_000_000 #this should be <<1 and >0
665
+ fail unless 1+EPSILON>1
666
+ fail unless EPSILON<0.1
667
+
668
+ def left_op_higher(op,op2)
669
+ KeywordToken===op2 or OperatorToken===op2 or return true
670
+ rightprec=@precedence[op2.to_s] or return true
671
+ rightprec+=EPSILON if @RIGHT_ASSOCIATIVE[op2.to_s]
672
+ return @precedence[op.to_s]>=rightprec
673
+ end
674
+
675
+ # LowerOp= proc{|parser,op2| parser.left_op_higher(parser[-3],op2) }
676
+ module LowerOp_inspect
677
+ def inspect; "lower_op" end
678
+ end
679
+
680
+ def lower_op
681
+ return @lower_op if defined? @lower_op
682
+ lower_op=item_that{|op| left_op_higher(@stack[-3],op) }
683
+ lower_op=(LOWEST_OP|(~VALUELIKE_LA() & lower_op)).la
684
+ lower_op.extend LowerOp_inspect
685
+ @lower_op=lower_op
686
+ end
687
+
688
+ #this is a hack, should use graphcopy to search for Deferreds and replace with double-Deferred as below
689
+ def item_that(*a,&b)
690
+ if defined? @generating_parse_tables
691
+ huh unless b
692
+ #double supers, one of them in a block executed after this method returns....
693
+ #man that's weird
694
+ super(*a){|ob| @saw_item_that[[super(*a,&b),ob]]=true}
695
+ else
696
+ super(*a,&b) #and then here's another
697
+ end
698
+ end
699
+
700
+ WANTS_SEMI=%w[while until if unless
701
+ def case when in rescue
702
+ elsif class module << => . ::
703
+ ]
704
+ def wants_semi_context
705
+ Op(/^(<<|=>|\.|::)$/)|KW(/^(#{WANTS_SEMI.map{|ws| Regexp.quote ws }.join('|')})$/)
706
+ end
707
+ def dont_postpone_semi
708
+ @dps||=~wants_semi_context
709
+ end
710
+
711
+ #NeverBlockKeyword=MethNameToken&-{:ident=>/^(return|break|next)$/}
712
+ #FakeBegin=KW('(')&-{:not_real? =>true}
713
+ #FakeEnd=KW(')')&-{:not_real? =>true}
714
+
715
+ #rule format:
716
+ # -[syntax pattern_matchers.+, lookahead.-]>>node type
717
+
718
+ DotCall=stack_monkey("DotCall",4,CallNode){|stack|
719
+ left,dot=*stack.slice!(-4..-3)
720
+ right=stack[-2]
721
+
722
+ right.startline=left.startline
723
+ right.set_receiver! left
724
+ }
725
+
726
+ Lvalue=(VarNode|CallSiteNode|BracketsGetNode|CommaOpNode|
727
+ ParenedNode|ConstantNode|UnaryStarNode)&-{:lvalue =>true}
728
+
729
+ BareMethod=MethNameToken|(LiteralNode&-{:bare_method=>true})
730
+
731
+ #BEGINWORDLIST=RubyLexer::BEGINWORDLIST + %w"( [ {"
732
+ ENDWORDLIST=%w"end ) ] }"
733
+ ENDWORDS=ENDWORDLIST.map{|x| Regexp.quote x}.join('|')
734
+ BEGINWORDS=RubyLexer::BEGINWORDS
735
+ INNERBOUNDINGWORDS=RubyLexer::INNERBOUNDINGWORDS
736
+
737
+ BEGIN2END={"{"=>"}", "("=>")", "["=>"]", BEGINWORDS=>"end"}
738
+ def beginsendsmatcher
739
+ @bem||=
740
+ /^(#{BEGINWORDS}|#{ENDWORDS})$/
741
+ end
742
+
743
+ MULTIASSIGN=UnaryStarNode|CommaOpNode|ParenedNode
744
+ WITHCOMMAS=UnaryStarNode|CommaOpNode|(CallSiteNode&-{:with_commas=>true})
745
+ #(CallSiteNode&-{:real_parens=>false, :args=>-{:size=>~0.reg}})
746
+
747
+ BEGINAFTEREQUALS=
748
+ BeginNode&
749
+ -{:after_equals =>nil}&-{:non_empty=>true}
750
+ BEGINAFTEREQUALS_MARKED=
751
+ BeginNode&
752
+ -{:after_equals =>true}&-{:non_empty=>true}
753
+
754
+ LHS_COMMA=Op('lhs,',true)#&-{:tag => :lhs}
755
+ RHS_COMMA=Op('rhs,',true)#&-{:tag => :rhs}
756
+ #PARAM_COMMA=Op('param,',true)#&-{:tag => :param}
757
+ def FUNCLIKE_KEYWORD
758
+ KeywordToken&-{:ident=>@funclikes}
759
+ end
760
+ IGN_SEMI_BEFORE=KW(/^(#{RubyLexer::INNERBOUNDINGWORDS.gsub(/(rescue|then)\|/,'')[1...-1]}|end|[)}\]])$/)|EoiToken
761
+ IGN_SEMI_AFTER=KW(/^(begin|[;:({|]|then|do|else|ensure)$/)|BlockFormalsNode
762
+
763
+ #for use in lookback patterns
764
+ OPERATORLIKE_LB=OperatorToken|
765
+ KW(/^(not | defined\? | rescue3 | .*[@,] | [~!;\(\[\{?:] | \.{1,3} | :: | => | ![=~])$/x)|
766
+ KW(%r{^( \*\*? | << | >> | &&? | \|\|? | \^ | % | / | - | \+ )?=$}x)|
767
+ KW(BEGINWORDS)|KW(/^#{INNERBOUNDINGWORDS}$/)|RescueHeaderNode|StartToken|
768
+ GoalPostToken|BlockFormalsNode|AssignmentRhsListStartToken
769
+
770
+ #for use in lookahead patterns
771
+ def VALUELIKE_LA
772
+ KW(@varlikes)|NumberToken|SymbolToken|StringToken|UNOP|DEFOP|
773
+ KW(/^[({]$/x)|VarNameToken|MethNameToken|HerePlaceholderToken|
774
+ KW(BEGINWORDS)|FUNCLIKE_KEYWORD()|AssignmentRhsListStartToken
775
+
776
+ #why isn't this a sufficient implementation of this method:
777
+ # KW('(')
778
+ #in which case, '(' can be made the highest precedence operator instead
779
+ end
780
+ LOWEST_OP=KW(/^(#{ENDWORDS})$/)|KW(/^#{INNERBOUNDINGWORDS.sub('rescue|','')}$/)|
781
+ EoiToken|GoalPostToken|AssignmentRhsListEndToken
782
+
783
+ RESCUE_BODY=-[Expr.-, RescueNode.*, ElseNode.-, EnsureNode.-,]
784
+
785
+ RESCUE_OP=Op('rescue') #|(KW('rescue')&-{:infix=>true})
786
+
787
+ RESCUE_KW=KW('rescue')&-{:infix=>nil}
788
+
789
+ inspect_constant_names if respond_to? :inspect_constant_names
790
+
791
+ (constants-%w[RawOpNode ParenedNode SequenceNode LiteralNode Node MisparsedNode]).each{|k|
792
+ if /Node$/===k.to_s
793
+ remove_const k
794
+ end
795
+ }
796
+
797
+ def RULES
798
+ lower_op= lower_op()
799
+
800
+
801
+ result=
802
+ [-[StartToken.lb, Expr.-, EoiToken.la]>>:accept,
803
+ -[EoiToken]>>:error,
804
+ ]+
805
+
806
+ #these must be the lowest possible priority, and hence first in the rules list
807
+ # BEGIN2END.map{|_beg,_end|
808
+ # -[KW(_beg), (KW(_beg)|KW(_end)).~.*, KW(_end), KW(/^(do|\{)$/).~.la]>>MisparsedNode
809
+ # }+
810
+
811
+ [
812
+ # -[UNOP, Expr, lower_op]>>UnOpNode,
813
+ # -[DEFOP, ParenedNode]>>UnOpNode,
814
+ # -[Op(/^(?:unary|lhs|rhs)\*$/), ValueNode, lower_op]>>UnaryStarNode,
815
+
816
+ # -[Op('=',true)|KW(/^(rescue|when|\[)$/)|Op(/,$/,true),
817
+ # Op(/^(?:unary|rhs)\*$/), ValueNode, (MODIFYASSIGNOP|Op('=',true)).la]>>:shift,
818
+ # -[MethNameToken|FUNCLIKE_KEYWORD(), KW('('),
819
+ # Op(/^(?:unary|rhs)\*$/), ValueNode, (MODIFYASSIGNOP|Op('=',true)).la]>>:shift,
820
+ #star should not be used in an lhs if an rhs or param list context is available to eat it.
821
+ #(including param lists for keywords such as return,break,next,rescue,yield,when)
822
+
823
+ # -[Op(/^(?:unary|lhs)\*$/), (GoalPostToken|Op(/,$/,true)|KW(/^(in|[=)|;])$/)).la]>>DanglingStarNode, #dangling *
824
+ # -[Op(/,$/,true), (GoalPostToken|KW(/^(in|[=)|;])$/)).la]>> #dangling ,
825
+ # stack_monkey("DanglingComma",1,DanglingCommaNode){|stack|
826
+ # dcomma=DanglingCommaNode.new
827
+ # dcomma.offset=stack.last.offset
828
+ # stack.push dcomma, stack.pop
829
+ # },
830
+ #hmmm.... | in char classes above looks useless (predates GoalPostToken)
831
+
832
+ -[Expr, Op|KW_Op, Expr, lower_op]>>RawOpNode, #most operators
833
+
834
+ #assignment
835
+ # -[Lvalue, MODIFYASSIGNOP, Expr, lower_op]>>AssignNode,
836
+ # -[Lvalue, Op('=',true), AssignmentRhsNode, lower_op]>>AssignNode,
837
+ # -[AssignmentRhsListStartToken, Expr, AssignmentRhsListEndToken]>>AssignmentRhsNode,
838
+
839
+ # a = b rescue c acts like a ternary,,,
840
+ #provided that both a and b are not multiple and b
841
+ #(if it is a parenless callsite) has just 1 param
842
+ # -[Lvalue&~MULTIASSIGN, Op('=',true), AssignmentRhsNode&-{:is_list=>true},
843
+ # Op('rescue3'), Expr, lower_op]>>AssignNode,
844
+ # -[Lvalue, Op('=',true), AssignmentRhsNode, Op('rescue3'), Expr, lower_op]>>AssignNode,
845
+
846
+ # -[Lvalue&~MULTIASSIGN, Op('=',true), AssignmentRhsNode&-{:is_list=>true},
847
+ # Op('rescue3',true).la]>>:shift,
848
+
849
+ # -[Lvalue&~MULTIASSIGN, Op('=',true), AssignmentRhsNode&-{:is_list=>true},
850
+ # RESCUE_OP.la] >>
851
+ # stack_monkey("rescue3",1,Op('rescue3',true)){|stack|
852
+ # resc=stack.last.dup
853
+ # resc.ident += '3'
854
+ # stack[-1]=resc
855
+ # },
856
+ #relative precedence of = and rescue are to be inverted if rescue
857
+ #is to the right and assignment is not multiple.
858
+
859
+ #if assignment rhs contains commas, don't reduce til they've been read
860
+ #(unless we're already on an rhs)
861
+ # -[(Op('=',true)|Expr).~.lb, Lvalue, Op('=',true), Expr, RHS_COMMA.la]>>:shift,
862
+ # -[RHS_COMMA.lb, Lvalue, Op('=',true), Expr, RHS_COMMA.la ]>>AssignNode,
863
+ # -[ValueNode, LHS_COMMA, ValueNode, Op('=',true).la]>>CommaOpNode,
864
+ #relative precedence of = and lhs/rhs , are to be inverted.
865
+
866
+ #mark parentheses and unary stars that come after lhs commas
867
+ # -[LHS_COMMA, (UnaryStarNode|ParenedNode)&~-{:after_comma =>true}, Op('=',true)]>>
868
+ # stack_monkey("after_comma",3,(UnaryStarNode|ParenedNode)&-{:after_comma =>true}){|stack|
869
+ # stack[-3].after_comma=true},
870
+ #mebbe this should be a lexer hack?
871
+
872
+ # -[#(OPERATORLIKE_LB&~Op('=',true)).lb,
873
+ # Expr, RESCUE_OP, Expr, lower_op]>>RescueOpNode,
874
+
875
+ #dot and double-colon
876
+ # -[DoubleColonOp, VarNode, lower_op]>>ConstantNode,#unary ::
877
+ # -[Expr, DotOp, CallNode, lower_op]>>DotCall, #binary .
878
+ # -[Expr, DoubleColonOp, CallNode, lower_op]>>DotCall, #binary ::
879
+ # -[Expr, DoubleColonOp, VarNode, lower_op]>>ConstantNode,#binary ::
880
+ #lower_op constaints on lookahead are unnecessary in above 4 (unless I give openparen a precedence)
881
+
882
+ # -[Expr, "?", Expr, ":", Expr, lower_op]>>TernaryNode,
883
+
884
+
885
+ # -[MethNameToken, '(', Expr.-, ')', BlockNode.-, KW('do').~.la]>>CallNode,
886
+ # -[FUNCLIKE_KEYWORD, '(', Expr.-, ')', BlockNode.-, KW('do').~.la]>>KWCallNode,
887
+
888
+ -[#(OPERATORLIKE_LB&
889
+ (MethNameToken|FUNCLIKE_KEYWORD()).~.lb,
890
+ '(', Expr, KW(')')&~(-{:callsite? =>true}|-{:not_real? =>true}), KW('do').~.la]>>ParenedNode,
891
+
892
+ # -[#(OPERATORLIKE_LB&
893
+ # (MethNameToken|FUNCLIKE_KEYWORD()).~.lb,
894
+ # '(', KW(')')&~(-{:callsite? =>true}|-{:not_real? =>true}), KW('do').~.la]>>VarLikeNode, #(), alias for nil
895
+ #constraint on do in above 2 rules is probably overkill
896
+
897
+ # -[ValueNode, Op(/,$/,true), ValueNode, lower_op]>>CommaOpNode,
898
+
899
+ -[(OPERATORLIKE_LB&dont_postpone_semi).lb,
900
+ Expr, ';', Expr, lower_op]>>SequenceNode,
901
+
902
+
903
+ # -[#(OPERATORLIKE_LB&~KW(')')).lb,
904
+ # '{', (CommaOpNode|ArrowOpNode).-, '}']>>HashLiteralNode, #-40
905
+
906
+ # -[KW(')').lb, 'do', BlockFormalsNode.-, Expr.-, 'end']>>BlockNode,
907
+ #this does {} as well... converted to do...end
908
+ #rubylexer handles the 'low precedence' of do...end
909
+
910
+ # -[GoalPostToken, Expr.-, GoalPostToken]>>BlockFormalsNode,
911
+ #rubylexer disambiguated operator vs keyword '|'
912
+
913
+ # -[/^(while|until)$/, Expr, /^([:;]|do)$/, Expr.-, 'end']>>LoopNode,
914
+
915
+ # -[/^(if|unless)$/, Expr, /^(;|then|:)$/,
916
+ # Expr.-, ElsifNode.*, ElseNode.-, 'end'
917
+ # ]>>IfNode,
918
+
919
+ # -['else', Expr.-, KW(/^(ensure|end)$/).la]>>ElseNode,
920
+
921
+ # -['elsif', Expr, /^(;|then|:)$/, Expr.-,
922
+ # KW(/^(end|else|elsif)$/).la
923
+ # ]>>ElsifNode,
924
+
925
+ # -['module', ConstantNode|VarNode, KW(/^(;|::)$/).~.la]>>
926
+ # stack_monkey(1,KW(';')){|stack| #insert ; at end of module header if none was present
927
+ # stack.push KeywordToken.new(';'), stack.pop
928
+ # },
929
+ # -['module', ConstantNode|VarNode, ';', RESCUE_BODY, 'end']>>ModuleNode,
930
+ # -['class', Expr, ';', RESCUE_BODY, 'end']>>ClassNode,
931
+ # -['class', Expr, Op('<'), Expr, KW(';').~.la]>>:shift,
932
+ # -['class', Op('<<'), Expr, ';', RESCUE_BODY, 'end']>>MetaClassNode, #-30
933
+
934
+ # -['alias', BareMethod|VarNode, BareMethod|VarNode]>>AliasNode,
935
+ # -['undef', BareMethod]>>UndefNode,
936
+ # -[UndefNode, Op(',',true), BareMethod]>>UndefNode,
937
+
938
+ # -['def', CallSiteNode, Op('=').-, KW(';'), RESCUE_BODY,
939
+ # Expr.-, RescueNode.*, ElseNode.-, EnsureNode.-,
940
+ # 'end'
941
+ # ]>>MethodNode,
942
+
943
+ # -['begin', RESCUE_BODY,
944
+ # Expr.-, RescueNode.*, ElseNode.-, EnsureNode.-,
945
+ # 'end'
946
+ # ]>>BeginNode,
947
+
948
+ # -[Op('=',true), BEGINAFTEREQUALS, RESCUE_OP.la]>>
949
+ # stack_monkey("begin after equals",2,BEGINAFTEREQUALS_MARKED){ |stack| stack[-2].after_equals=true },
950
+ #this is bs. all for an extra :begin in the parsetree
951
+
952
+ # -[(KW(/^(;|begin)$/)|RescueNode).lb, #ParenedNode|RescueOpNode|BeginNode used to be here too
953
+ # RESCUE_KW, KW('=>').-, Expr.-, /^([:;]|then)$/,
954
+ # ]>>RescueHeaderNode,
955
+ # -[ RescueHeaderNode, Expr.-, KW(';').-, (KW(/^(else|ensure|end)$/)|RESCUE_KW).la
956
+ # ]>>RescueNode,
957
+
958
+ # -['ensure', Expr.-, KW('end').la]>>EnsureNode,
959
+
960
+ # -['[', Expr.-, ']']>>ArrayLiteralNode, #-20
961
+
962
+ # -[Expr, '[', Expr.-, ']']>>BracketsGetNode,
963
+
964
+ # -[HereDocNode, StringToken+1, StringToken.~.la]>>StringCatNode,
965
+ # -[(OPERATORLIKE_LB&~(StringToken|HereDocNode)).lb, StringToken+2, StringToken.~.la]>>StringCatNode,
966
+ # -[(OPERATORLIKE_LB&~(StringToken|HereDocNode)).lb, StringToken, StringToken.~.la]>>StringNode,
967
+ #includes regexp, wordlist, backquotes
968
+
969
+ # -['case', Expr.-, KW(';').-, WhenNode.*, ElseNode.-, 'end']>>CaseNode,
970
+
971
+ # -['when', Expr, /^([:;]|then)$/, Expr.-,
972
+ # KW(/^(when|else|end)$/).la
973
+ # ]>>WhenNode,
974
+
975
+ # -['for', Expr, 'in', Expr, /^([:;]|do)$/, Expr.-, 'end']>>ForNode,
976
+
977
+ #semicolon cleanup....
978
+ # -[(OPERATORLIKE_LB&dont_postpone_semi).lb,Expr, ';', IGN_SEMI_BEFORE.la] \
979
+ # >>delete_monkey(2,"semi_cleanup_before_ISB"),
980
+ # -[Expr, ';', KW('then').la] >>delete_monkey(2,"semi_cleanup_before_then"),
981
+ # -[dont_postpone_semi.lb, Expr, ';', RescueNode] >>delete_monkey(3,"semi_cleanup_before_rescue"), #-10
982
+ # -[IGN_SEMI_AFTER.lb, ';'] >>delete_monkey(2,"semi_cleanup_after_oplike"),
983
+ # -[(StartToken|RescueHeaderNode).lb, ';' ] >>delete_monkey(2,"semi_cleanup_after_rescue"),
984
+ #this rule is somewhat more forgiving than matz' parser...
985
+ #not all semicolons after :, (, and { keywords should
986
+ #be ignored. some should cause syntax errors.
987
+
988
+
989
+ #comma cleanup....
990
+ # -[Op(/,$/,true), KW(/^([}\]])$/).la] >>delete_monkey(2, "comma_cleanup"),
991
+ #likewise, this is somewhat too forgiving.
992
+ #some commas before } or ] should cause syntax errors
993
+
994
+ #turn lvalues into rvalues if not followed by an assignop
995
+ # -[-{:lvalue =>true}, (Op('=',true)|MODIFYASSIGNOP|LHS_COMMA).~.la]>>
996
+ # stack_monkey("lval2rval",2,-{:lvalue =>nil}){|stack|
997
+ # stack[-2].lvalue=nil
998
+ # },
999
+
1000
+ #expand the = into a separate token in calls to settors (after . or ::).
1001
+ #but not in method headers
1002
+ # -[(OPERATORLIKE_LB&~KW('def')).lb, Expr, DotOp|DoubleColonOp,
1003
+ # (MethNameToken&-{:has_equals=>true}).la]>>
1004
+ # stack_monkey("expand_equals",1,CallNode){|stack|
1005
+ # methname=stack.pop
1006
+ # methname.ident.chomp!('=')
1007
+ # offset=methname.offset+methname.ident.size
1008
+ # stack.push(
1009
+ # CallNode.new(methname,nil,nil,nil,nil),
1010
+ # OperatorToken.new('=',offset)
1011
+ # )
1012
+ # },
1013
+
1014
+ -[NumberToken|SymbolToken]>>LiteralNode,
1015
+
1016
+ #lexer does the wrong thing with -22**44.5, making the - part
1017
+ #of the first number token. it's actually lower precedence than
1018
+ #**... this rule fixes that problem.
1019
+ #in theory, unary - is lower precedence than ., ::, and [] as well, but
1020
+ #that appears not to apply to unary - in numeric tokens
1021
+ # -[NumberToken&-{:negative=>true}, Op('**').la]>>
1022
+ # stack_monkey("fix_neg_exp",2,Op("-@",true)){|stack|
1023
+ # #neg_op.unary=true
1024
+ # num=stack[-2]
1025
+ # op=OperatorToken.new("-@",num.offset)
1026
+ # op.startline=num.startline
1027
+ # stack[-2,0]=op
1028
+ # num.ident.sub!(/\A-/,'')
1029
+ # num.offset+=1
1030
+ # },
1031
+
1032
+ #treat these keywords like (rvalue) variables.
1033
+ # -[@varlikes]>>VarLikeNode,
1034
+
1035
+ #here docs
1036
+ # -[HerePlaceholderToken]>>HereDocNode,
1037
+ # -[HereBodyToken.la]>>delete_monkey(1,"delete_here_body"), ##this is rediculous. this should be a lexer hack?
1038
+
1039
+ # -[VarNameToken]>>VarNode,
1040
+
1041
+
1042
+ ]
1043
+
1044
+ if @rubyversion >= 1.9
1045
+ result.concat [
1046
+ # -['->', ParenedNode.-, 'do', Expr.-, 'end']>>ProcLiteralNode,
1047
+ # -['->', VarLikeNode["nil",{:@value=>nil}].reg, 'do', Expr.-, 'end']>>ProcLiteralNode,
1048
+ -[(DotOp|DoubleColonOp).lb, '(',Expr.-,')', BlockNode.-, KW('do').~.la]>>CallNode,
1049
+ ]
1050
+ end
1051
+
1052
+ return result
1053
+ end
1054
+
1055
+ if defined? END_ATTACK
1056
+ module Reducer; end
1057
+ include Reducer
1058
+ end
1059
+
1060
+ def signature
1061
+ RedParse.signature(class<<self; ancestors end)
1062
+ end
1063
+ def RedParse.signature(ancs=ancestors)
1064
+ [ancs.map{|m| m.name},
1065
+ Digest::SHA256.file(__FILE__),
1066
+ Digest::SHA256.file(__FILE__.sub(/\.rb\z/,"/node.rb")),
1067
+ ]
1068
+ end
1069
+
1070
+ def initialize(input,name="(eval)",line=1,lvars=[],options={})
1071
+ @rubyversion=options[:rubyversion]||1.8
1072
+
1073
+ encoding=options[:encoding]||:ascii
1074
+ encoding=:binary if @rubyversion<=1.8
1075
+ cache=Cache.new(
1076
+ File===input,name,
1077
+ :line,line,:encoding,encoding,:locals,lvars.sort.join(","),
1078
+ @rubyversion, :/, *signature
1079
+ )
1080
+ cache_mode=options[:cache_mode]||:read_write
1081
+ raise ArgumentError unless /^(?:read_(?:write|only)|write_only|none)$/===cache_mode.to_s
1082
+ read_cache= /read/===cache_mode.to_s
1083
+ input.binmode if input.respond_to? :binmode
1084
+ if read_cache and cache and result=cache.get(input)
1085
+ @cached_result=result
1086
+ @write_cache=nil
1087
+ return
1088
+ end
1089
+ if /write/===cache_mode.to_s
1090
+ @write_cache,@input= cache,input
1091
+ else
1092
+ @write_cache=nil
1093
+ end
1094
+
1095
+ if Array===input
1096
+ def input.get1token; shift end
1097
+ @lexer=input
1098
+ if @rubyversion>=1.9
1099
+ @funclikes=RubyLexer::RubyLexer1_9::FUNCLIKE_KEYWORDS
1100
+ @varlikes=RubyLexer::RubyLexer1_9::VARLIKE_KEYWORDS
1101
+ else
1102
+ @funclikes=RubyLexer::FUNCLIKE_KEYWORDS
1103
+ @varlikes=RubyLexer::VARLIKE_KEYWORDS
1104
+ end
1105
+ else
1106
+ @lexer=RubyLexer.new(name,input,line,0,:rubyversion=>@rubyversion,:encoding=>encoding)
1107
+ @funclikes=@lexer::FUNCLIKE_KEYWORDS()
1108
+ @varlikes=@lexer::VARLIKE_KEYWORDS()
1109
+ lvars.each{|lvar| @lexer.localvars[lvar]=true }
1110
+ encoding=@lexer.encoding_name_normalize(encoding.to_s).to_sym
1111
+ warn "#{encoding} encoding won't really work right now" if RubyLexer::NONWORKING_ENCODINGS.include? encoding
1112
+ end
1113
+ @funclikes=/#@funclikes|^->$/ if @rubyversion>=1.9
1114
+ @filename=name
1115
+ @min_sizes={}
1116
+ @compiled_rules={}
1117
+ @moretokens=[]
1118
+ @unary_or_binary_op=/^[-+]$/
1119
+ # @rules=self.expaneded_RULES
1120
+ @precedence=self.PRECEDENCE
1121
+ @RIGHT_ASSOCIATIVE=self.RIGHT_ASSOCIATIVE
1122
+ if defined? END_ATTACK
1123
+ compile
1124
+ end
1125
+ @saw_item_that=nil
1126
+ @print_filter=proc{true}
1127
+ end
1128
+
1129
+ attr_accessor :lexer, :print_filter
1130
+ attr :rubyversion
1131
+
1132
+ def get_token(recursing=false)
1133
+ unless @moretokens.empty?
1134
+ @last_token=@moretokens.shift
1135
+ p @last_token if ENV['PRINT_TOKENS'] && @print_filter[@last_token] and not recursing
1136
+ return @last_token
1137
+ end
1138
+
1139
+ rpt=ENV['RAW_PRINT_TOKENS']
1140
+ begin
1141
+ result=@lexer.get1token or break
1142
+ p result if rpt and @print_filter[result]
1143
+
1144
+ #set token's line
1145
+ result.startline= @endline||=1
1146
+ #result.endline||=@endline if result.respond_to? :endline=
1147
+
1148
+ if result.respond_to?(:as) and as=result.as
1149
+ #result=make_kw(as,result.offset)
1150
+ #result.originally=result.ident
1151
+ if OperatorToken===result #or KeywordToken===result
1152
+ result=result.dup
1153
+ result.ident=as
1154
+ else
1155
+ result2=make_kw(as,result.offset)
1156
+ result2.startline=result.startline
1157
+ result2.endline=result.endline
1158
+ result=result2
1159
+ end
1160
+ result.not_real! if result.respond_to? :not_real!
1161
+ else
1162
+
1163
+ case result
1164
+ when FileAndLineToken #so __FILE__ and __LINE__ can know what their values are
1165
+ @file=result.file
1166
+ @endline=result.line
1167
+ redo
1168
+
1169
+ when OperatorToken
1170
+ if @unary_or_binary_op===result.ident and result.unary || result.tag==:unary
1171
+ result=result.dup
1172
+ result.ident+="@"
1173
+ end
1174
+
1175
+ #more symbol table maintenance....
1176
+ when KeywordToken
1177
+ case name=result.ident
1178
+
1179
+ when /^(#{BINOP_KEYWORDS.join '|'})$/o #should be like this in rubylexer
1180
+ unless result.has_end?
1181
+ orig=result
1182
+ result=OperatorToken.new(name,result.offset)
1183
+ result.endline=orig.endline
1184
+ end
1185
+ when "|";
1186
+ orig=result
1187
+ result=GoalPostToken.new(result.offset) #is this needed still? (yes)
1188
+ result.endline=orig.endline
1189
+ when "__FILE__"; #I wish rubylexer would handle this
1190
+ #class<<result; attr_accessor :value; end
1191
+ assert result.value==@file.dup
1192
+ when "__LINE__"; #I wish rubylexer would handle this
1193
+ #class<<result; attr_accessor :value; end
1194
+ assert result.value==@endline
1195
+ else
1196
+ result=make_kw name,result.offset if defined? SPECIALIZED_KEYWORDS
1197
+ #warning, this may discard information stored in instance vars of result
1198
+ end
1199
+
1200
+ when StringToken,HerePlaceholderToken
1201
+ @endline=result.endline
1202
+
1203
+ when EoiToken; break
1204
+ when HereBodyToken;
1205
+ @endline=result.endline
1206
+ break
1207
+ when AssignmentRhsListStartToken; break
1208
+ when AssignmentRhsListEndToken; break
1209
+ when IgnoreToken; redo
1210
+ end
1211
+ end
1212
+ end while false
1213
+ p result if ENV['PRINT_TOKENS'] && @print_filter[@last_token] unless recursing
1214
+
1215
+ #ugly weak assertion
1216
+ assert result.endline==@endline unless result.ident==';' && result.endline-1==@endline or EoiToken===result
1217
+
1218
+ return @last_token=result
1219
+ end
1220
+
1221
+ def unget_tokens(*tokens)
1222
+ @moretokens=tokens.concat @moretokens
1223
+ end
1224
+
1225
+ def unget_token(token)
1226
+ @moretokens.unshift token
1227
+ end
1228
+
1229
+ =begin
1230
+ self.LOOKAHEAD_CLASSES.each_with_index{|classes,i|
1231
+ case classes
1232
+ when Class: huh
1233
+ when Array: classes.flatten.each{huh}
1234
+ else
1235
+ end
1236
+ }
1237
+ =end
1238
+
1239
+ # def fixup_multiple_assignments!; end
1240
+ end
1241
+
1242
+
1243
+ if __FILE__==$0
1244
+ #this code has moved to bin/redparse; really, all this should just go away
1245
+ require 'problemfiles'
1246
+ class NeverExecThis<RuntimeError; end
1247
+
1248
+ def arraydiff(a,b)
1249
+ a==b and return [a,false]
1250
+ (Array===a or a=[a])
1251
+ result= a.dup
1252
+ diff=false
1253
+ size= a.size >= b.size ? a.size : b.size
1254
+ size.times{|i|
1255
+ ai=a[i]
1256
+ bi=b[i]
1257
+ if Array===ai and Array===bi
1258
+ result_i,diff_i= arraydiff(ai,bi)
1259
+ diff||=diff_i
1260
+ result[i]=result_i
1261
+ elsif ai!=bi
1262
+ next if Regexp===ai and ai.to_s==bi.to_s and
1263
+ ai.options==bi.options
1264
+ diff=true
1265
+ result[i]={ai=>bi}
1266
+ elsif ai.nil?
1267
+ result[i]={'size mismatch'=>"#{a.size} for #{b.size}"} if a.size!=b.size
1268
+ diff=true
1269
+ end
1270
+ if i.nonzero? and Hash===result[i] and Hash===result[i-1]
1271
+ old=result[i-1]
1272
+ oldkeys=old.keys
1273
+ oldvals=old.values
1274
+ if Reg::Subseq===oldkeys.first
1275
+ oldkeys=oldkeys.children
1276
+ oldval=oldvals.children
1277
+ end
1278
+ result[i-1..i]=[ {-[*oldkeys+result[i].keys]=>-[*oldvals+result[i].values]} ]
1279
+ end
1280
+ }
1281
+ return result,diff
1282
+ end
1283
+
1284
+ output=:pp
1285
+ quiet=true
1286
+ while /^-/===ARGV.first
1287
+ case opt=ARGV.shift
1288
+ when "--"; break
1289
+ when "--pp"; output=:pp
1290
+ when "--lisp"; output=:lisp
1291
+ when "--parsetree"; output=:parsetree
1292
+ when "--vsparsetree"; output=:vsparsetree
1293
+ when "--vsparsetree2"; output=:vsparsetree2
1294
+ when "--update-problemfiles"; problemfiles=ProblemFiles.new
1295
+ when "-q"; quiet=true
1296
+ when "-v"; quiet=false
1297
+ when "-e"; inputs=[ARGV.join(" ")]; names=["-e"]; break
1298
+ else fail "unknown option: #{opt}"
1299
+
1300
+ end
1301
+ end
1302
+
1303
+ unless inputs
1304
+ if ARGV.empty?
1305
+ inputs=[STDIN.read]
1306
+ names=["-"]
1307
+ elsif ARGV.size==1 and (Dir.entries(ARGV.first) rescue false)
1308
+ names=Dir[ARGV.first+"/**/*.rb"]
1309
+ else
1310
+ names=ARGV.dup
1311
+ end
1312
+ inputs||=names.map{|name| File.open(name).read rescue nil}
1313
+ end
1314
+
1315
+ result=0
1316
+
1317
+ safety="BEGIN{raise NeverExecThis};BEGIN{throw :never_exec_this,1};\n"
1318
+ nullsafety="\n"
1319
+ safe_inputs=inputs.map{|input| safety+input}
1320
+
1321
+ inputs.each_index{|i|
1322
+ begin
1323
+
1324
+ input=inputs[i] or next
1325
+ name=names[i]
1326
+
1327
+ input=nullsafety+input
1328
+ #print name+"... "; STDOUT.flush
1329
+
1330
+ begin
1331
+ tree=nil
1332
+ if catch(:never_exec_this){
1333
+ tree=RedParse.new(input,name).parse; nil
1334
+ } #raise NeverExecThis
1335
+ # rescue RedParse::ParseError=>e
1336
+ # require 'pp'
1337
+ # pp e.stack[-[15,e.stack.size].min..-1]
1338
+ # raise
1339
+ # rescue NeverExecThis
1340
+ puts "RedParse attempted to execute parse data in #{name}"
1341
+ next
1342
+ end
1343
+ rescue Interrupt; exit 2
1344
+ rescue Exception=>e
1345
+ # puts e.backtrace.join("\n")
1346
+ e.message << " during parse of #{name}"
1347
+ # err=e.class.new(e.message+" during parse of #{name}")
1348
+ # err.set_backtrace e.backtrace
1349
+ problemfiles.push name if problemfiles
1350
+ raise e
1351
+ end
1352
+ tree or fail "parsetree was nil for #{name}"
1353
+
1354
+ case output
1355
+ when :pp
1356
+ require 'pp'
1357
+ pp tree
1358
+ when :lisp
1359
+ puts tree.to_lisp
1360
+ when :parsetree
1361
+ pp tree.to_parsetree
1362
+ when :vsparsetree,:vsparsetree2
1363
+ begin
1364
+ require 'rubygems'
1365
+ rescue Exception
1366
+ end
1367
+ require 'parse_tree'
1368
+ #require 'algorithm/diff'
1369
+ begin
1370
+ mine=tree.to_parsetree(:quirks)
1371
+ if IO===input
1372
+ input.rewind
1373
+ input=input.read
1374
+ end
1375
+ ryans=nil
1376
+ catch(:never_exec_this){
1377
+ ryans=ParseTree.new.parse_tree_for_string(safe_inputs[i],name); nil
1378
+ } and raise NeverExecThis
1379
+ delta,is_diff=arraydiff(mine,ryans)
1380
+ rescue NeverExecThis
1381
+ puts "ParseTree attempted to execute parse data in #{name}"
1382
+ next
1383
+ rescue Interrupt; exit 2
1384
+ rescue Exception=>e
1385
+ #raise( RuntimeError.new( "#{e} during to_parsetree of #{name}" ) )
1386
+ puts "error during to_parsetree of #{name}"
1387
+ problemfiles.push name if problemfiles
1388
+ raise
1389
+ end
1390
+ if output==:vsparsetree2
1391
+ if !quiet or is_diff
1392
+ puts "mine:"
1393
+ pp mine
1394
+ puts "ryans:" if is_diff
1395
+ pp ryans if is_diff
1396
+ end
1397
+ elsif !quiet or is_diff
1398
+ puts 'differences in '+name if is_diff
1399
+ pp delta
1400
+ end
1401
+ if is_diff
1402
+ result=1
1403
+ problemfiles.push name if problemfiles
1404
+ else
1405
+ puts "no differences in "+name
1406
+ problemfiles.delete name if problemfiles
1407
+ end
1408
+ end
1409
+
1410
+ rescue NeverExecThis
1411
+ puts "mysterious attempt to execute parse data in #{name}"
1412
+ next
1413
+ rescue Interrupt,SystemExit; exit 2
1414
+ rescue Exception=>e
1415
+ puts "#{e}:#{e.class}"
1416
+ puts e.backtrace.join("\n")
1417
+ #problemfiles.push name if problemfiles
1418
+ #raise
1419
+ ensure
1420
+ STDOUT.flush
1421
+ end
1422
+ }
1423
+ exit result
1424
+ end
1425
+
1426
+ =begin old todo:
1427
+ v merge DotCallNode and CallSiteNode and CallWithBlockNode
1428
+ v remove actual Tokens from parse tree...
1429
+ v split ParenedNode into ParenedNode + Rescue/EnsureNode
1430
+ x 'incomplete' subtrees such as ElseNode, ElsifNode, RescueNode
1431
+ x -should not appear in final output
1432
+ v split keywordopnode into loop and if varieties?
1433
+ =end
1434
+
1435
+ =begin old optimization opportunities:, ha!
1436
+ top of stack slot contains mostly keywords, specific node classes, and Expr
1437
+ lookahead slot contains mostly lower_op and keywords, with a few classes and inverted keywords
1438
+ -(lower_op is hard to optimize)
1439
+ if top of stack matcher is Expr, then the next matcher down is mostly keywords, with some operators
1440
+ class membership can be optimized to test of integer within a range
1441
+ keywords could be stored as symbols instead of strings
1442
+ a few rules may need exploding (eg, ensure) to spoon feed the optimizer
1443
+ make all Nodes descendants of Array
1444
+ =end
1445
+
1446
+ #todo:
1447
+ #each node should have a corresponding range of tokens
1448
+ #-in an (optional) array of all tokens printed by the tokenizer.
1449
+ #v test stack_monkey mods
1450
+ #v break ParenedNode into 2 (3?) classes
1451
+ #x invent BEGINNode/ENDNode? (what other keywords?)
1452
+ #v at least make BEGIN/END be KWCallNode
1453
+ #v replace VarNameToken with VarNode in parser
1454
+ #x convert raw rules to lists of vertex identities?
1455
+ #v DottedRule class
1456
+ #v ParserState class (set of DottedRules)
1457
+ #v MultiReduce
1458
+ #v MultiShift
1459
+ #v ParserState#evolve(identity)
1460
+ #v DottedRule#evolve(identity)
1461
+ #v RedParse#enumerate_states
1462
+ #v RedParse#enumerate_exemplars
1463
+ #v Node/Token.enumerate_exemplars
1464
+ #v Node/Token.identity_param
1465
+ #v rename #lvalue? => #lvalue
1466
+ #x likewise get rid of other oddly named identity params
1467
+ #v BareMethod,WITHCOMMAS,BEGINAFTEREQUALS should have predicate methods defined for them
1468
+ #v do something about BEGINAFTEREQUALS... lots predicates, ugly to identify
1469
+ #v document identity parameters in nodes and tokens
1470
+ #operator and keyword tokens have some identity_param variations remaining...maybe?
1471
+ #xx all identity readers have to have writers as well (even if fake)
1472
+ #v sort out vertex identities... call identity_param in apt classes
1473
+ #convert identities<=>small ints
1474
+ #convert ParserStates<=>small ints
1475
+ #> lower_op/proc lookahead requires special action type with shift and reduce branches
1476
+ #x stack monkeys dictate some nodes appear in s/r table... which ones?
1477
+ #x some stack monkeys pushback nodes, action table must take take those as input
1478
+ #v retype GoalPostNode => GoalPostToken
1479
+ #v then, pushback* should go away
1480
+ #v build shift/reduce table
1481
+ #v build goto table
1482
+ #split tables into shift/reduce and goto....?
1483
+ #v integrate with c code generator
1484
+ #finish c code generator
1485
+ #code generator needs a way to deal with :
1486
+ #backtracking (to more than 1 node/token???)
1487
+ #actions (stack monkeys/lower_op)
1488
+ #every reduce requires b/ting thru the lookahead
1489
+ #garbage collection
1490
+ #sharing ruby objects between ruby code and generated c code
1491
+ #optimizer?
1492
+ #ruby code generator?
1493
+ #v what to do with :shift ?
1494
+ #what to do with :accept ?
1495
+ #what to do with :error ?
1496
+ #Node.create (used in generated code)
1497
+ #Node.create <= takes input directly from semantic stack
1498
+ #build Node.create param list generator
1499
+ #v names for rules, dotted rules, parser states, identities
1500
+ #x StartNode may be a problem... used by a stack monkey,
1501
+ #to remove extra ;s from the very beginning of input.
1502
+ #use a lexer hack instead?
1503
+ #v convert StartNode to StartToken?
1504
+ #convert names to numbers and numbers to names
1505
+ #for states, rules, vertex identities
1506
+ #in ruby and c (??)
1507
+ #x rule for HereBodyToken should be a lexer hack?
1508
+ #v stack monkeys should have names
1509
+ #how to handle a stack monkey whose 2nd parameter is not a single identity?
1510
+ #even reduces may not have enough info since 1 node class may have multiple identities
1511
+ #v RedParse constants should be named in inspect
1512
+ #v toplevel rule?
1513
+ #v semantic stack in generated c code should be a ruby array
1514
+ #x state stack should keep size of semantic stack at the time states are pushed,
1515
+ #so that i can restore semantic stack to former state when b-ting/reducing
1516
+ #urk, how do I know how many levels of state stack to pop when reducing?
1517
+ #in looping error rules, just scan back in semantic stack for rule start
1518
+ #in regular looping rules, transition to loop state is saved on a special stack
1519
+ #so that at reduce time, we can b/t to that point for a start
1520
+ #if rule contains only scalars, b/t is easy
1521
+ #else rule contains scalars and optionals:
1522
+ #scan for rule start vertex starting at highest node
1523
+ #on semantic stack that can contain it and working downward.
1524
+ #also, statically verify that relevent rules contain no collisions among first (how many?) matchers
1525
+
1526
+ #is lookahead in code generator even useful? my tables have built-in lookahead....
1527
+ #need hack to declare nonerror looping matchers as irrevokable (for speed, when reducing)
1528
+ #v assignmentRhsNode needs an identity_param for with_commas
1529
+ #v -** fixup and setter breakout rules need dedicated identity_params too
1530
+ # = rescue ternary is broken again now...
1531
+ #v instead of shift states and is_shift_state? to find them,
1532
+ #v i should have shift transitions. (transitions that imply a shift... in response to a token input.)
1533
+ #v all states will have 2 entry points, for shift and nonshift transitions.
1534
+ #split big table into goto(node) and sr(token) tables
1535
+ #in each state, most common sr action should be made default
1536
+ #unused entries in goto table can be ignored.
1537
+ #most common goto entries (if any) can be default.
1538
+ #is the change_index arg in stack_monkey calls really correct everywhere? what are
1539
+ #the exact semantics of that argument? what about stack_monkeys that change the stack size?
1540
+ #should there be another arg to keep track of that?
1541
+ #maybe rewrite stack_monkeys so they're a little clearer and easier to analyze (by hand)
1542
+ #MultiShift/MultiReduce are not supported actions in generate.rb
1543
+ #:accept/:error are not supported actions in generate.rb