redparse 0.8.4 → 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (45) hide show
  1. checksums.yaml +4 -0
  2. data/COPYING.LGPL +503 -158
  3. data/History.txt +192 -0
  4. data/Makefile +9 -0
  5. data/README.txt +72 -39
  6. data/bin/redparse +108 -14
  7. data/lib/miniredparse.rb +1543 -0
  8. data/lib/redparse.rb +971 -105
  9. data/lib/redparse/ReduceWithsFor_RedParse_1_8.rb +17412 -0
  10. data/lib/redparse/ReduceWithsFor_RedParse_1_9.rb +17633 -0
  11. data/lib/redparse/babynodes.rb +17 -0
  12. data/lib/redparse/babyparser.rb +17 -0
  13. data/lib/redparse/cache.rb +290 -6
  14. data/lib/redparse/compile.rb +6 -97
  15. data/lib/redparse/decisiontree.rb +1 -1
  16. data/lib/redparse/float_accurate_to_s.rb +30 -6
  17. data/lib/redparse/generate.rb +18 -0
  18. data/lib/redparse/node.rb +415 -124
  19. data/lib/redparse/parse_tree_server.rb +20 -2
  20. data/lib/redparse/problemfiles.rb +1 -1
  21. data/lib/redparse/pthelper.rb +17 -31
  22. data/lib/redparse/reg_more_sugar.rb +1 -1
  23. data/lib/redparse/replacing/parse_tree.rb +30 -0
  24. data/lib/redparse/replacing/ripper.rb +20 -0
  25. data/lib/redparse/replacing/ruby_parser.rb +28 -0
  26. data/lib/redparse/ripper.rb +393 -0
  27. data/lib/redparse/ripper_sexp.rb +153 -0
  28. data/lib/redparse/stackableclasses.rb +113 -0
  29. data/lib/redparse/version.rb +18 -1
  30. data/redparse.gemspec +29 -9
  31. data/rplt.txt +31 -0
  32. data/test/data/hd_with_blank_string.rb +3 -0
  33. data/test/data/pt_known_output.rb +13273 -0
  34. data/test/data/wp.pp +0 -0
  35. data/test/generate_parse_tree_server_rc.rb +17 -0
  36. data/test/rp-locatetest.rb +2 -2
  37. data/test/test_1.9.rb +338 -35
  38. data/test/test_all.rb +22 -3
  39. data/test/test_part.rb +32 -0
  40. data/test/test_redparse.rb +396 -74
  41. data/test/test_xform_tree.rb +18 -0
  42. data/test/unparse_1.9_exceptions.txt +85 -0
  43. data/test/unparse_1.9_exceptions.txt.old +81 -0
  44. metadata +71 -46
  45. data/Rakefile +0 -35
@@ -0,0 +1,1543 @@
1
+ =begin
2
+ redparse - a ruby parser written in ruby
3
+ Copyright (C) 2008,2009, 2012, 2016 Caleb Clausen
4
+
5
+ This program is free software: you can redistribute it and/or modify
6
+ it under the terms of the GNU Lesser General Public License as published by
7
+ the Free Software Foundation, either version 3 of the License, or
8
+ (at your option) any later version.
9
+
10
+ This program is distributed in the hope that it will be useful,
11
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
12
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13
+ GNU Lesser General Public License for more details.
14
+
15
+ You should have received a copy of the GNU Lesser General Public License
16
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
17
+ =end
18
+
19
+
20
+
21
+ require 'forwardable'
22
+
23
+ require 'digest/sha2'
24
+
25
+ begin
26
+ require 'rubygems'
27
+ rescue LoadError=>e
28
+ #hope we don't need it
29
+ raise unless /rubygems/===e.message
30
+ end
31
+ require 'rubylexer'
32
+ require 'reg'
33
+ require 'reglookab'
34
+
35
+ require "redparse/node"
36
+ #require "redparse/decisiontree"
37
+ require "redparse/reg_more_sugar"
38
+ #require "redparse/generate"
39
+ require "redparse/cache"
40
+ #require "redparse/compile"
41
+
42
+ class RedParse
43
+
44
+
45
+
46
+ alias :dump :inspect # preserve old inspect functionality
47
+
48
+ # irb friendly #inspect/#to_s
49
+ def to_s
50
+ mods=class<<self;self end.ancestors-self.class.ancestors
51
+ mods=mods.map{|mod| mod.name }.join('+')
52
+ mods="+"<<mods unless mods.empty?
53
+ "#<#{self.class.name}#{mods}: [#{@input.inspect}]>"
54
+ end
55
+
56
+ alias :inspect :to_s
57
+
58
+ ####### generic stuff for parsing any(?) language
59
+ # include Nodes
60
+ class StackMonkey
61
+ def initialize(name,first_changed_index,and_expect_node,options={},&monkey_code)
62
+ first_changed_index=-first_changed_index if first_changed_index>0
63
+ @name,@first_changed_index,@and_expect_node,@monkey_code=
64
+ name,first_changed_index,and_expect_node,monkey_code
65
+ end
66
+
67
+ attr_reader :name, :first_changed_index, :and_expect_node, :monkey_code
68
+ alias hint and_expect_node
69
+ attr_accessor :exemplars
70
+
71
+ def [](stack)
72
+ result=@monkey_code[stack]
73
+ return result
74
+ end
75
+
76
+ def _dump depth
77
+ @name
78
+ end
79
+
80
+ def self._load str
81
+ Thread.current[:$RedParse_parser].undumpables[@name]
82
+ end
83
+
84
+ def action2c
85
+ #"return the whole thing on first call, just a goto stmt after that"
86
+ return " goto #@goto_label;\n" if defined? @goto_label
87
+
88
+ =begin
89
+ <<-E
90
+ #{@goto_label=@name.gsub(/[^a-z0-9_]/,'_')}:
91
+ monkey=rb_hash_get(undumpables,rb_cstr2str("#@name"));
92
+ rb_funcall(monkey,rb_intern("[]"),huh_stack);
93
+
94
+ /*recover from stackmonkey fiddling*/
95
+ for(i=0;i<#{-@first_changed_index};++i) {
96
+ rb_ary_unshift(lexer_moretokens,
97
+ rb_ary_pop(huh_semantic_stack));
98
+ rb_ary_pop(huh_syntax_stack);
99
+ }
100
+
101
+ goto #{Node===@and_expect_node ?
102
+ postreduceaction4this_state(@and_expect_node) :
103
+ shiftaction4this_state
104
+ };
105
+ E
106
+ =end
107
+ end
108
+ end
109
+ class DeleteMonkey<StackMonkey
110
+ def initialize(index,name)
111
+ index=-index if index>0
112
+ @index=index
113
+ super(name,index,nil){|stack| stack.delete_at( index )}
114
+ end
115
+ end
116
+ def stack_monkey(*args,&block) StackMonkey.new(*args,&block) end
117
+ def self.stack_monkey(*args,&block) StackMonkey.new(*args,&block) end
118
+ def delete_monkey(index,name) DeleteMonkey.new(index,name) end
119
+
120
+ def evaluate rule
121
+ #dissect the rule
122
+ if false
123
+ rule=rule.dup
124
+ lookahead_processor=(rule.pop if Proc===rule.last)
125
+ node_type=rule.pop
126
+ else
127
+ Reg::Transform===rule or fail
128
+ node_type= rule.right
129
+ rule=rule.left.subregs.dup
130
+ lookahead_processor=(rule.pop if Proc|::Reg::LookAhead===rule.last)
131
+ lookback=rule[0]=rule[0].subregs[0] if ::Reg::LookBack===rule[0]
132
+ end
133
+
134
+ #index of data at which to start matching
135
+ i=@stack.size-1 #-1 because last element of @stack is always lookahead
136
+
137
+ #I could call this a JIT compiler, but that's a bit grandiose....
138
+ #more of a JIT pre-processor
139
+ compiled_rule=@compiled_rules[rule]||=
140
+ rule.map{|pattern|
141
+ String|Regexp===pattern ? KW(pattern) : pattern
142
+ }
143
+
144
+ #what's the minimum @stack size this rule could match?
145
+ rule_min_size=@min_sizes[compiled_rule]||=
146
+ compiled_rule.inject(0){|sum,pattern|
147
+ sum + pattern.itemrange.begin
148
+ }
149
+ i>=rule_min_size or return false
150
+
151
+ matching=[]
152
+
153
+ #actually try to match rule elements against each @stack element in turn
154
+ compiled_rule.reverse_each{|matcher|
155
+ i.zero? and fail
156
+ target=matching
157
+ #is this matcher optional? looping?
158
+ loop= matcher.itemrange.last.to_f.infinite?
159
+ minimum=matcher.itemrange.first
160
+ optional=minimum.zero?
161
+ matching.unshift target=[] if loop
162
+ if loop or optional
163
+ matcher=matcher.subregs[0]
164
+ end
165
+
166
+ begin
167
+ if matcher===@stack[i-=1] #try match
168
+ target.unshift @stack[i]
169
+ else
170
+ #if match failed, the whole rule fails
171
+ #unless this match was optional, in which case, ignore it
172
+ #or was looping and met its minimum
173
+ #but bump the data position back up, since the latest datum
174
+ #didn't actually match anything.
175
+ return false unless optional or loop&&target.size>=minimum
176
+ i+=1
177
+ matching.unshift nil unless loop
178
+ break
179
+ end
180
+ end while loop
181
+ }
182
+
183
+ matchrange= i...-1 #what elems in @stack were matched?
184
+
185
+ #give lookahead matcher (if any) a chance to fail the match
186
+ case lookahead_processor
187
+ when ::Reg::LookAhead
188
+ return false unless lookahead_processor.subregs[0]===@stack.last
189
+ when Proc
190
+ return false unless lookahead_processor[self,@stack.last]
191
+ end
192
+
193
+ #if there was a lookback item, don't include it in the new node
194
+ if lookback
195
+ matchrange= i+1...-1 #what elems in @stack were matched?
196
+ matching.shift
197
+ end
198
+
199
+
200
+ #replace matching elements in @stack with node type found
201
+ case node_type
202
+ when Class
203
+ node=node_type.create(*matching)
204
+ node.startline||=@stack[matchrange.first].startline
205
+ node.endline=@endline
206
+ @stack[matchrange]=[node]
207
+ when Proc,StackMonkey; node_type[@stack]
208
+ when :shift; return 0
209
+ when :accept,:error; throw :ParserDone
210
+ else fail
211
+ end
212
+
213
+ return true #let caller know we found a match
214
+
215
+
216
+ rescue Exception=>e
217
+ #puts "error (#{e}) while executing rule: #{rule.inspect}"
218
+ #puts e.backtrace.join("\n")
219
+ raise
220
+ end
221
+
222
+ class ParseError<RuntimeError
223
+ def initialize(msg,stack)
224
+ super(msg)
225
+ @stack=stack
226
+ if false
227
+ ranges=(1..stack.size-2).map{|i|
228
+ node=stack[i]
229
+ if node.respond_to? :linerange
230
+ node.linerange
231
+ elsif node.respond_to? :endline
232
+ node.endline..node.endline
233
+ end
234
+ }
235
+ types=(1..stack.size-2).map{|i| stack[i].class }
236
+ msg += "couldn't interpret #{types.inspect} at line ranges: #{ranges.inspect}"
237
+ end
238
+ super(msg)
239
+ end
240
+ attr :stack
241
+ end
242
+
243
+ def [](*args)
244
+ @stack.[](*args)
245
+ end
246
+
247
+ def []=(*args)
248
+ @stack.[]=(*args)
249
+ end
250
+
251
+ #try all possible reductions
252
+ def reduce
253
+ shift=nil
254
+ @rules.reverse_each{|rule|
255
+ shift=evaluate(rule) and break
256
+ }
257
+ return shift
258
+ end
259
+
260
+ def parse
261
+
262
+ #hack, so StringToken can know what parser its called from
263
+ #so it can use it to parse inclusions
264
+ oldparser=Thread.current[:$RedParse_parser]
265
+ Thread.current[:$RedParse_parser]||=self
266
+
267
+ return @cached_result if defined? @cached_result
268
+
269
+ @rules||=expanded_RULES()
270
+ # @inputs||=enumerate_exemplars
271
+
272
+ @stack=[StartToken.new, get_token]
273
+ #last token on @stack is always implicitly the lookahead
274
+ catch(:ParserDone){ loop {
275
+ #try all possible reductions
276
+ next if reduce==true
277
+
278
+ #no rule can match current @stack, get another token
279
+ tok=get_token or break
280
+
281
+ #are we done yet?
282
+ #tok.nil? or EoiToken===tok && EoiToken===@stack.last and break
283
+
284
+ #shift our token onto the @stack
285
+ @stack.push tok
286
+ }}
287
+
288
+ @stack.size==2 and return result=NopNode.new #handle empty parse string
289
+
290
+ #unless the @stack is 3 tokens,
291
+ #with the last an Eoi, and first a StartToken
292
+ #there was a parse error
293
+ unless @stack.size==3
294
+ pp @stack[-[15,@stack.size].min..-1] if ENV['PRINT_STACK']
295
+ top=MisparsedNode.new("(toplevel)", @stack[1...-1],'')
296
+ raise ParseError.new(top.msg,@stack)
297
+ end
298
+ EoiToken===@stack.last or fail
299
+ StartToken===@stack.first or fail
300
+
301
+ result= @stack[1]
302
+
303
+
304
+ #multiple assignment must be resolved
305
+ #afterwards by walking the parse tree.
306
+ #(because the relative precedences of = and ,
307
+ #are reversed in multiple assignment.)
308
+ # result.respond_to? :fixup_multiple_assignments! and
309
+ # result=result.fixup_multiple_assignments!
310
+
311
+ #relative precedence of = and rescue are also inverted sometimes
312
+ # result.respond_to? :fixup_rescue_assignments! and
313
+ # result=result.fixup_rescue_assignments!
314
+
315
+ #do something with error nodes
316
+ msgs=[]
317
+ result.walk{|parent,i,subi,node|
318
+ if node.respond_to? :error? and node.error?(@rubyversion)
319
+ msgs<< @filename+":"+node.blame.msg
320
+ false
321
+ else
322
+ true
323
+ end
324
+ } if result.respond_to? :walk #hack hack
325
+ result.errors=msgs unless msgs.empty?
326
+ #other types of errors (lexer errors, exceptions in lexer or parser actions)
327
+ #should be handled in the same way, but currently are not
328
+ # puts msgs.join("\n")
329
+
330
+ rescue Exception=>e
331
+ input=@lexer
332
+ if Array===input
333
+ STDERR.puts "error while parsing:"
334
+ STDERR.write input.pretty_inspect
335
+ input=nil
336
+ else
337
+ input=input.original_file
338
+ inputname=@lexer.filename
339
+ STDERR.puts "error while parsing #@filename:#@endline: <<< #{input if input.to_s.size<=1000} >>>"
340
+ end
341
+ e.backtrace.each{|l| p l }
342
+ raise
343
+ else
344
+ unless msgs.empty?
345
+ pp @stack[-[15,@stack.size].min..-1] if ENV['PRINT_STACK']
346
+ raise RedParse::ParseError.new(msgs.join("\n"),@stack)
347
+ end
348
+
349
+ # result=NopNode.new if EoiToken===result
350
+ return result
351
+ ensure
352
+ @write_cache.put(@input,result) if @write_cache and result and !result.errors
353
+ @stack=nil
354
+ Thread.current[:$RedParse_parser]=oldparser
355
+ end
356
+
357
+
358
+ #HIER=Class::FlattenedHierarchy.new *STACKABLE_CLASSES
359
+
360
+ def new_disabled_reduce
361
+ #@hier||=Class::FlattenedHierarchy.new *STACKABLE_CLASSES()
362
+ @reducer||=Reducer.new(@rules)
363
+
364
+ @reducer.reduce(@stack)
365
+ end #
366
+
367
+
368
+ #inline any subsequences in RULES right into the patterns
369
+ #reg should do this already, but current release does not
370
+ def expanded_RULES
371
+ result=RULES()
372
+ return result if (-[:foo, -[:bar]]).subregs.grep(Reg::Subseq).empty?
373
+ result.map!{|rule|
374
+ unless rule.left.subregs.grep(Reg::Subseq)
375
+ then rule
376
+ else
377
+ right=rule.right
378
+ rule=rule.left.subregs.dup
379
+ (rule.size-1).downto(0){|i|
380
+ if Reg::Subseq===rule[i]
381
+ rule[i,1]=rule[i].subregs
382
+ end
383
+ }
384
+ -rule>>right
385
+ end
386
+ }
387
+ end
388
+
389
+ ###### specific to parsing ruby
390
+
391
+
392
+ UCLETTER=RubyLexer::UCLETTER
393
+
394
+ LCLETTER=RubyLexer::LCLETTER
395
+ LETTER=RubyLexer::LETTER
396
+ LETTER_DIGIT=RubyLexer::LETTER_DIGIT
397
+
398
+ def vertices; self.class.constants.grep(Node|Token) end
399
+
400
+ def self.has_return_hash_fix? #is this needed? it's not used in this file....
401
+ rl=RubyLexer.new("","return {}.size")
402
+ return(
403
+ FileAndLineToken===rl.get1token and
404
+ MethNameToken===rl.get1token and
405
+ ImplicitParamListStartToken===rl.get1token and
406
+ WsToken===rl.get1token and
407
+ KeywordToken===rl.get1token and
408
+ KeywordToken===rl.get1token and
409
+ KeywordToken===rl.get1token and
410
+ MethNameToken===rl.get1token and
411
+ ImplicitParamListStartToken===rl.get1token and
412
+ ImplicitParamListEndToken===rl.get1token and
413
+ ImplicitParamListEndToken===rl.get1token and
414
+ EoiToken===rl.get1token
415
+ )
416
+ end
417
+
418
+ #see pickaxe, 1st ed, page 221
419
+ def RIGHT_ASSOCIATIVE
420
+ {
421
+ # "defined?"=>120.5,
422
+ "**"=>118,
423
+
424
+ "="=>105, "%="=>105, "/="=>105, "-="=>105, "+="=>105,
425
+ "|="=>105, "&="=>105, ">>="=>105, "<<="=>105, "*="=>105,
426
+ "&&="=>105, "||="=>105, "**="=>105, "^="=>105,
427
+
428
+
429
+ # "and"=>99, "or"=>99,
430
+
431
+ # "if"=>98, "unless"=>98, "while"=>98, "until"=>98, "rescue"=>98,
432
+
433
+ # "&&"=>109, "||"=>108,
434
+ }
435
+ end
436
+
437
+ def PRECEDENCE
438
+ {
439
+
440
+ # "("=>122, #method param list
441
+ # "{"=>122, "do"=>122, #blocks
442
+
443
+ "::"=>121, "."=>121,
444
+
445
+ # "defined?"=>120.5,
446
+
447
+ "["=>120, #[] []= methods
448
+
449
+ "!"=>119, "~"=>119,
450
+ "+@"=>119,
451
+
452
+ "**"=>118,
453
+
454
+ "-@"=>117,
455
+
456
+ "*"=>116, "/"=>116, "%"=>116,
457
+
458
+ "+"=>115, "-"=>115,
459
+
460
+ "<<"=>114, ">>"=>114,
461
+
462
+ "&"=>113,
463
+
464
+ "^"=>112, "|"=>112,
465
+
466
+ "<="=>111, ">="=>111, "<"=>111, ">"=>111,
467
+
468
+ "<=>"=>110, "=="=>110, "==="=>110,
469
+ "!="=>110, "=~"=>110, "!~"=>110,
470
+
471
+ "&&"=>109,
472
+
473
+ "||"=>108,
474
+
475
+ ".."=>107, "..."=>107,
476
+
477
+ "?"=>106, # ":"=>106, #not sure what to do with ":"
478
+
479
+ "unary&"=>105, #unary * and & operators
480
+ "lhs*"=>105, #this should remain above =
481
+ "lhs,"=>105,
482
+ "rescue3"=>105,
483
+
484
+ "="=>104, "%="=>104, "/="=>104, "-="=>104, "+="=>104,
485
+ "|="=>104, "&="=>104, ">>="=>104, "<<="=>104, "*="=>104,
486
+ "&&="=>104, "||="=>104, "**="=>104, "^="=>104,
487
+
488
+ "defined?"=>103,
489
+ "not"=>103,
490
+ ":"=>102, #but not when used as a substitute for 'then'
491
+
492
+ "=>"=>101,
493
+ "rhs,"=>100, #"call,"=>100, "array,"=>100, "param,"=>100,
494
+ ","=>100, "rhs*"=>100, "unary*"=>100,
495
+ #the 'precedence' of comma is somewhat controversial. it actually has
496
+ #several different precedences depending on which kind of comma it is.
497
+ #the precedence of , is higher than :, => and the assignment operators
498
+ #in certain (lhs) contexts. therefore, the precedence of lhs-comma should
499
+ #really be above "=".
500
+
501
+ #"unary" prefix function names seen has operators have this precedence
502
+ #but, rubylexer handles precedence of these and outputs fake parens
503
+ #to tell us how its parsed
504
+
505
+ "or"=>99, "and"=>99,
506
+
507
+ "if"=>98, "unless"=>98, "while"=>98, "until"=>98,
508
+
509
+ "rescue"=>98,
510
+
511
+ ";"=>96,
512
+ }
513
+ end
514
+
515
+ module BracketsCall; end
516
+ Value= #NumberToken|SymbolToken|
517
+ #HerePlaceholderToken|
518
+ ValueNode&-{:lvalue =>nil}
519
+ Expr=Value
520
+
521
+ if defined? SPECIALIZED_KEYWORDS
522
+ class SpecializedKeywordToken<KeywordToken
523
+ def inspect
524
+ "#<"+self.class.name+">"
525
+ end
526
+ alias image inspect
527
+ end
528
+
529
+ KW2class={}
530
+
531
+ Punc2name={
532
+ "("=>"lparen", ")"=>"rparen",
533
+ "["=>"lbracket", "]"=>"rbracket",
534
+ "{"=>"lbrace", "}"=>"rbrace",
535
+ ","=>"comma",
536
+ ";"=>"semicolon",
537
+ "::"=>"double_colon",
538
+ "."=>"dot",
539
+ "?"=>"question_mark", ":"=>"colon",
540
+ "="=>"equals",
541
+ "|"=>"pipe",
542
+ "<<"=>"leftleft", ">>"=>"rightright",
543
+ "=>"=>"arrow",
544
+ }
545
+ end
546
+
547
+ def self.KW(ident)
548
+ if defined? SPECIALIZED_KEYWORDS
549
+ fail if /\\/===ident
550
+ orig_ident=ident
551
+ if Regexp===ident
552
+ list=ident.to_s[/\(?-mix:\^\((.*)\)\$\)/,1]
553
+
554
+ #pick apart any char class in ident
555
+ if open_bracket_idx=list.index(/([^\\]|^)\[/)
556
+ open_bracket_idx+=1 unless list[open_bracket_idx]=="["
557
+ close_bracket_idx=list.index(/[^\\]\]/,open_bracket_idx+1)
558
+ close_bracket_idx+=1 unless list[close_bracket_idx]=="]"
559
+ cclass=list.slice!(open_bracket_idx..close_bracket_idx)
560
+ cclass=cclass[1...-1]
561
+ cclass=cclass.scan( /[^\\]|\\./ )
562
+ cclass.map!{|ch| ch.size==1 ? ch : ch[1..1] }
563
+ end
564
+
565
+ #rest of it should be a list of words separated by |
566
+ list=list.split(/\|/).reject{|x| x==''}
567
+ list.concat cclass if cclass
568
+ list.map{|w|
569
+ w.gsub!(/\\/,'')
570
+ KW(w)
571
+ }.inject{|sum,kw| sum|kw}
572
+ else
573
+ fail unless String===ident
574
+ ident=Punc2name[ident] unless /^(?:(?!#{LETTER_DIGIT}).)+$/o===ident
575
+ fail "no name for #{orig_ident}" unless ident
576
+ eval %{
577
+ class Keyword_#{ident} < SpecializedKeywordToken
578
+ def ident; '#{orig_ident}' end
579
+ # def self.instance; @instance ||= allocate end
580
+ # def self.new; instance end
581
+ def initialize(offset)
582
+ @offset=offset
583
+ end
584
+ end
585
+ }
586
+ KW2class[ident]||=const_get("Keyword_#{ident}")
587
+ end
588
+ else
589
+ ident=case ident
590
+ when Integer; ident.chr
591
+ when String,Regexp; ident
592
+ else ident.to_s
593
+ end
594
+
595
+ return KeywordToken&-{:ident=>ident}
596
+ end
597
+ end
598
+ def KW(ident); self.class.KW(ident) end
599
+
600
+ if defined? SPECIALIZED_KEYWORDS
601
+ def make_specialized_kw(name,offset)
602
+ name=Punc2name[name] unless /^((?!#{LETTER_DIGIT}).)+$/o===name
603
+ KW2class[name].new(offset)
604
+ end
605
+ alias make_kw make_specialized_kw
606
+ else
607
+ def make_kw(name,offset)
608
+ KeywordToken.new(name,offset)
609
+ end
610
+ end
611
+
612
+ UNOP=
613
+ (OperatorToken|KeywordToken)&-{ #sppflt! KeywordToken here is a hack too
614
+ :ident=>/^(?:[+-]@|unary[&*]|(?:lhs|rhs)[*])$/,
615
+ # :ident=>/^(?:[+-]@|unary[&])$/,
616
+ #:unary =>true,
617
+ }|
618
+ (OperatorToken|KeywordToken)&-{ #sppflt! KeywordToken here is a hack too
619
+ :ident=>/^([~!]|not|defined\?)$/, #defined? should be removed from here, its handled separately
620
+ } #|
621
+ DEFOP=
622
+ (OperatorToken|KeywordToken)&-{ #sppflt! KeywordToken here is a hack too
623
+ :ident=>"defined?",
624
+ }
625
+ =begin
626
+ MethNameToken&-{ #hack, shouldn't be necessary
627
+ #rubylexer should know to generally treat "defined?" as a keyword
628
+ #or operator. (like most keywords, it can also be used as a method
629
+ # name....)
630
+ :ident=>"defined?"
631
+ }
632
+ =end
633
+
634
+ def self.Op(ident=nil, allow_keyword=false)
635
+ result=OperatorToken
636
+ result |= KeywordToken if allow_keyword
637
+ result &= -{:ident=>ident} if ident
638
+ #result[:infix?]=true
639
+ return result
640
+ end
641
+ def Op(*args); self.class.Op(*args); end
642
+ BINOP_KEYWORDS=%w[if unless while until and or && \|\|]
643
+
644
+ #HAS_PRECEDENCE=Op(/^#{PRECEDENCE.keys.map{|k| Regexp.quote k}.join('|')}$/,true)
645
+ =begin
646
+ KeywordOp=
647
+ KeywordToken & -{
648
+ :ident=>/^(#{BINOP_KEYWORDS.join('|')})$/
649
+ }
650
+ KeywordOp2=
651
+ KeywordToken & -{
652
+ :ident=>/^([\[({!+*?:,]|\.{1,3}|::|=>)$/
653
+ }
654
+ =end
655
+ DotOp= KW('.') #KeywordToken & -{ :ident=>"." }
656
+ DoubleColonOp= KW('::') #KeywordToken & -{ :ident=>"::" }
657
+
658
+ Op=Op()
659
+ MODIFYASSIGNOP=Op( /^(([^=])\2|[^<>=!])=$/, true )
660
+ NONASSIGNOP=Op( /([^=]|[<>=!]=)$/)
661
+ KW_Op= #some of these ought to be regular operators, fer gosh sake
662
+ Op(/^(![=~]|\.\.\.?|=>)$/,true)|Op(/^(#{BINOP_KEYWORDS.join('|')})$/)
663
+
664
+ EPSILON=Float::EPSILON*10_000_000 #this should be <<1 and >0
665
+ fail unless 1+EPSILON>1
666
+ fail unless EPSILON<0.1
667
+
668
+ def left_op_higher(op,op2)
669
+ KeywordToken===op2 or OperatorToken===op2 or return true
670
+ rightprec=@precedence[op2.to_s] or return true
671
+ rightprec+=EPSILON if @RIGHT_ASSOCIATIVE[op2.to_s]
672
+ return @precedence[op.to_s]>=rightprec
673
+ end
674
+
675
+ # LowerOp= proc{|parser,op2| parser.left_op_higher(parser[-3],op2) }
676
+ module LowerOp_inspect
677
+ def inspect; "lower_op" end
678
+ end
679
+
680
+ def lower_op
681
+ return @lower_op if defined? @lower_op
682
+ lower_op=item_that{|op| left_op_higher(@stack[-3],op) }
683
+ lower_op=(LOWEST_OP|(~VALUELIKE_LA() & lower_op)).la
684
+ lower_op.extend LowerOp_inspect
685
+ @lower_op=lower_op
686
+ end
687
+
688
+ #this is a hack, should use graphcopy to search for Deferreds and replace with double-Deferred as below
689
+ def item_that(*a,&b)
690
+ if defined? @generating_parse_tables
691
+ huh unless b
692
+ #double supers, one of them in a block executed after this method returns....
693
+ #man that's weird
694
+ super(*a){|ob| @saw_item_that[[super(*a,&b),ob]]=true}
695
+ else
696
+ super(*a,&b) #and then here's another
697
+ end
698
+ end
699
+
700
+ WANTS_SEMI=%w[while until if unless
701
+ def case when in rescue
702
+ elsif class module << => . ::
703
+ ]
704
+ def wants_semi_context
705
+ Op(/^(<<|=>|\.|::)$/)|KW(/^(#{WANTS_SEMI.map{|ws| Regexp.quote ws }.join('|')})$/)
706
+ end
707
+ def dont_postpone_semi
708
+ @dps||=~wants_semi_context
709
+ end
710
+
711
+ #NeverBlockKeyword=MethNameToken&-{:ident=>/^(return|break|next)$/}
712
+ #FakeBegin=KW('(')&-{:not_real? =>true}
713
+ #FakeEnd=KW(')')&-{:not_real? =>true}
714
+
715
+ #rule format:
716
+ # -[syntax pattern_matchers.+, lookahead.-]>>node type
717
+
718
+ DotCall=stack_monkey("DotCall",4,CallNode){|stack|
719
+ left,dot=*stack.slice!(-4..-3)
720
+ right=stack[-2]
721
+
722
+ right.startline=left.startline
723
+ right.set_receiver! left
724
+ }
725
+
726
+ Lvalue=(VarNode|CallSiteNode|BracketsGetNode|CommaOpNode|
727
+ ParenedNode|ConstantNode|UnaryStarNode)&-{:lvalue =>true}
728
+
729
+ BareMethod=MethNameToken|(LiteralNode&-{:bare_method=>true})
730
+
731
+ #BEGINWORDLIST=RubyLexer::BEGINWORDLIST + %w"( [ {"
732
+ ENDWORDLIST=%w"end ) ] }"
733
+ ENDWORDS=ENDWORDLIST.map{|x| Regexp.quote x}.join('|')
734
+ BEGINWORDS=RubyLexer::BEGINWORDS
735
+ INNERBOUNDINGWORDS=RubyLexer::INNERBOUNDINGWORDS
736
+
737
+ BEGIN2END={"{"=>"}", "("=>")", "["=>"]", BEGINWORDS=>"end"}
738
+ def beginsendsmatcher
739
+ @bem||=
740
+ /^(#{BEGINWORDS}|#{ENDWORDS})$/
741
+ end
742
+
743
+ MULTIASSIGN=UnaryStarNode|CommaOpNode|ParenedNode
744
+ WITHCOMMAS=UnaryStarNode|CommaOpNode|(CallSiteNode&-{:with_commas=>true})
745
+ #(CallSiteNode&-{:real_parens=>false, :args=>-{:size=>~0.reg}})
746
+
747
+ BEGINAFTEREQUALS=
748
+ BeginNode&
749
+ -{:after_equals =>nil}&-{:non_empty=>true}
750
+ BEGINAFTEREQUALS_MARKED=
751
+ BeginNode&
752
+ -{:after_equals =>true}&-{:non_empty=>true}
753
+
754
+ LHS_COMMA=Op('lhs,',true)#&-{:tag => :lhs}
755
+ RHS_COMMA=Op('rhs,',true)#&-{:tag => :rhs}
756
+ #PARAM_COMMA=Op('param,',true)#&-{:tag => :param}
757
+ def FUNCLIKE_KEYWORD
758
+ KeywordToken&-{:ident=>@funclikes}
759
+ end
760
+ IGN_SEMI_BEFORE=KW(/^(#{RubyLexer::INNERBOUNDINGWORDS.gsub(/(rescue|then)\|/,'')[1...-1]}|end|[)}\]])$/)|EoiToken
761
+ IGN_SEMI_AFTER=KW(/^(begin|[;:({|]|then|do|else|ensure)$/)|BlockFormalsNode
762
+
763
+ #for use in lookback patterns
764
+ OPERATORLIKE_LB=OperatorToken|
765
+ KW(/^(not | defined\? | rescue3 | .*[@,] | [~!;\(\[\{?:] | \.{1,3} | :: | => | ![=~])$/x)|
766
+ KW(%r{^( \*\*? | << | >> | &&? | \|\|? | \^ | % | / | - | \+ )?=$}x)|
767
+ KW(BEGINWORDS)|KW(/^#{INNERBOUNDINGWORDS}$/)|RescueHeaderNode|StartToken|
768
+ GoalPostToken|BlockFormalsNode|AssignmentRhsListStartToken
769
+
770
+ #for use in lookahead patterns
771
+ def VALUELIKE_LA
772
+ KW(@varlikes)|NumberToken|SymbolToken|StringToken|UNOP|DEFOP|
773
+ KW(/^[({]$/x)|VarNameToken|MethNameToken|HerePlaceholderToken|
774
+ KW(BEGINWORDS)|FUNCLIKE_KEYWORD()|AssignmentRhsListStartToken
775
+
776
+ #why isn't this a sufficient implementation of this method:
777
+ # KW('(')
778
+ #in which case, '(' can be made the highest precedence operator instead
779
+ end
780
+ LOWEST_OP=KW(/^(#{ENDWORDS})$/)|KW(/^#{INNERBOUNDINGWORDS.sub('rescue|','')}$/)|
781
+ EoiToken|GoalPostToken|AssignmentRhsListEndToken
782
+
783
+ RESCUE_BODY=-[Expr.-, RescueNode.*, ElseNode.-, EnsureNode.-,]
784
+
785
+ RESCUE_OP=Op('rescue') #|(KW('rescue')&-{:infix=>true})
786
+
787
+ RESCUE_KW=KW('rescue')&-{:infix=>nil}
788
+
789
+ inspect_constant_names if respond_to? :inspect_constant_names
790
+
791
+ (constants-%w[RawOpNode ParenedNode SequenceNode LiteralNode Node MisparsedNode]).each{|k|
792
+ if /Node$/===k.to_s
793
+ remove_const k
794
+ end
795
+ }
796
+
797
+ def RULES
798
+ lower_op= lower_op()
799
+
800
+
801
+ result=
802
+ [-[StartToken.lb, Expr.-, EoiToken.la]>>:accept,
803
+ -[EoiToken]>>:error,
804
+ ]+
805
+
806
+ #these must be the lowest possible priority, and hence first in the rules list
807
+ # BEGIN2END.map{|_beg,_end|
808
+ # -[KW(_beg), (KW(_beg)|KW(_end)).~.*, KW(_end), KW(/^(do|\{)$/).~.la]>>MisparsedNode
809
+ # }+
810
+
811
+ [
812
+ # -[UNOP, Expr, lower_op]>>UnOpNode,
813
+ # -[DEFOP, ParenedNode]>>UnOpNode,
814
+ # -[Op(/^(?:unary|lhs|rhs)\*$/), ValueNode, lower_op]>>UnaryStarNode,
815
+
816
+ # -[Op('=',true)|KW(/^(rescue|when|\[)$/)|Op(/,$/,true),
817
+ # Op(/^(?:unary|rhs)\*$/), ValueNode, (MODIFYASSIGNOP|Op('=',true)).la]>>:shift,
818
+ # -[MethNameToken|FUNCLIKE_KEYWORD(), KW('('),
819
+ # Op(/^(?:unary|rhs)\*$/), ValueNode, (MODIFYASSIGNOP|Op('=',true)).la]>>:shift,
820
+ #star should not be used in an lhs if an rhs or param list context is available to eat it.
821
+ #(including param lists for keywords such as return,break,next,rescue,yield,when)
822
+
823
+ # -[Op(/^(?:unary|lhs)\*$/), (GoalPostToken|Op(/,$/,true)|KW(/^(in|[=)|;])$/)).la]>>DanglingStarNode, #dangling *
824
+ # -[Op(/,$/,true), (GoalPostToken|KW(/^(in|[=)|;])$/)).la]>> #dangling ,
825
+ # stack_monkey("DanglingComma",1,DanglingCommaNode){|stack|
826
+ # dcomma=DanglingCommaNode.new
827
+ # dcomma.offset=stack.last.offset
828
+ # stack.push dcomma, stack.pop
829
+ # },
830
+ #hmmm.... | in char classes above looks useless (predates GoalPostToken)
831
+
832
+ -[Expr, Op|KW_Op, Expr, lower_op]>>RawOpNode, #most operators
833
+
834
+ #assignment
835
+ # -[Lvalue, MODIFYASSIGNOP, Expr, lower_op]>>AssignNode,
836
+ # -[Lvalue, Op('=',true), AssignmentRhsNode, lower_op]>>AssignNode,
837
+ # -[AssignmentRhsListStartToken, Expr, AssignmentRhsListEndToken]>>AssignmentRhsNode,
838
+
839
+ # a = b rescue c acts like a ternary,,,
840
+ #provided that both a and b are not multiple and b
841
+ #(if it is a parenless callsite) has just 1 param
842
+ # -[Lvalue&~MULTIASSIGN, Op('=',true), AssignmentRhsNode&-{:is_list=>true},
843
+ # Op('rescue3'), Expr, lower_op]>>AssignNode,
844
+ # -[Lvalue, Op('=',true), AssignmentRhsNode, Op('rescue3'), Expr, lower_op]>>AssignNode,
845
+
846
+ # -[Lvalue&~MULTIASSIGN, Op('=',true), AssignmentRhsNode&-{:is_list=>true},
847
+ # Op('rescue3',true).la]>>:shift,
848
+
849
+ # -[Lvalue&~MULTIASSIGN, Op('=',true), AssignmentRhsNode&-{:is_list=>true},
850
+ # RESCUE_OP.la] >>
851
+ # stack_monkey("rescue3",1,Op('rescue3',true)){|stack|
852
+ # resc=stack.last.dup
853
+ # resc.ident += '3'
854
+ # stack[-1]=resc
855
+ # },
856
+ #relative precedence of = and rescue are to be inverted if rescue
857
+ #is to the right and assignment is not multiple.
858
+
859
+ #if assignment rhs contains commas, don't reduce til they've been read
860
+ #(unless we're already on an rhs)
861
+ # -[(Op('=',true)|Expr).~.lb, Lvalue, Op('=',true), Expr, RHS_COMMA.la]>>:shift,
862
+ # -[RHS_COMMA.lb, Lvalue, Op('=',true), Expr, RHS_COMMA.la ]>>AssignNode,
863
+ # -[ValueNode, LHS_COMMA, ValueNode, Op('=',true).la]>>CommaOpNode,
864
+ #relative precedence of = and lhs/rhs , are to be inverted.
865
+
866
+ #mark parentheses and unary stars that come after lhs commas
867
+ # -[LHS_COMMA, (UnaryStarNode|ParenedNode)&~-{:after_comma =>true}, Op('=',true)]>>
868
+ # stack_monkey("after_comma",3,(UnaryStarNode|ParenedNode)&-{:after_comma =>true}){|stack|
869
+ # stack[-3].after_comma=true},
870
+ #mebbe this should be a lexer hack?
871
+
872
+ # -[#(OPERATORLIKE_LB&~Op('=',true)).lb,
873
+ # Expr, RESCUE_OP, Expr, lower_op]>>RescueOpNode,
874
+
875
+ #dot and double-colon
876
+ # -[DoubleColonOp, VarNode, lower_op]>>ConstantNode,#unary ::
877
+ # -[Expr, DotOp, CallNode, lower_op]>>DotCall, #binary .
878
+ # -[Expr, DoubleColonOp, CallNode, lower_op]>>DotCall, #binary ::
879
+ # -[Expr, DoubleColonOp, VarNode, lower_op]>>ConstantNode,#binary ::
880
+ #lower_op constaints on lookahead are unnecessary in above 4 (unless I give openparen a precedence)
881
+
882
+ # -[Expr, "?", Expr, ":", Expr, lower_op]>>TernaryNode,
883
+
884
+
885
+ # -[MethNameToken, '(', Expr.-, ')', BlockNode.-, KW('do').~.la]>>CallNode,
886
+ # -[FUNCLIKE_KEYWORD, '(', Expr.-, ')', BlockNode.-, KW('do').~.la]>>KWCallNode,
887
+
888
+ -[#(OPERATORLIKE_LB&
889
+ (MethNameToken|FUNCLIKE_KEYWORD()).~.lb,
890
+ '(', Expr, KW(')')&~(-{:callsite? =>true}|-{:not_real? =>true}), KW('do').~.la]>>ParenedNode,
891
+
892
+ # -[#(OPERATORLIKE_LB&
893
+ # (MethNameToken|FUNCLIKE_KEYWORD()).~.lb,
894
+ # '(', KW(')')&~(-{:callsite? =>true}|-{:not_real? =>true}), KW('do').~.la]>>VarLikeNode, #(), alias for nil
895
+ #constraint on do in above 2 rules is probably overkill
896
+
897
+ # -[ValueNode, Op(/,$/,true), ValueNode, lower_op]>>CommaOpNode,
898
+
899
+ -[(OPERATORLIKE_LB&dont_postpone_semi).lb,
900
+ Expr, ';', Expr, lower_op]>>SequenceNode,
901
+
902
+
903
+ # -[#(OPERATORLIKE_LB&~KW(')')).lb,
904
+ # '{', (CommaOpNode|ArrowOpNode).-, '}']>>HashLiteralNode, #-40
905
+
906
+ # -[KW(')').lb, 'do', BlockFormalsNode.-, Expr.-, 'end']>>BlockNode,
907
+ #this does {} as well... converted to do...end
908
+ #rubylexer handles the 'low precedence' of do...end
909
+
910
+ # -[GoalPostToken, Expr.-, GoalPostToken]>>BlockFormalsNode,
911
+ #rubylexer disambiguated operator vs keyword '|'
912
+
913
+ # -[/^(while|until)$/, Expr, /^([:;]|do)$/, Expr.-, 'end']>>LoopNode,
914
+
915
+ # -[/^(if|unless)$/, Expr, /^(;|then|:)$/,
916
+ # Expr.-, ElsifNode.*, ElseNode.-, 'end'
917
+ # ]>>IfNode,
918
+
919
+ # -['else', Expr.-, KW(/^(ensure|end)$/).la]>>ElseNode,
920
+
921
+ # -['elsif', Expr, /^(;|then|:)$/, Expr.-,
922
+ # KW(/^(end|else|elsif)$/).la
923
+ # ]>>ElsifNode,
924
+
925
+ # -['module', ConstantNode|VarNode, KW(/^(;|::)$/).~.la]>>
926
+ # stack_monkey(1,KW(';')){|stack| #insert ; at end of module header if none was present
927
+ # stack.push KeywordToken.new(';'), stack.pop
928
+ # },
929
+ # -['module', ConstantNode|VarNode, ';', RESCUE_BODY, 'end']>>ModuleNode,
930
+ # -['class', Expr, ';', RESCUE_BODY, 'end']>>ClassNode,
931
+ # -['class', Expr, Op('<'), Expr, KW(';').~.la]>>:shift,
932
+ # -['class', Op('<<'), Expr, ';', RESCUE_BODY, 'end']>>MetaClassNode, #-30
933
+
934
+ # -['alias', BareMethod|VarNode, BareMethod|VarNode]>>AliasNode,
935
+ # -['undef', BareMethod]>>UndefNode,
936
+ # -[UndefNode, Op(',',true), BareMethod]>>UndefNode,
937
+
938
+ # -['def', CallSiteNode, Op('=').-, KW(';'), RESCUE_BODY,
939
+ # Expr.-, RescueNode.*, ElseNode.-, EnsureNode.-,
940
+ # 'end'
941
+ # ]>>MethodNode,
942
+
943
+ # -['begin', RESCUE_BODY,
944
+ # Expr.-, RescueNode.*, ElseNode.-, EnsureNode.-,
945
+ # 'end'
946
+ # ]>>BeginNode,
947
+
948
+ # -[Op('=',true), BEGINAFTEREQUALS, RESCUE_OP.la]>>
949
+ # stack_monkey("begin after equals",2,BEGINAFTEREQUALS_MARKED){ |stack| stack[-2].after_equals=true },
950
+ #this is bs. all for an extra :begin in the parsetree
951
+
952
+ # -[(KW(/^(;|begin)$/)|RescueNode).lb, #ParenedNode|RescueOpNode|BeginNode used to be here too
953
+ # RESCUE_KW, KW('=>').-, Expr.-, /^([:;]|then)$/,
954
+ # ]>>RescueHeaderNode,
955
+ # -[ RescueHeaderNode, Expr.-, KW(';').-, (KW(/^(else|ensure|end)$/)|RESCUE_KW).la
956
+ # ]>>RescueNode,
957
+
958
+ # -['ensure', Expr.-, KW('end').la]>>EnsureNode,
959
+
960
+ # -['[', Expr.-, ']']>>ArrayLiteralNode, #-20
961
+
962
+ # -[Expr, '[', Expr.-, ']']>>BracketsGetNode,
963
+
964
+ # -[HereDocNode, StringToken+1, StringToken.~.la]>>StringCatNode,
965
+ # -[(OPERATORLIKE_LB&~(StringToken|HereDocNode)).lb, StringToken+2, StringToken.~.la]>>StringCatNode,
966
+ # -[(OPERATORLIKE_LB&~(StringToken|HereDocNode)).lb, StringToken, StringToken.~.la]>>StringNode,
967
+ #includes regexp, wordlist, backquotes
968
+
969
+ # -['case', Expr.-, KW(';').-, WhenNode.*, ElseNode.-, 'end']>>CaseNode,
970
+
971
+ # -['when', Expr, /^([:;]|then)$/, Expr.-,
972
+ # KW(/^(when|else|end)$/).la
973
+ # ]>>WhenNode,
974
+
975
+ # -['for', Expr, 'in', Expr, /^([:;]|do)$/, Expr.-, 'end']>>ForNode,
976
+
977
+ #semicolon cleanup....
978
+ # -[(OPERATORLIKE_LB&dont_postpone_semi).lb,Expr, ';', IGN_SEMI_BEFORE.la] \
979
+ # >>delete_monkey(2,"semi_cleanup_before_ISB"),
980
+ # -[Expr, ';', KW('then').la] >>delete_monkey(2,"semi_cleanup_before_then"),
981
+ # -[dont_postpone_semi.lb, Expr, ';', RescueNode] >>delete_monkey(3,"semi_cleanup_before_rescue"), #-10
982
+ # -[IGN_SEMI_AFTER.lb, ';'] >>delete_monkey(2,"semi_cleanup_after_oplike"),
983
+ # -[(StartToken|RescueHeaderNode).lb, ';' ] >>delete_monkey(2,"semi_cleanup_after_rescue"),
984
+ #this rule is somewhat more forgiving than matz' parser...
985
+ #not all semicolons after :, (, and { keywords should
986
+ #be ignored. some should cause syntax errors.
987
+
988
+
989
+ #comma cleanup....
990
+ # -[Op(/,$/,true), KW(/^([}\]])$/).la] >>delete_monkey(2, "comma_cleanup"),
991
+ #likewise, this is somewhat too forgiving.
992
+ #some commas before } or ] should cause syntax errors
993
+
994
+ #turn lvalues into rvalues if not followed by an assignop
995
+ # -[-{:lvalue =>true}, (Op('=',true)|MODIFYASSIGNOP|LHS_COMMA).~.la]>>
996
+ # stack_monkey("lval2rval",2,-{:lvalue =>nil}){|stack|
997
+ # stack[-2].lvalue=nil
998
+ # },
999
+
1000
+ #expand the = into a separate token in calls to settors (after . or ::).
1001
+ #but not in method headers
1002
+ # -[(OPERATORLIKE_LB&~KW('def')).lb, Expr, DotOp|DoubleColonOp,
1003
+ # (MethNameToken&-{:has_equals=>true}).la]>>
1004
+ # stack_monkey("expand_equals",1,CallNode){|stack|
1005
+ # methname=stack.pop
1006
+ # methname.ident.chomp!('=')
1007
+ # offset=methname.offset+methname.ident.size
1008
+ # stack.push(
1009
+ # CallNode.new(methname,nil,nil,nil,nil),
1010
+ # OperatorToken.new('=',offset)
1011
+ # )
1012
+ # },
1013
+
1014
+ -[NumberToken|SymbolToken]>>LiteralNode,
1015
+
1016
+ #lexer does the wrong thing with -22**44.5, making the - part
1017
+ #of the first number token. it's actually lower precedence than
1018
+ #**... this rule fixes that problem.
1019
+ #in theory, unary - is lower precedence than ., ::, and [] as well, but
1020
+ #that appears not to apply to unary - in numeric tokens
1021
+ # -[NumberToken&-{:negative=>true}, Op('**').la]>>
1022
+ # stack_monkey("fix_neg_exp",2,Op("-@",true)){|stack|
1023
+ # #neg_op.unary=true
1024
+ # num=stack[-2]
1025
+ # op=OperatorToken.new("-@",num.offset)
1026
+ # op.startline=num.startline
1027
+ # stack[-2,0]=op
1028
+ # num.ident.sub!(/\A-/,'')
1029
+ # num.offset+=1
1030
+ # },
1031
+
1032
+ #treat these keywords like (rvalue) variables.
1033
+ # -[@varlikes]>>VarLikeNode,
1034
+
1035
+ #here docs
1036
+ # -[HerePlaceholderToken]>>HereDocNode,
1037
+ # -[HereBodyToken.la]>>delete_monkey(1,"delete_here_body"), ##this is rediculous. this should be a lexer hack?
1038
+
1039
+ # -[VarNameToken]>>VarNode,
1040
+
1041
+
1042
+ ]
1043
+
1044
+ if @rubyversion >= 1.9
1045
+ result.concat [
1046
+ # -['->', ParenedNode.-, 'do', Expr.-, 'end']>>ProcLiteralNode,
1047
+ # -['->', VarLikeNode["nil",{:@value=>nil}].reg, 'do', Expr.-, 'end']>>ProcLiteralNode,
1048
+ -[(DotOp|DoubleColonOp).lb, '(',Expr.-,')', BlockNode.-, KW('do').~.la]>>CallNode,
1049
+ ]
1050
+ end
1051
+
1052
+ return result
1053
+ end
1054
+
1055
+ if defined? END_ATTACK
1056
+ module Reducer; end
1057
+ include Reducer
1058
+ end
1059
+
1060
+ def signature
1061
+ RedParse.signature(class<<self; ancestors end)
1062
+ end
1063
+ def RedParse.signature(ancs=ancestors)
1064
+ [ancs.map{|m| m.name},
1065
+ Digest::SHA256.file(__FILE__),
1066
+ Digest::SHA256.file(__FILE__.sub(/\.rb\z/,"/node.rb")),
1067
+ ]
1068
+ end
1069
+
1070
+ def initialize(input,name="(eval)",line=1,lvars=[],options={})
1071
+ @rubyversion=options[:rubyversion]||1.8
1072
+
1073
+ encoding=options[:encoding]||:ascii
1074
+ encoding=:binary if @rubyversion<=1.8
1075
+ cache=Cache.new(
1076
+ File===input,name,
1077
+ :line,line,:encoding,encoding,:locals,lvars.sort.join(","),
1078
+ @rubyversion, :/, *signature
1079
+ )
1080
+ cache_mode=options[:cache_mode]||:read_write
1081
+ raise ArgumentError unless /^(?:read_(?:write|only)|write_only|none)$/===cache_mode.to_s
1082
+ read_cache= /read/===cache_mode.to_s
1083
+ input.binmode if input.respond_to? :binmode
1084
+ if read_cache and cache and result=cache.get(input)
1085
+ @cached_result=result
1086
+ @write_cache=nil
1087
+ return
1088
+ end
1089
+ if /write/===cache_mode.to_s
1090
+ @write_cache,@input= cache,input
1091
+ else
1092
+ @write_cache=nil
1093
+ end
1094
+
1095
+ if Array===input
1096
+ def input.get1token; shift end
1097
+ @lexer=input
1098
+ if @rubyversion>=1.9
1099
+ @funclikes=RubyLexer::RubyLexer1_9::FUNCLIKE_KEYWORDS
1100
+ @varlikes=RubyLexer::RubyLexer1_9::VARLIKE_KEYWORDS
1101
+ else
1102
+ @funclikes=RubyLexer::FUNCLIKE_KEYWORDS
1103
+ @varlikes=RubyLexer::VARLIKE_KEYWORDS
1104
+ end
1105
+ else
1106
+ @lexer=RubyLexer.new(name,input,line,0,:rubyversion=>@rubyversion,:encoding=>encoding)
1107
+ @funclikes=@lexer::FUNCLIKE_KEYWORDS()
1108
+ @varlikes=@lexer::VARLIKE_KEYWORDS()
1109
+ lvars.each{|lvar| @lexer.localvars[lvar]=true }
1110
+ encoding=@lexer.encoding_name_normalize(encoding.to_s).to_sym
1111
+ warn "#{encoding} encoding won't really work right now" if RubyLexer::NONWORKING_ENCODINGS.include? encoding
1112
+ end
1113
+ @funclikes=/#@funclikes|^->$/ if @rubyversion>=1.9
1114
+ @filename=name
1115
+ @min_sizes={}
1116
+ @compiled_rules={}
1117
+ @moretokens=[]
1118
+ @unary_or_binary_op=/^[-+]$/
1119
+ # @rules=self.expaneded_RULES
1120
+ @precedence=self.PRECEDENCE
1121
+ @RIGHT_ASSOCIATIVE=self.RIGHT_ASSOCIATIVE
1122
+ if defined? END_ATTACK
1123
+ compile
1124
+ end
1125
+ @saw_item_that=nil
1126
+ @print_filter=proc{true}
1127
+ end
1128
+
1129
+ attr_accessor :lexer, :print_filter
1130
+ attr :rubyversion
1131
+
1132
+ def get_token(recursing=false)
1133
+ unless @moretokens.empty?
1134
+ @last_token=@moretokens.shift
1135
+ p @last_token if ENV['PRINT_TOKENS'] && @print_filter[@last_token] and not recursing
1136
+ return @last_token
1137
+ end
1138
+
1139
+ rpt=ENV['RAW_PRINT_TOKENS']
1140
+ begin
1141
+ result=@lexer.get1token or break
1142
+ p result if rpt and @print_filter[result]
1143
+
1144
+ #set token's line
1145
+ result.startline= @endline||=1
1146
+ #result.endline||=@endline if result.respond_to? :endline=
1147
+
1148
+ if result.respond_to?(:as) and as=result.as
1149
+ #result=make_kw(as,result.offset)
1150
+ #result.originally=result.ident
1151
+ if OperatorToken===result #or KeywordToken===result
1152
+ result=result.dup
1153
+ result.ident=as
1154
+ else
1155
+ result2=make_kw(as,result.offset)
1156
+ result2.startline=result.startline
1157
+ result2.endline=result.endline
1158
+ result=result2
1159
+ end
1160
+ result.not_real! if result.respond_to? :not_real!
1161
+ else
1162
+
1163
+ case result
1164
+ when FileAndLineToken #so __FILE__ and __LINE__ can know what their values are
1165
+ @file=result.file
1166
+ @endline=result.line
1167
+ redo
1168
+
1169
+ when OperatorToken
1170
+ if @unary_or_binary_op===result.ident and result.unary || result.tag==:unary
1171
+ result=result.dup
1172
+ result.ident+="@"
1173
+ end
1174
+
1175
+ #more symbol table maintenance....
1176
+ when KeywordToken
1177
+ case name=result.ident
1178
+
1179
+ when /^(#{BINOP_KEYWORDS.join '|'})$/o #should be like this in rubylexer
1180
+ unless result.has_end?
1181
+ orig=result
1182
+ result=OperatorToken.new(name,result.offset)
1183
+ result.endline=orig.endline
1184
+ end
1185
+ when "|";
1186
+ orig=result
1187
+ result=GoalPostToken.new(result.offset) #is this needed still? (yes)
1188
+ result.endline=orig.endline
1189
+ when "__FILE__"; #I wish rubylexer would handle this
1190
+ #class<<result; attr_accessor :value; end
1191
+ assert result.value==@file.dup
1192
+ when "__LINE__"; #I wish rubylexer would handle this
1193
+ #class<<result; attr_accessor :value; end
1194
+ assert result.value==@endline
1195
+ else
1196
+ result=make_kw name,result.offset if defined? SPECIALIZED_KEYWORDS
1197
+ #warning, this may discard information stored in instance vars of result
1198
+ end
1199
+
1200
+ when StringToken,HerePlaceholderToken
1201
+ @endline=result.endline
1202
+
1203
+ when EoiToken; break
1204
+ when HereBodyToken;
1205
+ @endline=result.endline
1206
+ break
1207
+ when AssignmentRhsListStartToken; break
1208
+ when AssignmentRhsListEndToken; break
1209
+ when IgnoreToken; redo
1210
+ end
1211
+ end
1212
+ end while false
1213
+ p result if ENV['PRINT_TOKENS'] && @print_filter[@last_token] unless recursing
1214
+
1215
+ #ugly weak assertion
1216
+ assert result.endline==@endline unless result.ident==';' && result.endline-1==@endline or EoiToken===result
1217
+
1218
+ return @last_token=result
1219
+ end
1220
+
1221
+ def unget_tokens(*tokens)
1222
+ @moretokens=tokens.concat @moretokens
1223
+ end
1224
+
1225
+ def unget_token(token)
1226
+ @moretokens.unshift token
1227
+ end
1228
+
1229
+ =begin
1230
+ self.LOOKAHEAD_CLASSES.each_with_index{|classes,i|
1231
+ case classes
1232
+ when Class: huh
1233
+ when Array: classes.flatten.each{huh}
1234
+ else
1235
+ end
1236
+ }
1237
+ =end
1238
+
1239
+ # def fixup_multiple_assignments!; end
1240
+ end
1241
+
1242
+
1243
+ if __FILE__==$0
1244
+ #this code has moved to bin/redparse; really, all this should just go away
1245
+ require 'problemfiles'
1246
+ class NeverExecThis<RuntimeError; end
1247
+
1248
+ def arraydiff(a,b)
1249
+ a==b and return [a,false]
1250
+ (Array===a or a=[a])
1251
+ result= a.dup
1252
+ diff=false
1253
+ size= a.size >= b.size ? a.size : b.size
1254
+ size.times{|i|
1255
+ ai=a[i]
1256
+ bi=b[i]
1257
+ if Array===ai and Array===bi
1258
+ result_i,diff_i= arraydiff(ai,bi)
1259
+ diff||=diff_i
1260
+ result[i]=result_i
1261
+ elsif ai!=bi
1262
+ next if Regexp===ai and ai.to_s==bi.to_s and
1263
+ ai.options==bi.options
1264
+ diff=true
1265
+ result[i]={ai=>bi}
1266
+ elsif ai.nil?
1267
+ result[i]={'size mismatch'=>"#{a.size} for #{b.size}"} if a.size!=b.size
1268
+ diff=true
1269
+ end
1270
+ if i.nonzero? and Hash===result[i] and Hash===result[i-1]
1271
+ old=result[i-1]
1272
+ oldkeys=old.keys
1273
+ oldvals=old.values
1274
+ if Reg::Subseq===oldkeys.first
1275
+ oldkeys=oldkeys.children
1276
+ oldval=oldvals.children
1277
+ end
1278
+ result[i-1..i]=[ {-[*oldkeys+result[i].keys]=>-[*oldvals+result[i].values]} ]
1279
+ end
1280
+ }
1281
+ return result,diff
1282
+ end
1283
+
1284
+ output=:pp
1285
+ quiet=true
1286
+ while /^-/===ARGV.first
1287
+ case opt=ARGV.shift
1288
+ when "--"; break
1289
+ when "--pp"; output=:pp
1290
+ when "--lisp"; output=:lisp
1291
+ when "--parsetree"; output=:parsetree
1292
+ when "--vsparsetree"; output=:vsparsetree
1293
+ when "--vsparsetree2"; output=:vsparsetree2
1294
+ when "--update-problemfiles"; problemfiles=ProblemFiles.new
1295
+ when "-q"; quiet=true
1296
+ when "-v"; quiet=false
1297
+ when "-e"; inputs=[ARGV.join(" ")]; names=["-e"]; break
1298
+ else fail "unknown option: #{opt}"
1299
+
1300
+ end
1301
+ end
1302
+
1303
+ unless inputs
1304
+ if ARGV.empty?
1305
+ inputs=[STDIN.read]
1306
+ names=["-"]
1307
+ elsif ARGV.size==1 and (Dir.entries(ARGV.first) rescue false)
1308
+ names=Dir[ARGV.first+"/**/*.rb"]
1309
+ else
1310
+ names=ARGV.dup
1311
+ end
1312
+ inputs||=names.map{|name| File.open(name).read rescue nil}
1313
+ end
1314
+
1315
+ result=0
1316
+
1317
+ safety="BEGIN{raise NeverExecThis};BEGIN{throw :never_exec_this,1};\n"
1318
+ nullsafety="\n"
1319
+ safe_inputs=inputs.map{|input| safety+input}
1320
+
1321
+ inputs.each_index{|i|
1322
+ begin
1323
+
1324
+ input=inputs[i] or next
1325
+ name=names[i]
1326
+
1327
+ input=nullsafety+input
1328
+ #print name+"... "; STDOUT.flush
1329
+
1330
+ begin
1331
+ tree=nil
1332
+ if catch(:never_exec_this){
1333
+ tree=RedParse.new(input,name).parse; nil
1334
+ } #raise NeverExecThis
1335
+ # rescue RedParse::ParseError=>e
1336
+ # require 'pp'
1337
+ # pp e.stack[-[15,e.stack.size].min..-1]
1338
+ # raise
1339
+ # rescue NeverExecThis
1340
+ puts "RedParse attempted to execute parse data in #{name}"
1341
+ next
1342
+ end
1343
+ rescue Interrupt; exit 2
1344
+ rescue Exception=>e
1345
+ # puts e.backtrace.join("\n")
1346
+ e.message << " during parse of #{name}"
1347
+ # err=e.class.new(e.message+" during parse of #{name}")
1348
+ # err.set_backtrace e.backtrace
1349
+ problemfiles.push name if problemfiles
1350
+ raise e
1351
+ end
1352
+ tree or fail "parsetree was nil for #{name}"
1353
+
1354
+ case output
1355
+ when :pp
1356
+ require 'pp'
1357
+ pp tree
1358
+ when :lisp
1359
+ puts tree.to_lisp
1360
+ when :parsetree
1361
+ pp tree.to_parsetree
1362
+ when :vsparsetree,:vsparsetree2
1363
+ begin
1364
+ require 'rubygems'
1365
+ rescue Exception
1366
+ end
1367
+ require 'parse_tree'
1368
+ #require 'algorithm/diff'
1369
+ begin
1370
+ mine=tree.to_parsetree(:quirks)
1371
+ if IO===input
1372
+ input.rewind
1373
+ input=input.read
1374
+ end
1375
+ ryans=nil
1376
+ catch(:never_exec_this){
1377
+ ryans=ParseTree.new.parse_tree_for_string(safe_inputs[i],name); nil
1378
+ } and raise NeverExecThis
1379
+ delta,is_diff=arraydiff(mine,ryans)
1380
+ rescue NeverExecThis
1381
+ puts "ParseTree attempted to execute parse data in #{name}"
1382
+ next
1383
+ rescue Interrupt; exit 2
1384
+ rescue Exception=>e
1385
+ #raise( RuntimeError.new( "#{e} during to_parsetree of #{name}" ) )
1386
+ puts "error during to_parsetree of #{name}"
1387
+ problemfiles.push name if problemfiles
1388
+ raise
1389
+ end
1390
+ if output==:vsparsetree2
1391
+ if !quiet or is_diff
1392
+ puts "mine:"
1393
+ pp mine
1394
+ puts "ryans:" if is_diff
1395
+ pp ryans if is_diff
1396
+ end
1397
+ elsif !quiet or is_diff
1398
+ puts 'differences in '+name if is_diff
1399
+ pp delta
1400
+ end
1401
+ if is_diff
1402
+ result=1
1403
+ problemfiles.push name if problemfiles
1404
+ else
1405
+ puts "no differences in "+name
1406
+ problemfiles.delete name if problemfiles
1407
+ end
1408
+ end
1409
+
1410
+ rescue NeverExecThis
1411
+ puts "mysterious attempt to execute parse data in #{name}"
1412
+ next
1413
+ rescue Interrupt,SystemExit; exit 2
1414
+ rescue Exception=>e
1415
+ puts "#{e}:#{e.class}"
1416
+ puts e.backtrace.join("\n")
1417
+ #problemfiles.push name if problemfiles
1418
+ #raise
1419
+ ensure
1420
+ STDOUT.flush
1421
+ end
1422
+ }
1423
+ exit result
1424
+ end
1425
+
1426
+ =begin old todo:
1427
+ v merge DotCallNode and CallSiteNode and CallWithBlockNode
1428
+ v remove actual Tokens from parse tree...
1429
+ v split ParenedNode into ParenedNode + Rescue/EnsureNode
1430
+ x 'incomplete' subtrees such as ElseNode, ElsifNode, RescueNode
1431
+ x -should not appear in final output
1432
+ v split keywordopnode into loop and if varieties?
1433
+ =end
1434
+
1435
+ =begin old optimization opportunities:, ha!
1436
+ top of stack slot contains mostly keywords, specific node classes, and Expr
1437
+ lookahead slot contains mostly lower_op and keywords, with a few classes and inverted keywords
1438
+ -(lower_op is hard to optimize)
1439
+ if top of stack matcher is Expr, then the next matcher down is mostly keywords, with some operators
1440
+ class membership can be optimized to test of integer within a range
1441
+ keywords could be stored as symbols instead of strings
1442
+ a few rules may need exploding (eg, ensure) to spoon feed the optimizer
1443
+ make all Nodes descendants of Array
1444
+ =end
1445
+
1446
+ #todo:
1447
+ #each node should have a corresponding range of tokens
1448
+ #-in an (optional) array of all tokens printed by the tokenizer.
1449
+ #v test stack_monkey mods
1450
+ #v break ParenedNode into 2 (3?) classes
1451
+ #x invent BEGINNode/ENDNode? (what other keywords?)
1452
+ #v at least make BEGIN/END be KWCallNode
1453
+ #v replace VarNameToken with VarNode in parser
1454
+ #x convert raw rules to lists of vertex identities?
1455
+ #v DottedRule class
1456
+ #v ParserState class (set of DottedRules)
1457
+ #v MultiReduce
1458
+ #v MultiShift
1459
+ #v ParserState#evolve(identity)
1460
+ #v DottedRule#evolve(identity)
1461
+ #v RedParse#enumerate_states
1462
+ #v RedParse#enumerate_exemplars
1463
+ #v Node/Token.enumerate_exemplars
1464
+ #v Node/Token.identity_param
1465
+ #v rename #lvalue? => #lvalue
1466
+ #x likewise get rid of other oddly named identity params
1467
+ #v BareMethod,WITHCOMMAS,BEGINAFTEREQUALS should have predicate methods defined for them
1468
+ #v do something about BEGINAFTEREQUALS... lots predicates, ugly to identify
1469
+ #v document identity parameters in nodes and tokens
1470
+ #operator and keyword tokens have some identity_param variations remaining...maybe?
1471
+ #xx all identity readers have to have writers as well (even if fake)
1472
+ #v sort out vertex identities... call identity_param in apt classes
1473
+ #convert identities<=>small ints
1474
+ #convert ParserStates<=>small ints
1475
+ #> lower_op/proc lookahead requires special action type with shift and reduce branches
1476
+ #x stack monkeys dictate some nodes appear in s/r table... which ones?
1477
+ #x some stack monkeys pushback nodes, action table must take take those as input
1478
+ #v retype GoalPostNode => GoalPostToken
1479
+ #v then, pushback* should go away
1480
+ #v build shift/reduce table
1481
+ #v build goto table
1482
+ #split tables into shift/reduce and goto....?
1483
+ #v integrate with c code generator
1484
+ #finish c code generator
1485
+ #code generator needs a way to deal with :
1486
+ #backtracking (to more than 1 node/token???)
1487
+ #actions (stack monkeys/lower_op)
1488
+ #every reduce requires b/ting thru the lookahead
1489
+ #garbage collection
1490
+ #sharing ruby objects between ruby code and generated c code
1491
+ #optimizer?
1492
+ #ruby code generator?
1493
+ #v what to do with :shift ?
1494
+ #what to do with :accept ?
1495
+ #what to do with :error ?
1496
+ #Node.create (used in generated code)
1497
+ #Node.create <= takes input directly from semantic stack
1498
+ #build Node.create param list generator
1499
+ #v names for rules, dotted rules, parser states, identities
1500
+ #x StartNode may be a problem... used by a stack monkey,
1501
+ #to remove extra ;s from the very beginning of input.
1502
+ #use a lexer hack instead?
1503
+ #v convert StartNode to StartToken?
1504
+ #convert names to numbers and numbers to names
1505
+ #for states, rules, vertex identities
1506
+ #in ruby and c (??)
1507
+ #x rule for HereBodyToken should be a lexer hack?
1508
+ #v stack monkeys should have names
1509
+ #how to handle a stack monkey whose 2nd parameter is not a single identity?
1510
+ #even reduces may not have enough info since 1 node class may have multiple identities
1511
+ #v RedParse constants should be named in inspect
1512
+ #v toplevel rule?
1513
+ #v semantic stack in generated c code should be a ruby array
1514
+ #x state stack should keep size of semantic stack at the time states are pushed,
1515
+ #so that i can restore semantic stack to former state when b-ting/reducing
1516
+ #urk, how do I know how many levels of state stack to pop when reducing?
1517
+ #in looping error rules, just scan back in semantic stack for rule start
1518
+ #in regular looping rules, transition to loop state is saved on a special stack
1519
+ #so that at reduce time, we can b/t to that point for a start
1520
+ #if rule contains only scalars, b/t is easy
1521
+ #else rule contains scalars and optionals:
1522
+ #scan for rule start vertex starting at highest node
1523
+ #on semantic stack that can contain it and working downward.
1524
+ #also, statically verify that relevent rules contain no collisions among first (how many?) matchers
1525
+
1526
+ #is lookahead in code generator even useful? my tables have built-in lookahead....
1527
+ #need hack to declare nonerror looping matchers as irrevokable (for speed, when reducing)
1528
+ #v assignmentRhsNode needs an identity_param for with_commas
1529
+ #v -** fixup and setter breakout rules need dedicated identity_params too
1530
+ # = rescue ternary is broken again now...
1531
+ #v instead of shift states and is_shift_state? to find them,
1532
+ #v i should have shift transitions. (transitions that imply a shift... in response to a token input.)
1533
+ #v all states will have 2 entry points, for shift and nonshift transitions.
1534
+ #split big table into goto(node) and sr(token) tables
1535
+ #in each state, most common sr action should be made default
1536
+ #unused entries in goto table can be ignored.
1537
+ #most common goto entries (if any) can be default.
1538
+ #is the change_index arg in stack_monkey calls really correct everywhere? what are
1539
+ #the exact semantics of that argument? what about stack_monkeys that change the stack size?
1540
+ #should there be another arg to keep track of that?
1541
+ #maybe rewrite stack_monkeys so they're a little clearer and easier to analyze (by hand)
1542
+ #MultiShift/MultiReduce are not supported actions in generate.rb
1543
+ #:accept/:error are not supported actions in generate.rb