redparse 0.8.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,26 @@
1
+ # Copyright (C) 2008 Caleb Clausen
2
+ # Distributed under the terms of Ruby's license.
3
+ require 'rubygems'
4
+ require 'hoe'
5
+ require 'lib/redparse/version.rb'
6
+
7
+
8
+ readme=open("README.txt")
9
+ readme.readline("\n== DESCRIPTION:")
10
+ readme.readline("\n\n")
11
+ desc=readme.readline("\n\n")
12
+
13
+ hoe=Hoe.new("redparse", RedParse::VERSION) do |_|
14
+ _.author = "Caleb Clausen"
15
+ _.email = "redparse-owner @at@ inforadical .dot. net"
16
+ _.url = ["http://redparse.rubyforge.org/", "http://rubyforge.org/projects/redparse/"]
17
+ _.extra_deps << ['rubylexer', '>= 0.7.2']
18
+ # _.test_globs=["test/*"]
19
+ _.description=desc
20
+ _.summary=desc[/\A[^.]+\./]
21
+ _.spec_extras={:bindir=>''}
22
+ _.rdoc_pattern=/\A(README\.txt|lib\/.*\.rb)\Z/
23
+ _.remote_rdoc_dir="/"
24
+ end
25
+
26
+
@@ -0,0 +1,1083 @@
1
+ =begin
2
+ redparse - a ruby parser written in ruby
3
+ Copyright (C) 2008 Caleb Clausen
4
+
5
+ This program is free software: you can redistribute it and/or modify
6
+ it under the terms of the GNU Lesser General Public License as published by
7
+ the Free Software Foundation, either version 3 of the License, or
8
+ (at your option) any later version.
9
+
10
+ This program is distributed in the hope that it will be useful,
11
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
12
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13
+ GNU Lesser General Public License for more details.
14
+
15
+ You should have received a copy of the GNU Lesser General Public License
16
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
17
+ =end
18
+
19
+ #warn 'hacking up LOAD_PATH to include the latest RubyLexer!'
20
+ #$:.unshift Dir.pwd+'/../rubylexer/lib', Dir.pwd+'/../rubylexer'
21
+
22
+ # "faster rule compiler is untested"
23
+
24
+ require 'rubygems'
25
+ require 'rubylexer'
26
+ require 'reg'
27
+
28
+ require "redparse/node"
29
+ #require "redparse/decisiontree"
30
+ require "redparse/reg_more_sugar"
31
+ class RedParse
32
+ # include Nodes
33
+
34
+ def self.has_return_hash_fix?
35
+ rl=RubyLexer.new("","return {}.size")
36
+ return(
37
+ FileAndLineToken===rl.get1token and
38
+ MethNameToken===rl.get1token and
39
+ ImplicitParamListStartToken===rl.get1token and
40
+ WsToken===rl.get1token and
41
+ KeywordToken===rl.get1token and
42
+ KeywordToken===rl.get1token and
43
+ KeywordToken===rl.get1token and
44
+ MethNameToken===rl.get1token and
45
+ ImplicitParamListStartToken===rl.get1token and
46
+ ImplicitParamListEndToken===rl.get1token and
47
+ ImplicitParamListEndToken===rl.get1token and
48
+ EoiToken===rl.get1token
49
+ )
50
+ end
51
+
52
+ #see pickaxe, 1st ed, page 221
53
+ def RIGHT_ASSOCIATIVE
54
+ {
55
+ # "defined?"=>120.5,
56
+ "**"=>118,
57
+
58
+ "="=>105, "%="=>105, "/="=>105, "-="=>105, "+="=>105,
59
+ "|="=>105, "&="=>105, ">>="=>105, "<<="=>105, "*="=>105,
60
+ "&&="=>105, "||="=>105, "**="=>105, "^="=>105,
61
+
62
+ # "and"=>99, "or"=>99,
63
+
64
+ # "if"=>98, "unless"=>98, "while"=>98, "until"=>98, "rescue"=>98,
65
+
66
+ # "&&"=>109, "||"=>108,
67
+ }
68
+ end
69
+
70
+ def PRECEDENCE
71
+ {
72
+
73
+ # "("=>122, #method param list
74
+ # "{"=>122, "do"=>122, #blocks
75
+
76
+ "::"=>121, "."=>121,
77
+
78
+ # "defined?"=>120.5,
79
+
80
+ "["=>120, #[] []= methods
81
+
82
+ "!"=>119, "~"=>119,
83
+ "+@"=>119,
84
+
85
+ "**"=>118,
86
+
87
+ "-@"=>117,
88
+
89
+ "*"=>116, "/"=>116, "%"=>116,
90
+
91
+ "+"=>115, "-"=>115,
92
+
93
+ "<<"=>114, ">>"=>114,
94
+
95
+ "&"=>113,
96
+
97
+ "^"=>112, "|"=>112,
98
+
99
+ "<="=>111, ">="=>111, "<"=>111, ">"=>111,
100
+
101
+ "<=>"=>110, "=="=>110, "==="=>110,
102
+ "!="=>110, "=~"=>110, "!~"=>110,
103
+
104
+ "&&"=>109,
105
+
106
+ "||"=>108,
107
+
108
+ ".."=>107, "..."=>107,
109
+
110
+ "?"=>106, # ":"=>106, #not sure what to do with ":"
111
+
112
+ "*@"=>105.5, "&@"=>105.5, #unary * and & operators
113
+
114
+ "="=>105, "%="=>105, "/="=>105, "-="=>105, "+="=>105,
115
+ "|="=>105, "&="=>105, ">>="=>105, "<<="=>105, "*="=>105,
116
+ "&&="=>105, "||="=>105, "**="=>105, "^="=>105,
117
+
118
+ "defined?"=>103,
119
+ "not"=>103,
120
+ ":"=>102, #but not when used as a substitute for 'then'
121
+ "rescue3"=>102,
122
+
123
+ "=>"=>101,
124
+ ","=>100,
125
+ #the 'precedence' of comma is somewhat controversial. it actually has
126
+ #several different precedences depending on which kind of comma it is.
127
+ #the precedence of , is higher than :, => and the assignment operators
128
+ #in certain contexts.
129
+
130
+ #"unary" prefix function names seen has operators have this precedence
131
+ #but, rubylexer handles precedence of these and outputs fake parens
132
+ #to tell us how its parsed
133
+
134
+ "or"=>99, "and"=>99,
135
+
136
+ "if"=>98, "unless"=>98, "while"=>98, "until"=>98,
137
+
138
+ "rescue"=>98,
139
+
140
+ ";"=>96,
141
+ }
142
+ end
143
+
144
+ module BracketsCall; end
145
+
146
+ Value= #NumberToken|SymbolToken|
147
+ #HerePlaceholderToken|
148
+ ((VarNameToken|ValueNode)&-{:lvalue? =>nil})
149
+ Expr=Value
150
+
151
+ def self.KW(ident)
152
+ ident=case ident
153
+ when Integer: ident.chr
154
+ when String,Regexp: ident
155
+ else ident.to_s
156
+ end
157
+
158
+ return KeywordToken&-{:ident=>ident}
159
+ end
160
+ def KW(ident); self.class.KW(ident) end
161
+ UNOP=
162
+ (OperatorToken|KeywordToken)&-{ #sppflt! KeywordToken here is a hack too
163
+ :ident=>/^[*&+-]@$/,
164
+ :unary =>true,
165
+ }|
166
+ (OperatorToken|KeywordToken)&-{ #sppflt! KeywordToken here is a hack too
167
+ :ident=>/^([~!]|not|defined\?)$/,
168
+ } #|
169
+ DEFOP=
170
+ (OperatorToken|KeywordToken)&-{ #sppflt! KeywordToken here is a hack too
171
+ :ident=>"defined?",
172
+ }
173
+ =begin
174
+ MethNameToken&-{ #hack, shouldn't be necessary
175
+ #rubylexer should know to generally treat "defined?" as a keyword
176
+ #or operator. (like most keywords, it can also be used as a method
177
+ # name....)
178
+ :ident=>"defined?"
179
+ }
180
+ =end
181
+
182
+ def self.Op(ident=nil, allow_keyword=false)
183
+ result=OperatorToken
184
+ result |= KeywordToken if allow_keyword
185
+ result &= -{:ident=>ident} if ident
186
+ #result[:infix?]=true
187
+ return result
188
+ end
189
+ def Op(*args); self.class.Op(*args); end
190
+ BINOP_KEYWORDS=%w[if unless while until and or && \|\|]
191
+
192
+ #HAS_PRECEDENCE=Op(/^#{PRECEDENCE.keys.map{|k| Regexp.quote k}.join('|')}$/,true)
193
+ =begin
194
+ KeywordOp=
195
+ KeywordToken & -{
196
+ :ident=>/^(#{BINOP_KEYWORDS.join('|')})$/
197
+ }
198
+ KeywordOp2=
199
+ KeywordToken & -{
200
+ :ident=>/^([\[({!+*?:,]|\.{1,3}|::|=>)$/
201
+ }
202
+ =end
203
+ DotOp= KeywordToken & -{ :ident=>"." }
204
+ DoubleColonOp= KeywordToken & -{ :ident=>"::" }
205
+
206
+ Op=Op()
207
+ MODIFYASSIGNOP=Op( /^(([^=])\2|[^<>=!])=$/, true )
208
+ NONASSIGNOP=Op( /([^=]|[<>=!]=)$/)
209
+ KW_Op= #some of these ought to be regular operators, fer gosh sake
210
+ Op(/^((![=~])|\.\.\.?|=>)$/,true)|Op(/^(#{BINOP_KEYWORDS.join('|')})$/)
211
+
212
+ EPSILON=Float::EPSILON*10_000_000 #this should be <<1 and >0
213
+ fail unless 1+EPSILON>1
214
+ fail unless EPSILON<0.1
215
+
216
+ def left_op_higher(op,op2)
217
+ # (Op|KeywordOp|KeywordOp2|ASSIGNOP===op2) or return true
218
+ KeywordToken===op2 or OperatorToken===op2 or return true
219
+ rightprec=@precedence[op2.to_s] or return true
220
+ #or fail "unrecognized right operator: #{op2.inspect}"
221
+ rightprec+=EPSILON if @RIGHT_ASSOCIATIVE[op2.to_s]
222
+ return @precedence[op.to_s]>=rightprec
223
+ end
224
+
225
+ LowerOp= proc{|parser,op2| parser.left_op_higher(parser[-3],op2) }
226
+
227
+ def dont_postpone_semi
228
+ @dps||=~wants_semi_context
229
+ end
230
+ WANTS_SEMI=%w[while until if unless
231
+ def case when in rescue
232
+ elsif class module << => . ::
233
+ ]
234
+ def wants_semi_context
235
+ Op('<<')|KW(/^(#{WANTS_SEMI.map{|ws| Regexp.quote ws }.join('|')})$/)
236
+ end
237
+
238
+ NeverBlockKeyword=MethNameToken&-{:ident=>/^(return|break|next)$/}
239
+ FakeBegin=KW('(')&-{:not_real? =>true}
240
+ FakeEnd=KW(')')&-{:not_real? =>true}
241
+
242
+ #rule format:
243
+ # -[syntax pattern_matchers.+, lookahead.-]>>node type
244
+
245
+ DotCall=proc{|stack|
246
+ right=stack[-2]
247
+ left,bogus=*stack.slice!(-4..-3)
248
+
249
+ right.set_receiver! left
250
+ }
251
+
252
+ Lvalue=(VarNameToken|CallSiteNode|BracketsGetNode|CommaOpNode|
253
+ ParenedNode|ConstantNode|UnaryStarNode)&-{:lvalue? =>true}
254
+
255
+ BareMethod=MethNameToken|LiteralNode&-{:val=>Symbol|StringNode}
256
+
257
+ BEGINWORDLIST=RubyLexer::BEGINWORDLIST + %w"( [ {"
258
+ ENDWORDLIST=%w"end ) ] }"
259
+ BEGIN2END={"{"=>"}", "("=>")", "["=>"]", }
260
+ endword="end"
261
+ RubyLexer::BEGINWORDLIST.each{|bw| BEGIN2END[bw]=endword }
262
+ def beginsendsmatcher
263
+ @bem||=
264
+ /^(#{(BEGINWORDLIST+ENDWORDLIST).map{|x| Regexp.quote x}.join('|')})$/
265
+ end
266
+
267
+ MULTIASSIGN=UnaryStarNode|CommaOpNode|(ParenedNode&-{:size=>1})
268
+ WITHCOMMAS=UnaryStarNode|CommaOpNode|
269
+ (CallSiteNode&-{:real_parens=>false, :args=>-{:size=>~0.reg}}) #|(ParenedNode&-{:size=>~1.reg})
270
+
271
+ BEGINAFTEREQUALS=
272
+ ParenedNode&
273
+ -{:size =>~1.reg, :op? =>NilClass|FalseClass, :after_equals =>nil}&
274
+ (-{:body=>item_that.size>0}|-{:rescues=>item_that.size>0}|-{:ensures=>~NilClass})
275
+ # item_that{|x| x.body.size+x.rescues.size > 0 or x.ensures }
276
+
277
+ # ASSIGN_COMMA=Op(',',true)&-{:comma_type=>Symbol}
278
+ LHS_COMMA=Op(',',true)&-{:comma_type => :lhs}
279
+ RHS_COMMA=Op(',',true)&-{:comma_type => :rhs}
280
+ PARAM_COMMA=Op(',',true)&-{:comma_type => :param}
281
+ FUNCLIKE_KEYWORD=KeywordToken&-{:ident=>RubyLexer::FUNCLIKE_KEYWORDS}
282
+
283
+ def RULES
284
+ #these must be the lowest possible priority, and hence first in the rules list
285
+ BEGIN2END.map{|_beg,_end|
286
+ -[KW(_beg), KW(beginsendsmatcher).~.*, KW(_end), KW(/^(do|\{)$/).~.la]>>MisparsedNode
287
+ }+
288
+
289
+ [
290
+ -[UNOP, Value, LowerOp]>>UnOpNode,
291
+ -[DEFOP, ParenedNode&-{:size=>1}]>>UnOpNode,
292
+ -[Op('*@'), VarNameToken|ValueNode, LowerOp]>>UnaryStarNode,
293
+
294
+ -[Op('=',true)|KW(/^(rescue|when|\[)$/)|-{:comma_type=>:call.reg|:array|:param|:rhs},
295
+ Op('*@'), VarNameToken|ValueNode, (MODIFYASSIGNOP|Op('=',true)).la]>>:shift,
296
+ -[MethNameToken|FUNCLIKE_KEYWORD, KW('('),
297
+ Op('*@'), VarNameToken|ValueNode, (MODIFYASSIGNOP|Op('=',true)).la]>>:shift,
298
+ # -[KW('[')|-{:comma_type=>:call.reg|:array},
299
+ # Op('*@'), VarNameToken|ValueNode, Op('=',true).la]>>:shift,
300
+ #star should not be used in an lhs if an rhs or param list context is available to eat it.
301
+ #(including param lists for keywords such as return,break,next,continue,rescue,yield,when)
302
+
303
+ -[Op('*@'), (GoalPostNode|KW(/^(in|[=)|,;])$/)).la]>>DanglingStarNode, #dangling *
304
+ -[',', (GoalPostNode|KW(/^(in|[=)|;])$/)).la]>> #dangling ,
305
+ proc{|stack|
306
+ dcomma=DanglingCommaNode.new
307
+ dcomma.offset=stack.last.offset
308
+ stack.push dcomma, stack.pop
309
+ },
310
+
311
+ -[Value, Op|KW_Op, Value, LowerOp]>>RawOpNode, #most operators
312
+
313
+ #assignment
314
+ -[Lvalue, MODIFYASSIGNOP, Value, LowerOp]>>AssignNode,
315
+ -[Lvalue, Op('=',true), AssignmentRhsNode, LowerOp]>>AssignNode,
316
+ -[Op('=',true).lb, Value, LowerOp]>>AssignmentRhsNode,
317
+ #was: -[AssignmentRhsListStartToken, Value, AssignmentRhsListEndToken]>>AssignmentRhsNode,
318
+ -[Lvalue&~MULTIASSIGN, Op('=',true), AssignmentRhsNode&-{:val =>~WITHCOMMAS},
319
+ Op('rescue3',true), Value, LowerOp]>>AssignNode,
320
+ -[Lvalue&~MULTIASSIGN, Op('=',true), AssignmentRhsNode&-{:val =>~WITHCOMMAS},
321
+ Op('rescue3',true).la]>>:shift,
322
+ -[Lvalue&~MULTIASSIGN, Op('=',true), AssignmentRhsNode&-{:val =>~WITHCOMMAS},
323
+ Op('rescue',true).la] >>proc{|stack|
324
+ resc=stack.last.dup
325
+ resc.ident += '3'
326
+ stack[-1]=resc
327
+ },
328
+ # a = b rescue c acts like a ternary,,,
329
+ #provided that both a and b are not multiple and b
330
+ #(if it is a parenless callsite) has just 1 param
331
+
332
+ # -[Op('=',true), ~WITHCOMMAS, Op('rescue',true).la]>>:shift,
333
+ #relative precedence of = and rescue are to be inverted if rescue
334
+ #is to the right and assignment is not multiple.
335
+
336
+ -[Op('=',true).~.lb, OB, Op('=',true), Value, RHS_COMMA.la]>>:shift,
337
+ -[RHS_COMMA.lb, Lvalue, Op('=',true), Value, RHS_COMMA.la ]>>AssignNode,
338
+ -[ValueNode|VarNameToken, LHS_COMMA, ValueNode|VarNameToken, Op('=',true).la]>>CommaOpNode,
339
+ #relative precedence of = and lhs/rhs , are to be inverted.
340
+
341
+ -[KW(',')&-{:comma_type=>:lhs}, (UnaryStarNode|ParenedNode)&~-{:after_comma =>true}, Op('=',true)]>>
342
+ proc{|stack| stack[-3].after_comma=true}, #mebbe this should be a lexer hack
343
+ #mark parentheses and unary stars that come after lhs commas
344
+
345
+ #-[Value, DotOp|DoubleColonOp, MethNameToken,
346
+ # ASSIGNOP, Value, LowerOp]>>AccessorAssignNode,
347
+
348
+ -[MethNameToken.~.lb, '(', Value, ')']>>ParenedNode,
349
+ -[MethNameToken.~.lb, '(', ')']>>VarLikeNode, #alias for nil
350
+
351
+ # -[Value, KeywordOp, Value, LowerOp]>>KeywordOpNode,
352
+ -[Op('=',true).~.lb, Value, Op('rescue',true), Value, LowerOp]>>ParenedNode,
353
+
354
+ #dot and double-colon
355
+ -[DoubleColonOp, VarNameToken, LowerOp]>>ConstantNode,#unary ::
356
+ -[Value, DotOp, CallNode, LowerOp]>>DotCall, #binary .
357
+ -[Value, DoubleColonOp, CallNode, LowerOp]>>DotCall, #binary ::
358
+ -[Value, DoubleColonOp, VarNameToken, LowerOp]>>ConstantNode,#binary ::
359
+
360
+ -[Value, "?", Value, ":", Value, LowerOp]>>TernaryNode,
361
+
362
+ # -[Value, /^\.\.\.?$/, Value, LowerOp]>>RangeNode,
363
+
364
+ -[MethNameToken, '(', Value.-, ')', BlockNode.-, KW(/^(do|\{)$/).~.la]>>CallNode,
365
+ -[FUNCLIKE_KEYWORD, '(', Value.-, ')',
366
+ BlockNode.-, KW(/^(do|\{)$/).~.la]>>KWCallNode,
367
+
368
+ -[ValueNode|VarNameToken, ',', ValueNode|VarNameToken, LowerOp]>>CommaOpNode,
369
+
370
+ -[dont_postpone_semi.lb,
371
+ Value, ';', Value, LowerOp]>>SequenceNode,
372
+
373
+ # -[Value, '=>', Value, LowerOp]>>ArrowOpNode,
374
+
375
+ -[KW(')').~.lb, '{', (CommaOpNode|ArrowOpNode).-, '}']>>HashLiteralNode,
376
+
377
+ # -[CallSiteNode.~.lb, '{', Value, '}']>>HashLiteralNode,
378
+
379
+ # -[KW(')').lb, '{', BlockFormalsNode.-, Value.-, '}']>>BlockNode,
380
+ -[KW(')').lb, 'do', BlockFormalsNode.-, Value.-, 'end']>>BlockNode,
381
+ #rubylexer handles the 'low precedence' of do...end
382
+
383
+ -[GoalPostNode, Value.-, GoalPostNode]>>BlockFormalsNode,
384
+ #rubylexer disambiguated operator vs keyword '|'
385
+
386
+ -[/^(while|until)$/, Value, /^([:;]|do)$/, Value.-, 'end']>>LoopNode,
387
+
388
+ -[/^(if|unless)$/, Value, /^(;|then|:)$/,
389
+ Value.-, ElsifNode.*, ElseNode.-, 'end'
390
+ ]>>IfNode,
391
+
392
+ -['else', Value.-, KW(/^(ensure|end)$/).la]>>ElseNode,
393
+
394
+ -['elsif', Value, /^(;|then|:)$/, Value.-,
395
+ KW(/^e(nd|ls(e|if))$/).la
396
+ ]>>ElsifNode,
397
+
398
+ -['module', ConstantNode|VarNameToken, KW(';'), Value.-, 'end']>>ModuleNode,
399
+ # -['module', ConstantNode|VarNameToken, KW(/^(;|::)$/).~.la]>>
400
+ # proc{|stack| #insert ; at end of module header if none was present
401
+ # stack.push KeywordToken.new(';'), stack.pop
402
+ # },
403
+ -['class', Value, ';', Value.-, 'end']>>ClassNode,
404
+ -['class', Value, Op('<'), Value, KW(';').~.la]>>:shift,
405
+ -['class', Op('<<'), Value, ';', Value.-, 'end']>>MetaClassNode,
406
+
407
+ -['alias', BareMethod|VarNameToken, BareMethod|VarNameToken]>>AliasNode,
408
+ -['undef', BareMethod]>>UndefNode,
409
+ -[UndefNode, ',', BareMethod]>>UndefNode,
410
+
411
+ -['def', CallSiteNode, Op('=').-, KW(';'),
412
+ Value.-, RescueNode.*, ElseNode.-, EnsureNode.-,
413
+ 'end'
414
+ ]>>MethodNode,
415
+
416
+ -['begin',
417
+ Value.-, RescueNode.*, ElseNode.-, EnsureNode.-,
418
+ 'end'
419
+ ]>>ParenedNode,
420
+
421
+ -[Op('=',true), BEGINAFTEREQUALS, Op('rescue',true).la]>>
422
+ proc{ |stack| stack[-2].after_equals=true },
423
+ #this is bs. all for an extra :begin in the parsetree
424
+
425
+ -[(KW(/^(;|begin)$/)|ParenedNode|RescueNode).lb,
426
+ 'rescue', KW('=>').-, Value.-, /^([:;]|then)$/,
427
+ ]>>RescueHeaderNode,
428
+ -[ RescueHeaderNode, Value.-, KW(';').-, KW(/^(rescue|else|ensure|end)$/).la
429
+ ]>>RescueNode,
430
+
431
+ -['ensure', Value.-, KW('end').la]>>EnsureNode,
432
+
433
+ -['[', Value.-, ']']>>ArrayLiteralNode,
434
+
435
+ -[Value, '[', Value.-, ']']>>BracketsGetNode,
436
+
437
+ -[HereDocNode, StringToken.*, StringToken, StringToken.~.la]>>StringCatNode,
438
+ -[(StringToken|HereDocNode).~.lb, StringToken.*, StringToken, StringToken, StringToken.~.la]>>StringCatNode,
439
+ -[(StringToken|HereDocNode).~.lb, StringToken, StringToken.~.la]>>StringNode, #includes regexp, wordlist, backquotes
440
+
441
+ -['case', Value.-, KW(/^[:;]$/).-, WhenNode.*, ElseNode.-, 'end']>>CaseNode,
442
+
443
+ -['when', Value, /^([:;]|then)$/, Value.-,
444
+ KW(/^(when|else|end)$/).la
445
+ ]>>WhenNode,
446
+
447
+ -['for', Value, 'in', Value, /^([:;]|do)$/, Value.-, 'end']>>ForNode,
448
+
449
+ #semicolon cleanup....
450
+ -[dont_postpone_semi.lb,
451
+ Value, ';',
452
+ (KW(/^(#{RubyLexer::INNERBOUNDINGWORDS.gsub(/(rescue|then)\|/,'')}|end|[)}\]])$/)|EoiToken).la
453
+ ]>>proc{|stack| stack.delete_at -2 },
454
+ -[Value, ';', KW('then').la
455
+ ]>>proc{|stack| stack.delete_at -2 },
456
+ -[dont_postpone_semi.lb, Value, ';', RescueNode
457
+ ]>>proc{|stack| stack.delete_at -3 },
458
+ -[(KW(/^(begin|[;:({|]|then|do|else|ensure)$/)|StartNode|RescueHeaderNode).lb, ';'
459
+ ]>>proc{|stack| stack.delete_at -2 },
460
+ #this rule is somewhat more forgiving than matz' parser...
461
+ #not all semicolons after :, (, and { keywords should
462
+ #be ignored. some should cause syntax errors.
463
+
464
+
465
+ #comma cleanup....
466
+ -[',', KW(/^[}\]]$/).la]>>proc{|stack| stack.delete_at -2},
467
+ #likewise, this is somewhat too forgiving.
468
+ #some commas before } or ] should cause syntax errors
469
+
470
+ #multiple assignment.... (handled in a subsequent stage?)
471
+ #(cause it requires that the relative priorities of = and , be reversed!)
472
+
473
+
474
+ #turn lvalues into rvalues if not followed by an assignop
475
+ -[-{:lvalue? =>true}, (Op('=',true)|MODIFYASSIGNOP).~.la]>>proc{|stack| stack[-2].lvalue=nil},
476
+
477
+ #expand the = into a separate token in calls to settors (after . or ::).
478
+ #but not in method headers
479
+ -[KW('def').~.lb, Value, DotOp|DoubleColonOp,
480
+ (MethNameToken&-{:ident=>/^[a-z_][a-z0-9_]*=$/i}).la]>>
481
+ proc{|stack|
482
+ methname=stack.pop
483
+ methname.ident.chomp!('=')
484
+ offset=methname.offset+methname.ident.size
485
+ stack.push(
486
+ CallNode.new(methname,nil,nil,nil,nil),
487
+ OperatorToken.new('=',offset)
488
+ )
489
+ },
490
+
491
+ -[NumberToken|SymbolToken]>>LiteralNode,
492
+
493
+ #lexer does the wrong thing with -22**44.5, making the - part
494
+ #of the first number token. it's actually lower precedence than
495
+ #**... this rule fixes that problem.
496
+ -[NumberToken&-{:ident=>/\A-/}, Op('**').la]>>
497
+ proc{|stack|
498
+ neg_op=OperatorToken.new("-@",stack[-2].offset)
499
+ neg_op.unary=true
500
+ stack[-2,0]=neg_op
501
+ stack[-2].ident.sub!(/\A-/,'')
502
+ stack[-2].offset+=1
503
+ },
504
+
505
+ #treat these keywords like (rvalue) variables.
506
+ -[/^(nil|false|true|__FILE__|__LINE__|self)$/]>>VarLikeNode,
507
+
508
+ #here docs
509
+ -[HerePlaceholderToken]>>HereDocNode,
510
+ -[HereBodyToken]>>proc{|stack|
511
+ stack.delete_at(-2)#.instance_eval{@headtok}.node.saw_body!
512
+ },
513
+
514
+ ]
515
+ end
516
+
517
+
518
+
519
+ def initialize(input,name="(eval)",line=1,lvars=[])
520
+ if Array===input
521
+ def input.get1token; shift end
522
+ @lexer=input
523
+ else
524
+ @lexer=RubyLexer.new(name,input,line)
525
+ lvars.each{|lvar| @lexer.localvars[lvar]=true }
526
+ end
527
+ @filename=name
528
+ @min_sizes={}
529
+ @compiled_rules={}
530
+ @moretokens=[]
531
+ @unary_or_binary_op=/^[-+&*]$/
532
+ @rules=self.RULES
533
+ @precedence=self.PRECEDENCE
534
+ @RIGHT_ASSOCIATIVE=self.RIGHT_ASSOCIATIVE
535
+ end
536
+
537
+ attr_accessor :lexer
538
+
539
+ def get_token(recursing=false)
540
+ unless @moretokens.empty?
541
+ @last_token=@moretokens.shift
542
+ p @last_token if ENV['PRINT_TOKENS'] unless recursing
543
+ return @last_token
544
+ end
545
+
546
+ begin
547
+ result=@lexer.get1token or break
548
+ p result if ENV['RAW_PRINT_TOKENS']
549
+
550
+ #set token's line if wanted
551
+ result.line||=@line if result.respond_to? :line=
552
+
553
+ if result.respond_to?(:as) and as=result.as
554
+ result=KeywordToken.new(as,result.offset)
555
+ result.not_real!
556
+ else
557
+
558
+ case result
559
+ #=begin
560
+ when ImplicitParamListStartToken: #treat it like (
561
+ result=KeywordToken.new('(', result.offset)
562
+ result.not_real!
563
+ #=end
564
+ #=begin
565
+ when ImplicitParamListEndToken:
566
+ result=KeywordToken.new(')', result.offset)
567
+ result.not_real!
568
+ #=end
569
+ # when AssignmentRhsListStartToken, AssignmentRhsListEndToken:
570
+ #do nothing, pass it thru
571
+ #=begin
572
+ when NewlineToken:
573
+ result=KeywordToken.new(';',result.offset)
574
+ #=end
575
+ when FileAndLineToken: #so __FILE__ and __LINE__ can know what their values are
576
+ @file=result.file
577
+ @line=result.line
578
+ redo
579
+ when NoWsToken:
580
+ #rubylexer disambiguates array literal from
581
+ #call to [] or []= method with a preceding NoWsToken...
582
+ #kind of a dumb interface.
583
+ result=get_token(true)
584
+ result.ident=='[' and result.extend BracketsCall
585
+
586
+
587
+ when OperatorToken:
588
+ if @unary_or_binary_op===result.ident and result.unary
589
+ result=result.dup
590
+ result.ident+="@"
591
+ end
592
+
593
+ #more symbol table maintenance....
594
+ when KeywordToken:
595
+ case name=result.ident
596
+
597
+ #=begin
598
+ when "do":
599
+ if result.has_end?
600
+ else
601
+ result=KeywordToken.new(';',result.offset)
602
+ end
603
+ #=end
604
+ when /^(#{BINOP_KEYWORDS.join '|'})$/: #should be like this in rubylexer
605
+ result=OperatorToken.new(name,result.offset) unless result.has_end?
606
+ when "|": result=GoalPostNode.new(result.offset) #is this needed still?
607
+ when "__FILE__": #I wish rubylexer would handle this
608
+ class<<result; attr_accessor :value; end
609
+ result.value=@file.dup
610
+ when "__LINE__": #I wish rubylexer would handle this
611
+ class<<result; attr_accessor :value; end
612
+ result.value=@line
613
+ end
614
+
615
+ when EoiToken: break
616
+ when HereBodyToken: break
617
+ when IgnoreToken: redo
618
+ end
619
+ end
620
+ end while false
621
+ p result if ENV['PRINT_TOKENS'] unless recursing
622
+ return @last_token=result
623
+ end
624
+
625
+ def evaluate rule
626
+ #dissect the rule
627
+ if false
628
+ rule=rule.dup
629
+ lookahead_processor=(rule.pop if Proc===rule.last)
630
+ node_type=rule.pop
631
+ else
632
+ Reg::Transform===rule or fail
633
+ node_type= rule.right
634
+ rule=rule.left.subregs.dup
635
+ lookahead_processor=(rule.pop if Proc|::Reg::LookAhead===rule.last)
636
+ lookback=rule[0]=rule[0].regs(0) if ::Reg::LookBack===rule[0]
637
+ end
638
+
639
+ #index of data at which to start matching
640
+ i=@stack.size-1 #-1 because last element of @stack is always lookahead
641
+
642
+ #I could call this a JIT compiler, but that's a bit grandiose....
643
+ #more of a JIT pre-processor
644
+ compiled_rule=@compiled_rules[rule]||=
645
+ rule.map{|pattern|
646
+ String|Regexp===pattern ? KW(pattern) : pattern
647
+ }
648
+
649
+ #what's the minimum @stack size this rule could match?
650
+ rule_min_size=@min_sizes[compiled_rule]||=
651
+ compiled_rule.inject(0){|sum,pattern|
652
+ sum + pattern.itemrange.begin
653
+ }
654
+ i>=rule_min_size or return false
655
+
656
+ matching=[]
657
+
658
+ #actually try to match rule elements against each @stack element in turn
659
+ compiled_rule.reverse_each{|matcher|
660
+ i.zero? and fail
661
+ target=matching
662
+ #is this matcher optional? looping?
663
+ loop= matcher.itemrange.last.to_f.infinite?
664
+ optional=matcher.itemrange.first.zero?
665
+ matching.unshift target=[] if loop
666
+ if loop or optional
667
+ matcher=matcher.regs(0)
668
+ end
669
+
670
+ begin
671
+ if matcher===@stack[i-=1] #try match
672
+ target.unshift @stack[i]
673
+ else
674
+ #if match failed, the whole rule fails
675
+ #unless this match was optional, in which case, ignore it
676
+ #but bump the data position back up, since the latest datum
677
+ #didn't actually match anything.
678
+ return false unless optional or loop&&!target.empty?
679
+ i+=1
680
+ matching.unshift nil unless loop
681
+ break
682
+ end
683
+ end while loop
684
+ }
685
+
686
+ matchrange= i...-1 #what elems in @stack were matched?
687
+
688
+ #give lookahead matcher (if any) a chance to fail the match
689
+ case lookahead_processor
690
+ when ::Reg::LookAhead:
691
+ return false unless lookahead_processor.regs(0)===@stack.last
692
+ when Proc:
693
+ return false unless lookahead_processor[self,@stack.last]
694
+ end
695
+
696
+ #if there was a lookback item, don't include it in the new node
697
+ if lookback
698
+ matchrange= i+1...-1 #what elems in @stack were matched?
699
+ matching.shift
700
+ end
701
+
702
+ #replace matching elements in @stack with node type found
703
+ case node_type
704
+ when Class
705
+ node=node_type.new(*matching)
706
+ node.line=@line
707
+ @stack[matchrange]=[node]
708
+ when Proc; node_type[@stack]
709
+ when :shift; return 0
710
+ else fail
711
+ end
712
+
713
+ return true #let caller know we found a match
714
+
715
+
716
+ rescue Exception=>e
717
+ puts "error (#{e}) while executing rule: #{rule.inspect}"
718
+ puts e.backtrace.join("\n")
719
+ raise
720
+ end
721
+
722
+ class ParseError<RuntimeError
723
+ def initialize(msg,stack)
724
+ super(msg)
725
+ @stack=stack
726
+ if false
727
+ ranges=(1..stack.size-2).map{|i|
728
+ node=stack[i]
729
+ if node.respond_to? :linerange
730
+ node.linerange
731
+ elsif node.respond_to? :line
732
+ node.line..node.line
733
+ end
734
+ }
735
+ types=(1..stack.size-2).map{|i| stack[i].class }
736
+ msg += "couldn't interpret #{types.inspect} at line ranges: #{ranges.inspect}"
737
+ end
738
+ super(msg)
739
+ end
740
+ attr :stack
741
+ end
742
+
743
+ def [](*args)
744
+ @stack.[] *args
745
+ end
746
+
747
+ def []=(*args)
748
+ @stack.[]= *args
749
+ end
750
+
751
+ def parse
752
+ oldparser= Thread.current[:$RedParse_parser]
753
+ Thread.current[:$RedParse_parser]||=self
754
+
755
+ @stack=[StartNode.new, get_token]
756
+ #last token on @stack is always implicitly the lookahead
757
+ loop {
758
+ #try all possible reductions
759
+ shift=nil
760
+ @rules.reverse_each{|rule|
761
+ shift=evaluate(rule) and break
762
+ }
763
+ next if shift==true
764
+
765
+ #no rule can match current @stack, get another token
766
+ tok=get_token
767
+
768
+ #are we done yet?
769
+ tok.nil? or EoiToken===tok && EoiToken===@stack.last and break
770
+
771
+ #shift our token onto the @stack
772
+ @stack.push tok
773
+ }
774
+
775
+ @stack.size==2 and return NopNode.new #handle empty parse string
776
+
777
+ #unless the @stack is 3 tokens,
778
+ #with the last an Eoi, and first a StartNode
779
+ #there was a parse error
780
+ unless @stack.size==3
781
+ pp @stack[-[15,@stack.size].min..-1] if ENV['PRINT_STACK']
782
+ top=MisparsedNode.new("(toplevel)", @stack[1...-1],'')
783
+ raise ParseError.new(top.msg,@stack)
784
+ end
785
+ EoiToken===@stack.last or fail
786
+ StartNode===@stack.first or fail
787
+
788
+ result= @stack[1]
789
+
790
+
791
+ #multiple assignment must be resolved
792
+ #afterwards by walking the parse tree.
793
+ #(because the relative precedences of = and ,
794
+ #are reversed in multiple assignment.)
795
+ # result.respond_to? :fixup_multiple_assignments! and
796
+ # result=result.fixup_multiple_assignments!
797
+
798
+ #relative precedence of = and rescue are also inverted sometimes
799
+ # result.respond_to? :fixup_rescue_assignments! and
800
+ # result=result.fixup_rescue_assignments!
801
+
802
+ #do something with error nodes
803
+ msgs=[]
804
+ result.walk{|parent,i,subi,node|
805
+ not if ErrorNode===node
806
+ msgs<< @filename+":"+node.blame.msg
807
+ end
808
+ } if result.respond_to? :walk #hack hack
809
+ result.errors=msgs unless msgs.empty?
810
+ #other types of errors (lexer errors, exceptions in lexer or parser actions)
811
+ #should be handled in the same way, but currently are not
812
+ # puts msgs.join("\n")
813
+
814
+ rescue Exception=>e
815
+ # input=@filename
816
+ # if input=="(eval)"
817
+ input=@lexer
818
+ if Array===input
819
+ puts "error while parsing:"
820
+ pp input
821
+ input=nil
822
+ else
823
+ input=input.original_file
824
+ inputname=@lexer.filename
825
+ input.to_s.size>1000 and input=inputname
826
+ end
827
+ # end
828
+ puts "error while parsing: <<< #{input} >>>"
829
+ raise
830
+ else
831
+ unless msgs.empty?
832
+ pp @stack[-[15,@stack.size].min..-1] if ENV['PRINT_STACK']
833
+ raise RedParse::ParseError.new(msgs.join("\n"),@stack)
834
+ end
835
+
836
+ return result
837
+ ensure
838
+ Thread.current[:$RedParse_parser]=oldparser
839
+ end
840
+
841
+ def LEFT_MATCHERS;self.RULES.map{|r| r.left.subregs }.flatten; end
842
+ def STACKABLE_CLASSES
843
+
844
+
845
+ _LEFT_MATCHERS.map!{|m|
846
+ case m
847
+ when Reg::LookAhead,Reg::LookBack: m.regs(0)
848
+ else m
849
+ end
850
+ } #remove lookahead and lookback decoration
851
+ rule_juicer=proc{|m|
852
+ case m
853
+ when Class: m
854
+ when Reg::And: m.subregs.map &rule_juicer
855
+ when Reg::Or: m.subregs.map &rule_juicer
856
+ else #fukit
857
+ end
858
+ }
859
+ _LEFT_CLASSES=_LEFT_MATCHERS.map{|m| rule_juicer[m] }.flatten.compact
860
+ _RIGHT_CLASSES= self.RULES.map{|r| r.right }.grep(Class) #classes in productions
861
+ _LEFT_CLASSES+_RIGHT_CLASSES
862
+ end
863
+ =begin
864
+ HIER=Class::FlattenedHierarchy.new *STACKABLE_CLASSES
865
+
866
+ LOOKAHEAD_MATCHERS=self.RULES.map{|r| r.left.subregs.last }.map{|la| Reg::LookAhead===la and la.regs(0) }
867
+
868
+ LOOKAHEAD_CLASSES=LOOKAHEAD_MATCHERS.map(&rule_juicer)
869
+ LOOKAHEAD_CLASSES.each_with_index{|classes,i|
870
+ case classes
871
+ when Class: huh
872
+ when Array: classes.flatten.each{huh}
873
+ else
874
+ end
875
+ }
876
+ =end
877
+ # def fixup_multiple_assignments!; end
878
+ end
879
+
880
+
881
+ if __FILE__==$0
882
+ require 'problemfiles'
883
+ class NeverExecThis<RuntimeError; end
884
+
885
+ def arraydiff(a,b)
886
+ a==b and return [a,false]
887
+ (Array===a or a=[a])
888
+ result= a.dup
889
+ diff=false
890
+ size= a.size >= b.size ? a.size : b.size
891
+ size.times{|i|
892
+ ai=a[i]
893
+ bi=b[i]
894
+ if Array===ai and Array===bi
895
+ result_i,diff_i= arraydiff(ai,bi)
896
+ diff||=diff_i
897
+ result[i]=result_i
898
+ elsif ai!=bi
899
+ next if Regexp===ai and ai.to_s==bi.to_s and
900
+ ai.options==bi.options
901
+ diff=true
902
+ result[i]={ai=>bi}
903
+ elsif ai.nil?
904
+ result[i]={'size mismatch'=>"#{a.size} for #{b.size}"} if a.size!=b.size
905
+ diff=true
906
+ end
907
+ if i.nonzero? and Hash===result[i] and Hash===result[i-1]
908
+ old=result[i-1]
909
+ oldkeys=old.keys
910
+ oldvals=old.values
911
+ if Reg::Subseq===oldkeys.first
912
+ oldkeys=oldkeys.children
913
+ oldval=oldvals.children
914
+ end
915
+ result[i-1..i]=[ {-[*oldkeys+result[i].keys]=>-[*oldvals+result[i].values]} ]
916
+ end
917
+ }
918
+ return result,diff
919
+ end
920
+
921
+ output=:pp
922
+ quiet=true
923
+ while /^-/===ARGV.first
924
+ case opt=ARGV.shift
925
+ when "--": break
926
+ when "--pp": output=:pp
927
+ when "--lisp": output=:lisp
928
+ when "--parsetree": output=:parsetree
929
+ when "--vsparsetree": output=:vsparsetree
930
+ when "--vsparsetree2": output=:vsparsetree2
931
+ when "--update-problemfiles": problemfiles=ProblemFiles.new
932
+ when "-q": quiet=true
933
+ when "-v": quiet=false
934
+ when "-e": inputs=[ARGV.join(" ")]; names=["-e"]; break
935
+ else fail "unknown option: #{opt}"
936
+
937
+ end
938
+ end
939
+
940
+ unless inputs
941
+ if ARGV.empty?
942
+ inputs=[STDIN.read]
943
+ names=["-"]
944
+ elsif ARGV.size==1 and (Dir.entries(ARGV.first) rescue false)
945
+ names=Dir[ARGV.first+"/**/*.rb"]
946
+ else
947
+ names=ARGV.dup
948
+ end
949
+ inputs||=names.map{|name| File.open(name).read rescue nil}
950
+ end
951
+
952
+ result=0
953
+
954
+ safety="BEGIN{raise NeverExecThis};BEGIN{throw :never_exec_this,1};\n"
955
+ nullsafety="\n"
956
+ safe_inputs=inputs.map{|input| safety+input}
957
+
958
+ inputs.each_index{|i|
959
+ begin
960
+
961
+ input=inputs[i] or next
962
+ name=names[i]
963
+
964
+ input=nullsafety+input
965
+ #print name+"... "; STDOUT.flush
966
+
967
+ begin
968
+ tree=nil
969
+ if catch(:never_exec_this){
970
+ tree=RedParse.new(input,name).parse; nil
971
+ } #raise NeverExecThis
972
+ # rescue RedParse::ParseError=>e
973
+ # require 'pp'
974
+ # pp e.stack[-[15,e.stack.size].min..-1]
975
+ # raise
976
+ # rescue NeverExecThis:
977
+ puts "RedParse attempted to execute parse data in #{name}"
978
+ next
979
+ end
980
+ rescue Interrupt: exit 2
981
+ rescue Exception=>e
982
+ # puts e.backtrace.join("\n")
983
+ e.message << " during parse of #{name}"
984
+ # err=e.class.new(e.message+" during parse of #{name}")
985
+ # err.set_backtrace e.backtrace
986
+ problemfiles.push name if problemfiles
987
+ raise e
988
+ end
989
+ tree or fail "parsetree was nil for #{name}"
990
+
991
+ case output
992
+ when :pp
993
+ require 'pp'
994
+ pp tree
995
+ when :lisp
996
+ puts tree.to_lisp
997
+ when :parsetree
998
+ pp tree.to_parsetree
999
+ when :vsparsetree,:vsparsetree2
1000
+ begin
1001
+ require 'rubygems'
1002
+ rescue Exception
1003
+ end
1004
+ require 'parse_tree'
1005
+ #require 'algorithm/diff'
1006
+ begin
1007
+ mine=tree.to_parsetree(:quirks)
1008
+ if IO===input
1009
+ input.rewind
1010
+ input=input.read
1011
+ end
1012
+ ryans=nil
1013
+ catch(:never_exec_this){
1014
+ ryans=ParseTree.new.parse_tree_for_string(safe_inputs[i],name); nil
1015
+ } and raise NeverExecThis
1016
+ delta,is_diff=arraydiff(mine,ryans)
1017
+ rescue NeverExecThis:
1018
+ puts "ParseTree attempted to execute parse data in #{name}"
1019
+ next
1020
+ rescue Interrupt: exit 2
1021
+ rescue Exception=>e
1022
+ #raise( RuntimeError.new( "#{e} during to_parsetree of #{name}" ) )
1023
+ puts "error during to_parsetree of #{name}"
1024
+ problemfiles.push name if problemfiles
1025
+ raise
1026
+ end
1027
+ if output==:vsparsetree2
1028
+ if !quiet or is_diff
1029
+ puts "mine:"
1030
+ pp mine
1031
+ puts "ryans:" if is_diff
1032
+ pp ryans if is_diff
1033
+ end
1034
+ elsif !quiet or is_diff
1035
+ puts 'differences in '+name if is_diff
1036
+ pp delta
1037
+ end
1038
+ if is_diff
1039
+ result=1
1040
+ problemfiles.push name if problemfiles
1041
+ else
1042
+ puts "no differences in "+name
1043
+ problemfiles.delete name if problemfiles
1044
+ end
1045
+ end
1046
+
1047
+ rescue NeverExecThis:
1048
+ puts "mysterious attempt to execute parse data in #{name}"
1049
+ next
1050
+ rescue Interrupt,SystemExit: exit 2
1051
+ rescue Exception=>e
1052
+ puts "#{e}:#{e.class}"
1053
+ puts e.backtrace.join("\n")
1054
+ #problemfiles.push name if problemfiles
1055
+ #raise
1056
+ ensure
1057
+ STDOUT.flush
1058
+ end
1059
+ }
1060
+ exit result
1061
+ end
1062
+
1063
+ =begin todo:
1064
+ v merge DotCallNode and CallSiteNode and CallWithBlockNode
1065
+ remove actual Tokens from parse tree...
1066
+ instead, each node has a corresponding range of tokens
1067
+ -in an (optional) array of all tokens printed by the tokenizer.
1068
+ split ParenedNode into ParenedNode + Rescue/EnsureNode
1069
+ 'incomplete' subtrees such as ElseNode, ElsifNode, RescueNode
1070
+ -should not appear in final output
1071
+ v split keywordopnode into loop and if varieties?
1072
+ =end
1073
+
1074
+ =begin optimization opportunities:
1075
+ top of stack slot contains mostly keywords, specific node classes, and Value
1076
+ lookahead slot contains mostly LowerOp and keywords, with a few classes and inverted keywords
1077
+ -(LowerOp is hard to optimize)
1078
+ if top of stack matcher is Value, then the next matcher down is mostly keywords, with some operators
1079
+ class membership can be optimized to test of integer within a range
1080
+ keywords could be stored as symbols instead of strings
1081
+ a few rules may need exploding (eg, ensure) to spoon feed the optimizer
1082
+ make all Nodes descendants of Array
1083
+ =end