redparse 0.8.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,26 @@
1
+ # Copyright (C) 2008 Caleb Clausen
2
+ # Distributed under the terms of Ruby's license.
3
+ require 'rubygems'
4
+ require 'hoe'
5
+ require 'lib/redparse/version.rb'
6
+
7
+
8
+ readme=open("README.txt")
9
+ readme.readline("\n== DESCRIPTION:")
10
+ readme.readline("\n\n")
11
+ desc=readme.readline("\n\n")
12
+
13
+ hoe=Hoe.new("redparse", RedParse::VERSION) do |_|
14
+ _.author = "Caleb Clausen"
15
+ _.email = "redparse-owner @at@ inforadical .dot. net"
16
+ _.url = ["http://redparse.rubyforge.org/", "http://rubyforge.org/projects/redparse/"]
17
+ _.extra_deps << ['rubylexer', '>= 0.7.2']
18
+ # _.test_globs=["test/*"]
19
+ _.description=desc
20
+ _.summary=desc[/\A[^.]+\./]
21
+ _.spec_extras={:bindir=>''}
22
+ _.rdoc_pattern=/\A(README\.txt|lib\/.*\.rb)\Z/
23
+ _.remote_rdoc_dir="/"
24
+ end
25
+
26
+
@@ -0,0 +1,1083 @@
1
+ =begin
2
+ redparse - a ruby parser written in ruby
3
+ Copyright (C) 2008 Caleb Clausen
4
+
5
+ This program is free software: you can redistribute it and/or modify
6
+ it under the terms of the GNU Lesser General Public License as published by
7
+ the Free Software Foundation, either version 3 of the License, or
8
+ (at your option) any later version.
9
+
10
+ This program is distributed in the hope that it will be useful,
11
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
12
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13
+ GNU Lesser General Public License for more details.
14
+
15
+ You should have received a copy of the GNU Lesser General Public License
16
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
17
+ =end
18
+
19
+ #warn 'hacking up LOAD_PATH to include the latest RubyLexer!'
20
+ #$:.unshift Dir.pwd+'/../rubylexer/lib', Dir.pwd+'/../rubylexer'
21
+
22
+ # "faster rule compiler is untested"
23
+
24
+ require 'rubygems'
25
+ require 'rubylexer'
26
+ require 'reg'
27
+
28
+ require "redparse/node"
29
+ #require "redparse/decisiontree"
30
+ require "redparse/reg_more_sugar"
31
+ class RedParse
32
+ # include Nodes
33
+
34
+ def self.has_return_hash_fix?
35
+ rl=RubyLexer.new("","return {}.size")
36
+ return(
37
+ FileAndLineToken===rl.get1token and
38
+ MethNameToken===rl.get1token and
39
+ ImplicitParamListStartToken===rl.get1token and
40
+ WsToken===rl.get1token and
41
+ KeywordToken===rl.get1token and
42
+ KeywordToken===rl.get1token and
43
+ KeywordToken===rl.get1token and
44
+ MethNameToken===rl.get1token and
45
+ ImplicitParamListStartToken===rl.get1token and
46
+ ImplicitParamListEndToken===rl.get1token and
47
+ ImplicitParamListEndToken===rl.get1token and
48
+ EoiToken===rl.get1token
49
+ )
50
+ end
51
+
52
+ #see pickaxe, 1st ed, page 221
53
+ def RIGHT_ASSOCIATIVE
54
+ {
55
+ # "defined?"=>120.5,
56
+ "**"=>118,
57
+
58
+ "="=>105, "%="=>105, "/="=>105, "-="=>105, "+="=>105,
59
+ "|="=>105, "&="=>105, ">>="=>105, "<<="=>105, "*="=>105,
60
+ "&&="=>105, "||="=>105, "**="=>105, "^="=>105,
61
+
62
+ # "and"=>99, "or"=>99,
63
+
64
+ # "if"=>98, "unless"=>98, "while"=>98, "until"=>98, "rescue"=>98,
65
+
66
+ # "&&"=>109, "||"=>108,
67
+ }
68
+ end
69
+
70
+ def PRECEDENCE
71
+ {
72
+
73
+ # "("=>122, #method param list
74
+ # "{"=>122, "do"=>122, #blocks
75
+
76
+ "::"=>121, "."=>121,
77
+
78
+ # "defined?"=>120.5,
79
+
80
+ "["=>120, #[] []= methods
81
+
82
+ "!"=>119, "~"=>119,
83
+ "+@"=>119,
84
+
85
+ "**"=>118,
86
+
87
+ "-@"=>117,
88
+
89
+ "*"=>116, "/"=>116, "%"=>116,
90
+
91
+ "+"=>115, "-"=>115,
92
+
93
+ "<<"=>114, ">>"=>114,
94
+
95
+ "&"=>113,
96
+
97
+ "^"=>112, "|"=>112,
98
+
99
+ "<="=>111, ">="=>111, "<"=>111, ">"=>111,
100
+
101
+ "<=>"=>110, "=="=>110, "==="=>110,
102
+ "!="=>110, "=~"=>110, "!~"=>110,
103
+
104
+ "&&"=>109,
105
+
106
+ "||"=>108,
107
+
108
+ ".."=>107, "..."=>107,
109
+
110
+ "?"=>106, # ":"=>106, #not sure what to do with ":"
111
+
112
+ "*@"=>105.5, "&@"=>105.5, #unary * and & operators
113
+
114
+ "="=>105, "%="=>105, "/="=>105, "-="=>105, "+="=>105,
115
+ "|="=>105, "&="=>105, ">>="=>105, "<<="=>105, "*="=>105,
116
+ "&&="=>105, "||="=>105, "**="=>105, "^="=>105,
117
+
118
+ "defined?"=>103,
119
+ "not"=>103,
120
+ ":"=>102, #but not when used as a substitute for 'then'
121
+ "rescue3"=>102,
122
+
123
+ "=>"=>101,
124
+ ","=>100,
125
+ #the 'precedence' of comma is somewhat controversial. it actually has
126
+ #several different precedences depending on which kind of comma it is.
127
+ #the precedence of , is higher than :, => and the assignment operators
128
+ #in certain contexts.
129
+
130
+ #"unary" prefix function names seen has operators have this precedence
131
+ #but, rubylexer handles precedence of these and outputs fake parens
132
+ #to tell us how its parsed
133
+
134
+ "or"=>99, "and"=>99,
135
+
136
+ "if"=>98, "unless"=>98, "while"=>98, "until"=>98,
137
+
138
+ "rescue"=>98,
139
+
140
+ ";"=>96,
141
+ }
142
+ end
143
+
144
+ module BracketsCall; end
145
+
146
+ Value= #NumberToken|SymbolToken|
147
+ #HerePlaceholderToken|
148
+ ((VarNameToken|ValueNode)&-{:lvalue? =>nil})
149
+ Expr=Value
150
+
151
+ def self.KW(ident)
152
+ ident=case ident
153
+ when Integer: ident.chr
154
+ when String,Regexp: ident
155
+ else ident.to_s
156
+ end
157
+
158
+ return KeywordToken&-{:ident=>ident}
159
+ end
160
+ def KW(ident); self.class.KW(ident) end
161
+ UNOP=
162
+ (OperatorToken|KeywordToken)&-{ #sppflt! KeywordToken here is a hack too
163
+ :ident=>/^[*&+-]@$/,
164
+ :unary =>true,
165
+ }|
166
+ (OperatorToken|KeywordToken)&-{ #sppflt! KeywordToken here is a hack too
167
+ :ident=>/^([~!]|not|defined\?)$/,
168
+ } #|
169
+ DEFOP=
170
+ (OperatorToken|KeywordToken)&-{ #sppflt! KeywordToken here is a hack too
171
+ :ident=>"defined?",
172
+ }
173
+ =begin
174
+ MethNameToken&-{ #hack, shouldn't be necessary
175
+ #rubylexer should know to generally treat "defined?" as a keyword
176
+ #or operator. (like most keywords, it can also be used as a method
177
+ # name....)
178
+ :ident=>"defined?"
179
+ }
180
+ =end
181
+
182
+ def self.Op(ident=nil, allow_keyword=false)
183
+ result=OperatorToken
184
+ result |= KeywordToken if allow_keyword
185
+ result &= -{:ident=>ident} if ident
186
+ #result[:infix?]=true
187
+ return result
188
+ end
189
+ def Op(*args); self.class.Op(*args); end
190
+ BINOP_KEYWORDS=%w[if unless while until and or && \|\|]
191
+
192
+ #HAS_PRECEDENCE=Op(/^#{PRECEDENCE.keys.map{|k| Regexp.quote k}.join('|')}$/,true)
193
+ =begin
194
+ KeywordOp=
195
+ KeywordToken & -{
196
+ :ident=>/^(#{BINOP_KEYWORDS.join('|')})$/
197
+ }
198
+ KeywordOp2=
199
+ KeywordToken & -{
200
+ :ident=>/^([\[({!+*?:,]|\.{1,3}|::|=>)$/
201
+ }
202
+ =end
203
+ DotOp= KeywordToken & -{ :ident=>"." }
204
+ DoubleColonOp= KeywordToken & -{ :ident=>"::" }
205
+
206
+ Op=Op()
207
+ MODIFYASSIGNOP=Op( /^(([^=])\2|[^<>=!])=$/, true )
208
+ NONASSIGNOP=Op( /([^=]|[<>=!]=)$/)
209
+ KW_Op= #some of these ought to be regular operators, fer gosh sake
210
+ Op(/^((![=~])|\.\.\.?|=>)$/,true)|Op(/^(#{BINOP_KEYWORDS.join('|')})$/)
211
+
212
+ EPSILON=Float::EPSILON*10_000_000 #this should be <<1 and >0
213
+ fail unless 1+EPSILON>1
214
+ fail unless EPSILON<0.1
215
+
216
+ def left_op_higher(op,op2)
217
+ # (Op|KeywordOp|KeywordOp2|ASSIGNOP===op2) or return true
218
+ KeywordToken===op2 or OperatorToken===op2 or return true
219
+ rightprec=@precedence[op2.to_s] or return true
220
+ #or fail "unrecognized right operator: #{op2.inspect}"
221
+ rightprec+=EPSILON if @RIGHT_ASSOCIATIVE[op2.to_s]
222
+ return @precedence[op.to_s]>=rightprec
223
+ end
224
+
225
+ LowerOp= proc{|parser,op2| parser.left_op_higher(parser[-3],op2) }
226
+
227
+ def dont_postpone_semi
228
+ @dps||=~wants_semi_context
229
+ end
230
+ WANTS_SEMI=%w[while until if unless
231
+ def case when in rescue
232
+ elsif class module << => . ::
233
+ ]
234
+ def wants_semi_context
235
+ Op('<<')|KW(/^(#{WANTS_SEMI.map{|ws| Regexp.quote ws }.join('|')})$/)
236
+ end
237
+
238
+ NeverBlockKeyword=MethNameToken&-{:ident=>/^(return|break|next)$/}
239
+ FakeBegin=KW('(')&-{:not_real? =>true}
240
+ FakeEnd=KW(')')&-{:not_real? =>true}
241
+
242
+ #rule format:
243
+ # -[syntax pattern_matchers.+, lookahead.-]>>node type
244
+
245
+ DotCall=proc{|stack|
246
+ right=stack[-2]
247
+ left,bogus=*stack.slice!(-4..-3)
248
+
249
+ right.set_receiver! left
250
+ }
251
+
252
+ Lvalue=(VarNameToken|CallSiteNode|BracketsGetNode|CommaOpNode|
253
+ ParenedNode|ConstantNode|UnaryStarNode)&-{:lvalue? =>true}
254
+
255
+ BareMethod=MethNameToken|LiteralNode&-{:val=>Symbol|StringNode}
256
+
257
+ BEGINWORDLIST=RubyLexer::BEGINWORDLIST + %w"( [ {"
258
+ ENDWORDLIST=%w"end ) ] }"
259
+ BEGIN2END={"{"=>"}", "("=>")", "["=>"]", }
260
+ endword="end"
261
+ RubyLexer::BEGINWORDLIST.each{|bw| BEGIN2END[bw]=endword }
262
+ def beginsendsmatcher
263
+ @bem||=
264
+ /^(#{(BEGINWORDLIST+ENDWORDLIST).map{|x| Regexp.quote x}.join('|')})$/
265
+ end
266
+
267
+ MULTIASSIGN=UnaryStarNode|CommaOpNode|(ParenedNode&-{:size=>1})
268
+ WITHCOMMAS=UnaryStarNode|CommaOpNode|
269
+ (CallSiteNode&-{:real_parens=>false, :args=>-{:size=>~0.reg}}) #|(ParenedNode&-{:size=>~1.reg})
270
+
271
+ BEGINAFTEREQUALS=
272
+ ParenedNode&
273
+ -{:size =>~1.reg, :op? =>NilClass|FalseClass, :after_equals =>nil}&
274
+ (-{:body=>item_that.size>0}|-{:rescues=>item_that.size>0}|-{:ensures=>~NilClass})
275
+ # item_that{|x| x.body.size+x.rescues.size > 0 or x.ensures }
276
+
277
+ # ASSIGN_COMMA=Op(',',true)&-{:comma_type=>Symbol}
278
+ LHS_COMMA=Op(',',true)&-{:comma_type => :lhs}
279
+ RHS_COMMA=Op(',',true)&-{:comma_type => :rhs}
280
+ PARAM_COMMA=Op(',',true)&-{:comma_type => :param}
281
+ FUNCLIKE_KEYWORD=KeywordToken&-{:ident=>RubyLexer::FUNCLIKE_KEYWORDS}
282
+
283
+ def RULES
284
+ #these must be the lowest possible priority, and hence first in the rules list
285
+ BEGIN2END.map{|_beg,_end|
286
+ -[KW(_beg), KW(beginsendsmatcher).~.*, KW(_end), KW(/^(do|\{)$/).~.la]>>MisparsedNode
287
+ }+
288
+
289
+ [
290
+ -[UNOP, Value, LowerOp]>>UnOpNode,
291
+ -[DEFOP, ParenedNode&-{:size=>1}]>>UnOpNode,
292
+ -[Op('*@'), VarNameToken|ValueNode, LowerOp]>>UnaryStarNode,
293
+
294
+ -[Op('=',true)|KW(/^(rescue|when|\[)$/)|-{:comma_type=>:call.reg|:array|:param|:rhs},
295
+ Op('*@'), VarNameToken|ValueNode, (MODIFYASSIGNOP|Op('=',true)).la]>>:shift,
296
+ -[MethNameToken|FUNCLIKE_KEYWORD, KW('('),
297
+ Op('*@'), VarNameToken|ValueNode, (MODIFYASSIGNOP|Op('=',true)).la]>>:shift,
298
+ # -[KW('[')|-{:comma_type=>:call.reg|:array},
299
+ # Op('*@'), VarNameToken|ValueNode, Op('=',true).la]>>:shift,
300
+ #star should not be used in an lhs if an rhs or param list context is available to eat it.
301
+ #(including param lists for keywords such as return,break,next,continue,rescue,yield,when)
302
+
303
+ -[Op('*@'), (GoalPostNode|KW(/^(in|[=)|,;])$/)).la]>>DanglingStarNode, #dangling *
304
+ -[',', (GoalPostNode|KW(/^(in|[=)|;])$/)).la]>> #dangling ,
305
+ proc{|stack|
306
+ dcomma=DanglingCommaNode.new
307
+ dcomma.offset=stack.last.offset
308
+ stack.push dcomma, stack.pop
309
+ },
310
+
311
+ -[Value, Op|KW_Op, Value, LowerOp]>>RawOpNode, #most operators
312
+
313
+ #assignment
314
+ -[Lvalue, MODIFYASSIGNOP, Value, LowerOp]>>AssignNode,
315
+ -[Lvalue, Op('=',true), AssignmentRhsNode, LowerOp]>>AssignNode,
316
+ -[Op('=',true).lb, Value, LowerOp]>>AssignmentRhsNode,
317
+ #was: -[AssignmentRhsListStartToken, Value, AssignmentRhsListEndToken]>>AssignmentRhsNode,
318
+ -[Lvalue&~MULTIASSIGN, Op('=',true), AssignmentRhsNode&-{:val =>~WITHCOMMAS},
319
+ Op('rescue3',true), Value, LowerOp]>>AssignNode,
320
+ -[Lvalue&~MULTIASSIGN, Op('=',true), AssignmentRhsNode&-{:val =>~WITHCOMMAS},
321
+ Op('rescue3',true).la]>>:shift,
322
+ -[Lvalue&~MULTIASSIGN, Op('=',true), AssignmentRhsNode&-{:val =>~WITHCOMMAS},
323
+ Op('rescue',true).la] >>proc{|stack|
324
+ resc=stack.last.dup
325
+ resc.ident += '3'
326
+ stack[-1]=resc
327
+ },
328
+ # a = b rescue c acts like a ternary,,,
329
+ #provided that both a and b are not multiple and b
330
+ #(if it is a parenless callsite) has just 1 param
331
+
332
+ # -[Op('=',true), ~WITHCOMMAS, Op('rescue',true).la]>>:shift,
333
+ #relative precedence of = and rescue are to be inverted if rescue
334
+ #is to the right and assignment is not multiple.
335
+
336
+ -[Op('=',true).~.lb, OB, Op('=',true), Value, RHS_COMMA.la]>>:shift,
337
+ -[RHS_COMMA.lb, Lvalue, Op('=',true), Value, RHS_COMMA.la ]>>AssignNode,
338
+ -[ValueNode|VarNameToken, LHS_COMMA, ValueNode|VarNameToken, Op('=',true).la]>>CommaOpNode,
339
+ #relative precedence of = and lhs/rhs , are to be inverted.
340
+
341
+ -[KW(',')&-{:comma_type=>:lhs}, (UnaryStarNode|ParenedNode)&~-{:after_comma =>true}, Op('=',true)]>>
342
+ proc{|stack| stack[-3].after_comma=true}, #mebbe this should be a lexer hack
343
+ #mark parentheses and unary stars that come after lhs commas
344
+
345
+ #-[Value, DotOp|DoubleColonOp, MethNameToken,
346
+ # ASSIGNOP, Value, LowerOp]>>AccessorAssignNode,
347
+
348
+ -[MethNameToken.~.lb, '(', Value, ')']>>ParenedNode,
349
+ -[MethNameToken.~.lb, '(', ')']>>VarLikeNode, #alias for nil
350
+
351
+ # -[Value, KeywordOp, Value, LowerOp]>>KeywordOpNode,
352
+ -[Op('=',true).~.lb, Value, Op('rescue',true), Value, LowerOp]>>ParenedNode,
353
+
354
+ #dot and double-colon
355
+ -[DoubleColonOp, VarNameToken, LowerOp]>>ConstantNode,#unary ::
356
+ -[Value, DotOp, CallNode, LowerOp]>>DotCall, #binary .
357
+ -[Value, DoubleColonOp, CallNode, LowerOp]>>DotCall, #binary ::
358
+ -[Value, DoubleColonOp, VarNameToken, LowerOp]>>ConstantNode,#binary ::
359
+
360
+ -[Value, "?", Value, ":", Value, LowerOp]>>TernaryNode,
361
+
362
+ # -[Value, /^\.\.\.?$/, Value, LowerOp]>>RangeNode,
363
+
364
+ -[MethNameToken, '(', Value.-, ')', BlockNode.-, KW(/^(do|\{)$/).~.la]>>CallNode,
365
+ -[FUNCLIKE_KEYWORD, '(', Value.-, ')',
366
+ BlockNode.-, KW(/^(do|\{)$/).~.la]>>KWCallNode,
367
+
368
+ -[ValueNode|VarNameToken, ',', ValueNode|VarNameToken, LowerOp]>>CommaOpNode,
369
+
370
+ -[dont_postpone_semi.lb,
371
+ Value, ';', Value, LowerOp]>>SequenceNode,
372
+
373
+ # -[Value, '=>', Value, LowerOp]>>ArrowOpNode,
374
+
375
+ -[KW(')').~.lb, '{', (CommaOpNode|ArrowOpNode).-, '}']>>HashLiteralNode,
376
+
377
+ # -[CallSiteNode.~.lb, '{', Value, '}']>>HashLiteralNode,
378
+
379
+ # -[KW(')').lb, '{', BlockFormalsNode.-, Value.-, '}']>>BlockNode,
380
+ -[KW(')').lb, 'do', BlockFormalsNode.-, Value.-, 'end']>>BlockNode,
381
+ #rubylexer handles the 'low precedence' of do...end
382
+
383
+ -[GoalPostNode, Value.-, GoalPostNode]>>BlockFormalsNode,
384
+ #rubylexer disambiguated operator vs keyword '|'
385
+
386
+ -[/^(while|until)$/, Value, /^([:;]|do)$/, Value.-, 'end']>>LoopNode,
387
+
388
+ -[/^(if|unless)$/, Value, /^(;|then|:)$/,
389
+ Value.-, ElsifNode.*, ElseNode.-, 'end'
390
+ ]>>IfNode,
391
+
392
+ -['else', Value.-, KW(/^(ensure|end)$/).la]>>ElseNode,
393
+
394
+ -['elsif', Value, /^(;|then|:)$/, Value.-,
395
+ KW(/^e(nd|ls(e|if))$/).la
396
+ ]>>ElsifNode,
397
+
398
+ -['module', ConstantNode|VarNameToken, KW(';'), Value.-, 'end']>>ModuleNode,
399
+ # -['module', ConstantNode|VarNameToken, KW(/^(;|::)$/).~.la]>>
400
+ # proc{|stack| #insert ; at end of module header if none was present
401
+ # stack.push KeywordToken.new(';'), stack.pop
402
+ # },
403
+ -['class', Value, ';', Value.-, 'end']>>ClassNode,
404
+ -['class', Value, Op('<'), Value, KW(';').~.la]>>:shift,
405
+ -['class', Op('<<'), Value, ';', Value.-, 'end']>>MetaClassNode,
406
+
407
+ -['alias', BareMethod|VarNameToken, BareMethod|VarNameToken]>>AliasNode,
408
+ -['undef', BareMethod]>>UndefNode,
409
+ -[UndefNode, ',', BareMethod]>>UndefNode,
410
+
411
+ -['def', CallSiteNode, Op('=').-, KW(';'),
412
+ Value.-, RescueNode.*, ElseNode.-, EnsureNode.-,
413
+ 'end'
414
+ ]>>MethodNode,
415
+
416
+ -['begin',
417
+ Value.-, RescueNode.*, ElseNode.-, EnsureNode.-,
418
+ 'end'
419
+ ]>>ParenedNode,
420
+
421
+ -[Op('=',true), BEGINAFTEREQUALS, Op('rescue',true).la]>>
422
+ proc{ |stack| stack[-2].after_equals=true },
423
+ #this is bs. all for an extra :begin in the parsetree
424
+
425
+ -[(KW(/^(;|begin)$/)|ParenedNode|RescueNode).lb,
426
+ 'rescue', KW('=>').-, Value.-, /^([:;]|then)$/,
427
+ ]>>RescueHeaderNode,
428
+ -[ RescueHeaderNode, Value.-, KW(';').-, KW(/^(rescue|else|ensure|end)$/).la
429
+ ]>>RescueNode,
430
+
431
+ -['ensure', Value.-, KW('end').la]>>EnsureNode,
432
+
433
+ -['[', Value.-, ']']>>ArrayLiteralNode,
434
+
435
+ -[Value, '[', Value.-, ']']>>BracketsGetNode,
436
+
437
+ -[HereDocNode, StringToken.*, StringToken, StringToken.~.la]>>StringCatNode,
438
+ -[(StringToken|HereDocNode).~.lb, StringToken.*, StringToken, StringToken, StringToken.~.la]>>StringCatNode,
439
+ -[(StringToken|HereDocNode).~.lb, StringToken, StringToken.~.la]>>StringNode, #includes regexp, wordlist, backquotes
440
+
441
+ -['case', Value.-, KW(/^[:;]$/).-, WhenNode.*, ElseNode.-, 'end']>>CaseNode,
442
+
443
+ -['when', Value, /^([:;]|then)$/, Value.-,
444
+ KW(/^(when|else|end)$/).la
445
+ ]>>WhenNode,
446
+
447
+ -['for', Value, 'in', Value, /^([:;]|do)$/, Value.-, 'end']>>ForNode,
448
+
449
+ #semicolon cleanup....
450
+ -[dont_postpone_semi.lb,
451
+ Value, ';',
452
+ (KW(/^(#{RubyLexer::INNERBOUNDINGWORDS.gsub(/(rescue|then)\|/,'')}|end|[)}\]])$/)|EoiToken).la
453
+ ]>>proc{|stack| stack.delete_at -2 },
454
+ -[Value, ';', KW('then').la
455
+ ]>>proc{|stack| stack.delete_at -2 },
456
+ -[dont_postpone_semi.lb, Value, ';', RescueNode
457
+ ]>>proc{|stack| stack.delete_at -3 },
458
+ -[(KW(/^(begin|[;:({|]|then|do|else|ensure)$/)|StartNode|RescueHeaderNode).lb, ';'
459
+ ]>>proc{|stack| stack.delete_at -2 },
460
+ #this rule is somewhat more forgiving than matz' parser...
461
+ #not all semicolons after :, (, and { keywords should
462
+ #be ignored. some should cause syntax errors.
463
+
464
+
465
+ #comma cleanup....
466
+ -[',', KW(/^[}\]]$/).la]>>proc{|stack| stack.delete_at -2},
467
+ #likewise, this is somewhat too forgiving.
468
+ #some commas before } or ] should cause syntax errors
469
+
470
+ #multiple assignment.... (handled in a subsequent stage?)
471
+ #(cause it requires that the relative priorities of = and , be reversed!)
472
+
473
+
474
+ #turn lvalues into rvalues if not followed by an assignop
475
+ -[-{:lvalue? =>true}, (Op('=',true)|MODIFYASSIGNOP).~.la]>>proc{|stack| stack[-2].lvalue=nil},
476
+
477
+ #expand the = into a separate token in calls to settors (after . or ::).
478
+ #but not in method headers
479
+ -[KW('def').~.lb, Value, DotOp|DoubleColonOp,
480
+ (MethNameToken&-{:ident=>/^[a-z_][a-z0-9_]*=$/i}).la]>>
481
+ proc{|stack|
482
+ methname=stack.pop
483
+ methname.ident.chomp!('=')
484
+ offset=methname.offset+methname.ident.size
485
+ stack.push(
486
+ CallNode.new(methname,nil,nil,nil,nil),
487
+ OperatorToken.new('=',offset)
488
+ )
489
+ },
490
+
491
+ -[NumberToken|SymbolToken]>>LiteralNode,
492
+
493
+ #lexer does the wrong thing with -22**44.5, making the - part
494
+ #of the first number token. it's actually lower precedence than
495
+ #**... this rule fixes that problem.
496
+ -[NumberToken&-{:ident=>/\A-/}, Op('**').la]>>
497
+ proc{|stack|
498
+ neg_op=OperatorToken.new("-@",stack[-2].offset)
499
+ neg_op.unary=true
500
+ stack[-2,0]=neg_op
501
+ stack[-2].ident.sub!(/\A-/,'')
502
+ stack[-2].offset+=1
503
+ },
504
+
505
+ #treat these keywords like (rvalue) variables.
506
+ -[/^(nil|false|true|__FILE__|__LINE__|self)$/]>>VarLikeNode,
507
+
508
+ #here docs
509
+ -[HerePlaceholderToken]>>HereDocNode,
510
+ -[HereBodyToken]>>proc{|stack|
511
+ stack.delete_at(-2)#.instance_eval{@headtok}.node.saw_body!
512
+ },
513
+
514
+ ]
515
+ end
516
+
517
+
518
+
519
+ def initialize(input,name="(eval)",line=1,lvars=[])
520
+ if Array===input
521
+ def input.get1token; shift end
522
+ @lexer=input
523
+ else
524
+ @lexer=RubyLexer.new(name,input,line)
525
+ lvars.each{|lvar| @lexer.localvars[lvar]=true }
526
+ end
527
+ @filename=name
528
+ @min_sizes={}
529
+ @compiled_rules={}
530
+ @moretokens=[]
531
+ @unary_or_binary_op=/^[-+&*]$/
532
+ @rules=self.RULES
533
+ @precedence=self.PRECEDENCE
534
+ @RIGHT_ASSOCIATIVE=self.RIGHT_ASSOCIATIVE
535
+ end
536
+
537
+ attr_accessor :lexer
538
+
539
+ def get_token(recursing=false)
540
+ unless @moretokens.empty?
541
+ @last_token=@moretokens.shift
542
+ p @last_token if ENV['PRINT_TOKENS'] unless recursing
543
+ return @last_token
544
+ end
545
+
546
+ begin
547
+ result=@lexer.get1token or break
548
+ p result if ENV['RAW_PRINT_TOKENS']
549
+
550
+ #set token's line if wanted
551
+ result.line||=@line if result.respond_to? :line=
552
+
553
+ if result.respond_to?(:as) and as=result.as
554
+ result=KeywordToken.new(as,result.offset)
555
+ result.not_real!
556
+ else
557
+
558
+ case result
559
+ #=begin
560
+ when ImplicitParamListStartToken: #treat it like (
561
+ result=KeywordToken.new('(', result.offset)
562
+ result.not_real!
563
+ #=end
564
+ #=begin
565
+ when ImplicitParamListEndToken:
566
+ result=KeywordToken.new(')', result.offset)
567
+ result.not_real!
568
+ #=end
569
+ # when AssignmentRhsListStartToken, AssignmentRhsListEndToken:
570
+ #do nothing, pass it thru
571
+ #=begin
572
+ when NewlineToken:
573
+ result=KeywordToken.new(';',result.offset)
574
+ #=end
575
+ when FileAndLineToken: #so __FILE__ and __LINE__ can know what their values are
576
+ @file=result.file
577
+ @line=result.line
578
+ redo
579
+ when NoWsToken:
580
+ #rubylexer disambiguates array literal from
581
+ #call to [] or []= method with a preceding NoWsToken...
582
+ #kind of a dumb interface.
583
+ result=get_token(true)
584
+ result.ident=='[' and result.extend BracketsCall
585
+
586
+
587
+ when OperatorToken:
588
+ if @unary_or_binary_op===result.ident and result.unary
589
+ result=result.dup
590
+ result.ident+="@"
591
+ end
592
+
593
+ #more symbol table maintenance....
594
+ when KeywordToken:
595
+ case name=result.ident
596
+
597
+ #=begin
598
+ when "do":
599
+ if result.has_end?
600
+ else
601
+ result=KeywordToken.new(';',result.offset)
602
+ end
603
+ #=end
604
+ when /^(#{BINOP_KEYWORDS.join '|'})$/: #should be like this in rubylexer
605
+ result=OperatorToken.new(name,result.offset) unless result.has_end?
606
+ when "|": result=GoalPostNode.new(result.offset) #is this needed still?
607
+ when "__FILE__": #I wish rubylexer would handle this
608
+ class<<result; attr_accessor :value; end
609
+ result.value=@file.dup
610
+ when "__LINE__": #I wish rubylexer would handle this
611
+ class<<result; attr_accessor :value; end
612
+ result.value=@line
613
+ end
614
+
615
+ when EoiToken: break
616
+ when HereBodyToken: break
617
+ when IgnoreToken: redo
618
+ end
619
+ end
620
+ end while false
621
+ p result if ENV['PRINT_TOKENS'] unless recursing
622
+ return @last_token=result
623
+ end
624
+
625
+ def evaluate rule
626
+ #dissect the rule
627
+ if false
628
+ rule=rule.dup
629
+ lookahead_processor=(rule.pop if Proc===rule.last)
630
+ node_type=rule.pop
631
+ else
632
+ Reg::Transform===rule or fail
633
+ node_type= rule.right
634
+ rule=rule.left.subregs.dup
635
+ lookahead_processor=(rule.pop if Proc|::Reg::LookAhead===rule.last)
636
+ lookback=rule[0]=rule[0].regs(0) if ::Reg::LookBack===rule[0]
637
+ end
638
+
639
+ #index of data at which to start matching
640
+ i=@stack.size-1 #-1 because last element of @stack is always lookahead
641
+
642
+ #I could call this a JIT compiler, but that's a bit grandiose....
643
+ #more of a JIT pre-processor
644
+ compiled_rule=@compiled_rules[rule]||=
645
+ rule.map{|pattern|
646
+ String|Regexp===pattern ? KW(pattern) : pattern
647
+ }
648
+
649
+ #what's the minimum @stack size this rule could match?
650
+ rule_min_size=@min_sizes[compiled_rule]||=
651
+ compiled_rule.inject(0){|sum,pattern|
652
+ sum + pattern.itemrange.begin
653
+ }
654
+ i>=rule_min_size or return false
655
+
656
+ matching=[]
657
+
658
+ #actually try to match rule elements against each @stack element in turn
659
+ compiled_rule.reverse_each{|matcher|
660
+ i.zero? and fail
661
+ target=matching
662
+ #is this matcher optional? looping?
663
+ loop= matcher.itemrange.last.to_f.infinite?
664
+ optional=matcher.itemrange.first.zero?
665
+ matching.unshift target=[] if loop
666
+ if loop or optional
667
+ matcher=matcher.regs(0)
668
+ end
669
+
670
+ begin
671
+ if matcher===@stack[i-=1] #try match
672
+ target.unshift @stack[i]
673
+ else
674
+ #if match failed, the whole rule fails
675
+ #unless this match was optional, in which case, ignore it
676
+ #but bump the data position back up, since the latest datum
677
+ #didn't actually match anything.
678
+ return false unless optional or loop&&!target.empty?
679
+ i+=1
680
+ matching.unshift nil unless loop
681
+ break
682
+ end
683
+ end while loop
684
+ }
685
+
686
+ matchrange= i...-1 #what elems in @stack were matched?
687
+
688
+ #give lookahead matcher (if any) a chance to fail the match
689
+ case lookahead_processor
690
+ when ::Reg::LookAhead:
691
+ return false unless lookahead_processor.regs(0)===@stack.last
692
+ when Proc:
693
+ return false unless lookahead_processor[self,@stack.last]
694
+ end
695
+
696
+ #if there was a lookback item, don't include it in the new node
697
+ if lookback
698
+ matchrange= i+1...-1 #what elems in @stack were matched?
699
+ matching.shift
700
+ end
701
+
702
+ #replace matching elements in @stack with node type found
703
+ case node_type
704
+ when Class
705
+ node=node_type.new(*matching)
706
+ node.line=@line
707
+ @stack[matchrange]=[node]
708
+ when Proc; node_type[@stack]
709
+ when :shift; return 0
710
+ else fail
711
+ end
712
+
713
+ return true #let caller know we found a match
714
+
715
+
716
+ rescue Exception=>e
717
+ puts "error (#{e}) while executing rule: #{rule.inspect}"
718
+ puts e.backtrace.join("\n")
719
+ raise
720
+ end
721
+
722
+ class ParseError<RuntimeError
723
+ def initialize(msg,stack)
724
+ super(msg)
725
+ @stack=stack
726
+ if false
727
+ ranges=(1..stack.size-2).map{|i|
728
+ node=stack[i]
729
+ if node.respond_to? :linerange
730
+ node.linerange
731
+ elsif node.respond_to? :line
732
+ node.line..node.line
733
+ end
734
+ }
735
+ types=(1..stack.size-2).map{|i| stack[i].class }
736
+ msg += "couldn't interpret #{types.inspect} at line ranges: #{ranges.inspect}"
737
+ end
738
+ super(msg)
739
+ end
740
+ attr :stack
741
+ end
742
+
743
+ def [](*args)
744
+ @stack.[] *args
745
+ end
746
+
747
+ def []=(*args)
748
+ @stack.[]= *args
749
+ end
750
+
751
+ def parse
752
+ oldparser= Thread.current[:$RedParse_parser]
753
+ Thread.current[:$RedParse_parser]||=self
754
+
755
+ @stack=[StartNode.new, get_token]
756
+ #last token on @stack is always implicitly the lookahead
757
+ loop {
758
+ #try all possible reductions
759
+ shift=nil
760
+ @rules.reverse_each{|rule|
761
+ shift=evaluate(rule) and break
762
+ }
763
+ next if shift==true
764
+
765
+ #no rule can match current @stack, get another token
766
+ tok=get_token
767
+
768
+ #are we done yet?
769
+ tok.nil? or EoiToken===tok && EoiToken===@stack.last and break
770
+
771
+ #shift our token onto the @stack
772
+ @stack.push tok
773
+ }
774
+
775
+ @stack.size==2 and return NopNode.new #handle empty parse string
776
+
777
+ #unless the @stack is 3 tokens,
778
+ #with the last an Eoi, and first a StartNode
779
+ #there was a parse error
780
+ unless @stack.size==3
781
+ pp @stack[-[15,@stack.size].min..-1] if ENV['PRINT_STACK']
782
+ top=MisparsedNode.new("(toplevel)", @stack[1...-1],'')
783
+ raise ParseError.new(top.msg,@stack)
784
+ end
785
+ EoiToken===@stack.last or fail
786
+ StartNode===@stack.first or fail
787
+
788
+ result= @stack[1]
789
+
790
+
791
+ #multiple assignment must be resolved
792
+ #afterwards by walking the parse tree.
793
+ #(because the relative precedences of = and ,
794
+ #are reversed in multiple assignment.)
795
+ # result.respond_to? :fixup_multiple_assignments! and
796
+ # result=result.fixup_multiple_assignments!
797
+
798
+ #relative precedence of = and rescue are also inverted sometimes
799
+ # result.respond_to? :fixup_rescue_assignments! and
800
+ # result=result.fixup_rescue_assignments!
801
+
802
+ #do something with error nodes
803
+ msgs=[]
804
+ result.walk{|parent,i,subi,node|
805
+ not if ErrorNode===node
806
+ msgs<< @filename+":"+node.blame.msg
807
+ end
808
+ } if result.respond_to? :walk #hack hack
809
+ result.errors=msgs unless msgs.empty?
810
+ #other types of errors (lexer errors, exceptions in lexer or parser actions)
811
+ #should be handled in the same way, but currently are not
812
+ # puts msgs.join("\n")
813
+
814
+ rescue Exception=>e
815
+ # input=@filename
816
+ # if input=="(eval)"
817
+ input=@lexer
818
+ if Array===input
819
+ puts "error while parsing:"
820
+ pp input
821
+ input=nil
822
+ else
823
+ input=input.original_file
824
+ inputname=@lexer.filename
825
+ input.to_s.size>1000 and input=inputname
826
+ end
827
+ # end
828
+ puts "error while parsing: <<< #{input} >>>"
829
+ raise
830
+ else
831
+ unless msgs.empty?
832
+ pp @stack[-[15,@stack.size].min..-1] if ENV['PRINT_STACK']
833
+ raise RedParse::ParseError.new(msgs.join("\n"),@stack)
834
+ end
835
+
836
+ return result
837
+ ensure
838
+ Thread.current[:$RedParse_parser]=oldparser
839
+ end
840
+
841
+ def LEFT_MATCHERS;self.RULES.map{|r| r.left.subregs }.flatten; end
842
+ def STACKABLE_CLASSES
843
+
844
+
845
+ _LEFT_MATCHERS.map!{|m|
846
+ case m
847
+ when Reg::LookAhead,Reg::LookBack: m.regs(0)
848
+ else m
849
+ end
850
+ } #remove lookahead and lookback decoration
851
+ rule_juicer=proc{|m|
852
+ case m
853
+ when Class: m
854
+ when Reg::And: m.subregs.map &rule_juicer
855
+ when Reg::Or: m.subregs.map &rule_juicer
856
+ else #fukit
857
+ end
858
+ }
859
+ _LEFT_CLASSES=_LEFT_MATCHERS.map{|m| rule_juicer[m] }.flatten.compact
860
+ _RIGHT_CLASSES= self.RULES.map{|r| r.right }.grep(Class) #classes in productions
861
+ _LEFT_CLASSES+_RIGHT_CLASSES
862
+ end
863
+ =begin
864
+ HIER=Class::FlattenedHierarchy.new *STACKABLE_CLASSES
865
+
866
+ LOOKAHEAD_MATCHERS=self.RULES.map{|r| r.left.subregs.last }.map{|la| Reg::LookAhead===la and la.regs(0) }
867
+
868
+ LOOKAHEAD_CLASSES=LOOKAHEAD_MATCHERS.map(&rule_juicer)
869
+ LOOKAHEAD_CLASSES.each_with_index{|classes,i|
870
+ case classes
871
+ when Class: huh
872
+ when Array: classes.flatten.each{huh}
873
+ else
874
+ end
875
+ }
876
+ =end
877
+ # def fixup_multiple_assignments!; end
878
+ end
879
+
880
+
881
+ if __FILE__==$0
882
+ require 'problemfiles'
883
+ class NeverExecThis<RuntimeError; end
884
+
885
+ def arraydiff(a,b)
886
+ a==b and return [a,false]
887
+ (Array===a or a=[a])
888
+ result= a.dup
889
+ diff=false
890
+ size= a.size >= b.size ? a.size : b.size
891
+ size.times{|i|
892
+ ai=a[i]
893
+ bi=b[i]
894
+ if Array===ai and Array===bi
895
+ result_i,diff_i= arraydiff(ai,bi)
896
+ diff||=diff_i
897
+ result[i]=result_i
898
+ elsif ai!=bi
899
+ next if Regexp===ai and ai.to_s==bi.to_s and
900
+ ai.options==bi.options
901
+ diff=true
902
+ result[i]={ai=>bi}
903
+ elsif ai.nil?
904
+ result[i]={'size mismatch'=>"#{a.size} for #{b.size}"} if a.size!=b.size
905
+ diff=true
906
+ end
907
+ if i.nonzero? and Hash===result[i] and Hash===result[i-1]
908
+ old=result[i-1]
909
+ oldkeys=old.keys
910
+ oldvals=old.values
911
+ if Reg::Subseq===oldkeys.first
912
+ oldkeys=oldkeys.children
913
+ oldval=oldvals.children
914
+ end
915
+ result[i-1..i]=[ {-[*oldkeys+result[i].keys]=>-[*oldvals+result[i].values]} ]
916
+ end
917
+ }
918
+ return result,diff
919
+ end
920
+
921
+ output=:pp
922
+ quiet=true
923
+ while /^-/===ARGV.first
924
+ case opt=ARGV.shift
925
+ when "--": break
926
+ when "--pp": output=:pp
927
+ when "--lisp": output=:lisp
928
+ when "--parsetree": output=:parsetree
929
+ when "--vsparsetree": output=:vsparsetree
930
+ when "--vsparsetree2": output=:vsparsetree2
931
+ when "--update-problemfiles": problemfiles=ProblemFiles.new
932
+ when "-q": quiet=true
933
+ when "-v": quiet=false
934
+ when "-e": inputs=[ARGV.join(" ")]; names=["-e"]; break
935
+ else fail "unknown option: #{opt}"
936
+
937
+ end
938
+ end
939
+
940
+ unless inputs
941
+ if ARGV.empty?
942
+ inputs=[STDIN.read]
943
+ names=["-"]
944
+ elsif ARGV.size==1 and (Dir.entries(ARGV.first) rescue false)
945
+ names=Dir[ARGV.first+"/**/*.rb"]
946
+ else
947
+ names=ARGV.dup
948
+ end
949
+ inputs||=names.map{|name| File.open(name).read rescue nil}
950
+ end
951
+
952
+ result=0
953
+
954
+ safety="BEGIN{raise NeverExecThis};BEGIN{throw :never_exec_this,1};\n"
955
+ nullsafety="\n"
956
+ safe_inputs=inputs.map{|input| safety+input}
957
+
958
+ inputs.each_index{|i|
959
+ begin
960
+
961
+ input=inputs[i] or next
962
+ name=names[i]
963
+
964
+ input=nullsafety+input
965
+ #print name+"... "; STDOUT.flush
966
+
967
+ begin
968
+ tree=nil
969
+ if catch(:never_exec_this){
970
+ tree=RedParse.new(input,name).parse; nil
971
+ } #raise NeverExecThis
972
+ # rescue RedParse::ParseError=>e
973
+ # require 'pp'
974
+ # pp e.stack[-[15,e.stack.size].min..-1]
975
+ # raise
976
+ # rescue NeverExecThis:
977
+ puts "RedParse attempted to execute parse data in #{name}"
978
+ next
979
+ end
980
+ rescue Interrupt: exit 2
981
+ rescue Exception=>e
982
+ # puts e.backtrace.join("\n")
983
+ e.message << " during parse of #{name}"
984
+ # err=e.class.new(e.message+" during parse of #{name}")
985
+ # err.set_backtrace e.backtrace
986
+ problemfiles.push name if problemfiles
987
+ raise e
988
+ end
989
+ tree or fail "parsetree was nil for #{name}"
990
+
991
+ case output
992
+ when :pp
993
+ require 'pp'
994
+ pp tree
995
+ when :lisp
996
+ puts tree.to_lisp
997
+ when :parsetree
998
+ pp tree.to_parsetree
999
+ when :vsparsetree,:vsparsetree2
1000
+ begin
1001
+ require 'rubygems'
1002
+ rescue Exception
1003
+ end
1004
+ require 'parse_tree'
1005
+ #require 'algorithm/diff'
1006
+ begin
1007
+ mine=tree.to_parsetree(:quirks)
1008
+ if IO===input
1009
+ input.rewind
1010
+ input=input.read
1011
+ end
1012
+ ryans=nil
1013
+ catch(:never_exec_this){
1014
+ ryans=ParseTree.new.parse_tree_for_string(safe_inputs[i],name); nil
1015
+ } and raise NeverExecThis
1016
+ delta,is_diff=arraydiff(mine,ryans)
1017
+ rescue NeverExecThis:
1018
+ puts "ParseTree attempted to execute parse data in #{name}"
1019
+ next
1020
+ rescue Interrupt: exit 2
1021
+ rescue Exception=>e
1022
+ #raise( RuntimeError.new( "#{e} during to_parsetree of #{name}" ) )
1023
+ puts "error during to_parsetree of #{name}"
1024
+ problemfiles.push name if problemfiles
1025
+ raise
1026
+ end
1027
+ if output==:vsparsetree2
1028
+ if !quiet or is_diff
1029
+ puts "mine:"
1030
+ pp mine
1031
+ puts "ryans:" if is_diff
1032
+ pp ryans if is_diff
1033
+ end
1034
+ elsif !quiet or is_diff
1035
+ puts 'differences in '+name if is_diff
1036
+ pp delta
1037
+ end
1038
+ if is_diff
1039
+ result=1
1040
+ problemfiles.push name if problemfiles
1041
+ else
1042
+ puts "no differences in "+name
1043
+ problemfiles.delete name if problemfiles
1044
+ end
1045
+ end
1046
+
1047
+ rescue NeverExecThis:
1048
+ puts "mysterious attempt to execute parse data in #{name}"
1049
+ next
1050
+ rescue Interrupt,SystemExit: exit 2
1051
+ rescue Exception=>e
1052
+ puts "#{e}:#{e.class}"
1053
+ puts e.backtrace.join("\n")
1054
+ #problemfiles.push name if problemfiles
1055
+ #raise
1056
+ ensure
1057
+ STDOUT.flush
1058
+ end
1059
+ }
1060
+ exit result
1061
+ end
1062
+
1063
+ =begin todo:
1064
+ v merge DotCallNode and CallSiteNode and CallWithBlockNode
1065
+ remove actual Tokens from parse tree...
1066
+ instead, each node has a corresponding range of tokens
1067
+ -in an (optional) array of all tokens printed by the tokenizer.
1068
+ split ParenedNode into ParenedNode + Rescue/EnsureNode
1069
+ 'incomplete' subtrees such as ElseNode, ElsifNode, RescueNode
1070
+ -should not appear in final output
1071
+ v split keywordopnode into loop and if varieties?
1072
+ =end
1073
+
1074
+ =begin optimization opportunities:
1075
+ top of stack slot contains mostly keywords, specific node classes, and Value
1076
+ lookahead slot contains mostly LowerOp and keywords, with a few classes and inverted keywords
1077
+ -(LowerOp is hard to optimize)
1078
+ if top of stack matcher is Value, then the next matcher down is mostly keywords, with some operators
1079
+ class membership can be optimized to test of integer within a range
1080
+ keywords could be stored as symbols instead of strings
1081
+ a few rules may need exploding (eg, ensure) to spoon feed the optimizer
1082
+ make all Nodes descendants of Array
1083
+ =end