redparse 0.8.0
Sign up to get free protection for your applications and to get access to all the features.
- data/COPYING.LGPL +165 -0
- data/Manifest.txt +40 -0
- data/README.txt +461 -0
- data/Rakefile +26 -0
- data/lib/redparse.rb +1083 -0
- data/lib/redparse/babynodes.rb +137 -0
- data/lib/redparse/babyparser.rb +276 -0
- data/lib/redparse/decisiontree.rb +372 -0
- data/lib/redparse/node.rb +3808 -0
- data/lib/redparse/problemfiles.rb +84 -0
- data/lib/redparse/reg_more_sugar.rb +99 -0
- data/nurli/test_control.nurli +261 -0
- data/redparse.vpj +92 -0
- data/redparse.vpw +8 -0
- data/test/data/__end.rb +5 -0
- data/test/data/__f.rb +2 -0
- data/test/data/be.rb +3 -0
- data/test/data/be2.rb +6 -0
- data/test/data/bqhd.rb +3 -0
- data/test/data/bqhd2.rb +3 -0
- data/test/data/case.rb +8 -0
- data/test/data/datetime.rb +66 -0
- data/test/data/defd.rb +9 -0
- data/test/data/hd-def.rb +8 -0
- data/test/data/hd.rb +3 -0
- data/test/data/hd2.rb +3 -0
- data/test/data/hd3.rb +3 -0
- data/test/data/hd4.rb +75 -0
- data/test/data/hd5.rb +4 -0
- data/test/data/hdcat.rb +4 -0
- data/test/data/hdx.rb +3 -0
- data/test/data/heredoc.rb +3 -0
- data/test/data/if.rb +7 -0
- data/test/data/jbridge.rb +779 -0
- data/test/data/mod.rb +3 -0
- data/test/data/nl_as_strdelim.rb +7 -0
- data/test/data/pw.rb +2 -0
- data/test/data/wvt.rb +2 -0
- data/test/rp-locatetest.rb +344 -0
- data/test/test_redparse.rb +3319 -0
- metadata +113 -0
data/Rakefile
ADDED
@@ -0,0 +1,26 @@
|
|
1
|
+
# Copyright (C) 2008 Caleb Clausen
|
2
|
+
# Distributed under the terms of Ruby's license.
|
3
|
+
require 'rubygems'
|
4
|
+
require 'hoe'
|
5
|
+
require 'lib/redparse/version.rb'
|
6
|
+
|
7
|
+
|
8
|
+
readme=open("README.txt")
|
9
|
+
readme.readline("\n== DESCRIPTION:")
|
10
|
+
readme.readline("\n\n")
|
11
|
+
desc=readme.readline("\n\n")
|
12
|
+
|
13
|
+
hoe=Hoe.new("redparse", RedParse::VERSION) do |_|
|
14
|
+
_.author = "Caleb Clausen"
|
15
|
+
_.email = "redparse-owner @at@ inforadical .dot. net"
|
16
|
+
_.url = ["http://redparse.rubyforge.org/", "http://rubyforge.org/projects/redparse/"]
|
17
|
+
_.extra_deps << ['rubylexer', '>= 0.7.2']
|
18
|
+
# _.test_globs=["test/*"]
|
19
|
+
_.description=desc
|
20
|
+
_.summary=desc[/\A[^.]+\./]
|
21
|
+
_.spec_extras={:bindir=>''}
|
22
|
+
_.rdoc_pattern=/\A(README\.txt|lib\/.*\.rb)\Z/
|
23
|
+
_.remote_rdoc_dir="/"
|
24
|
+
end
|
25
|
+
|
26
|
+
|
data/lib/redparse.rb
ADDED
@@ -0,0 +1,1083 @@
|
|
1
|
+
=begin
|
2
|
+
redparse - a ruby parser written in ruby
|
3
|
+
Copyright (C) 2008 Caleb Clausen
|
4
|
+
|
5
|
+
This program is free software: you can redistribute it and/or modify
|
6
|
+
it under the terms of the GNU Lesser General Public License as published by
|
7
|
+
the Free Software Foundation, either version 3 of the License, or
|
8
|
+
(at your option) any later version.
|
9
|
+
|
10
|
+
This program is distributed in the hope that it will be useful,
|
11
|
+
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
12
|
+
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
13
|
+
GNU Lesser General Public License for more details.
|
14
|
+
|
15
|
+
You should have received a copy of the GNU Lesser General Public License
|
16
|
+
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
17
|
+
=end
|
18
|
+
|
19
|
+
#warn 'hacking up LOAD_PATH to include the latest RubyLexer!'
|
20
|
+
#$:.unshift Dir.pwd+'/../rubylexer/lib', Dir.pwd+'/../rubylexer'
|
21
|
+
|
22
|
+
# "faster rule compiler is untested"
|
23
|
+
|
24
|
+
require 'rubygems'
|
25
|
+
require 'rubylexer'
|
26
|
+
require 'reg'
|
27
|
+
|
28
|
+
require "redparse/node"
|
29
|
+
#require "redparse/decisiontree"
|
30
|
+
require "redparse/reg_more_sugar"
|
31
|
+
class RedParse
|
32
|
+
# include Nodes
|
33
|
+
|
34
|
+
def self.has_return_hash_fix?
|
35
|
+
rl=RubyLexer.new("","return {}.size")
|
36
|
+
return(
|
37
|
+
FileAndLineToken===rl.get1token and
|
38
|
+
MethNameToken===rl.get1token and
|
39
|
+
ImplicitParamListStartToken===rl.get1token and
|
40
|
+
WsToken===rl.get1token and
|
41
|
+
KeywordToken===rl.get1token and
|
42
|
+
KeywordToken===rl.get1token and
|
43
|
+
KeywordToken===rl.get1token and
|
44
|
+
MethNameToken===rl.get1token and
|
45
|
+
ImplicitParamListStartToken===rl.get1token and
|
46
|
+
ImplicitParamListEndToken===rl.get1token and
|
47
|
+
ImplicitParamListEndToken===rl.get1token and
|
48
|
+
EoiToken===rl.get1token
|
49
|
+
)
|
50
|
+
end
|
51
|
+
|
52
|
+
#see pickaxe, 1st ed, page 221
|
53
|
+
def RIGHT_ASSOCIATIVE
|
54
|
+
{
|
55
|
+
# "defined?"=>120.5,
|
56
|
+
"**"=>118,
|
57
|
+
|
58
|
+
"="=>105, "%="=>105, "/="=>105, "-="=>105, "+="=>105,
|
59
|
+
"|="=>105, "&="=>105, ">>="=>105, "<<="=>105, "*="=>105,
|
60
|
+
"&&="=>105, "||="=>105, "**="=>105, "^="=>105,
|
61
|
+
|
62
|
+
# "and"=>99, "or"=>99,
|
63
|
+
|
64
|
+
# "if"=>98, "unless"=>98, "while"=>98, "until"=>98, "rescue"=>98,
|
65
|
+
|
66
|
+
# "&&"=>109, "||"=>108,
|
67
|
+
}
|
68
|
+
end
|
69
|
+
|
70
|
+
def PRECEDENCE
|
71
|
+
{
|
72
|
+
|
73
|
+
# "("=>122, #method param list
|
74
|
+
# "{"=>122, "do"=>122, #blocks
|
75
|
+
|
76
|
+
"::"=>121, "."=>121,
|
77
|
+
|
78
|
+
# "defined?"=>120.5,
|
79
|
+
|
80
|
+
"["=>120, #[] []= methods
|
81
|
+
|
82
|
+
"!"=>119, "~"=>119,
|
83
|
+
"+@"=>119,
|
84
|
+
|
85
|
+
"**"=>118,
|
86
|
+
|
87
|
+
"-@"=>117,
|
88
|
+
|
89
|
+
"*"=>116, "/"=>116, "%"=>116,
|
90
|
+
|
91
|
+
"+"=>115, "-"=>115,
|
92
|
+
|
93
|
+
"<<"=>114, ">>"=>114,
|
94
|
+
|
95
|
+
"&"=>113,
|
96
|
+
|
97
|
+
"^"=>112, "|"=>112,
|
98
|
+
|
99
|
+
"<="=>111, ">="=>111, "<"=>111, ">"=>111,
|
100
|
+
|
101
|
+
"<=>"=>110, "=="=>110, "==="=>110,
|
102
|
+
"!="=>110, "=~"=>110, "!~"=>110,
|
103
|
+
|
104
|
+
"&&"=>109,
|
105
|
+
|
106
|
+
"||"=>108,
|
107
|
+
|
108
|
+
".."=>107, "..."=>107,
|
109
|
+
|
110
|
+
"?"=>106, # ":"=>106, #not sure what to do with ":"
|
111
|
+
|
112
|
+
"*@"=>105.5, "&@"=>105.5, #unary * and & operators
|
113
|
+
|
114
|
+
"="=>105, "%="=>105, "/="=>105, "-="=>105, "+="=>105,
|
115
|
+
"|="=>105, "&="=>105, ">>="=>105, "<<="=>105, "*="=>105,
|
116
|
+
"&&="=>105, "||="=>105, "**="=>105, "^="=>105,
|
117
|
+
|
118
|
+
"defined?"=>103,
|
119
|
+
"not"=>103,
|
120
|
+
":"=>102, #but not when used as a substitute for 'then'
|
121
|
+
"rescue3"=>102,
|
122
|
+
|
123
|
+
"=>"=>101,
|
124
|
+
","=>100,
|
125
|
+
#the 'precedence' of comma is somewhat controversial. it actually has
|
126
|
+
#several different precedences depending on which kind of comma it is.
|
127
|
+
#the precedence of , is higher than :, => and the assignment operators
|
128
|
+
#in certain contexts.
|
129
|
+
|
130
|
+
#"unary" prefix function names seen has operators have this precedence
|
131
|
+
#but, rubylexer handles precedence of these and outputs fake parens
|
132
|
+
#to tell us how its parsed
|
133
|
+
|
134
|
+
"or"=>99, "and"=>99,
|
135
|
+
|
136
|
+
"if"=>98, "unless"=>98, "while"=>98, "until"=>98,
|
137
|
+
|
138
|
+
"rescue"=>98,
|
139
|
+
|
140
|
+
";"=>96,
|
141
|
+
}
|
142
|
+
end
|
143
|
+
|
144
|
+
module BracketsCall; end
|
145
|
+
|
146
|
+
Value= #NumberToken|SymbolToken|
|
147
|
+
#HerePlaceholderToken|
|
148
|
+
((VarNameToken|ValueNode)&-{:lvalue? =>nil})
|
149
|
+
Expr=Value
|
150
|
+
|
151
|
+
def self.KW(ident)
|
152
|
+
ident=case ident
|
153
|
+
when Integer: ident.chr
|
154
|
+
when String,Regexp: ident
|
155
|
+
else ident.to_s
|
156
|
+
end
|
157
|
+
|
158
|
+
return KeywordToken&-{:ident=>ident}
|
159
|
+
end
|
160
|
+
def KW(ident); self.class.KW(ident) end
|
161
|
+
UNOP=
|
162
|
+
(OperatorToken|KeywordToken)&-{ #sppflt! KeywordToken here is a hack too
|
163
|
+
:ident=>/^[*&+-]@$/,
|
164
|
+
:unary =>true,
|
165
|
+
}|
|
166
|
+
(OperatorToken|KeywordToken)&-{ #sppflt! KeywordToken here is a hack too
|
167
|
+
:ident=>/^([~!]|not|defined\?)$/,
|
168
|
+
} #|
|
169
|
+
DEFOP=
|
170
|
+
(OperatorToken|KeywordToken)&-{ #sppflt! KeywordToken here is a hack too
|
171
|
+
:ident=>"defined?",
|
172
|
+
}
|
173
|
+
=begin
|
174
|
+
MethNameToken&-{ #hack, shouldn't be necessary
|
175
|
+
#rubylexer should know to generally treat "defined?" as a keyword
|
176
|
+
#or operator. (like most keywords, it can also be used as a method
|
177
|
+
# name....)
|
178
|
+
:ident=>"defined?"
|
179
|
+
}
|
180
|
+
=end
|
181
|
+
|
182
|
+
def self.Op(ident=nil, allow_keyword=false)
|
183
|
+
result=OperatorToken
|
184
|
+
result |= KeywordToken if allow_keyword
|
185
|
+
result &= -{:ident=>ident} if ident
|
186
|
+
#result[:infix?]=true
|
187
|
+
return result
|
188
|
+
end
|
189
|
+
def Op(*args); self.class.Op(*args); end
|
190
|
+
BINOP_KEYWORDS=%w[if unless while until and or && \|\|]
|
191
|
+
|
192
|
+
#HAS_PRECEDENCE=Op(/^#{PRECEDENCE.keys.map{|k| Regexp.quote k}.join('|')}$/,true)
|
193
|
+
=begin
|
194
|
+
KeywordOp=
|
195
|
+
KeywordToken & -{
|
196
|
+
:ident=>/^(#{BINOP_KEYWORDS.join('|')})$/
|
197
|
+
}
|
198
|
+
KeywordOp2=
|
199
|
+
KeywordToken & -{
|
200
|
+
:ident=>/^([\[({!+*?:,]|\.{1,3}|::|=>)$/
|
201
|
+
}
|
202
|
+
=end
|
203
|
+
DotOp= KeywordToken & -{ :ident=>"." }
|
204
|
+
DoubleColonOp= KeywordToken & -{ :ident=>"::" }
|
205
|
+
|
206
|
+
Op=Op()
|
207
|
+
MODIFYASSIGNOP=Op( /^(([^=])\2|[^<>=!])=$/, true )
|
208
|
+
NONASSIGNOP=Op( /([^=]|[<>=!]=)$/)
|
209
|
+
KW_Op= #some of these ought to be regular operators, fer gosh sake
|
210
|
+
Op(/^((![=~])|\.\.\.?|=>)$/,true)|Op(/^(#{BINOP_KEYWORDS.join('|')})$/)
|
211
|
+
|
212
|
+
EPSILON=Float::EPSILON*10_000_000 #this should be <<1 and >0
|
213
|
+
fail unless 1+EPSILON>1
|
214
|
+
fail unless EPSILON<0.1
|
215
|
+
|
216
|
+
def left_op_higher(op,op2)
|
217
|
+
# (Op|KeywordOp|KeywordOp2|ASSIGNOP===op2) or return true
|
218
|
+
KeywordToken===op2 or OperatorToken===op2 or return true
|
219
|
+
rightprec=@precedence[op2.to_s] or return true
|
220
|
+
#or fail "unrecognized right operator: #{op2.inspect}"
|
221
|
+
rightprec+=EPSILON if @RIGHT_ASSOCIATIVE[op2.to_s]
|
222
|
+
return @precedence[op.to_s]>=rightprec
|
223
|
+
end
|
224
|
+
|
225
|
+
LowerOp= proc{|parser,op2| parser.left_op_higher(parser[-3],op2) }
|
226
|
+
|
227
|
+
def dont_postpone_semi
|
228
|
+
@dps||=~wants_semi_context
|
229
|
+
end
|
230
|
+
WANTS_SEMI=%w[while until if unless
|
231
|
+
def case when in rescue
|
232
|
+
elsif class module << => . ::
|
233
|
+
]
|
234
|
+
def wants_semi_context
|
235
|
+
Op('<<')|KW(/^(#{WANTS_SEMI.map{|ws| Regexp.quote ws }.join('|')})$/)
|
236
|
+
end
|
237
|
+
|
238
|
+
NeverBlockKeyword=MethNameToken&-{:ident=>/^(return|break|next)$/}
|
239
|
+
FakeBegin=KW('(')&-{:not_real? =>true}
|
240
|
+
FakeEnd=KW(')')&-{:not_real? =>true}
|
241
|
+
|
242
|
+
#rule format:
|
243
|
+
# -[syntax pattern_matchers.+, lookahead.-]>>node type
|
244
|
+
|
245
|
+
DotCall=proc{|stack|
|
246
|
+
right=stack[-2]
|
247
|
+
left,bogus=*stack.slice!(-4..-3)
|
248
|
+
|
249
|
+
right.set_receiver! left
|
250
|
+
}
|
251
|
+
|
252
|
+
Lvalue=(VarNameToken|CallSiteNode|BracketsGetNode|CommaOpNode|
|
253
|
+
ParenedNode|ConstantNode|UnaryStarNode)&-{:lvalue? =>true}
|
254
|
+
|
255
|
+
BareMethod=MethNameToken|LiteralNode&-{:val=>Symbol|StringNode}
|
256
|
+
|
257
|
+
BEGINWORDLIST=RubyLexer::BEGINWORDLIST + %w"( [ {"
|
258
|
+
ENDWORDLIST=%w"end ) ] }"
|
259
|
+
BEGIN2END={"{"=>"}", "("=>")", "["=>"]", }
|
260
|
+
endword="end"
|
261
|
+
RubyLexer::BEGINWORDLIST.each{|bw| BEGIN2END[bw]=endword }
|
262
|
+
def beginsendsmatcher
|
263
|
+
@bem||=
|
264
|
+
/^(#{(BEGINWORDLIST+ENDWORDLIST).map{|x| Regexp.quote x}.join('|')})$/
|
265
|
+
end
|
266
|
+
|
267
|
+
MULTIASSIGN=UnaryStarNode|CommaOpNode|(ParenedNode&-{:size=>1})
|
268
|
+
WITHCOMMAS=UnaryStarNode|CommaOpNode|
|
269
|
+
(CallSiteNode&-{:real_parens=>false, :args=>-{:size=>~0.reg}}) #|(ParenedNode&-{:size=>~1.reg})
|
270
|
+
|
271
|
+
BEGINAFTEREQUALS=
|
272
|
+
ParenedNode&
|
273
|
+
-{:size =>~1.reg, :op? =>NilClass|FalseClass, :after_equals =>nil}&
|
274
|
+
(-{:body=>item_that.size>0}|-{:rescues=>item_that.size>0}|-{:ensures=>~NilClass})
|
275
|
+
# item_that{|x| x.body.size+x.rescues.size > 0 or x.ensures }
|
276
|
+
|
277
|
+
# ASSIGN_COMMA=Op(',',true)&-{:comma_type=>Symbol}
|
278
|
+
LHS_COMMA=Op(',',true)&-{:comma_type => :lhs}
|
279
|
+
RHS_COMMA=Op(',',true)&-{:comma_type => :rhs}
|
280
|
+
PARAM_COMMA=Op(',',true)&-{:comma_type => :param}
|
281
|
+
FUNCLIKE_KEYWORD=KeywordToken&-{:ident=>RubyLexer::FUNCLIKE_KEYWORDS}
|
282
|
+
|
283
|
+
def RULES
|
284
|
+
#these must be the lowest possible priority, and hence first in the rules list
|
285
|
+
BEGIN2END.map{|_beg,_end|
|
286
|
+
-[KW(_beg), KW(beginsendsmatcher).~.*, KW(_end), KW(/^(do|\{)$/).~.la]>>MisparsedNode
|
287
|
+
}+
|
288
|
+
|
289
|
+
[
|
290
|
+
-[UNOP, Value, LowerOp]>>UnOpNode,
|
291
|
+
-[DEFOP, ParenedNode&-{:size=>1}]>>UnOpNode,
|
292
|
+
-[Op('*@'), VarNameToken|ValueNode, LowerOp]>>UnaryStarNode,
|
293
|
+
|
294
|
+
-[Op('=',true)|KW(/^(rescue|when|\[)$/)|-{:comma_type=>:call.reg|:array|:param|:rhs},
|
295
|
+
Op('*@'), VarNameToken|ValueNode, (MODIFYASSIGNOP|Op('=',true)).la]>>:shift,
|
296
|
+
-[MethNameToken|FUNCLIKE_KEYWORD, KW('('),
|
297
|
+
Op('*@'), VarNameToken|ValueNode, (MODIFYASSIGNOP|Op('=',true)).la]>>:shift,
|
298
|
+
# -[KW('[')|-{:comma_type=>:call.reg|:array},
|
299
|
+
# Op('*@'), VarNameToken|ValueNode, Op('=',true).la]>>:shift,
|
300
|
+
#star should not be used in an lhs if an rhs or param list context is available to eat it.
|
301
|
+
#(including param lists for keywords such as return,break,next,continue,rescue,yield,when)
|
302
|
+
|
303
|
+
-[Op('*@'), (GoalPostNode|KW(/^(in|[=)|,;])$/)).la]>>DanglingStarNode, #dangling *
|
304
|
+
-[',', (GoalPostNode|KW(/^(in|[=)|;])$/)).la]>> #dangling ,
|
305
|
+
proc{|stack|
|
306
|
+
dcomma=DanglingCommaNode.new
|
307
|
+
dcomma.offset=stack.last.offset
|
308
|
+
stack.push dcomma, stack.pop
|
309
|
+
},
|
310
|
+
|
311
|
+
-[Value, Op|KW_Op, Value, LowerOp]>>RawOpNode, #most operators
|
312
|
+
|
313
|
+
#assignment
|
314
|
+
-[Lvalue, MODIFYASSIGNOP, Value, LowerOp]>>AssignNode,
|
315
|
+
-[Lvalue, Op('=',true), AssignmentRhsNode, LowerOp]>>AssignNode,
|
316
|
+
-[Op('=',true).lb, Value, LowerOp]>>AssignmentRhsNode,
|
317
|
+
#was: -[AssignmentRhsListStartToken, Value, AssignmentRhsListEndToken]>>AssignmentRhsNode,
|
318
|
+
-[Lvalue&~MULTIASSIGN, Op('=',true), AssignmentRhsNode&-{:val =>~WITHCOMMAS},
|
319
|
+
Op('rescue3',true), Value, LowerOp]>>AssignNode,
|
320
|
+
-[Lvalue&~MULTIASSIGN, Op('=',true), AssignmentRhsNode&-{:val =>~WITHCOMMAS},
|
321
|
+
Op('rescue3',true).la]>>:shift,
|
322
|
+
-[Lvalue&~MULTIASSIGN, Op('=',true), AssignmentRhsNode&-{:val =>~WITHCOMMAS},
|
323
|
+
Op('rescue',true).la] >>proc{|stack|
|
324
|
+
resc=stack.last.dup
|
325
|
+
resc.ident += '3'
|
326
|
+
stack[-1]=resc
|
327
|
+
},
|
328
|
+
# a = b rescue c acts like a ternary,,,
|
329
|
+
#provided that both a and b are not multiple and b
|
330
|
+
#(if it is a parenless callsite) has just 1 param
|
331
|
+
|
332
|
+
# -[Op('=',true), ~WITHCOMMAS, Op('rescue',true).la]>>:shift,
|
333
|
+
#relative precedence of = and rescue are to be inverted if rescue
|
334
|
+
#is to the right and assignment is not multiple.
|
335
|
+
|
336
|
+
-[Op('=',true).~.lb, OB, Op('=',true), Value, RHS_COMMA.la]>>:shift,
|
337
|
+
-[RHS_COMMA.lb, Lvalue, Op('=',true), Value, RHS_COMMA.la ]>>AssignNode,
|
338
|
+
-[ValueNode|VarNameToken, LHS_COMMA, ValueNode|VarNameToken, Op('=',true).la]>>CommaOpNode,
|
339
|
+
#relative precedence of = and lhs/rhs , are to be inverted.
|
340
|
+
|
341
|
+
-[KW(',')&-{:comma_type=>:lhs}, (UnaryStarNode|ParenedNode)&~-{:after_comma =>true}, Op('=',true)]>>
|
342
|
+
proc{|stack| stack[-3].after_comma=true}, #mebbe this should be a lexer hack
|
343
|
+
#mark parentheses and unary stars that come after lhs commas
|
344
|
+
|
345
|
+
#-[Value, DotOp|DoubleColonOp, MethNameToken,
|
346
|
+
# ASSIGNOP, Value, LowerOp]>>AccessorAssignNode,
|
347
|
+
|
348
|
+
-[MethNameToken.~.lb, '(', Value, ')']>>ParenedNode,
|
349
|
+
-[MethNameToken.~.lb, '(', ')']>>VarLikeNode, #alias for nil
|
350
|
+
|
351
|
+
# -[Value, KeywordOp, Value, LowerOp]>>KeywordOpNode,
|
352
|
+
-[Op('=',true).~.lb, Value, Op('rescue',true), Value, LowerOp]>>ParenedNode,
|
353
|
+
|
354
|
+
#dot and double-colon
|
355
|
+
-[DoubleColonOp, VarNameToken, LowerOp]>>ConstantNode,#unary ::
|
356
|
+
-[Value, DotOp, CallNode, LowerOp]>>DotCall, #binary .
|
357
|
+
-[Value, DoubleColonOp, CallNode, LowerOp]>>DotCall, #binary ::
|
358
|
+
-[Value, DoubleColonOp, VarNameToken, LowerOp]>>ConstantNode,#binary ::
|
359
|
+
|
360
|
+
-[Value, "?", Value, ":", Value, LowerOp]>>TernaryNode,
|
361
|
+
|
362
|
+
# -[Value, /^\.\.\.?$/, Value, LowerOp]>>RangeNode,
|
363
|
+
|
364
|
+
-[MethNameToken, '(', Value.-, ')', BlockNode.-, KW(/^(do|\{)$/).~.la]>>CallNode,
|
365
|
+
-[FUNCLIKE_KEYWORD, '(', Value.-, ')',
|
366
|
+
BlockNode.-, KW(/^(do|\{)$/).~.la]>>KWCallNode,
|
367
|
+
|
368
|
+
-[ValueNode|VarNameToken, ',', ValueNode|VarNameToken, LowerOp]>>CommaOpNode,
|
369
|
+
|
370
|
+
-[dont_postpone_semi.lb,
|
371
|
+
Value, ';', Value, LowerOp]>>SequenceNode,
|
372
|
+
|
373
|
+
# -[Value, '=>', Value, LowerOp]>>ArrowOpNode,
|
374
|
+
|
375
|
+
-[KW(')').~.lb, '{', (CommaOpNode|ArrowOpNode).-, '}']>>HashLiteralNode,
|
376
|
+
|
377
|
+
# -[CallSiteNode.~.lb, '{', Value, '}']>>HashLiteralNode,
|
378
|
+
|
379
|
+
# -[KW(')').lb, '{', BlockFormalsNode.-, Value.-, '}']>>BlockNode,
|
380
|
+
-[KW(')').lb, 'do', BlockFormalsNode.-, Value.-, 'end']>>BlockNode,
|
381
|
+
#rubylexer handles the 'low precedence' of do...end
|
382
|
+
|
383
|
+
-[GoalPostNode, Value.-, GoalPostNode]>>BlockFormalsNode,
|
384
|
+
#rubylexer disambiguated operator vs keyword '|'
|
385
|
+
|
386
|
+
-[/^(while|until)$/, Value, /^([:;]|do)$/, Value.-, 'end']>>LoopNode,
|
387
|
+
|
388
|
+
-[/^(if|unless)$/, Value, /^(;|then|:)$/,
|
389
|
+
Value.-, ElsifNode.*, ElseNode.-, 'end'
|
390
|
+
]>>IfNode,
|
391
|
+
|
392
|
+
-['else', Value.-, KW(/^(ensure|end)$/).la]>>ElseNode,
|
393
|
+
|
394
|
+
-['elsif', Value, /^(;|then|:)$/, Value.-,
|
395
|
+
KW(/^e(nd|ls(e|if))$/).la
|
396
|
+
]>>ElsifNode,
|
397
|
+
|
398
|
+
-['module', ConstantNode|VarNameToken, KW(';'), Value.-, 'end']>>ModuleNode,
|
399
|
+
# -['module', ConstantNode|VarNameToken, KW(/^(;|::)$/).~.la]>>
|
400
|
+
# proc{|stack| #insert ; at end of module header if none was present
|
401
|
+
# stack.push KeywordToken.new(';'), stack.pop
|
402
|
+
# },
|
403
|
+
-['class', Value, ';', Value.-, 'end']>>ClassNode,
|
404
|
+
-['class', Value, Op('<'), Value, KW(';').~.la]>>:shift,
|
405
|
+
-['class', Op('<<'), Value, ';', Value.-, 'end']>>MetaClassNode,
|
406
|
+
|
407
|
+
-['alias', BareMethod|VarNameToken, BareMethod|VarNameToken]>>AliasNode,
|
408
|
+
-['undef', BareMethod]>>UndefNode,
|
409
|
+
-[UndefNode, ',', BareMethod]>>UndefNode,
|
410
|
+
|
411
|
+
-['def', CallSiteNode, Op('=').-, KW(';'),
|
412
|
+
Value.-, RescueNode.*, ElseNode.-, EnsureNode.-,
|
413
|
+
'end'
|
414
|
+
]>>MethodNode,
|
415
|
+
|
416
|
+
-['begin',
|
417
|
+
Value.-, RescueNode.*, ElseNode.-, EnsureNode.-,
|
418
|
+
'end'
|
419
|
+
]>>ParenedNode,
|
420
|
+
|
421
|
+
-[Op('=',true), BEGINAFTEREQUALS, Op('rescue',true).la]>>
|
422
|
+
proc{ |stack| stack[-2].after_equals=true },
|
423
|
+
#this is bs. all for an extra :begin in the parsetree
|
424
|
+
|
425
|
+
-[(KW(/^(;|begin)$/)|ParenedNode|RescueNode).lb,
|
426
|
+
'rescue', KW('=>').-, Value.-, /^([:;]|then)$/,
|
427
|
+
]>>RescueHeaderNode,
|
428
|
+
-[ RescueHeaderNode, Value.-, KW(';').-, KW(/^(rescue|else|ensure|end)$/).la
|
429
|
+
]>>RescueNode,
|
430
|
+
|
431
|
+
-['ensure', Value.-, KW('end').la]>>EnsureNode,
|
432
|
+
|
433
|
+
-['[', Value.-, ']']>>ArrayLiteralNode,
|
434
|
+
|
435
|
+
-[Value, '[', Value.-, ']']>>BracketsGetNode,
|
436
|
+
|
437
|
+
-[HereDocNode, StringToken.*, StringToken, StringToken.~.la]>>StringCatNode,
|
438
|
+
-[(StringToken|HereDocNode).~.lb, StringToken.*, StringToken, StringToken, StringToken.~.la]>>StringCatNode,
|
439
|
+
-[(StringToken|HereDocNode).~.lb, StringToken, StringToken.~.la]>>StringNode, #includes regexp, wordlist, backquotes
|
440
|
+
|
441
|
+
-['case', Value.-, KW(/^[:;]$/).-, WhenNode.*, ElseNode.-, 'end']>>CaseNode,
|
442
|
+
|
443
|
+
-['when', Value, /^([:;]|then)$/, Value.-,
|
444
|
+
KW(/^(when|else|end)$/).la
|
445
|
+
]>>WhenNode,
|
446
|
+
|
447
|
+
-['for', Value, 'in', Value, /^([:;]|do)$/, Value.-, 'end']>>ForNode,
|
448
|
+
|
449
|
+
#semicolon cleanup....
|
450
|
+
-[dont_postpone_semi.lb,
|
451
|
+
Value, ';',
|
452
|
+
(KW(/^(#{RubyLexer::INNERBOUNDINGWORDS.gsub(/(rescue|then)\|/,'')}|end|[)}\]])$/)|EoiToken).la
|
453
|
+
]>>proc{|stack| stack.delete_at -2 },
|
454
|
+
-[Value, ';', KW('then').la
|
455
|
+
]>>proc{|stack| stack.delete_at -2 },
|
456
|
+
-[dont_postpone_semi.lb, Value, ';', RescueNode
|
457
|
+
]>>proc{|stack| stack.delete_at -3 },
|
458
|
+
-[(KW(/^(begin|[;:({|]|then|do|else|ensure)$/)|StartNode|RescueHeaderNode).lb, ';'
|
459
|
+
]>>proc{|stack| stack.delete_at -2 },
|
460
|
+
#this rule is somewhat more forgiving than matz' parser...
|
461
|
+
#not all semicolons after :, (, and { keywords should
|
462
|
+
#be ignored. some should cause syntax errors.
|
463
|
+
|
464
|
+
|
465
|
+
#comma cleanup....
|
466
|
+
-[',', KW(/^[}\]]$/).la]>>proc{|stack| stack.delete_at -2},
|
467
|
+
#likewise, this is somewhat too forgiving.
|
468
|
+
#some commas before } or ] should cause syntax errors
|
469
|
+
|
470
|
+
#multiple assignment.... (handled in a subsequent stage?)
|
471
|
+
#(cause it requires that the relative priorities of = and , be reversed!)
|
472
|
+
|
473
|
+
|
474
|
+
#turn lvalues into rvalues if not followed by an assignop
|
475
|
+
-[-{:lvalue? =>true}, (Op('=',true)|MODIFYASSIGNOP).~.la]>>proc{|stack| stack[-2].lvalue=nil},
|
476
|
+
|
477
|
+
#expand the = into a separate token in calls to settors (after . or ::).
|
478
|
+
#but not in method headers
|
479
|
+
-[KW('def').~.lb, Value, DotOp|DoubleColonOp,
|
480
|
+
(MethNameToken&-{:ident=>/^[a-z_][a-z0-9_]*=$/i}).la]>>
|
481
|
+
proc{|stack|
|
482
|
+
methname=stack.pop
|
483
|
+
methname.ident.chomp!('=')
|
484
|
+
offset=methname.offset+methname.ident.size
|
485
|
+
stack.push(
|
486
|
+
CallNode.new(methname,nil,nil,nil,nil),
|
487
|
+
OperatorToken.new('=',offset)
|
488
|
+
)
|
489
|
+
},
|
490
|
+
|
491
|
+
-[NumberToken|SymbolToken]>>LiteralNode,
|
492
|
+
|
493
|
+
#lexer does the wrong thing with -22**44.5, making the - part
|
494
|
+
#of the first number token. it's actually lower precedence than
|
495
|
+
#**... this rule fixes that problem.
|
496
|
+
-[NumberToken&-{:ident=>/\A-/}, Op('**').la]>>
|
497
|
+
proc{|stack|
|
498
|
+
neg_op=OperatorToken.new("-@",stack[-2].offset)
|
499
|
+
neg_op.unary=true
|
500
|
+
stack[-2,0]=neg_op
|
501
|
+
stack[-2].ident.sub!(/\A-/,'')
|
502
|
+
stack[-2].offset+=1
|
503
|
+
},
|
504
|
+
|
505
|
+
#treat these keywords like (rvalue) variables.
|
506
|
+
-[/^(nil|false|true|__FILE__|__LINE__|self)$/]>>VarLikeNode,
|
507
|
+
|
508
|
+
#here docs
|
509
|
+
-[HerePlaceholderToken]>>HereDocNode,
|
510
|
+
-[HereBodyToken]>>proc{|stack|
|
511
|
+
stack.delete_at(-2)#.instance_eval{@headtok}.node.saw_body!
|
512
|
+
},
|
513
|
+
|
514
|
+
]
|
515
|
+
end
|
516
|
+
|
517
|
+
|
518
|
+
|
519
|
+
def initialize(input,name="(eval)",line=1,lvars=[])
|
520
|
+
if Array===input
|
521
|
+
def input.get1token; shift end
|
522
|
+
@lexer=input
|
523
|
+
else
|
524
|
+
@lexer=RubyLexer.new(name,input,line)
|
525
|
+
lvars.each{|lvar| @lexer.localvars[lvar]=true }
|
526
|
+
end
|
527
|
+
@filename=name
|
528
|
+
@min_sizes={}
|
529
|
+
@compiled_rules={}
|
530
|
+
@moretokens=[]
|
531
|
+
@unary_or_binary_op=/^[-+&*]$/
|
532
|
+
@rules=self.RULES
|
533
|
+
@precedence=self.PRECEDENCE
|
534
|
+
@RIGHT_ASSOCIATIVE=self.RIGHT_ASSOCIATIVE
|
535
|
+
end
|
536
|
+
|
537
|
+
attr_accessor :lexer
|
538
|
+
|
539
|
+
def get_token(recursing=false)
|
540
|
+
unless @moretokens.empty?
|
541
|
+
@last_token=@moretokens.shift
|
542
|
+
p @last_token if ENV['PRINT_TOKENS'] unless recursing
|
543
|
+
return @last_token
|
544
|
+
end
|
545
|
+
|
546
|
+
begin
|
547
|
+
result=@lexer.get1token or break
|
548
|
+
p result if ENV['RAW_PRINT_TOKENS']
|
549
|
+
|
550
|
+
#set token's line if wanted
|
551
|
+
result.line||=@line if result.respond_to? :line=
|
552
|
+
|
553
|
+
if result.respond_to?(:as) and as=result.as
|
554
|
+
result=KeywordToken.new(as,result.offset)
|
555
|
+
result.not_real!
|
556
|
+
else
|
557
|
+
|
558
|
+
case result
|
559
|
+
#=begin
|
560
|
+
when ImplicitParamListStartToken: #treat it like (
|
561
|
+
result=KeywordToken.new('(', result.offset)
|
562
|
+
result.not_real!
|
563
|
+
#=end
|
564
|
+
#=begin
|
565
|
+
when ImplicitParamListEndToken:
|
566
|
+
result=KeywordToken.new(')', result.offset)
|
567
|
+
result.not_real!
|
568
|
+
#=end
|
569
|
+
# when AssignmentRhsListStartToken, AssignmentRhsListEndToken:
|
570
|
+
#do nothing, pass it thru
|
571
|
+
#=begin
|
572
|
+
when NewlineToken:
|
573
|
+
result=KeywordToken.new(';',result.offset)
|
574
|
+
#=end
|
575
|
+
when FileAndLineToken: #so __FILE__ and __LINE__ can know what their values are
|
576
|
+
@file=result.file
|
577
|
+
@line=result.line
|
578
|
+
redo
|
579
|
+
when NoWsToken:
|
580
|
+
#rubylexer disambiguates array literal from
|
581
|
+
#call to [] or []= method with a preceding NoWsToken...
|
582
|
+
#kind of a dumb interface.
|
583
|
+
result=get_token(true)
|
584
|
+
result.ident=='[' and result.extend BracketsCall
|
585
|
+
|
586
|
+
|
587
|
+
when OperatorToken:
|
588
|
+
if @unary_or_binary_op===result.ident and result.unary
|
589
|
+
result=result.dup
|
590
|
+
result.ident+="@"
|
591
|
+
end
|
592
|
+
|
593
|
+
#more symbol table maintenance....
|
594
|
+
when KeywordToken:
|
595
|
+
case name=result.ident
|
596
|
+
|
597
|
+
#=begin
|
598
|
+
when "do":
|
599
|
+
if result.has_end?
|
600
|
+
else
|
601
|
+
result=KeywordToken.new(';',result.offset)
|
602
|
+
end
|
603
|
+
#=end
|
604
|
+
when /^(#{BINOP_KEYWORDS.join '|'})$/: #should be like this in rubylexer
|
605
|
+
result=OperatorToken.new(name,result.offset) unless result.has_end?
|
606
|
+
when "|": result=GoalPostNode.new(result.offset) #is this needed still?
|
607
|
+
when "__FILE__": #I wish rubylexer would handle this
|
608
|
+
class<<result; attr_accessor :value; end
|
609
|
+
result.value=@file.dup
|
610
|
+
when "__LINE__": #I wish rubylexer would handle this
|
611
|
+
class<<result; attr_accessor :value; end
|
612
|
+
result.value=@line
|
613
|
+
end
|
614
|
+
|
615
|
+
when EoiToken: break
|
616
|
+
when HereBodyToken: break
|
617
|
+
when IgnoreToken: redo
|
618
|
+
end
|
619
|
+
end
|
620
|
+
end while false
|
621
|
+
p result if ENV['PRINT_TOKENS'] unless recursing
|
622
|
+
return @last_token=result
|
623
|
+
end
|
624
|
+
|
625
|
+
def evaluate rule
|
626
|
+
#dissect the rule
|
627
|
+
if false
|
628
|
+
rule=rule.dup
|
629
|
+
lookahead_processor=(rule.pop if Proc===rule.last)
|
630
|
+
node_type=rule.pop
|
631
|
+
else
|
632
|
+
Reg::Transform===rule or fail
|
633
|
+
node_type= rule.right
|
634
|
+
rule=rule.left.subregs.dup
|
635
|
+
lookahead_processor=(rule.pop if Proc|::Reg::LookAhead===rule.last)
|
636
|
+
lookback=rule[0]=rule[0].regs(0) if ::Reg::LookBack===rule[0]
|
637
|
+
end
|
638
|
+
|
639
|
+
#index of data at which to start matching
|
640
|
+
i=@stack.size-1 #-1 because last element of @stack is always lookahead
|
641
|
+
|
642
|
+
#I could call this a JIT compiler, but that's a bit grandiose....
|
643
|
+
#more of a JIT pre-processor
|
644
|
+
compiled_rule=@compiled_rules[rule]||=
|
645
|
+
rule.map{|pattern|
|
646
|
+
String|Regexp===pattern ? KW(pattern) : pattern
|
647
|
+
}
|
648
|
+
|
649
|
+
#what's the minimum @stack size this rule could match?
|
650
|
+
rule_min_size=@min_sizes[compiled_rule]||=
|
651
|
+
compiled_rule.inject(0){|sum,pattern|
|
652
|
+
sum + pattern.itemrange.begin
|
653
|
+
}
|
654
|
+
i>=rule_min_size or return false
|
655
|
+
|
656
|
+
matching=[]
|
657
|
+
|
658
|
+
#actually try to match rule elements against each @stack element in turn
|
659
|
+
compiled_rule.reverse_each{|matcher|
|
660
|
+
i.zero? and fail
|
661
|
+
target=matching
|
662
|
+
#is this matcher optional? looping?
|
663
|
+
loop= matcher.itemrange.last.to_f.infinite?
|
664
|
+
optional=matcher.itemrange.first.zero?
|
665
|
+
matching.unshift target=[] if loop
|
666
|
+
if loop or optional
|
667
|
+
matcher=matcher.regs(0)
|
668
|
+
end
|
669
|
+
|
670
|
+
begin
|
671
|
+
if matcher===@stack[i-=1] #try match
|
672
|
+
target.unshift @stack[i]
|
673
|
+
else
|
674
|
+
#if match failed, the whole rule fails
|
675
|
+
#unless this match was optional, in which case, ignore it
|
676
|
+
#but bump the data position back up, since the latest datum
|
677
|
+
#didn't actually match anything.
|
678
|
+
return false unless optional or loop&&!target.empty?
|
679
|
+
i+=1
|
680
|
+
matching.unshift nil unless loop
|
681
|
+
break
|
682
|
+
end
|
683
|
+
end while loop
|
684
|
+
}
|
685
|
+
|
686
|
+
matchrange= i...-1 #what elems in @stack were matched?
|
687
|
+
|
688
|
+
#give lookahead matcher (if any) a chance to fail the match
|
689
|
+
case lookahead_processor
|
690
|
+
when ::Reg::LookAhead:
|
691
|
+
return false unless lookahead_processor.regs(0)===@stack.last
|
692
|
+
when Proc:
|
693
|
+
return false unless lookahead_processor[self,@stack.last]
|
694
|
+
end
|
695
|
+
|
696
|
+
#if there was a lookback item, don't include it in the new node
|
697
|
+
if lookback
|
698
|
+
matchrange= i+1...-1 #what elems in @stack were matched?
|
699
|
+
matching.shift
|
700
|
+
end
|
701
|
+
|
702
|
+
#replace matching elements in @stack with node type found
|
703
|
+
case node_type
|
704
|
+
when Class
|
705
|
+
node=node_type.new(*matching)
|
706
|
+
node.line=@line
|
707
|
+
@stack[matchrange]=[node]
|
708
|
+
when Proc; node_type[@stack]
|
709
|
+
when :shift; return 0
|
710
|
+
else fail
|
711
|
+
end
|
712
|
+
|
713
|
+
return true #let caller know we found a match
|
714
|
+
|
715
|
+
|
716
|
+
rescue Exception=>e
|
717
|
+
puts "error (#{e}) while executing rule: #{rule.inspect}"
|
718
|
+
puts e.backtrace.join("\n")
|
719
|
+
raise
|
720
|
+
end
|
721
|
+
|
722
|
+
class ParseError<RuntimeError
|
723
|
+
def initialize(msg,stack)
|
724
|
+
super(msg)
|
725
|
+
@stack=stack
|
726
|
+
if false
|
727
|
+
ranges=(1..stack.size-2).map{|i|
|
728
|
+
node=stack[i]
|
729
|
+
if node.respond_to? :linerange
|
730
|
+
node.linerange
|
731
|
+
elsif node.respond_to? :line
|
732
|
+
node.line..node.line
|
733
|
+
end
|
734
|
+
}
|
735
|
+
types=(1..stack.size-2).map{|i| stack[i].class }
|
736
|
+
msg += "couldn't interpret #{types.inspect} at line ranges: #{ranges.inspect}"
|
737
|
+
end
|
738
|
+
super(msg)
|
739
|
+
end
|
740
|
+
attr :stack
|
741
|
+
end
|
742
|
+
|
743
|
+
def [](*args)
|
744
|
+
@stack.[] *args
|
745
|
+
end
|
746
|
+
|
747
|
+
def []=(*args)
|
748
|
+
@stack.[]= *args
|
749
|
+
end
|
750
|
+
|
751
|
+
def parse
|
752
|
+
oldparser= Thread.current[:$RedParse_parser]
|
753
|
+
Thread.current[:$RedParse_parser]||=self
|
754
|
+
|
755
|
+
@stack=[StartNode.new, get_token]
|
756
|
+
#last token on @stack is always implicitly the lookahead
|
757
|
+
loop {
|
758
|
+
#try all possible reductions
|
759
|
+
shift=nil
|
760
|
+
@rules.reverse_each{|rule|
|
761
|
+
shift=evaluate(rule) and break
|
762
|
+
}
|
763
|
+
next if shift==true
|
764
|
+
|
765
|
+
#no rule can match current @stack, get another token
|
766
|
+
tok=get_token
|
767
|
+
|
768
|
+
#are we done yet?
|
769
|
+
tok.nil? or EoiToken===tok && EoiToken===@stack.last and break
|
770
|
+
|
771
|
+
#shift our token onto the @stack
|
772
|
+
@stack.push tok
|
773
|
+
}
|
774
|
+
|
775
|
+
@stack.size==2 and return NopNode.new #handle empty parse string
|
776
|
+
|
777
|
+
#unless the @stack is 3 tokens,
|
778
|
+
#with the last an Eoi, and first a StartNode
|
779
|
+
#there was a parse error
|
780
|
+
unless @stack.size==3
|
781
|
+
pp @stack[-[15,@stack.size].min..-1] if ENV['PRINT_STACK']
|
782
|
+
top=MisparsedNode.new("(toplevel)", @stack[1...-1],'')
|
783
|
+
raise ParseError.new(top.msg,@stack)
|
784
|
+
end
|
785
|
+
EoiToken===@stack.last or fail
|
786
|
+
StartNode===@stack.first or fail
|
787
|
+
|
788
|
+
result= @stack[1]
|
789
|
+
|
790
|
+
|
791
|
+
#multiple assignment must be resolved
|
792
|
+
#afterwards by walking the parse tree.
|
793
|
+
#(because the relative precedences of = and ,
|
794
|
+
#are reversed in multiple assignment.)
|
795
|
+
# result.respond_to? :fixup_multiple_assignments! and
|
796
|
+
# result=result.fixup_multiple_assignments!
|
797
|
+
|
798
|
+
#relative precedence of = and rescue are also inverted sometimes
|
799
|
+
# result.respond_to? :fixup_rescue_assignments! and
|
800
|
+
# result=result.fixup_rescue_assignments!
|
801
|
+
|
802
|
+
#do something with error nodes
|
803
|
+
msgs=[]
|
804
|
+
result.walk{|parent,i,subi,node|
|
805
|
+
not if ErrorNode===node
|
806
|
+
msgs<< @filename+":"+node.blame.msg
|
807
|
+
end
|
808
|
+
} if result.respond_to? :walk #hack hack
|
809
|
+
result.errors=msgs unless msgs.empty?
|
810
|
+
#other types of errors (lexer errors, exceptions in lexer or parser actions)
|
811
|
+
#should be handled in the same way, but currently are not
|
812
|
+
# puts msgs.join("\n")
|
813
|
+
|
814
|
+
rescue Exception=>e
|
815
|
+
# input=@filename
|
816
|
+
# if input=="(eval)"
|
817
|
+
input=@lexer
|
818
|
+
if Array===input
|
819
|
+
puts "error while parsing:"
|
820
|
+
pp input
|
821
|
+
input=nil
|
822
|
+
else
|
823
|
+
input=input.original_file
|
824
|
+
inputname=@lexer.filename
|
825
|
+
input.to_s.size>1000 and input=inputname
|
826
|
+
end
|
827
|
+
# end
|
828
|
+
puts "error while parsing: <<< #{input} >>>"
|
829
|
+
raise
|
830
|
+
else
|
831
|
+
unless msgs.empty?
|
832
|
+
pp @stack[-[15,@stack.size].min..-1] if ENV['PRINT_STACK']
|
833
|
+
raise RedParse::ParseError.new(msgs.join("\n"),@stack)
|
834
|
+
end
|
835
|
+
|
836
|
+
return result
|
837
|
+
ensure
|
838
|
+
Thread.current[:$RedParse_parser]=oldparser
|
839
|
+
end
|
840
|
+
|
841
|
+
def LEFT_MATCHERS;self.RULES.map{|r| r.left.subregs }.flatten; end
|
842
|
+
def STACKABLE_CLASSES
|
843
|
+
|
844
|
+
|
845
|
+
_LEFT_MATCHERS.map!{|m|
|
846
|
+
case m
|
847
|
+
when Reg::LookAhead,Reg::LookBack: m.regs(0)
|
848
|
+
else m
|
849
|
+
end
|
850
|
+
} #remove lookahead and lookback decoration
|
851
|
+
rule_juicer=proc{|m|
|
852
|
+
case m
|
853
|
+
when Class: m
|
854
|
+
when Reg::And: m.subregs.map &rule_juicer
|
855
|
+
when Reg::Or: m.subregs.map &rule_juicer
|
856
|
+
else #fukit
|
857
|
+
end
|
858
|
+
}
|
859
|
+
_LEFT_CLASSES=_LEFT_MATCHERS.map{|m| rule_juicer[m] }.flatten.compact
|
860
|
+
_RIGHT_CLASSES= self.RULES.map{|r| r.right }.grep(Class) #classes in productions
|
861
|
+
_LEFT_CLASSES+_RIGHT_CLASSES
|
862
|
+
end
|
863
|
+
=begin
|
864
|
+
HIER=Class::FlattenedHierarchy.new *STACKABLE_CLASSES
|
865
|
+
|
866
|
+
LOOKAHEAD_MATCHERS=self.RULES.map{|r| r.left.subregs.last }.map{|la| Reg::LookAhead===la and la.regs(0) }
|
867
|
+
|
868
|
+
LOOKAHEAD_CLASSES=LOOKAHEAD_MATCHERS.map(&rule_juicer)
|
869
|
+
LOOKAHEAD_CLASSES.each_with_index{|classes,i|
|
870
|
+
case classes
|
871
|
+
when Class: huh
|
872
|
+
when Array: classes.flatten.each{huh}
|
873
|
+
else
|
874
|
+
end
|
875
|
+
}
|
876
|
+
=end
|
877
|
+
# def fixup_multiple_assignments!; end
|
878
|
+
end
|
879
|
+
|
880
|
+
|
881
|
+
if __FILE__==$0
|
882
|
+
require 'problemfiles'
|
883
|
+
class NeverExecThis<RuntimeError; end
|
884
|
+
|
885
|
+
def arraydiff(a,b)
|
886
|
+
a==b and return [a,false]
|
887
|
+
(Array===a or a=[a])
|
888
|
+
result= a.dup
|
889
|
+
diff=false
|
890
|
+
size= a.size >= b.size ? a.size : b.size
|
891
|
+
size.times{|i|
|
892
|
+
ai=a[i]
|
893
|
+
bi=b[i]
|
894
|
+
if Array===ai and Array===bi
|
895
|
+
result_i,diff_i= arraydiff(ai,bi)
|
896
|
+
diff||=diff_i
|
897
|
+
result[i]=result_i
|
898
|
+
elsif ai!=bi
|
899
|
+
next if Regexp===ai and ai.to_s==bi.to_s and
|
900
|
+
ai.options==bi.options
|
901
|
+
diff=true
|
902
|
+
result[i]={ai=>bi}
|
903
|
+
elsif ai.nil?
|
904
|
+
result[i]={'size mismatch'=>"#{a.size} for #{b.size}"} if a.size!=b.size
|
905
|
+
diff=true
|
906
|
+
end
|
907
|
+
if i.nonzero? and Hash===result[i] and Hash===result[i-1]
|
908
|
+
old=result[i-1]
|
909
|
+
oldkeys=old.keys
|
910
|
+
oldvals=old.values
|
911
|
+
if Reg::Subseq===oldkeys.first
|
912
|
+
oldkeys=oldkeys.children
|
913
|
+
oldval=oldvals.children
|
914
|
+
end
|
915
|
+
result[i-1..i]=[ {-[*oldkeys+result[i].keys]=>-[*oldvals+result[i].values]} ]
|
916
|
+
end
|
917
|
+
}
|
918
|
+
return result,diff
|
919
|
+
end
|
920
|
+
|
921
|
+
output=:pp
|
922
|
+
quiet=true
|
923
|
+
while /^-/===ARGV.first
|
924
|
+
case opt=ARGV.shift
|
925
|
+
when "--": break
|
926
|
+
when "--pp": output=:pp
|
927
|
+
when "--lisp": output=:lisp
|
928
|
+
when "--parsetree": output=:parsetree
|
929
|
+
when "--vsparsetree": output=:vsparsetree
|
930
|
+
when "--vsparsetree2": output=:vsparsetree2
|
931
|
+
when "--update-problemfiles": problemfiles=ProblemFiles.new
|
932
|
+
when "-q": quiet=true
|
933
|
+
when "-v": quiet=false
|
934
|
+
when "-e": inputs=[ARGV.join(" ")]; names=["-e"]; break
|
935
|
+
else fail "unknown option: #{opt}"
|
936
|
+
|
937
|
+
end
|
938
|
+
end
|
939
|
+
|
940
|
+
unless inputs
|
941
|
+
if ARGV.empty?
|
942
|
+
inputs=[STDIN.read]
|
943
|
+
names=["-"]
|
944
|
+
elsif ARGV.size==1 and (Dir.entries(ARGV.first) rescue false)
|
945
|
+
names=Dir[ARGV.first+"/**/*.rb"]
|
946
|
+
else
|
947
|
+
names=ARGV.dup
|
948
|
+
end
|
949
|
+
inputs||=names.map{|name| File.open(name).read rescue nil}
|
950
|
+
end
|
951
|
+
|
952
|
+
result=0
|
953
|
+
|
954
|
+
safety="BEGIN{raise NeverExecThis};BEGIN{throw :never_exec_this,1};\n"
|
955
|
+
nullsafety="\n"
|
956
|
+
safe_inputs=inputs.map{|input| safety+input}
|
957
|
+
|
958
|
+
inputs.each_index{|i|
|
959
|
+
begin
|
960
|
+
|
961
|
+
input=inputs[i] or next
|
962
|
+
name=names[i]
|
963
|
+
|
964
|
+
input=nullsafety+input
|
965
|
+
#print name+"... "; STDOUT.flush
|
966
|
+
|
967
|
+
begin
|
968
|
+
tree=nil
|
969
|
+
if catch(:never_exec_this){
|
970
|
+
tree=RedParse.new(input,name).parse; nil
|
971
|
+
} #raise NeverExecThis
|
972
|
+
# rescue RedParse::ParseError=>e
|
973
|
+
# require 'pp'
|
974
|
+
# pp e.stack[-[15,e.stack.size].min..-1]
|
975
|
+
# raise
|
976
|
+
# rescue NeverExecThis:
|
977
|
+
puts "RedParse attempted to execute parse data in #{name}"
|
978
|
+
next
|
979
|
+
end
|
980
|
+
rescue Interrupt: exit 2
|
981
|
+
rescue Exception=>e
|
982
|
+
# puts e.backtrace.join("\n")
|
983
|
+
e.message << " during parse of #{name}"
|
984
|
+
# err=e.class.new(e.message+" during parse of #{name}")
|
985
|
+
# err.set_backtrace e.backtrace
|
986
|
+
problemfiles.push name if problemfiles
|
987
|
+
raise e
|
988
|
+
end
|
989
|
+
tree or fail "parsetree was nil for #{name}"
|
990
|
+
|
991
|
+
case output
|
992
|
+
when :pp
|
993
|
+
require 'pp'
|
994
|
+
pp tree
|
995
|
+
when :lisp
|
996
|
+
puts tree.to_lisp
|
997
|
+
when :parsetree
|
998
|
+
pp tree.to_parsetree
|
999
|
+
when :vsparsetree,:vsparsetree2
|
1000
|
+
begin
|
1001
|
+
require 'rubygems'
|
1002
|
+
rescue Exception
|
1003
|
+
end
|
1004
|
+
require 'parse_tree'
|
1005
|
+
#require 'algorithm/diff'
|
1006
|
+
begin
|
1007
|
+
mine=tree.to_parsetree(:quirks)
|
1008
|
+
if IO===input
|
1009
|
+
input.rewind
|
1010
|
+
input=input.read
|
1011
|
+
end
|
1012
|
+
ryans=nil
|
1013
|
+
catch(:never_exec_this){
|
1014
|
+
ryans=ParseTree.new.parse_tree_for_string(safe_inputs[i],name); nil
|
1015
|
+
} and raise NeverExecThis
|
1016
|
+
delta,is_diff=arraydiff(mine,ryans)
|
1017
|
+
rescue NeverExecThis:
|
1018
|
+
puts "ParseTree attempted to execute parse data in #{name}"
|
1019
|
+
next
|
1020
|
+
rescue Interrupt: exit 2
|
1021
|
+
rescue Exception=>e
|
1022
|
+
#raise( RuntimeError.new( "#{e} during to_parsetree of #{name}" ) )
|
1023
|
+
puts "error during to_parsetree of #{name}"
|
1024
|
+
problemfiles.push name if problemfiles
|
1025
|
+
raise
|
1026
|
+
end
|
1027
|
+
if output==:vsparsetree2
|
1028
|
+
if !quiet or is_diff
|
1029
|
+
puts "mine:"
|
1030
|
+
pp mine
|
1031
|
+
puts "ryans:" if is_diff
|
1032
|
+
pp ryans if is_diff
|
1033
|
+
end
|
1034
|
+
elsif !quiet or is_diff
|
1035
|
+
puts 'differences in '+name if is_diff
|
1036
|
+
pp delta
|
1037
|
+
end
|
1038
|
+
if is_diff
|
1039
|
+
result=1
|
1040
|
+
problemfiles.push name if problemfiles
|
1041
|
+
else
|
1042
|
+
puts "no differences in "+name
|
1043
|
+
problemfiles.delete name if problemfiles
|
1044
|
+
end
|
1045
|
+
end
|
1046
|
+
|
1047
|
+
rescue NeverExecThis:
|
1048
|
+
puts "mysterious attempt to execute parse data in #{name}"
|
1049
|
+
next
|
1050
|
+
rescue Interrupt,SystemExit: exit 2
|
1051
|
+
rescue Exception=>e
|
1052
|
+
puts "#{e}:#{e.class}"
|
1053
|
+
puts e.backtrace.join("\n")
|
1054
|
+
#problemfiles.push name if problemfiles
|
1055
|
+
#raise
|
1056
|
+
ensure
|
1057
|
+
STDOUT.flush
|
1058
|
+
end
|
1059
|
+
}
|
1060
|
+
exit result
|
1061
|
+
end
|
1062
|
+
|
1063
|
+
=begin todo:
|
1064
|
+
v merge DotCallNode and CallSiteNode and CallWithBlockNode
|
1065
|
+
remove actual Tokens from parse tree...
|
1066
|
+
instead, each node has a corresponding range of tokens
|
1067
|
+
-in an (optional) array of all tokens printed by the tokenizer.
|
1068
|
+
split ParenedNode into ParenedNode + Rescue/EnsureNode
|
1069
|
+
'incomplete' subtrees such as ElseNode, ElsifNode, RescueNode
|
1070
|
+
-should not appear in final output
|
1071
|
+
v split keywordopnode into loop and if varieties?
|
1072
|
+
=end
|
1073
|
+
|
1074
|
+
=begin optimization opportunities:
|
1075
|
+
top of stack slot contains mostly keywords, specific node classes, and Value
|
1076
|
+
lookahead slot contains mostly LowerOp and keywords, with a few classes and inverted keywords
|
1077
|
+
-(LowerOp is hard to optimize)
|
1078
|
+
if top of stack matcher is Value, then the next matcher down is mostly keywords, with some operators
|
1079
|
+
class membership can be optimized to test of integer within a range
|
1080
|
+
keywords could be stored as symbols instead of strings
|
1081
|
+
a few rules may need exploding (eg, ensure) to spoon feed the optimizer
|
1082
|
+
make all Nodes descendants of Array
|
1083
|
+
=end
|