redparse 0.8.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/COPYING.LGPL +165 -0
- data/Manifest.txt +40 -0
- data/README.txt +461 -0
- data/Rakefile +26 -0
- data/lib/redparse.rb +1083 -0
- data/lib/redparse/babynodes.rb +137 -0
- data/lib/redparse/babyparser.rb +276 -0
- data/lib/redparse/decisiontree.rb +372 -0
- data/lib/redparse/node.rb +3808 -0
- data/lib/redparse/problemfiles.rb +84 -0
- data/lib/redparse/reg_more_sugar.rb +99 -0
- data/nurli/test_control.nurli +261 -0
- data/redparse.vpj +92 -0
- data/redparse.vpw +8 -0
- data/test/data/__end.rb +5 -0
- data/test/data/__f.rb +2 -0
- data/test/data/be.rb +3 -0
- data/test/data/be2.rb +6 -0
- data/test/data/bqhd.rb +3 -0
- data/test/data/bqhd2.rb +3 -0
- data/test/data/case.rb +8 -0
- data/test/data/datetime.rb +66 -0
- data/test/data/defd.rb +9 -0
- data/test/data/hd-def.rb +8 -0
- data/test/data/hd.rb +3 -0
- data/test/data/hd2.rb +3 -0
- data/test/data/hd3.rb +3 -0
- data/test/data/hd4.rb +75 -0
- data/test/data/hd5.rb +4 -0
- data/test/data/hdcat.rb +4 -0
- data/test/data/hdx.rb +3 -0
- data/test/data/heredoc.rb +3 -0
- data/test/data/if.rb +7 -0
- data/test/data/jbridge.rb +779 -0
- data/test/data/mod.rb +3 -0
- data/test/data/nl_as_strdelim.rb +7 -0
- data/test/data/pw.rb +2 -0
- data/test/data/wvt.rb +2 -0
- data/test/rp-locatetest.rb +344 -0
- data/test/test_redparse.rb +3319 -0
- metadata +113 -0
data/Rakefile
ADDED
@@ -0,0 +1,26 @@
|
|
1
|
+
# Copyright (C) 2008 Caleb Clausen
|
2
|
+
# Distributed under the terms of Ruby's license.
|
3
|
+
require 'rubygems'
|
4
|
+
require 'hoe'
|
5
|
+
require 'lib/redparse/version.rb'
|
6
|
+
|
7
|
+
|
8
|
+
readme=open("README.txt")
|
9
|
+
readme.readline("\n== DESCRIPTION:")
|
10
|
+
readme.readline("\n\n")
|
11
|
+
desc=readme.readline("\n\n")
|
12
|
+
|
13
|
+
hoe=Hoe.new("redparse", RedParse::VERSION) do |_|
|
14
|
+
_.author = "Caleb Clausen"
|
15
|
+
_.email = "redparse-owner @at@ inforadical .dot. net"
|
16
|
+
_.url = ["http://redparse.rubyforge.org/", "http://rubyforge.org/projects/redparse/"]
|
17
|
+
_.extra_deps << ['rubylexer', '>= 0.7.2']
|
18
|
+
# _.test_globs=["test/*"]
|
19
|
+
_.description=desc
|
20
|
+
_.summary=desc[/\A[^.]+\./]
|
21
|
+
_.spec_extras={:bindir=>''}
|
22
|
+
_.rdoc_pattern=/\A(README\.txt|lib\/.*\.rb)\Z/
|
23
|
+
_.remote_rdoc_dir="/"
|
24
|
+
end
|
25
|
+
|
26
|
+
|
data/lib/redparse.rb
ADDED
@@ -0,0 +1,1083 @@
|
|
1
|
+
=begin
|
2
|
+
redparse - a ruby parser written in ruby
|
3
|
+
Copyright (C) 2008 Caleb Clausen
|
4
|
+
|
5
|
+
This program is free software: you can redistribute it and/or modify
|
6
|
+
it under the terms of the GNU Lesser General Public License as published by
|
7
|
+
the Free Software Foundation, either version 3 of the License, or
|
8
|
+
(at your option) any later version.
|
9
|
+
|
10
|
+
This program is distributed in the hope that it will be useful,
|
11
|
+
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
12
|
+
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
13
|
+
GNU Lesser General Public License for more details.
|
14
|
+
|
15
|
+
You should have received a copy of the GNU Lesser General Public License
|
16
|
+
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
17
|
+
=end
|
18
|
+
|
19
|
+
#warn 'hacking up LOAD_PATH to include the latest RubyLexer!'
|
20
|
+
#$:.unshift Dir.pwd+'/../rubylexer/lib', Dir.pwd+'/../rubylexer'
|
21
|
+
|
22
|
+
# "faster rule compiler is untested"
|
23
|
+
|
24
|
+
require 'rubygems'
|
25
|
+
require 'rubylexer'
|
26
|
+
require 'reg'
|
27
|
+
|
28
|
+
require "redparse/node"
|
29
|
+
#require "redparse/decisiontree"
|
30
|
+
require "redparse/reg_more_sugar"
|
31
|
+
class RedParse
|
32
|
+
# include Nodes
|
33
|
+
|
34
|
+
def self.has_return_hash_fix?
|
35
|
+
rl=RubyLexer.new("","return {}.size")
|
36
|
+
return(
|
37
|
+
FileAndLineToken===rl.get1token and
|
38
|
+
MethNameToken===rl.get1token and
|
39
|
+
ImplicitParamListStartToken===rl.get1token and
|
40
|
+
WsToken===rl.get1token and
|
41
|
+
KeywordToken===rl.get1token and
|
42
|
+
KeywordToken===rl.get1token and
|
43
|
+
KeywordToken===rl.get1token and
|
44
|
+
MethNameToken===rl.get1token and
|
45
|
+
ImplicitParamListStartToken===rl.get1token and
|
46
|
+
ImplicitParamListEndToken===rl.get1token and
|
47
|
+
ImplicitParamListEndToken===rl.get1token and
|
48
|
+
EoiToken===rl.get1token
|
49
|
+
)
|
50
|
+
end
|
51
|
+
|
52
|
+
#see pickaxe, 1st ed, page 221
|
53
|
+
def RIGHT_ASSOCIATIVE
|
54
|
+
{
|
55
|
+
# "defined?"=>120.5,
|
56
|
+
"**"=>118,
|
57
|
+
|
58
|
+
"="=>105, "%="=>105, "/="=>105, "-="=>105, "+="=>105,
|
59
|
+
"|="=>105, "&="=>105, ">>="=>105, "<<="=>105, "*="=>105,
|
60
|
+
"&&="=>105, "||="=>105, "**="=>105, "^="=>105,
|
61
|
+
|
62
|
+
# "and"=>99, "or"=>99,
|
63
|
+
|
64
|
+
# "if"=>98, "unless"=>98, "while"=>98, "until"=>98, "rescue"=>98,
|
65
|
+
|
66
|
+
# "&&"=>109, "||"=>108,
|
67
|
+
}
|
68
|
+
end
|
69
|
+
|
70
|
+
def PRECEDENCE
|
71
|
+
{
|
72
|
+
|
73
|
+
# "("=>122, #method param list
|
74
|
+
# "{"=>122, "do"=>122, #blocks
|
75
|
+
|
76
|
+
"::"=>121, "."=>121,
|
77
|
+
|
78
|
+
# "defined?"=>120.5,
|
79
|
+
|
80
|
+
"["=>120, #[] []= methods
|
81
|
+
|
82
|
+
"!"=>119, "~"=>119,
|
83
|
+
"+@"=>119,
|
84
|
+
|
85
|
+
"**"=>118,
|
86
|
+
|
87
|
+
"-@"=>117,
|
88
|
+
|
89
|
+
"*"=>116, "/"=>116, "%"=>116,
|
90
|
+
|
91
|
+
"+"=>115, "-"=>115,
|
92
|
+
|
93
|
+
"<<"=>114, ">>"=>114,
|
94
|
+
|
95
|
+
"&"=>113,
|
96
|
+
|
97
|
+
"^"=>112, "|"=>112,
|
98
|
+
|
99
|
+
"<="=>111, ">="=>111, "<"=>111, ">"=>111,
|
100
|
+
|
101
|
+
"<=>"=>110, "=="=>110, "==="=>110,
|
102
|
+
"!="=>110, "=~"=>110, "!~"=>110,
|
103
|
+
|
104
|
+
"&&"=>109,
|
105
|
+
|
106
|
+
"||"=>108,
|
107
|
+
|
108
|
+
".."=>107, "..."=>107,
|
109
|
+
|
110
|
+
"?"=>106, # ":"=>106, #not sure what to do with ":"
|
111
|
+
|
112
|
+
"*@"=>105.5, "&@"=>105.5, #unary * and & operators
|
113
|
+
|
114
|
+
"="=>105, "%="=>105, "/="=>105, "-="=>105, "+="=>105,
|
115
|
+
"|="=>105, "&="=>105, ">>="=>105, "<<="=>105, "*="=>105,
|
116
|
+
"&&="=>105, "||="=>105, "**="=>105, "^="=>105,
|
117
|
+
|
118
|
+
"defined?"=>103,
|
119
|
+
"not"=>103,
|
120
|
+
":"=>102, #but not when used as a substitute for 'then'
|
121
|
+
"rescue3"=>102,
|
122
|
+
|
123
|
+
"=>"=>101,
|
124
|
+
","=>100,
|
125
|
+
#the 'precedence' of comma is somewhat controversial. it actually has
|
126
|
+
#several different precedences depending on which kind of comma it is.
|
127
|
+
#the precedence of , is higher than :, => and the assignment operators
|
128
|
+
#in certain contexts.
|
129
|
+
|
130
|
+
#"unary" prefix function names seen has operators have this precedence
|
131
|
+
#but, rubylexer handles precedence of these and outputs fake parens
|
132
|
+
#to tell us how its parsed
|
133
|
+
|
134
|
+
"or"=>99, "and"=>99,
|
135
|
+
|
136
|
+
"if"=>98, "unless"=>98, "while"=>98, "until"=>98,
|
137
|
+
|
138
|
+
"rescue"=>98,
|
139
|
+
|
140
|
+
";"=>96,
|
141
|
+
}
|
142
|
+
end
|
143
|
+
|
144
|
+
module BracketsCall; end
|
145
|
+
|
146
|
+
Value= #NumberToken|SymbolToken|
|
147
|
+
#HerePlaceholderToken|
|
148
|
+
((VarNameToken|ValueNode)&-{:lvalue? =>nil})
|
149
|
+
Expr=Value
|
150
|
+
|
151
|
+
def self.KW(ident)
|
152
|
+
ident=case ident
|
153
|
+
when Integer: ident.chr
|
154
|
+
when String,Regexp: ident
|
155
|
+
else ident.to_s
|
156
|
+
end
|
157
|
+
|
158
|
+
return KeywordToken&-{:ident=>ident}
|
159
|
+
end
|
160
|
+
def KW(ident); self.class.KW(ident) end
|
161
|
+
UNOP=
|
162
|
+
(OperatorToken|KeywordToken)&-{ #sppflt! KeywordToken here is a hack too
|
163
|
+
:ident=>/^[*&+-]@$/,
|
164
|
+
:unary =>true,
|
165
|
+
}|
|
166
|
+
(OperatorToken|KeywordToken)&-{ #sppflt! KeywordToken here is a hack too
|
167
|
+
:ident=>/^([~!]|not|defined\?)$/,
|
168
|
+
} #|
|
169
|
+
DEFOP=
|
170
|
+
(OperatorToken|KeywordToken)&-{ #sppflt! KeywordToken here is a hack too
|
171
|
+
:ident=>"defined?",
|
172
|
+
}
|
173
|
+
=begin
|
174
|
+
MethNameToken&-{ #hack, shouldn't be necessary
|
175
|
+
#rubylexer should know to generally treat "defined?" as a keyword
|
176
|
+
#or operator. (like most keywords, it can also be used as a method
|
177
|
+
# name....)
|
178
|
+
:ident=>"defined?"
|
179
|
+
}
|
180
|
+
=end
|
181
|
+
|
182
|
+
def self.Op(ident=nil, allow_keyword=false)
|
183
|
+
result=OperatorToken
|
184
|
+
result |= KeywordToken if allow_keyword
|
185
|
+
result &= -{:ident=>ident} if ident
|
186
|
+
#result[:infix?]=true
|
187
|
+
return result
|
188
|
+
end
|
189
|
+
def Op(*args); self.class.Op(*args); end
|
190
|
+
BINOP_KEYWORDS=%w[if unless while until and or && \|\|]
|
191
|
+
|
192
|
+
#HAS_PRECEDENCE=Op(/^#{PRECEDENCE.keys.map{|k| Regexp.quote k}.join('|')}$/,true)
|
193
|
+
=begin
|
194
|
+
KeywordOp=
|
195
|
+
KeywordToken & -{
|
196
|
+
:ident=>/^(#{BINOP_KEYWORDS.join('|')})$/
|
197
|
+
}
|
198
|
+
KeywordOp2=
|
199
|
+
KeywordToken & -{
|
200
|
+
:ident=>/^([\[({!+*?:,]|\.{1,3}|::|=>)$/
|
201
|
+
}
|
202
|
+
=end
|
203
|
+
DotOp= KeywordToken & -{ :ident=>"." }
|
204
|
+
DoubleColonOp= KeywordToken & -{ :ident=>"::" }
|
205
|
+
|
206
|
+
Op=Op()
|
207
|
+
MODIFYASSIGNOP=Op( /^(([^=])\2|[^<>=!])=$/, true )
|
208
|
+
NONASSIGNOP=Op( /([^=]|[<>=!]=)$/)
|
209
|
+
KW_Op= #some of these ought to be regular operators, fer gosh sake
|
210
|
+
Op(/^((![=~])|\.\.\.?|=>)$/,true)|Op(/^(#{BINOP_KEYWORDS.join('|')})$/)
|
211
|
+
|
212
|
+
EPSILON=Float::EPSILON*10_000_000 #this should be <<1 and >0
|
213
|
+
fail unless 1+EPSILON>1
|
214
|
+
fail unless EPSILON<0.1
|
215
|
+
|
216
|
+
def left_op_higher(op,op2)
|
217
|
+
# (Op|KeywordOp|KeywordOp2|ASSIGNOP===op2) or return true
|
218
|
+
KeywordToken===op2 or OperatorToken===op2 or return true
|
219
|
+
rightprec=@precedence[op2.to_s] or return true
|
220
|
+
#or fail "unrecognized right operator: #{op2.inspect}"
|
221
|
+
rightprec+=EPSILON if @RIGHT_ASSOCIATIVE[op2.to_s]
|
222
|
+
return @precedence[op.to_s]>=rightprec
|
223
|
+
end
|
224
|
+
|
225
|
+
LowerOp= proc{|parser,op2| parser.left_op_higher(parser[-3],op2) }
|
226
|
+
|
227
|
+
def dont_postpone_semi
|
228
|
+
@dps||=~wants_semi_context
|
229
|
+
end
|
230
|
+
WANTS_SEMI=%w[while until if unless
|
231
|
+
def case when in rescue
|
232
|
+
elsif class module << => . ::
|
233
|
+
]
|
234
|
+
def wants_semi_context
|
235
|
+
Op('<<')|KW(/^(#{WANTS_SEMI.map{|ws| Regexp.quote ws }.join('|')})$/)
|
236
|
+
end
|
237
|
+
|
238
|
+
NeverBlockKeyword=MethNameToken&-{:ident=>/^(return|break|next)$/}
|
239
|
+
FakeBegin=KW('(')&-{:not_real? =>true}
|
240
|
+
FakeEnd=KW(')')&-{:not_real? =>true}
|
241
|
+
|
242
|
+
#rule format:
|
243
|
+
# -[syntax pattern_matchers.+, lookahead.-]>>node type
|
244
|
+
|
245
|
+
DotCall=proc{|stack|
|
246
|
+
right=stack[-2]
|
247
|
+
left,bogus=*stack.slice!(-4..-3)
|
248
|
+
|
249
|
+
right.set_receiver! left
|
250
|
+
}
|
251
|
+
|
252
|
+
Lvalue=(VarNameToken|CallSiteNode|BracketsGetNode|CommaOpNode|
|
253
|
+
ParenedNode|ConstantNode|UnaryStarNode)&-{:lvalue? =>true}
|
254
|
+
|
255
|
+
BareMethod=MethNameToken|LiteralNode&-{:val=>Symbol|StringNode}
|
256
|
+
|
257
|
+
BEGINWORDLIST=RubyLexer::BEGINWORDLIST + %w"( [ {"
|
258
|
+
ENDWORDLIST=%w"end ) ] }"
|
259
|
+
BEGIN2END={"{"=>"}", "("=>")", "["=>"]", }
|
260
|
+
endword="end"
|
261
|
+
RubyLexer::BEGINWORDLIST.each{|bw| BEGIN2END[bw]=endword }
|
262
|
+
def beginsendsmatcher
|
263
|
+
@bem||=
|
264
|
+
/^(#{(BEGINWORDLIST+ENDWORDLIST).map{|x| Regexp.quote x}.join('|')})$/
|
265
|
+
end
|
266
|
+
|
267
|
+
MULTIASSIGN=UnaryStarNode|CommaOpNode|(ParenedNode&-{:size=>1})
|
268
|
+
WITHCOMMAS=UnaryStarNode|CommaOpNode|
|
269
|
+
(CallSiteNode&-{:real_parens=>false, :args=>-{:size=>~0.reg}}) #|(ParenedNode&-{:size=>~1.reg})
|
270
|
+
|
271
|
+
BEGINAFTEREQUALS=
|
272
|
+
ParenedNode&
|
273
|
+
-{:size =>~1.reg, :op? =>NilClass|FalseClass, :after_equals =>nil}&
|
274
|
+
(-{:body=>item_that.size>0}|-{:rescues=>item_that.size>0}|-{:ensures=>~NilClass})
|
275
|
+
# item_that{|x| x.body.size+x.rescues.size > 0 or x.ensures }
|
276
|
+
|
277
|
+
# ASSIGN_COMMA=Op(',',true)&-{:comma_type=>Symbol}
|
278
|
+
LHS_COMMA=Op(',',true)&-{:comma_type => :lhs}
|
279
|
+
RHS_COMMA=Op(',',true)&-{:comma_type => :rhs}
|
280
|
+
PARAM_COMMA=Op(',',true)&-{:comma_type => :param}
|
281
|
+
FUNCLIKE_KEYWORD=KeywordToken&-{:ident=>RubyLexer::FUNCLIKE_KEYWORDS}
|
282
|
+
|
283
|
+
def RULES
|
284
|
+
#these must be the lowest possible priority, and hence first in the rules list
|
285
|
+
BEGIN2END.map{|_beg,_end|
|
286
|
+
-[KW(_beg), KW(beginsendsmatcher).~.*, KW(_end), KW(/^(do|\{)$/).~.la]>>MisparsedNode
|
287
|
+
}+
|
288
|
+
|
289
|
+
[
|
290
|
+
-[UNOP, Value, LowerOp]>>UnOpNode,
|
291
|
+
-[DEFOP, ParenedNode&-{:size=>1}]>>UnOpNode,
|
292
|
+
-[Op('*@'), VarNameToken|ValueNode, LowerOp]>>UnaryStarNode,
|
293
|
+
|
294
|
+
-[Op('=',true)|KW(/^(rescue|when|\[)$/)|-{:comma_type=>:call.reg|:array|:param|:rhs},
|
295
|
+
Op('*@'), VarNameToken|ValueNode, (MODIFYASSIGNOP|Op('=',true)).la]>>:shift,
|
296
|
+
-[MethNameToken|FUNCLIKE_KEYWORD, KW('('),
|
297
|
+
Op('*@'), VarNameToken|ValueNode, (MODIFYASSIGNOP|Op('=',true)).la]>>:shift,
|
298
|
+
# -[KW('[')|-{:comma_type=>:call.reg|:array},
|
299
|
+
# Op('*@'), VarNameToken|ValueNode, Op('=',true).la]>>:shift,
|
300
|
+
#star should not be used in an lhs if an rhs or param list context is available to eat it.
|
301
|
+
#(including param lists for keywords such as return,break,next,continue,rescue,yield,when)
|
302
|
+
|
303
|
+
-[Op('*@'), (GoalPostNode|KW(/^(in|[=)|,;])$/)).la]>>DanglingStarNode, #dangling *
|
304
|
+
-[',', (GoalPostNode|KW(/^(in|[=)|;])$/)).la]>> #dangling ,
|
305
|
+
proc{|stack|
|
306
|
+
dcomma=DanglingCommaNode.new
|
307
|
+
dcomma.offset=stack.last.offset
|
308
|
+
stack.push dcomma, stack.pop
|
309
|
+
},
|
310
|
+
|
311
|
+
-[Value, Op|KW_Op, Value, LowerOp]>>RawOpNode, #most operators
|
312
|
+
|
313
|
+
#assignment
|
314
|
+
-[Lvalue, MODIFYASSIGNOP, Value, LowerOp]>>AssignNode,
|
315
|
+
-[Lvalue, Op('=',true), AssignmentRhsNode, LowerOp]>>AssignNode,
|
316
|
+
-[Op('=',true).lb, Value, LowerOp]>>AssignmentRhsNode,
|
317
|
+
#was: -[AssignmentRhsListStartToken, Value, AssignmentRhsListEndToken]>>AssignmentRhsNode,
|
318
|
+
-[Lvalue&~MULTIASSIGN, Op('=',true), AssignmentRhsNode&-{:val =>~WITHCOMMAS},
|
319
|
+
Op('rescue3',true), Value, LowerOp]>>AssignNode,
|
320
|
+
-[Lvalue&~MULTIASSIGN, Op('=',true), AssignmentRhsNode&-{:val =>~WITHCOMMAS},
|
321
|
+
Op('rescue3',true).la]>>:shift,
|
322
|
+
-[Lvalue&~MULTIASSIGN, Op('=',true), AssignmentRhsNode&-{:val =>~WITHCOMMAS},
|
323
|
+
Op('rescue',true).la] >>proc{|stack|
|
324
|
+
resc=stack.last.dup
|
325
|
+
resc.ident += '3'
|
326
|
+
stack[-1]=resc
|
327
|
+
},
|
328
|
+
# a = b rescue c acts like a ternary,,,
|
329
|
+
#provided that both a and b are not multiple and b
|
330
|
+
#(if it is a parenless callsite) has just 1 param
|
331
|
+
|
332
|
+
# -[Op('=',true), ~WITHCOMMAS, Op('rescue',true).la]>>:shift,
|
333
|
+
#relative precedence of = and rescue are to be inverted if rescue
|
334
|
+
#is to the right and assignment is not multiple.
|
335
|
+
|
336
|
+
-[Op('=',true).~.lb, OB, Op('=',true), Value, RHS_COMMA.la]>>:shift,
|
337
|
+
-[RHS_COMMA.lb, Lvalue, Op('=',true), Value, RHS_COMMA.la ]>>AssignNode,
|
338
|
+
-[ValueNode|VarNameToken, LHS_COMMA, ValueNode|VarNameToken, Op('=',true).la]>>CommaOpNode,
|
339
|
+
#relative precedence of = and lhs/rhs , are to be inverted.
|
340
|
+
|
341
|
+
-[KW(',')&-{:comma_type=>:lhs}, (UnaryStarNode|ParenedNode)&~-{:after_comma =>true}, Op('=',true)]>>
|
342
|
+
proc{|stack| stack[-3].after_comma=true}, #mebbe this should be a lexer hack
|
343
|
+
#mark parentheses and unary stars that come after lhs commas
|
344
|
+
|
345
|
+
#-[Value, DotOp|DoubleColonOp, MethNameToken,
|
346
|
+
# ASSIGNOP, Value, LowerOp]>>AccessorAssignNode,
|
347
|
+
|
348
|
+
-[MethNameToken.~.lb, '(', Value, ')']>>ParenedNode,
|
349
|
+
-[MethNameToken.~.lb, '(', ')']>>VarLikeNode, #alias for nil
|
350
|
+
|
351
|
+
# -[Value, KeywordOp, Value, LowerOp]>>KeywordOpNode,
|
352
|
+
-[Op('=',true).~.lb, Value, Op('rescue',true), Value, LowerOp]>>ParenedNode,
|
353
|
+
|
354
|
+
#dot and double-colon
|
355
|
+
-[DoubleColonOp, VarNameToken, LowerOp]>>ConstantNode,#unary ::
|
356
|
+
-[Value, DotOp, CallNode, LowerOp]>>DotCall, #binary .
|
357
|
+
-[Value, DoubleColonOp, CallNode, LowerOp]>>DotCall, #binary ::
|
358
|
+
-[Value, DoubleColonOp, VarNameToken, LowerOp]>>ConstantNode,#binary ::
|
359
|
+
|
360
|
+
-[Value, "?", Value, ":", Value, LowerOp]>>TernaryNode,
|
361
|
+
|
362
|
+
# -[Value, /^\.\.\.?$/, Value, LowerOp]>>RangeNode,
|
363
|
+
|
364
|
+
-[MethNameToken, '(', Value.-, ')', BlockNode.-, KW(/^(do|\{)$/).~.la]>>CallNode,
|
365
|
+
-[FUNCLIKE_KEYWORD, '(', Value.-, ')',
|
366
|
+
BlockNode.-, KW(/^(do|\{)$/).~.la]>>KWCallNode,
|
367
|
+
|
368
|
+
-[ValueNode|VarNameToken, ',', ValueNode|VarNameToken, LowerOp]>>CommaOpNode,
|
369
|
+
|
370
|
+
-[dont_postpone_semi.lb,
|
371
|
+
Value, ';', Value, LowerOp]>>SequenceNode,
|
372
|
+
|
373
|
+
# -[Value, '=>', Value, LowerOp]>>ArrowOpNode,
|
374
|
+
|
375
|
+
-[KW(')').~.lb, '{', (CommaOpNode|ArrowOpNode).-, '}']>>HashLiteralNode,
|
376
|
+
|
377
|
+
# -[CallSiteNode.~.lb, '{', Value, '}']>>HashLiteralNode,
|
378
|
+
|
379
|
+
# -[KW(')').lb, '{', BlockFormalsNode.-, Value.-, '}']>>BlockNode,
|
380
|
+
-[KW(')').lb, 'do', BlockFormalsNode.-, Value.-, 'end']>>BlockNode,
|
381
|
+
#rubylexer handles the 'low precedence' of do...end
|
382
|
+
|
383
|
+
-[GoalPostNode, Value.-, GoalPostNode]>>BlockFormalsNode,
|
384
|
+
#rubylexer disambiguated operator vs keyword '|'
|
385
|
+
|
386
|
+
-[/^(while|until)$/, Value, /^([:;]|do)$/, Value.-, 'end']>>LoopNode,
|
387
|
+
|
388
|
+
-[/^(if|unless)$/, Value, /^(;|then|:)$/,
|
389
|
+
Value.-, ElsifNode.*, ElseNode.-, 'end'
|
390
|
+
]>>IfNode,
|
391
|
+
|
392
|
+
-['else', Value.-, KW(/^(ensure|end)$/).la]>>ElseNode,
|
393
|
+
|
394
|
+
-['elsif', Value, /^(;|then|:)$/, Value.-,
|
395
|
+
KW(/^e(nd|ls(e|if))$/).la
|
396
|
+
]>>ElsifNode,
|
397
|
+
|
398
|
+
-['module', ConstantNode|VarNameToken, KW(';'), Value.-, 'end']>>ModuleNode,
|
399
|
+
# -['module', ConstantNode|VarNameToken, KW(/^(;|::)$/).~.la]>>
|
400
|
+
# proc{|stack| #insert ; at end of module header if none was present
|
401
|
+
# stack.push KeywordToken.new(';'), stack.pop
|
402
|
+
# },
|
403
|
+
-['class', Value, ';', Value.-, 'end']>>ClassNode,
|
404
|
+
-['class', Value, Op('<'), Value, KW(';').~.la]>>:shift,
|
405
|
+
-['class', Op('<<'), Value, ';', Value.-, 'end']>>MetaClassNode,
|
406
|
+
|
407
|
+
-['alias', BareMethod|VarNameToken, BareMethod|VarNameToken]>>AliasNode,
|
408
|
+
-['undef', BareMethod]>>UndefNode,
|
409
|
+
-[UndefNode, ',', BareMethod]>>UndefNode,
|
410
|
+
|
411
|
+
-['def', CallSiteNode, Op('=').-, KW(';'),
|
412
|
+
Value.-, RescueNode.*, ElseNode.-, EnsureNode.-,
|
413
|
+
'end'
|
414
|
+
]>>MethodNode,
|
415
|
+
|
416
|
+
-['begin',
|
417
|
+
Value.-, RescueNode.*, ElseNode.-, EnsureNode.-,
|
418
|
+
'end'
|
419
|
+
]>>ParenedNode,
|
420
|
+
|
421
|
+
-[Op('=',true), BEGINAFTEREQUALS, Op('rescue',true).la]>>
|
422
|
+
proc{ |stack| stack[-2].after_equals=true },
|
423
|
+
#this is bs. all for an extra :begin in the parsetree
|
424
|
+
|
425
|
+
-[(KW(/^(;|begin)$/)|ParenedNode|RescueNode).lb,
|
426
|
+
'rescue', KW('=>').-, Value.-, /^([:;]|then)$/,
|
427
|
+
]>>RescueHeaderNode,
|
428
|
+
-[ RescueHeaderNode, Value.-, KW(';').-, KW(/^(rescue|else|ensure|end)$/).la
|
429
|
+
]>>RescueNode,
|
430
|
+
|
431
|
+
-['ensure', Value.-, KW('end').la]>>EnsureNode,
|
432
|
+
|
433
|
+
-['[', Value.-, ']']>>ArrayLiteralNode,
|
434
|
+
|
435
|
+
-[Value, '[', Value.-, ']']>>BracketsGetNode,
|
436
|
+
|
437
|
+
-[HereDocNode, StringToken.*, StringToken, StringToken.~.la]>>StringCatNode,
|
438
|
+
-[(StringToken|HereDocNode).~.lb, StringToken.*, StringToken, StringToken, StringToken.~.la]>>StringCatNode,
|
439
|
+
-[(StringToken|HereDocNode).~.lb, StringToken, StringToken.~.la]>>StringNode, #includes regexp, wordlist, backquotes
|
440
|
+
|
441
|
+
-['case', Value.-, KW(/^[:;]$/).-, WhenNode.*, ElseNode.-, 'end']>>CaseNode,
|
442
|
+
|
443
|
+
-['when', Value, /^([:;]|then)$/, Value.-,
|
444
|
+
KW(/^(when|else|end)$/).la
|
445
|
+
]>>WhenNode,
|
446
|
+
|
447
|
+
-['for', Value, 'in', Value, /^([:;]|do)$/, Value.-, 'end']>>ForNode,
|
448
|
+
|
449
|
+
#semicolon cleanup....
|
450
|
+
-[dont_postpone_semi.lb,
|
451
|
+
Value, ';',
|
452
|
+
(KW(/^(#{RubyLexer::INNERBOUNDINGWORDS.gsub(/(rescue|then)\|/,'')}|end|[)}\]])$/)|EoiToken).la
|
453
|
+
]>>proc{|stack| stack.delete_at -2 },
|
454
|
+
-[Value, ';', KW('then').la
|
455
|
+
]>>proc{|stack| stack.delete_at -2 },
|
456
|
+
-[dont_postpone_semi.lb, Value, ';', RescueNode
|
457
|
+
]>>proc{|stack| stack.delete_at -3 },
|
458
|
+
-[(KW(/^(begin|[;:({|]|then|do|else|ensure)$/)|StartNode|RescueHeaderNode).lb, ';'
|
459
|
+
]>>proc{|stack| stack.delete_at -2 },
|
460
|
+
#this rule is somewhat more forgiving than matz' parser...
|
461
|
+
#not all semicolons after :, (, and { keywords should
|
462
|
+
#be ignored. some should cause syntax errors.
|
463
|
+
|
464
|
+
|
465
|
+
#comma cleanup....
|
466
|
+
-[',', KW(/^[}\]]$/).la]>>proc{|stack| stack.delete_at -2},
|
467
|
+
#likewise, this is somewhat too forgiving.
|
468
|
+
#some commas before } or ] should cause syntax errors
|
469
|
+
|
470
|
+
#multiple assignment.... (handled in a subsequent stage?)
|
471
|
+
#(cause it requires that the relative priorities of = and , be reversed!)
|
472
|
+
|
473
|
+
|
474
|
+
#turn lvalues into rvalues if not followed by an assignop
|
475
|
+
-[-{:lvalue? =>true}, (Op('=',true)|MODIFYASSIGNOP).~.la]>>proc{|stack| stack[-2].lvalue=nil},
|
476
|
+
|
477
|
+
#expand the = into a separate token in calls to settors (after . or ::).
|
478
|
+
#but not in method headers
|
479
|
+
-[KW('def').~.lb, Value, DotOp|DoubleColonOp,
|
480
|
+
(MethNameToken&-{:ident=>/^[a-z_][a-z0-9_]*=$/i}).la]>>
|
481
|
+
proc{|stack|
|
482
|
+
methname=stack.pop
|
483
|
+
methname.ident.chomp!('=')
|
484
|
+
offset=methname.offset+methname.ident.size
|
485
|
+
stack.push(
|
486
|
+
CallNode.new(methname,nil,nil,nil,nil),
|
487
|
+
OperatorToken.new('=',offset)
|
488
|
+
)
|
489
|
+
},
|
490
|
+
|
491
|
+
-[NumberToken|SymbolToken]>>LiteralNode,
|
492
|
+
|
493
|
+
#lexer does the wrong thing with -22**44.5, making the - part
|
494
|
+
#of the first number token. it's actually lower precedence than
|
495
|
+
#**... this rule fixes that problem.
|
496
|
+
-[NumberToken&-{:ident=>/\A-/}, Op('**').la]>>
|
497
|
+
proc{|stack|
|
498
|
+
neg_op=OperatorToken.new("-@",stack[-2].offset)
|
499
|
+
neg_op.unary=true
|
500
|
+
stack[-2,0]=neg_op
|
501
|
+
stack[-2].ident.sub!(/\A-/,'')
|
502
|
+
stack[-2].offset+=1
|
503
|
+
},
|
504
|
+
|
505
|
+
#treat these keywords like (rvalue) variables.
|
506
|
+
-[/^(nil|false|true|__FILE__|__LINE__|self)$/]>>VarLikeNode,
|
507
|
+
|
508
|
+
#here docs
|
509
|
+
-[HerePlaceholderToken]>>HereDocNode,
|
510
|
+
-[HereBodyToken]>>proc{|stack|
|
511
|
+
stack.delete_at(-2)#.instance_eval{@headtok}.node.saw_body!
|
512
|
+
},
|
513
|
+
|
514
|
+
]
|
515
|
+
end
|
516
|
+
|
517
|
+
|
518
|
+
|
519
|
+
def initialize(input,name="(eval)",line=1,lvars=[])
|
520
|
+
if Array===input
|
521
|
+
def input.get1token; shift end
|
522
|
+
@lexer=input
|
523
|
+
else
|
524
|
+
@lexer=RubyLexer.new(name,input,line)
|
525
|
+
lvars.each{|lvar| @lexer.localvars[lvar]=true }
|
526
|
+
end
|
527
|
+
@filename=name
|
528
|
+
@min_sizes={}
|
529
|
+
@compiled_rules={}
|
530
|
+
@moretokens=[]
|
531
|
+
@unary_or_binary_op=/^[-+&*]$/
|
532
|
+
@rules=self.RULES
|
533
|
+
@precedence=self.PRECEDENCE
|
534
|
+
@RIGHT_ASSOCIATIVE=self.RIGHT_ASSOCIATIVE
|
535
|
+
end
|
536
|
+
|
537
|
+
attr_accessor :lexer
|
538
|
+
|
539
|
+
def get_token(recursing=false)
|
540
|
+
unless @moretokens.empty?
|
541
|
+
@last_token=@moretokens.shift
|
542
|
+
p @last_token if ENV['PRINT_TOKENS'] unless recursing
|
543
|
+
return @last_token
|
544
|
+
end
|
545
|
+
|
546
|
+
begin
|
547
|
+
result=@lexer.get1token or break
|
548
|
+
p result if ENV['RAW_PRINT_TOKENS']
|
549
|
+
|
550
|
+
#set token's line if wanted
|
551
|
+
result.line||=@line if result.respond_to? :line=
|
552
|
+
|
553
|
+
if result.respond_to?(:as) and as=result.as
|
554
|
+
result=KeywordToken.new(as,result.offset)
|
555
|
+
result.not_real!
|
556
|
+
else
|
557
|
+
|
558
|
+
case result
|
559
|
+
#=begin
|
560
|
+
when ImplicitParamListStartToken: #treat it like (
|
561
|
+
result=KeywordToken.new('(', result.offset)
|
562
|
+
result.not_real!
|
563
|
+
#=end
|
564
|
+
#=begin
|
565
|
+
when ImplicitParamListEndToken:
|
566
|
+
result=KeywordToken.new(')', result.offset)
|
567
|
+
result.not_real!
|
568
|
+
#=end
|
569
|
+
# when AssignmentRhsListStartToken, AssignmentRhsListEndToken:
|
570
|
+
#do nothing, pass it thru
|
571
|
+
#=begin
|
572
|
+
when NewlineToken:
|
573
|
+
result=KeywordToken.new(';',result.offset)
|
574
|
+
#=end
|
575
|
+
when FileAndLineToken: #so __FILE__ and __LINE__ can know what their values are
|
576
|
+
@file=result.file
|
577
|
+
@line=result.line
|
578
|
+
redo
|
579
|
+
when NoWsToken:
|
580
|
+
#rubylexer disambiguates array literal from
|
581
|
+
#call to [] or []= method with a preceding NoWsToken...
|
582
|
+
#kind of a dumb interface.
|
583
|
+
result=get_token(true)
|
584
|
+
result.ident=='[' and result.extend BracketsCall
|
585
|
+
|
586
|
+
|
587
|
+
when OperatorToken:
|
588
|
+
if @unary_or_binary_op===result.ident and result.unary
|
589
|
+
result=result.dup
|
590
|
+
result.ident+="@"
|
591
|
+
end
|
592
|
+
|
593
|
+
#more symbol table maintenance....
|
594
|
+
when KeywordToken:
|
595
|
+
case name=result.ident
|
596
|
+
|
597
|
+
#=begin
|
598
|
+
when "do":
|
599
|
+
if result.has_end?
|
600
|
+
else
|
601
|
+
result=KeywordToken.new(';',result.offset)
|
602
|
+
end
|
603
|
+
#=end
|
604
|
+
when /^(#{BINOP_KEYWORDS.join '|'})$/: #should be like this in rubylexer
|
605
|
+
result=OperatorToken.new(name,result.offset) unless result.has_end?
|
606
|
+
when "|": result=GoalPostNode.new(result.offset) #is this needed still?
|
607
|
+
when "__FILE__": #I wish rubylexer would handle this
|
608
|
+
class<<result; attr_accessor :value; end
|
609
|
+
result.value=@file.dup
|
610
|
+
when "__LINE__": #I wish rubylexer would handle this
|
611
|
+
class<<result; attr_accessor :value; end
|
612
|
+
result.value=@line
|
613
|
+
end
|
614
|
+
|
615
|
+
when EoiToken: break
|
616
|
+
when HereBodyToken: break
|
617
|
+
when IgnoreToken: redo
|
618
|
+
end
|
619
|
+
end
|
620
|
+
end while false
|
621
|
+
p result if ENV['PRINT_TOKENS'] unless recursing
|
622
|
+
return @last_token=result
|
623
|
+
end
|
624
|
+
|
625
|
+
def evaluate rule
|
626
|
+
#dissect the rule
|
627
|
+
if false
|
628
|
+
rule=rule.dup
|
629
|
+
lookahead_processor=(rule.pop if Proc===rule.last)
|
630
|
+
node_type=rule.pop
|
631
|
+
else
|
632
|
+
Reg::Transform===rule or fail
|
633
|
+
node_type= rule.right
|
634
|
+
rule=rule.left.subregs.dup
|
635
|
+
lookahead_processor=(rule.pop if Proc|::Reg::LookAhead===rule.last)
|
636
|
+
lookback=rule[0]=rule[0].regs(0) if ::Reg::LookBack===rule[0]
|
637
|
+
end
|
638
|
+
|
639
|
+
#index of data at which to start matching
|
640
|
+
i=@stack.size-1 #-1 because last element of @stack is always lookahead
|
641
|
+
|
642
|
+
#I could call this a JIT compiler, but that's a bit grandiose....
|
643
|
+
#more of a JIT pre-processor
|
644
|
+
compiled_rule=@compiled_rules[rule]||=
|
645
|
+
rule.map{|pattern|
|
646
|
+
String|Regexp===pattern ? KW(pattern) : pattern
|
647
|
+
}
|
648
|
+
|
649
|
+
#what's the minimum @stack size this rule could match?
|
650
|
+
rule_min_size=@min_sizes[compiled_rule]||=
|
651
|
+
compiled_rule.inject(0){|sum,pattern|
|
652
|
+
sum + pattern.itemrange.begin
|
653
|
+
}
|
654
|
+
i>=rule_min_size or return false
|
655
|
+
|
656
|
+
matching=[]
|
657
|
+
|
658
|
+
#actually try to match rule elements against each @stack element in turn
|
659
|
+
compiled_rule.reverse_each{|matcher|
|
660
|
+
i.zero? and fail
|
661
|
+
target=matching
|
662
|
+
#is this matcher optional? looping?
|
663
|
+
loop= matcher.itemrange.last.to_f.infinite?
|
664
|
+
optional=matcher.itemrange.first.zero?
|
665
|
+
matching.unshift target=[] if loop
|
666
|
+
if loop or optional
|
667
|
+
matcher=matcher.regs(0)
|
668
|
+
end
|
669
|
+
|
670
|
+
begin
|
671
|
+
if matcher===@stack[i-=1] #try match
|
672
|
+
target.unshift @stack[i]
|
673
|
+
else
|
674
|
+
#if match failed, the whole rule fails
|
675
|
+
#unless this match was optional, in which case, ignore it
|
676
|
+
#but bump the data position back up, since the latest datum
|
677
|
+
#didn't actually match anything.
|
678
|
+
return false unless optional or loop&&!target.empty?
|
679
|
+
i+=1
|
680
|
+
matching.unshift nil unless loop
|
681
|
+
break
|
682
|
+
end
|
683
|
+
end while loop
|
684
|
+
}
|
685
|
+
|
686
|
+
matchrange= i...-1 #what elems in @stack were matched?
|
687
|
+
|
688
|
+
#give lookahead matcher (if any) a chance to fail the match
|
689
|
+
case lookahead_processor
|
690
|
+
when ::Reg::LookAhead:
|
691
|
+
return false unless lookahead_processor.regs(0)===@stack.last
|
692
|
+
when Proc:
|
693
|
+
return false unless lookahead_processor[self,@stack.last]
|
694
|
+
end
|
695
|
+
|
696
|
+
#if there was a lookback item, don't include it in the new node
|
697
|
+
if lookback
|
698
|
+
matchrange= i+1...-1 #what elems in @stack were matched?
|
699
|
+
matching.shift
|
700
|
+
end
|
701
|
+
|
702
|
+
#replace matching elements in @stack with node type found
|
703
|
+
case node_type
|
704
|
+
when Class
|
705
|
+
node=node_type.new(*matching)
|
706
|
+
node.line=@line
|
707
|
+
@stack[matchrange]=[node]
|
708
|
+
when Proc; node_type[@stack]
|
709
|
+
when :shift; return 0
|
710
|
+
else fail
|
711
|
+
end
|
712
|
+
|
713
|
+
return true #let caller know we found a match
|
714
|
+
|
715
|
+
|
716
|
+
rescue Exception=>e
|
717
|
+
puts "error (#{e}) while executing rule: #{rule.inspect}"
|
718
|
+
puts e.backtrace.join("\n")
|
719
|
+
raise
|
720
|
+
end
|
721
|
+
|
722
|
+
class ParseError<RuntimeError
|
723
|
+
def initialize(msg,stack)
|
724
|
+
super(msg)
|
725
|
+
@stack=stack
|
726
|
+
if false
|
727
|
+
ranges=(1..stack.size-2).map{|i|
|
728
|
+
node=stack[i]
|
729
|
+
if node.respond_to? :linerange
|
730
|
+
node.linerange
|
731
|
+
elsif node.respond_to? :line
|
732
|
+
node.line..node.line
|
733
|
+
end
|
734
|
+
}
|
735
|
+
types=(1..stack.size-2).map{|i| stack[i].class }
|
736
|
+
msg += "couldn't interpret #{types.inspect} at line ranges: #{ranges.inspect}"
|
737
|
+
end
|
738
|
+
super(msg)
|
739
|
+
end
|
740
|
+
attr :stack
|
741
|
+
end
|
742
|
+
|
743
|
+
def [](*args)
|
744
|
+
@stack.[] *args
|
745
|
+
end
|
746
|
+
|
747
|
+
def []=(*args)
|
748
|
+
@stack.[]= *args
|
749
|
+
end
|
750
|
+
|
751
|
+
def parse
|
752
|
+
oldparser= Thread.current[:$RedParse_parser]
|
753
|
+
Thread.current[:$RedParse_parser]||=self
|
754
|
+
|
755
|
+
@stack=[StartNode.new, get_token]
|
756
|
+
#last token on @stack is always implicitly the lookahead
|
757
|
+
loop {
|
758
|
+
#try all possible reductions
|
759
|
+
shift=nil
|
760
|
+
@rules.reverse_each{|rule|
|
761
|
+
shift=evaluate(rule) and break
|
762
|
+
}
|
763
|
+
next if shift==true
|
764
|
+
|
765
|
+
#no rule can match current @stack, get another token
|
766
|
+
tok=get_token
|
767
|
+
|
768
|
+
#are we done yet?
|
769
|
+
tok.nil? or EoiToken===tok && EoiToken===@stack.last and break
|
770
|
+
|
771
|
+
#shift our token onto the @stack
|
772
|
+
@stack.push tok
|
773
|
+
}
|
774
|
+
|
775
|
+
@stack.size==2 and return NopNode.new #handle empty parse string
|
776
|
+
|
777
|
+
#unless the @stack is 3 tokens,
|
778
|
+
#with the last an Eoi, and first a StartNode
|
779
|
+
#there was a parse error
|
780
|
+
unless @stack.size==3
|
781
|
+
pp @stack[-[15,@stack.size].min..-1] if ENV['PRINT_STACK']
|
782
|
+
top=MisparsedNode.new("(toplevel)", @stack[1...-1],'')
|
783
|
+
raise ParseError.new(top.msg,@stack)
|
784
|
+
end
|
785
|
+
EoiToken===@stack.last or fail
|
786
|
+
StartNode===@stack.first or fail
|
787
|
+
|
788
|
+
result= @stack[1]
|
789
|
+
|
790
|
+
|
791
|
+
#multiple assignment must be resolved
|
792
|
+
#afterwards by walking the parse tree.
|
793
|
+
#(because the relative precedences of = and ,
|
794
|
+
#are reversed in multiple assignment.)
|
795
|
+
# result.respond_to? :fixup_multiple_assignments! and
|
796
|
+
# result=result.fixup_multiple_assignments!
|
797
|
+
|
798
|
+
#relative precedence of = and rescue are also inverted sometimes
|
799
|
+
# result.respond_to? :fixup_rescue_assignments! and
|
800
|
+
# result=result.fixup_rescue_assignments!
|
801
|
+
|
802
|
+
#do something with error nodes
|
803
|
+
msgs=[]
|
804
|
+
result.walk{|parent,i,subi,node|
|
805
|
+
not if ErrorNode===node
|
806
|
+
msgs<< @filename+":"+node.blame.msg
|
807
|
+
end
|
808
|
+
} if result.respond_to? :walk #hack hack
|
809
|
+
result.errors=msgs unless msgs.empty?
|
810
|
+
#other types of errors (lexer errors, exceptions in lexer or parser actions)
|
811
|
+
#should be handled in the same way, but currently are not
|
812
|
+
# puts msgs.join("\n")
|
813
|
+
|
814
|
+
rescue Exception=>e
|
815
|
+
# input=@filename
|
816
|
+
# if input=="(eval)"
|
817
|
+
input=@lexer
|
818
|
+
if Array===input
|
819
|
+
puts "error while parsing:"
|
820
|
+
pp input
|
821
|
+
input=nil
|
822
|
+
else
|
823
|
+
input=input.original_file
|
824
|
+
inputname=@lexer.filename
|
825
|
+
input.to_s.size>1000 and input=inputname
|
826
|
+
end
|
827
|
+
# end
|
828
|
+
puts "error while parsing: <<< #{input} >>>"
|
829
|
+
raise
|
830
|
+
else
|
831
|
+
unless msgs.empty?
|
832
|
+
pp @stack[-[15,@stack.size].min..-1] if ENV['PRINT_STACK']
|
833
|
+
raise RedParse::ParseError.new(msgs.join("\n"),@stack)
|
834
|
+
end
|
835
|
+
|
836
|
+
return result
|
837
|
+
ensure
|
838
|
+
Thread.current[:$RedParse_parser]=oldparser
|
839
|
+
end
|
840
|
+
|
841
|
+
def LEFT_MATCHERS;self.RULES.map{|r| r.left.subregs }.flatten; end
|
842
|
+
def STACKABLE_CLASSES
|
843
|
+
|
844
|
+
|
845
|
+
_LEFT_MATCHERS.map!{|m|
|
846
|
+
case m
|
847
|
+
when Reg::LookAhead,Reg::LookBack: m.regs(0)
|
848
|
+
else m
|
849
|
+
end
|
850
|
+
} #remove lookahead and lookback decoration
|
851
|
+
rule_juicer=proc{|m|
|
852
|
+
case m
|
853
|
+
when Class: m
|
854
|
+
when Reg::And: m.subregs.map &rule_juicer
|
855
|
+
when Reg::Or: m.subregs.map &rule_juicer
|
856
|
+
else #fukit
|
857
|
+
end
|
858
|
+
}
|
859
|
+
_LEFT_CLASSES=_LEFT_MATCHERS.map{|m| rule_juicer[m] }.flatten.compact
|
860
|
+
_RIGHT_CLASSES= self.RULES.map{|r| r.right }.grep(Class) #classes in productions
|
861
|
+
_LEFT_CLASSES+_RIGHT_CLASSES
|
862
|
+
end
|
863
|
+
=begin
|
864
|
+
HIER=Class::FlattenedHierarchy.new *STACKABLE_CLASSES
|
865
|
+
|
866
|
+
LOOKAHEAD_MATCHERS=self.RULES.map{|r| r.left.subregs.last }.map{|la| Reg::LookAhead===la and la.regs(0) }
|
867
|
+
|
868
|
+
LOOKAHEAD_CLASSES=LOOKAHEAD_MATCHERS.map(&rule_juicer)
|
869
|
+
LOOKAHEAD_CLASSES.each_with_index{|classes,i|
|
870
|
+
case classes
|
871
|
+
when Class: huh
|
872
|
+
when Array: classes.flatten.each{huh}
|
873
|
+
else
|
874
|
+
end
|
875
|
+
}
|
876
|
+
=end
|
877
|
+
# def fixup_multiple_assignments!; end
|
878
|
+
end
|
879
|
+
|
880
|
+
|
881
|
+
if __FILE__==$0
|
882
|
+
require 'problemfiles'
|
883
|
+
class NeverExecThis<RuntimeError; end
|
884
|
+
|
885
|
+
def arraydiff(a,b)
|
886
|
+
a==b and return [a,false]
|
887
|
+
(Array===a or a=[a])
|
888
|
+
result= a.dup
|
889
|
+
diff=false
|
890
|
+
size= a.size >= b.size ? a.size : b.size
|
891
|
+
size.times{|i|
|
892
|
+
ai=a[i]
|
893
|
+
bi=b[i]
|
894
|
+
if Array===ai and Array===bi
|
895
|
+
result_i,diff_i= arraydiff(ai,bi)
|
896
|
+
diff||=diff_i
|
897
|
+
result[i]=result_i
|
898
|
+
elsif ai!=bi
|
899
|
+
next if Regexp===ai and ai.to_s==bi.to_s and
|
900
|
+
ai.options==bi.options
|
901
|
+
diff=true
|
902
|
+
result[i]={ai=>bi}
|
903
|
+
elsif ai.nil?
|
904
|
+
result[i]={'size mismatch'=>"#{a.size} for #{b.size}"} if a.size!=b.size
|
905
|
+
diff=true
|
906
|
+
end
|
907
|
+
if i.nonzero? and Hash===result[i] and Hash===result[i-1]
|
908
|
+
old=result[i-1]
|
909
|
+
oldkeys=old.keys
|
910
|
+
oldvals=old.values
|
911
|
+
if Reg::Subseq===oldkeys.first
|
912
|
+
oldkeys=oldkeys.children
|
913
|
+
oldval=oldvals.children
|
914
|
+
end
|
915
|
+
result[i-1..i]=[ {-[*oldkeys+result[i].keys]=>-[*oldvals+result[i].values]} ]
|
916
|
+
end
|
917
|
+
}
|
918
|
+
return result,diff
|
919
|
+
end
|
920
|
+
|
921
|
+
output=:pp
|
922
|
+
quiet=true
|
923
|
+
while /^-/===ARGV.first
|
924
|
+
case opt=ARGV.shift
|
925
|
+
when "--": break
|
926
|
+
when "--pp": output=:pp
|
927
|
+
when "--lisp": output=:lisp
|
928
|
+
when "--parsetree": output=:parsetree
|
929
|
+
when "--vsparsetree": output=:vsparsetree
|
930
|
+
when "--vsparsetree2": output=:vsparsetree2
|
931
|
+
when "--update-problemfiles": problemfiles=ProblemFiles.new
|
932
|
+
when "-q": quiet=true
|
933
|
+
when "-v": quiet=false
|
934
|
+
when "-e": inputs=[ARGV.join(" ")]; names=["-e"]; break
|
935
|
+
else fail "unknown option: #{opt}"
|
936
|
+
|
937
|
+
end
|
938
|
+
end
|
939
|
+
|
940
|
+
unless inputs
|
941
|
+
if ARGV.empty?
|
942
|
+
inputs=[STDIN.read]
|
943
|
+
names=["-"]
|
944
|
+
elsif ARGV.size==1 and (Dir.entries(ARGV.first) rescue false)
|
945
|
+
names=Dir[ARGV.first+"/**/*.rb"]
|
946
|
+
else
|
947
|
+
names=ARGV.dup
|
948
|
+
end
|
949
|
+
inputs||=names.map{|name| File.open(name).read rescue nil}
|
950
|
+
end
|
951
|
+
|
952
|
+
result=0
|
953
|
+
|
954
|
+
safety="BEGIN{raise NeverExecThis};BEGIN{throw :never_exec_this,1};\n"
|
955
|
+
nullsafety="\n"
|
956
|
+
safe_inputs=inputs.map{|input| safety+input}
|
957
|
+
|
958
|
+
inputs.each_index{|i|
|
959
|
+
begin
|
960
|
+
|
961
|
+
input=inputs[i] or next
|
962
|
+
name=names[i]
|
963
|
+
|
964
|
+
input=nullsafety+input
|
965
|
+
#print name+"... "; STDOUT.flush
|
966
|
+
|
967
|
+
begin
|
968
|
+
tree=nil
|
969
|
+
if catch(:never_exec_this){
|
970
|
+
tree=RedParse.new(input,name).parse; nil
|
971
|
+
} #raise NeverExecThis
|
972
|
+
# rescue RedParse::ParseError=>e
|
973
|
+
# require 'pp'
|
974
|
+
# pp e.stack[-[15,e.stack.size].min..-1]
|
975
|
+
# raise
|
976
|
+
# rescue NeverExecThis:
|
977
|
+
puts "RedParse attempted to execute parse data in #{name}"
|
978
|
+
next
|
979
|
+
end
|
980
|
+
rescue Interrupt: exit 2
|
981
|
+
rescue Exception=>e
|
982
|
+
# puts e.backtrace.join("\n")
|
983
|
+
e.message << " during parse of #{name}"
|
984
|
+
# err=e.class.new(e.message+" during parse of #{name}")
|
985
|
+
# err.set_backtrace e.backtrace
|
986
|
+
problemfiles.push name if problemfiles
|
987
|
+
raise e
|
988
|
+
end
|
989
|
+
tree or fail "parsetree was nil for #{name}"
|
990
|
+
|
991
|
+
case output
|
992
|
+
when :pp
|
993
|
+
require 'pp'
|
994
|
+
pp tree
|
995
|
+
when :lisp
|
996
|
+
puts tree.to_lisp
|
997
|
+
when :parsetree
|
998
|
+
pp tree.to_parsetree
|
999
|
+
when :vsparsetree,:vsparsetree2
|
1000
|
+
begin
|
1001
|
+
require 'rubygems'
|
1002
|
+
rescue Exception
|
1003
|
+
end
|
1004
|
+
require 'parse_tree'
|
1005
|
+
#require 'algorithm/diff'
|
1006
|
+
begin
|
1007
|
+
mine=tree.to_parsetree(:quirks)
|
1008
|
+
if IO===input
|
1009
|
+
input.rewind
|
1010
|
+
input=input.read
|
1011
|
+
end
|
1012
|
+
ryans=nil
|
1013
|
+
catch(:never_exec_this){
|
1014
|
+
ryans=ParseTree.new.parse_tree_for_string(safe_inputs[i],name); nil
|
1015
|
+
} and raise NeverExecThis
|
1016
|
+
delta,is_diff=arraydiff(mine,ryans)
|
1017
|
+
rescue NeverExecThis:
|
1018
|
+
puts "ParseTree attempted to execute parse data in #{name}"
|
1019
|
+
next
|
1020
|
+
rescue Interrupt: exit 2
|
1021
|
+
rescue Exception=>e
|
1022
|
+
#raise( RuntimeError.new( "#{e} during to_parsetree of #{name}" ) )
|
1023
|
+
puts "error during to_parsetree of #{name}"
|
1024
|
+
problemfiles.push name if problemfiles
|
1025
|
+
raise
|
1026
|
+
end
|
1027
|
+
if output==:vsparsetree2
|
1028
|
+
if !quiet or is_diff
|
1029
|
+
puts "mine:"
|
1030
|
+
pp mine
|
1031
|
+
puts "ryans:" if is_diff
|
1032
|
+
pp ryans if is_diff
|
1033
|
+
end
|
1034
|
+
elsif !quiet or is_diff
|
1035
|
+
puts 'differences in '+name if is_diff
|
1036
|
+
pp delta
|
1037
|
+
end
|
1038
|
+
if is_diff
|
1039
|
+
result=1
|
1040
|
+
problemfiles.push name if problemfiles
|
1041
|
+
else
|
1042
|
+
puts "no differences in "+name
|
1043
|
+
problemfiles.delete name if problemfiles
|
1044
|
+
end
|
1045
|
+
end
|
1046
|
+
|
1047
|
+
rescue NeverExecThis:
|
1048
|
+
puts "mysterious attempt to execute parse data in #{name}"
|
1049
|
+
next
|
1050
|
+
rescue Interrupt,SystemExit: exit 2
|
1051
|
+
rescue Exception=>e
|
1052
|
+
puts "#{e}:#{e.class}"
|
1053
|
+
puts e.backtrace.join("\n")
|
1054
|
+
#problemfiles.push name if problemfiles
|
1055
|
+
#raise
|
1056
|
+
ensure
|
1057
|
+
STDOUT.flush
|
1058
|
+
end
|
1059
|
+
}
|
1060
|
+
exit result
|
1061
|
+
end
|
1062
|
+
|
1063
|
+
=begin todo:
|
1064
|
+
v merge DotCallNode and CallSiteNode and CallWithBlockNode
|
1065
|
+
remove actual Tokens from parse tree...
|
1066
|
+
instead, each node has a corresponding range of tokens
|
1067
|
+
-in an (optional) array of all tokens printed by the tokenizer.
|
1068
|
+
split ParenedNode into ParenedNode + Rescue/EnsureNode
|
1069
|
+
'incomplete' subtrees such as ElseNode, ElsifNode, RescueNode
|
1070
|
+
-should not appear in final output
|
1071
|
+
v split keywordopnode into loop and if varieties?
|
1072
|
+
=end
|
1073
|
+
|
1074
|
+
=begin optimization opportunities:
|
1075
|
+
top of stack slot contains mostly keywords, specific node classes, and Value
|
1076
|
+
lookahead slot contains mostly LowerOp and keywords, with a few classes and inverted keywords
|
1077
|
+
-(LowerOp is hard to optimize)
|
1078
|
+
if top of stack matcher is Value, then the next matcher down is mostly keywords, with some operators
|
1079
|
+
class membership can be optimized to test of integer within a range
|
1080
|
+
keywords could be stored as symbols instead of strings
|
1081
|
+
a few rules may need exploding (eg, ensure) to spoon feed the optimizer
|
1082
|
+
make all Nodes descendants of Array
|
1083
|
+
=end
|