rubylexer 0.6.2
Sign up to get free protection for your applications and to get access to all the features.
- data/COPYING +510 -0
- data/README +134 -0
- data/Rantfile +37 -0
- data/assert.rb +31 -0
- data/charhandler.rb +84 -0
- data/charset.rb +76 -0
- data/context.rb +174 -0
- data/howtouse.txt +136 -0
- data/io.each_til_charset.rb +247 -0
- data/require.rb +103 -0
- data/rlold.rb +12 -0
- data/rubycode.rb +44 -0
- data/rubylexer.rb +1589 -0
- data/rulexer.rb +532 -0
- data/symboltable.rb +65 -0
- data/testcode/deletewarns.rb +39 -0
- data/testcode/dumptokens.rb +38 -0
- data/testcode/locatetest +12 -0
- data/testcode/rubylexervsruby.rb +104 -0
- data/testcode/rubylexervsruby.sh +51 -0
- data/testcode/tokentest.rb +237 -0
- data/testcode/torment +51 -0
- data/testdata/1.rb.broken +729 -0
- data/testdata/23.rb +24 -0
- data/testdata/g.rb +15 -0
- data/testdata/newsyntax.rb +18 -0
- data/testdata/noeolatend.rb +1 -0
- data/testdata/p.rb +1227 -0
- data/testdata/pleac.rb.broken +6282 -0
- data/testdata/pre.rb +33 -0
- data/testdata/pre.unix.rb +33 -0
- data/testdata/regtest.rb +621 -0
- data/testdata/tokentest.assert.rb.can +7 -0
- data/testdata/untitled1.rb +1 -0
- data/testdata/w.rb +22 -0
- data/testdata/wsdlDriver.rb +499 -0
- data/testing.txt +130 -0
- data/testresults/placeholder +0 -0
- data/token.rb +486 -0
- data/tokenprinter.rb +152 -0
- metadata +76 -0
data/rlold.rb
ADDED
data/rubycode.rb
ADDED
@@ -0,0 +1,44 @@
|
|
1
|
+
=begin copyright
|
2
|
+
rubylexer - a ruby lexer written in ruby
|
3
|
+
Copyright (C) 2004,2005 Caleb Clausen
|
4
|
+
|
5
|
+
This library is free software; you can redistribute it and/or
|
6
|
+
modify it under the terms of the GNU Lesser General Public
|
7
|
+
License as published by the Free Software Foundation; either
|
8
|
+
version 2.1 of the License, or (at your option) any later version.
|
9
|
+
|
10
|
+
This library is distributed in the hope that it will be useful,
|
11
|
+
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
12
|
+
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
13
|
+
Lesser General Public License for more details.
|
14
|
+
|
15
|
+
You should have received a copy of the GNU Lesser General Public
|
16
|
+
License along with this library; if not, write to the Free Software
|
17
|
+
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
|
18
|
+
=end
|
19
|
+
|
20
|
+
|
21
|
+
|
22
|
+
require "token.rb"
|
23
|
+
require "tokenprinter.rb"
|
24
|
+
|
25
|
+
class RubyCode < Token
|
26
|
+
def initialize(tokens,filename,linenum)
|
27
|
+
super(tokens)
|
28
|
+
@filename=filename
|
29
|
+
@linenum=linenum
|
30
|
+
end
|
31
|
+
|
32
|
+
def [](*args)
|
33
|
+
exec? ident.huh
|
34
|
+
end
|
35
|
+
|
36
|
+
def to_s()
|
37
|
+
result=[]
|
38
|
+
keepwsprinter=KeepWsTokenPrinter.new('',@linenum)
|
39
|
+
ident.each{|tok| result << keepwsprinter.sprint(tok) }
|
40
|
+
return result.to_s
|
41
|
+
end
|
42
|
+
end
|
43
|
+
|
44
|
+
|
data/rubylexer.rb
ADDED
@@ -0,0 +1,1589 @@
|
|
1
|
+
=begin copyright
|
2
|
+
rubylexer - a ruby lexer written in ruby
|
3
|
+
Copyright (C) 2004,2005 Caleb Clausen
|
4
|
+
|
5
|
+
This library is free software; you can redistribute it and/or
|
6
|
+
modify it under the terms of the GNU Lesser General Public
|
7
|
+
License as published by the Free Software Foundation; either
|
8
|
+
version 2.1 of the License, or (at your option) any later version.
|
9
|
+
|
10
|
+
This library is distributed in the hope that it will be useful,
|
11
|
+
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
12
|
+
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
13
|
+
Lesser General Public License for more details.
|
14
|
+
|
15
|
+
You should have received a copy of the GNU Lesser General Public
|
16
|
+
License along with this library; if not, write to the Free Software
|
17
|
+
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
|
18
|
+
=end
|
19
|
+
|
20
|
+
|
21
|
+
|
22
|
+
require "rulexer"
|
23
|
+
require "symboltable"
|
24
|
+
require "io.each_til_charset"
|
25
|
+
require "context.rb"
|
26
|
+
|
27
|
+
|
28
|
+
|
29
|
+
#-----------------------------------
|
30
|
+
class RubyLexer < RuLexer
|
31
|
+
include NestedContexts
|
32
|
+
|
33
|
+
RUBYSYMOPERATORREX=
|
34
|
+
%r{^([&|^/%~]|=(==?|~)|>[=>]?|<(<|=>?)?|[+\-]@?|\*\*?|\[\]=?)}
|
35
|
+
# (nasty beastie, eh?)
|
36
|
+
#these are the overridable operators
|
37
|
+
#does not match flow-control operators like: || && ! or and if not
|
38
|
+
#or op= ops like: += -= ||=
|
39
|
+
#or .. ... ?:
|
40
|
+
#for that use:
|
41
|
+
RUBYNONSYMOPERATORREX=
|
42
|
+
%r{^([%^~/\-+]=|(\|\|?|&&?)=?|(<<|>>|\*\*?)=|\.{1,3}|[?:,;]|=>?|![=~]?)$}
|
43
|
+
RUBYOPERATORREX=/#{RUBYSYMOPERATORREX}|#{RUBYNONSYMOPERATORREX}/o
|
44
|
+
UNSYMOPS=/^[~!]$/ #always unary
|
45
|
+
UBSYMOPS=/^([*&+-]|::)$/ #ops that could be unary or binary
|
46
|
+
WHSPCHARS=WHSPLF+"\\#"
|
47
|
+
OPORBEGINWORDS="(if|unless|while|until)"
|
48
|
+
BEGINWORDS=/^(def|class|module|begin|for|case|do|#{OPORBEGINWORDS})$/o
|
49
|
+
FUNCLIKE_KEYWORDS=/^(break|next|redo|return|raise|yield|defined\?|retry|super|BEGIN|END)$/
|
50
|
+
VARLIKE_KEYWORDS=/^(__FILE__|__LINE__|false|nil|self|true)$/
|
51
|
+
INNERBOUNDINGWORDS="(else|elsif|ensure|in|then|rescue|when)"
|
52
|
+
BINOPWORDS="(and|or)"
|
53
|
+
NEVERSTARTPARAMLISTWORDS=/^(#{OPORBEGINWORDS}|#{INNERBOUNDINGWORDS}|#{BINOPWORDS}|end)[^a-zA-Z0-9_!?=]?/o
|
54
|
+
NEVERSTARTPARAMLISTFIRST=CharSet[%[aoeitrwu]] #char set that begins NEVERSTARTPARAMLIST
|
55
|
+
NEVERSTARTPARAMLISTMAXLEN=7 #max len of a NEVERSTARTPARAMLIST
|
56
|
+
|
57
|
+
RUBYKEYWORDS=%r{
|
58
|
+
^(alias|#{BINOPWORDS}|not|undef|__END__|end|
|
59
|
+
#{VARLIKE_KEYWORDS}|#{FUNCLIKE_KEYWORDS}|
|
60
|
+
#{INNERBOUNDINGWORDS}|#{BEGINWORDS}
|
61
|
+
)$
|
62
|
+
}xo
|
63
|
+
CHARMAPPINGS = {
|
64
|
+
?$ => :dollar_identifier,
|
65
|
+
?@ => :at_identifier,
|
66
|
+
?a..?z => :identifier,
|
67
|
+
?A..?Z => :identifier,
|
68
|
+
?_ => :identifier,
|
69
|
+
?0..?9 => :number,
|
70
|
+
?" => :double_quote,
|
71
|
+
?' => :single_quote,
|
72
|
+
?` => :back_quote,
|
73
|
+
|
74
|
+
WHSP => :whitespace, #includes \r
|
75
|
+
?, => :comma,
|
76
|
+
?; => :semicolon,
|
77
|
+
|
78
|
+
?^ => :biop,
|
79
|
+
?~ => :tilde,
|
80
|
+
?= => :equals,
|
81
|
+
?! => :exclam,
|
82
|
+
?. => :dot,
|
83
|
+
|
84
|
+
#these ones could signal either an op or a term
|
85
|
+
?/ => :regex_or_div,
|
86
|
+
"|>" => :quadriop,
|
87
|
+
"*&" => :star_or_amp, #could be unary
|
88
|
+
"+-" => :plusminus, #could be unary
|
89
|
+
?< => :lessthan,
|
90
|
+
?% => :percent,
|
91
|
+
?? => :char_literal_or_op, #single-char int literal
|
92
|
+
?: => :symbol_or_op,
|
93
|
+
?\n => :newline, #implicitly escaped after op
|
94
|
+
#?\r => :newline, #implicitly escaped after op
|
95
|
+
|
96
|
+
?\\ => :escnewline,
|
97
|
+
?\0 => :eof,
|
98
|
+
|
99
|
+
"[({" => :open_brace,
|
100
|
+
"])}" => :close_brace,
|
101
|
+
|
102
|
+
|
103
|
+
?# => :comment
|
104
|
+
}
|
105
|
+
|
106
|
+
attr :incomplete_here_tokens
|
107
|
+
|
108
|
+
|
109
|
+
#-----------------------------------
|
110
|
+
def initialize(filename,file,linenum=1)
|
111
|
+
super(filename,file, linenum)
|
112
|
+
@start_linenum=linenum
|
113
|
+
@bracestack=[TopLevelContext.new]
|
114
|
+
@incomplete_here_tokens=[]
|
115
|
+
@localvars=SymbolTable.new
|
116
|
+
@defining_lvar=nil
|
117
|
+
|
118
|
+
@toptable=CharHandler.new(self, :illegal_char, CHARMAPPINGS)
|
119
|
+
|
120
|
+
start_of_line_directives
|
121
|
+
end
|
122
|
+
|
123
|
+
#-----------------------------------
|
124
|
+
def get1token
|
125
|
+
result=super #most of the action's here
|
126
|
+
|
127
|
+
#now cleanup and housekeeping
|
128
|
+
|
129
|
+
|
130
|
+
#check for bizarre token types
|
131
|
+
case result
|
132
|
+
when IgnoreToken#,nil
|
133
|
+
return result
|
134
|
+
when Token#,String
|
135
|
+
else
|
136
|
+
raise "#{@filename}:#{linenum}:token is a #{result.class}, last is #{@last_operative_token}"
|
137
|
+
end
|
138
|
+
|
139
|
+
@last_operative_token=result
|
140
|
+
|
141
|
+
return result
|
142
|
+
end
|
143
|
+
|
144
|
+
|
145
|
+
|
146
|
+
#-----------------------------------
|
147
|
+
def balanced_braces?
|
148
|
+
|
149
|
+
#@bracestack.empty?
|
150
|
+
@bracestack.size==1 and TopLevelContext===@bracestack.first
|
151
|
+
end
|
152
|
+
|
153
|
+
#-----------------------------------
|
154
|
+
def dollar_identifier(ch=nil)
|
155
|
+
s=eat_next_if(?$) or return nil
|
156
|
+
|
157
|
+
if t=((identifier_as_string(?$) or special_global))
|
158
|
+
s<<t
|
159
|
+
else error= "missing $id name"
|
160
|
+
end
|
161
|
+
|
162
|
+
return lexerror(VarNameToken.new(s),error)
|
163
|
+
end
|
164
|
+
|
165
|
+
#-----------------------------------
|
166
|
+
def at_identifier(ch=nil)
|
167
|
+
result = (eat_next_if(?@) or return nil)
|
168
|
+
result << (eat_next_if(?@)or'')
|
169
|
+
if t=identifier_as_string(?@)
|
170
|
+
result<<t
|
171
|
+
else error= "missing @id name"
|
172
|
+
end
|
173
|
+
return lexerror(VarNameToken.new(result),error)
|
174
|
+
end
|
175
|
+
|
176
|
+
private
|
177
|
+
#-----------------------------------
|
178
|
+
def here_spread_over_ruby_code(rl,tok)
|
179
|
+
assert(!rl.incomplete_here_tokens.empty?)
|
180
|
+
@incomplete_here_tokens += rl.incomplete_here_tokens
|
181
|
+
end
|
182
|
+
|
183
|
+
#-----------------------------------
|
184
|
+
def expect_do_or_end_or_nl!(st)
|
185
|
+
@bracestack.push ExpectDoOrNlContext.new(st,/(do|;|:|\n)/,@linenum)
|
186
|
+
end
|
187
|
+
|
188
|
+
#-----------------------------------
|
189
|
+
#match NoWstoken, ws, comment, or (escaped?) newline repeatedly
|
190
|
+
def maybe_no_ws_token
|
191
|
+
result=[]
|
192
|
+
while IgnoreToken===(tok=get1token)
|
193
|
+
EoiToken===tok and lexerror tok,"end of file not expected here"
|
194
|
+
result << tok
|
195
|
+
end
|
196
|
+
assert((not IgnoreToken===tok))
|
197
|
+
@moretokens.unshift tok
|
198
|
+
return result
|
199
|
+
end
|
200
|
+
|
201
|
+
#-----------------------------------
|
202
|
+
WSCHARSET=CharSet["#\\\n\s\t\v\r\f"]
|
203
|
+
def ignored_tokens(allow_eof=false)
|
204
|
+
result=[]
|
205
|
+
result<<@moretokens.shift while IgnoreToken===@moretokens.first
|
206
|
+
@moretokens.empty? or return result
|
207
|
+
if true
|
208
|
+
loop do
|
209
|
+
unless @moretokens.empty?
|
210
|
+
IgnoreToken===@moretokens.first or NewlineToken===@moretokens.first or
|
211
|
+
break
|
212
|
+
else
|
213
|
+
WSCHARSET===nextchar or break
|
214
|
+
end
|
215
|
+
|
216
|
+
tok=get1token
|
217
|
+
result<<tok
|
218
|
+
case tok
|
219
|
+
when NewlineToken : block_given? and yield tok
|
220
|
+
when EoiToken : allow_eof or lexerror tok,"end of file not expected here(2)"
|
221
|
+
when IgnoreToken
|
222
|
+
else raise "impossible"
|
223
|
+
end
|
224
|
+
end
|
225
|
+
|
226
|
+
else
|
227
|
+
@whsphandler||=CharHandler.new(self, :==,
|
228
|
+
"#" => :comment,
|
229
|
+
"\n" => :newline,
|
230
|
+
"\\" => :escnewline,
|
231
|
+
"\s\t\v\r\f" => :whitespace
|
232
|
+
)
|
233
|
+
#tok=nil
|
234
|
+
while tok=@whsphandler.go((nextchar or return result))
|
235
|
+
block_given? and NewlineToken===tok and yield tok
|
236
|
+
result << tok
|
237
|
+
end
|
238
|
+
end
|
239
|
+
return result
|
240
|
+
end
|
241
|
+
|
242
|
+
#-----------------------------------
|
243
|
+
def safe_recurse
|
244
|
+
old_moretokens=@moretokens
|
245
|
+
#old_bracestack=@bracestack.dup
|
246
|
+
@moretokens=[]
|
247
|
+
result= yield @moretokens
|
248
|
+
#assert @incomplete_here_tokens.empty?
|
249
|
+
#assert @bracestack==old_bracestack
|
250
|
+
@moretokens= old_moretokens.concat @moretokens
|
251
|
+
return result
|
252
|
+
#need to do something with @last_operative_token?
|
253
|
+
end
|
254
|
+
|
255
|
+
#-----------------------------------
|
256
|
+
def special_global #handle $-a and friends
|
257
|
+
assert prevchar=='$'
|
258
|
+
result = ((
|
259
|
+
#order matters here, but it shouldn't
|
260
|
+
#(but til_charset must be last)
|
261
|
+
eat_next_if(/^[!@&+`'=~\/\\,.;<>*"$?:]$/) or
|
262
|
+
(eat_next_if('-') and ("-"+getchar)) or
|
263
|
+
(?0..?9)===nextchar ? til_charset(/[^\d]/) : nil
|
264
|
+
))
|
265
|
+
end
|
266
|
+
|
267
|
+
#-----------------------------------
|
268
|
+
def identifier(context=nil)
|
269
|
+
oldpos=@file.pos
|
270
|
+
str=identifier_as_string(context)
|
271
|
+
|
272
|
+
#skip keyword processing if 'escaped' as it were, by def, . or ::
|
273
|
+
#or if in a non-bare context
|
274
|
+
#just asserts because those contexts are never encountered.
|
275
|
+
#control goes through symbol(<...>,nil)
|
276
|
+
assert( /^[a-z_]$/i===context)
|
277
|
+
assert !(@last_operative_token===/^(\.|::|(un)?def|alias)$/)
|
278
|
+
|
279
|
+
@moretokens.unshift(*parse_keywords(str,oldpos) do
|
280
|
+
#if not a keyword,
|
281
|
+
case str
|
282
|
+
when FUNCLIKE_KEYWORDS: #do nothing
|
283
|
+
when VARLIKE_KEYWORDS,RUBYKEYWORDS: raise "shouldnt see keywords here, now"
|
284
|
+
end
|
285
|
+
safe_recurse { |a| var_or_meth_name(str,@last_operative_token,oldpos) }
|
286
|
+
end)
|
287
|
+
return @moretokens.shift
|
288
|
+
end
|
289
|
+
|
290
|
+
#-----------------------------------
|
291
|
+
def identifier_as_string(context)
|
292
|
+
#must begin w/ letter or underscore
|
293
|
+
str=eat_next_if(/^[_a-z]$/i) or return nil
|
294
|
+
|
295
|
+
#equals, question mark, and exclamation mark
|
296
|
+
#might be allowed at the end in some contexts.
|
297
|
+
#(in def headers and symbols)
|
298
|
+
#otherwise, =,?, and ! are to be considered
|
299
|
+
#separate tokens. confusing, eh?
|
300
|
+
#i hope i've captured all right conditions....
|
301
|
+
#context should always be ?: right after def, ., and :: now
|
302
|
+
|
303
|
+
maybe_eq,maybe_qm,maybe_ex = case context
|
304
|
+
when ?@,?$ then [nil,nil,nil]
|
305
|
+
when ?: then [?=, ??, ?!]
|
306
|
+
else [nil,??, ?!]
|
307
|
+
end
|
308
|
+
|
309
|
+
str<<til_charset(/[^a-z0-9_]/i)
|
310
|
+
|
311
|
+
#look for ?, !, or =, if allowed
|
312
|
+
case b=@file.getc
|
313
|
+
when nil #means we're at eof
|
314
|
+
#handling nil here prevents b from ever matching
|
315
|
+
#a nil value of maybe_qm, maybe_ex or maybe_eq
|
316
|
+
when maybe_qm
|
317
|
+
str << b
|
318
|
+
when maybe_ex
|
319
|
+
nc=(nextchar unless @file.eof?)
|
320
|
+
#does ex appear to be part of a larger operator?
|
321
|
+
if nc==?= #or nc==?~
|
322
|
+
back1char
|
323
|
+
else
|
324
|
+
str << b
|
325
|
+
end
|
326
|
+
when maybe_eq
|
327
|
+
nc=(nextchar unless @file.eof?)
|
328
|
+
#does eq appear to be part of a larger operator?
|
329
|
+
if nc==?= or nc==?~ or nc==?>
|
330
|
+
back1char
|
331
|
+
else
|
332
|
+
str << b
|
333
|
+
end
|
334
|
+
else
|
335
|
+
back1char
|
336
|
+
end
|
337
|
+
|
338
|
+
|
339
|
+
return str
|
340
|
+
end
|
341
|
+
|
342
|
+
#-----------------------------------
|
343
|
+
#contexts in which comma may appear in ruby:
|
344
|
+
#multiple lhs (terminated by assign op)
|
345
|
+
#multiple rhs (in implicit context) (tbd)
|
346
|
+
#method actual param list (in ( or implicit context)
|
347
|
+
#method formal param list (in ( or implicit context)
|
348
|
+
#block formal param list (in | context) (tbd)
|
349
|
+
#hash immediate (in imm{ context)
|
350
|
+
#array immediate (in imm[ context)
|
351
|
+
#element reference/assignment (in [] or []= method actual parameter context)
|
352
|
+
#list after for
|
353
|
+
#list after rescue
|
354
|
+
#list after when
|
355
|
+
#list after undef
|
356
|
+
|
357
|
+
#note: comma in parens not around a param list is illegal
|
358
|
+
|
359
|
+
#-----------------------------------
|
360
|
+
#a comma has been seen. are we in an
|
361
|
+
#lvalue list or some other construct that uses commas?
|
362
|
+
def comma_in_lvalue_list?
|
363
|
+
not ListContext===@bracestack.last
|
364
|
+
end
|
365
|
+
|
366
|
+
#-----------------------------------
|
367
|
+
def in_lvar_define_state
|
368
|
+
#@defining_lvar is a hack
|
369
|
+
@defining_lvar or case ctx=@bracestack.last
|
370
|
+
when ForSMContext: ctx.state==:for
|
371
|
+
when RescueSMContext: ctx.state==:arrow
|
372
|
+
when BlockParamListContext: true
|
373
|
+
end
|
374
|
+
end
|
375
|
+
|
376
|
+
#-----------------------------------
|
377
|
+
#determine if an alphabetic identifier refers to a variable
|
378
|
+
#or method name. generates implicit parenthes(es) if it is a
|
379
|
+
#call site and no explicit parens are present. starts an implicit param list
|
380
|
+
#if appropriate. adds tok to the
|
381
|
+
#local var table if its a local var being defined for the first time.
|
382
|
+
|
383
|
+
#note: what we here call variables (rather, constants) following ::
|
384
|
+
#might actually be methods at runtime, but that's immaterial to tokenization.
|
385
|
+
|
386
|
+
#note: this routine should determine the correct token type for name and
|
387
|
+
#create the appropriate token. currently this is not done because callers
|
388
|
+
#typically have done it (perhaps incorrectly) already.
|
389
|
+
def var_or_meth_name(name,lasttok,pos)
|
390
|
+
#look for call site if not a keyword or keyword is function-like
|
391
|
+
#look for and ignore local variable names
|
392
|
+
|
393
|
+
assert String===name
|
394
|
+
|
395
|
+
#fixme: keywords shouldn't be treated specially after :: and .
|
396
|
+
|
397
|
+
#maybe_local really means 'maybe local or constant'
|
398
|
+
maybe_local=case name
|
399
|
+
when /[^a-z_0-9]$/i: #do nothing
|
400
|
+
when /^[a-z_]/: (@localvars===name or VARLIKE_KEYWORDS===name or in_lvar_define_state) and not lasttok===/^(\.|::)$/
|
401
|
+
when /^[A-Z]/: is_const=true;not lasttok==='.' #this is the right algorithm for constants...
|
402
|
+
end
|
403
|
+
|
404
|
+
assert(@moretokens.empty?)
|
405
|
+
|
406
|
+
tok=@last_operative_token=VarNameToken.new(name,pos)
|
407
|
+
|
408
|
+
oldpos=@file.pos
|
409
|
+
sawnl=false
|
410
|
+
result=ws_toks=ignored_tokens(true) {|nl| sawnl=true }
|
411
|
+
sawnl || @file.eof? and return result.unshift(
|
412
|
+
*if maybe_local : [tok]
|
413
|
+
else [MethNameToken.new(name,pos), #insert implicit parens right after tok
|
414
|
+
ImplicitParamListStartToken.new( oldpos),
|
415
|
+
ImplicitParamListEndToken.new( oldpos) ]
|
416
|
+
end
|
417
|
+
)
|
418
|
+
|
419
|
+
#if next op is assignment (or comma in lvalue list)
|
420
|
+
#then omit implicit parens
|
421
|
+
assignment_coming=case nc=nextchar
|
422
|
+
when ?=: not /^=[=~]$/===readahead(2)
|
423
|
+
when ?,: comma_in_lvalue_list?
|
424
|
+
when ?>,?<: /^([<>])\1=$/===readahead(3)
|
425
|
+
when ?*,?|,?&: /^([*|&])\1?=/===readahead(3)
|
426
|
+
when ?%,?/,?-,?+,?^: readahead(2)[1..1]=='='
|
427
|
+
end
|
428
|
+
if (assignment_coming or in_lvar_define_state)
|
429
|
+
tok=VarNameToken.new(name,pos)
|
430
|
+
if /[^a-z_0-9]$/i===name
|
431
|
+
lexerror tok,"not a valid variable name: #{name}"
|
432
|
+
elsif /^[a-z_]/===name and !(lasttok===/^(\.|::)$/)
|
433
|
+
@localvars[name]=true
|
434
|
+
end
|
435
|
+
return result.unshift(tok)
|
436
|
+
end
|
437
|
+
|
438
|
+
implicit_parens_to_emit=case nc
|
439
|
+
when ?!: readahead(2)=='!=' ? 2 : 1
|
440
|
+
when NEVERSTARTPARAMLISTFIRST
|
441
|
+
(NEVERSTARTPARAMLISTWORDS===readahead(NEVERSTARTPARAMLISTMAXLEN)) ? 2 : 1
|
442
|
+
when ?",?',?`,?a..?z,?A..?Z,?0..?9,?_,?@,?$,?~: 1
|
443
|
+
when ?{: maybe_local=false; 2
|
444
|
+
when ?(: maybe_local=false; 0
|
445
|
+
when ?},?],?),?;,?^, ?|, ?>, ?,, ?., ?=: 2
|
446
|
+
when ?+, ?-, ?*, ?&, ?%, ?/, ?:, ??: (ws_toks.empty? || readahead(2)[/^.[#{WHSPLF}]/]) ? 2 : 3
|
447
|
+
when ?<: (ws_toks.empty? || readahead(3)[/^<<[^"'`a-zA-Z_0-9-]/]) ? 2 : 3
|
448
|
+
when ?[: ws_toks.empty? ? 2 : 3
|
449
|
+
when ?\\, ?\s, ?\t, ?\n, ?\r, ?\v, ?#: raise 'failure'
|
450
|
+
else raise "unknown char after ident: #{nextchar.chr}"
|
451
|
+
end
|
452
|
+
|
453
|
+
implicit_parens_to_emit==3 and is_const and implicit_parens_to_emit=1
|
454
|
+
|
455
|
+
tok=if maybe_local and implicit_parens_to_emit>=2
|
456
|
+
implicit_parens_to_emit=0
|
457
|
+
VarNameToken
|
458
|
+
else
|
459
|
+
MethNameToken
|
460
|
+
end.new(name,pos)
|
461
|
+
|
462
|
+
|
463
|
+
case implicit_parens_to_emit
|
464
|
+
when 2:
|
465
|
+
result.unshift ImplicitParamListStartToken.new(oldpos),
|
466
|
+
ImplicitParamListEndToken.new(oldpos)
|
467
|
+
when 1,3:
|
468
|
+
result.unshift ImplicitParamListStartToken.new(oldpos)
|
469
|
+
@bracestack.push ParamListContextNoParen.new(@linenum)
|
470
|
+
when 0: #do nothing
|
471
|
+
else raise 'invalid value of implicit_parens_to_emit'
|
472
|
+
end
|
473
|
+
return result.unshift(tok)
|
474
|
+
# 'ok:'
|
475
|
+
# 'if unless while until {'
|
476
|
+
# '\n (unescaped) and or'
|
477
|
+
# 'then else elsif rescue ensure (illegal in value context)'
|
478
|
+
|
479
|
+
# 'need to pop noparen from bracestack on these tokens: (in operator context)'
|
480
|
+
# 'not ok:'
|
481
|
+
# 'not (but should it be?)'
|
482
|
+
end
|
483
|
+
|
484
|
+
#-----------------------------------
|
485
|
+
CONTEXT2ENDTOK={AssignmentRhsContext=>AssignmentRhsListEndToken,
|
486
|
+
ParamListContextNoParen=>ImplicitParamListEndToken,
|
487
|
+
KwParamListContext=>KwParamListEndToken
|
488
|
+
}
|
489
|
+
def abort_noparens!(str='')
|
490
|
+
#assert @moretokens.empty?
|
491
|
+
result=[]
|
492
|
+
while klass=CONTEXT2ENDTOK[@bracestack.last.class]
|
493
|
+
result << klass.new(@file.pos-str.length)
|
494
|
+
@bracestack.pop
|
495
|
+
end
|
496
|
+
return result
|
497
|
+
end
|
498
|
+
|
499
|
+
if false #no longer used
|
500
|
+
#-----------------------------------
|
501
|
+
def abort_1_noparen!(offs=0)
|
502
|
+
assert @moretokens.empty?
|
503
|
+
result=[]
|
504
|
+
while AssignmentRhsContext===@bracestack.last
|
505
|
+
@bracestack.pop
|
506
|
+
result << AssignmentRhsListEndToken.new(@file.pos-offs)
|
507
|
+
end
|
508
|
+
ParamListContextNoParen===@bracestack.last or lexerror huh,'{} with no matching callsite'
|
509
|
+
@bracestack.pop
|
510
|
+
result << ImplicitParamListEndToken.new(@file.pos-offs)
|
511
|
+
return result
|
512
|
+
end
|
513
|
+
end
|
514
|
+
|
515
|
+
#-----------------------------------
|
516
|
+
#parse keywords now, to prevent confusion over bare symbols
|
517
|
+
#and match end with corresponding preceding def or class or whatever.
|
518
|
+
#if arg is not a keyword, the block is called
|
519
|
+
def parse_keywords(str,offset)
|
520
|
+
assert @moretokens.empty?
|
521
|
+
result=[KeywordToken.new(str,offset)]
|
522
|
+
|
523
|
+
case str
|
524
|
+
when "end"
|
525
|
+
result.unshift(*abort_noparens!(str))
|
526
|
+
@bracestack.last.see @bracestack,:semi #sorta hacky... should make an :end event instead?
|
527
|
+
|
528
|
+
=begin not needed?
|
529
|
+
if ExpectDoOrNlContext===@bracestack.last
|
530
|
+
@bracestack.pop
|
531
|
+
assert @bracestack.last.starter[/^(while|until|for)$/]
|
532
|
+
end
|
533
|
+
=end
|
534
|
+
|
535
|
+
WantsEndContext===@bracestack.last or lexerror result.last, 'unbalanced end'
|
536
|
+
ctx=@bracestack.pop
|
537
|
+
start,line=ctx.starter,ctx.linenum
|
538
|
+
BEGINWORDS===start or lexerror result.last, "end does not match #{start or "nil"}"
|
539
|
+
/^(class|module|def|do)$/===start and @localvars.end_block
|
540
|
+
|
541
|
+
when "class","module"
|
542
|
+
result.first.has_end!
|
543
|
+
@bracestack.push WantsEndContext.new(str,@linenum)
|
544
|
+
@localvars.start_block
|
545
|
+
|
546
|
+
when "if","unless" #could be infix form without end
|
547
|
+
if after_nonid_op?{false} #prefix form
|
548
|
+
result.first.has_end!
|
549
|
+
@bracestack.push WantsEndContext.new(str,@linenum)
|
550
|
+
|
551
|
+
|
552
|
+
else #infix form
|
553
|
+
result.unshift(*abort_noparens!(str))
|
554
|
+
end
|
555
|
+
when "begin","case"
|
556
|
+
result.first.has_end!
|
557
|
+
@bracestack.push WantsEndContext.new(str,@linenum)
|
558
|
+
when "while","until" #could be infix form without end
|
559
|
+
if after_nonid_op?{false} #prefix form
|
560
|
+
result.first.has_end!
|
561
|
+
@bracestack.push WantsEndContext.new(str,@linenum)
|
562
|
+
expect_do_or_end_or_nl! str
|
563
|
+
|
564
|
+
else #infix form
|
565
|
+
result.unshift(*abort_noparens!(str))
|
566
|
+
end
|
567
|
+
when "for"
|
568
|
+
result.first.has_end!
|
569
|
+
@bracestack.push WantsEndContext.new(str,@linenum)
|
570
|
+
#expect_do_or_end_or_nl! str #handled by ForSMContext now
|
571
|
+
@bracestack.push ForSMContext.new(@linenum)
|
572
|
+
when "do"
|
573
|
+
result.unshift(*abort_noparens!(str))
|
574
|
+
if ExpectDoOrNlContext===@bracestack.last
|
575
|
+
@bracestack.pop
|
576
|
+
assert WantsEndContext===@bracestack.last
|
577
|
+
else
|
578
|
+
result.last.has_end!
|
579
|
+
@bracestack.push WantsEndContext.new(str,@linenum)
|
580
|
+
@localvars.start_block
|
581
|
+
block_param_list_lookahead
|
582
|
+
end
|
583
|
+
when "def"
|
584
|
+
result.first.has_end!
|
585
|
+
@bracestack.push WantsEndContext.new("def",@linenum)
|
586
|
+
@localvars.start_block
|
587
|
+
safe_recurse { |aa|
|
588
|
+
@last_operative_token=KeywordToken.new "def" #hack
|
589
|
+
result.concat ignored_tokens
|
590
|
+
|
591
|
+
#read an expr like a.b.c or a::b::c
|
592
|
+
#or (expr).b.c
|
593
|
+
if nextchar==?( #look for optional parenthesised head
|
594
|
+
old_size=@bracestack.size
|
595
|
+
parencount=0
|
596
|
+
begin
|
597
|
+
tok=get1token
|
598
|
+
case tok
|
599
|
+
when/^\($/.token_pat then parencount+=1
|
600
|
+
when/^\)$/.token_pat then parencount-=1
|
601
|
+
end
|
602
|
+
EoiToken===tok and lexerror tok, "eof in def header"
|
603
|
+
result<<tok
|
604
|
+
end until parencount==0 #@bracestack.size==old_size
|
605
|
+
else #no parentheses, all tail
|
606
|
+
@last_operative_token=KeywordToken.new "." #hack hack
|
607
|
+
result << symbol(false,false)
|
608
|
+
#this isn't quite right.... if a.b.c.d is seen, a, b, and c
|
609
|
+
#should be considered maybe varname instead of methnames.
|
610
|
+
#the last (d in the example) is always considered a methname;
|
611
|
+
#it's what's being defined.
|
612
|
+
end
|
613
|
+
#read tail: .b.c.d etc
|
614
|
+
@last_operative_token=result.last
|
615
|
+
state=:expect_op
|
616
|
+
loop do
|
617
|
+
|
618
|
+
#look for start of parameter list
|
619
|
+
nc=(@moretokens.first or nextchar.chr)
|
620
|
+
if state==:expect_op and /^[a-z_(&*]/i===nc
|
621
|
+
result.concat def_param_list
|
622
|
+
break
|
623
|
+
end
|
624
|
+
|
625
|
+
tok=get1token
|
626
|
+
result<<tok
|
627
|
+
case tok
|
628
|
+
when EoiToken
|
629
|
+
lexerror tok,'unexpected eof in def header'
|
630
|
+
when IgnoreToken
|
631
|
+
when MethNameToken #,VarNameToken # /^[a-z_]/i.token_pat
|
632
|
+
lexerror tok,'expected . or ::' unless state==:expect_name
|
633
|
+
state=:expect_op
|
634
|
+
when /^(\.|::)$/.token_pat
|
635
|
+
lexerror tok,'expected ident' unless state==:expect_op
|
636
|
+
state=:expect_name
|
637
|
+
when /^(;|end)$/.token_pat, NewlineToken #are we done with def name?
|
638
|
+
state==:expect_op or lexerror tok,'expected identifier'
|
639
|
+
break
|
640
|
+
else
|
641
|
+
lexerror(tok, "bizarre token in def name: " +
|
642
|
+
"#{tok}:#{tok.class}")
|
643
|
+
end
|
644
|
+
end
|
645
|
+
}
|
646
|
+
when "alias"
|
647
|
+
safe_recurse { |a|
|
648
|
+
@last_operative_token=KeywordToken.new "alias" #hack
|
649
|
+
result.concat ignored_tokens
|
650
|
+
res=symbol(eat_next_if(?:),false)
|
651
|
+
res ? result<<res : lexerror(result.first,"bad symbol in alias")
|
652
|
+
@last_operative_token=KeywordToken.new "alias" #hack
|
653
|
+
result.concat ignored_tokens
|
654
|
+
res=symbol(eat_next_if(?:),false)
|
655
|
+
res ? result<<res : lexerror(result.first,"bad symbol in alias")
|
656
|
+
}
|
657
|
+
when "undef"
|
658
|
+
safe_recurse { |a|
|
659
|
+
loop do
|
660
|
+
@last_operative_token=KeywordToken.new "," #hack
|
661
|
+
result.concat ignored_tokens
|
662
|
+
tok=symbol(eat_next_if(?:),false)
|
663
|
+
tok or lexerror(result.first,"bad symbol in undef")
|
664
|
+
result<< tok
|
665
|
+
@last_operative_token=tok
|
666
|
+
|
667
|
+
sawnl=false
|
668
|
+
result.concat ignored_tokens(true){|nl| sawnl=true}
|
669
|
+
|
670
|
+
break if sawnl or nextchar != ?,
|
671
|
+
tok= single_char_token(?,)
|
672
|
+
result<< tok
|
673
|
+
end
|
674
|
+
}
|
675
|
+
|
676
|
+
# when "defined?"
|
677
|
+
# huh
|
678
|
+
#defined? might have a baresymbol following it
|
679
|
+
#does it need to be handled specially?
|
680
|
+
|
681
|
+
when "when"
|
682
|
+
result.unshift(*abort_noparens!(str))
|
683
|
+
@bracestack.push KwParamListContext.new(str,@linenum)
|
684
|
+
|
685
|
+
when "rescue"
|
686
|
+
result.unshift(*abort_noparens!(str))
|
687
|
+
@bracestack.push RescueSMContext.new(@linenum)
|
688
|
+
|
689
|
+
when "then","in"
|
690
|
+
result.unshift(*abort_noparens!(str))
|
691
|
+
@bracestack.last.see @bracestack,str.to_sym
|
692
|
+
|
693
|
+
when /^(#{BINOPWORDS}|#{INNERBOUNDINGWORDS})$/o
|
694
|
+
result.unshift(*abort_noparens!(str))
|
695
|
+
|
696
|
+
when FUNCLIKE_KEYWORDS: result=yield
|
697
|
+
|
698
|
+
when RUBYKEYWORDS
|
699
|
+
#do nothing
|
700
|
+
|
701
|
+
else result=yield
|
702
|
+
|
703
|
+
end
|
704
|
+
|
705
|
+
return result
|
706
|
+
end
|
707
|
+
|
708
|
+
|
709
|
+
#-----------------------------------
|
710
|
+
def block_param_list_lookahead
|
711
|
+
safe_recurse{ |la|
|
712
|
+
@last_operative_token=KeywordToken.new ';'
|
713
|
+
a=ignored_tokens
|
714
|
+
|
715
|
+
if eat_next_if(?|)
|
716
|
+
a<<KeywordToken.new("|",@file.pos-1)
|
717
|
+
if eat_next_if(?|)
|
718
|
+
a.concat [NoWsToken.new(@file.pos-1),
|
719
|
+
KeywordToken.new('|',@file.pos-1)]
|
720
|
+
else
|
721
|
+
assert !@defining_lvar
|
722
|
+
@defining_lvar=true
|
723
|
+
assert((@last_operative_token===';' or NewlineToken===@last_operative_token))
|
724
|
+
@bracestack.push BlockParamListContext.new(@linenum)
|
725
|
+
#block param initializers are not supported here, because ruby doesn't allow them!
|
726
|
+
begin
|
727
|
+
tok=get1token
|
728
|
+
EoiToken===tok and lexerror tok,"eof in block parameter list"
|
729
|
+
a<<tok
|
730
|
+
end until tok==='|'
|
731
|
+
assert@defining_lvar
|
732
|
+
@defining_lvar=false
|
733
|
+
BlockParamListContext===@bracestack.last or raise 'expected BlockParamListContext atop @bracestack'
|
734
|
+
@bracestack.pop
|
735
|
+
@moretokens.empty? or
|
736
|
+
fixme %#moretokens might be set from get1token call above...might be bad#
|
737
|
+
end
|
738
|
+
end
|
739
|
+
|
740
|
+
@last_operative_token=KeywordToken.new ';'
|
741
|
+
#a.concat ignored_tokens
|
742
|
+
|
743
|
+
#assert @last_operative_token===';'
|
744
|
+
#a<<get1token
|
745
|
+
|
746
|
+
la[0,0]=a
|
747
|
+
}
|
748
|
+
end
|
749
|
+
|
750
|
+
#-----------------------------------
|
751
|
+
#handle parameter list of a method declaration.
|
752
|
+
#parentheses are optional... if missing param list
|
753
|
+
#is ended by (unescaped) newline or semicolon (at the same bracing level)
|
754
|
+
#expect a brace as the next token,
|
755
|
+
#then match the following tokens until
|
756
|
+
#the matching endbrace is found
|
757
|
+
def def_param_list
|
758
|
+
result=[]
|
759
|
+
normal_comma_level=old_bracestack_size=@bracestack.size
|
760
|
+
safe_recurse { |a|
|
761
|
+
assert(@moretokens.empty?)
|
762
|
+
assert((not IgnoreToken===@moretokens[0]))
|
763
|
+
assert((@moretokens[0] or not nextchar.chr[WHSPCHARS]))
|
764
|
+
|
765
|
+
#have parentheses?
|
766
|
+
if '('==@moretokens[0] or nextchar==?(
|
767
|
+
#get open paren token
|
768
|
+
result.concat maybe_no_ws_token
|
769
|
+
result << tok=get1token
|
770
|
+
assert(tok==='(')
|
771
|
+
|
772
|
+
|
773
|
+
#bracestack was changed by get1token above...
|
774
|
+
normal_comma_level+=1
|
775
|
+
assert(normal_comma_level==@bracestack.size)
|
776
|
+
endingblock=proc{|tok| tok===')' }
|
777
|
+
else
|
778
|
+
endingblock=proc{|tok| tok===';' or NewlineToken===tok}
|
779
|
+
end
|
780
|
+
class << endingblock
|
781
|
+
alias === call
|
782
|
+
end
|
783
|
+
|
784
|
+
@last_operative_token=KeywordToken.new ',' #hack
|
785
|
+
#read local parameter names
|
786
|
+
loop do
|
787
|
+
expect_name=(@last_operative_token===',' and
|
788
|
+
normal_comma_level==@bracestack.size)
|
789
|
+
expect_name and @defining_lvar||=true
|
790
|
+
result << tok=get1token
|
791
|
+
lexerror tok, "unexpected eof in def header" if EoiToken===tok
|
792
|
+
|
793
|
+
#break if at end of param list
|
794
|
+
endingblock===tok and
|
795
|
+
old_bracestack_size>=@bracestack.size and break
|
796
|
+
|
797
|
+
#next token is a local var name
|
798
|
+
#(or the one after that if unary ops present)
|
799
|
+
#result.concat ignored_tokens
|
800
|
+
expect_name and case tok
|
801
|
+
when IgnoreToken#, /^[A-Z]/ #do nothing
|
802
|
+
when VarNameToken
|
803
|
+
assert@defining_lvar
|
804
|
+
@defining_lvar=false
|
805
|
+
assert((not @last_operative_token===','))
|
806
|
+
when /^[&*]$/.token_pat #unary form...
|
807
|
+
#a NoWsToken is also expected... read it now
|
808
|
+
result.concat maybe_no_ws_token #not needed?
|
809
|
+
@last_operative_token=KeywordToken.new ','
|
810
|
+
else lexerror tok,"unfamiliar var name '#{tok}'"
|
811
|
+
end
|
812
|
+
end
|
813
|
+
|
814
|
+
@defining_lvar=false
|
815
|
+
|
816
|
+
|
817
|
+
assert(@bracestack.size <= old_bracestack_size)
|
818
|
+
assert(endingblock[tok])
|
819
|
+
|
820
|
+
#hack: force next token to look like start of a
|
821
|
+
#new stmt, if the last ignored_tokens
|
822
|
+
#call above did not find a newline
|
823
|
+
#(just in case the next token parsed
|
824
|
+
#happens to call quote_expected? or after_nonid_op)
|
825
|
+
result.concat ignored_tokens
|
826
|
+
if nextchar.chr[/[iuw\/<|>+\-*&%?:]/] and
|
827
|
+
!(NewlineToken===@last_operative_token) and
|
828
|
+
!(/^(end|;)$/===@last_operative_token)
|
829
|
+
@last_operative_token=KeywordToken.new ';'
|
830
|
+
result<< get1token
|
831
|
+
end
|
832
|
+
}
|
833
|
+
|
834
|
+
return result
|
835
|
+
end
|
836
|
+
|
837
|
+
|
838
|
+
#-----------------------------------
|
839
|
+
#handle % in ruby code. is it part of fancy quote or a modulo operator?
|
840
|
+
def percent(ch)
|
841
|
+
if quote_expected? ch
|
842
|
+
fancy_quote ch
|
843
|
+
else
|
844
|
+
biop ch
|
845
|
+
end
|
846
|
+
end
|
847
|
+
|
848
|
+
#-----------------------------------
|
849
|
+
#handle * in ruby code. is unary or binary operator?
|
850
|
+
def star_or_amp(ch)
|
851
|
+
assert('*&'[ch])
|
852
|
+
if unary_op_expected? ch
|
853
|
+
#readahead(2)[1..1][/[\s\v#\\]/] or #not needed?
|
854
|
+
result=operator_or_methname_token getchar
|
855
|
+
WHSPLF[nextchar.chr] or
|
856
|
+
@moretokens << NoWsToken.new(@file.pos)
|
857
|
+
return result
|
858
|
+
else
|
859
|
+
return(quadriop ch)
|
860
|
+
end
|
861
|
+
#result should distinguish unary+binary *&
|
862
|
+
end
|
863
|
+
|
864
|
+
#-----------------------------------
|
865
|
+
#handle ? in ruby code. is it part of ?..: or a character literal?
|
866
|
+
def char_literal_or_op(ch)
|
867
|
+
if colon_quote_expected? ch
|
868
|
+
getchar
|
869
|
+
NumberToken.new getchar_maybe_escape
|
870
|
+
else
|
871
|
+
@bracestack.push TernaryContext.new(@linenum)
|
872
|
+
KeywordToken.new getchar #operator
|
873
|
+
end
|
874
|
+
end
|
875
|
+
|
876
|
+
#-----------------------------------
|
877
|
+
def regex_or_div(ch)
|
878
|
+
#space after slash always means / operator, rather than regex start
|
879
|
+
if after_nonid_op?{ !is_var_name? and WHSPLF[prevchar] and !readahead(2)[%r{^/\s}] }
|
880
|
+
return regex(ch)
|
881
|
+
else #/ is operator
|
882
|
+
result=getchar
|
883
|
+
if eat_next_if(?=)
|
884
|
+
result << '='
|
885
|
+
end
|
886
|
+
return(operator_or_methname_token result)
|
887
|
+
end
|
888
|
+
end
|
889
|
+
|
890
|
+
#-----------------------------------
|
891
|
+
#return true if tok corresponds to a variable or constant, false if its for a method, nil for something else
|
892
|
+
#we assume tok is a valid token with a correctly formed name.
|
893
|
+
#...should really be called was_var_name
|
894
|
+
def is_var_name?
|
895
|
+
(tok=@last_operative_token)
|
896
|
+
|
897
|
+
s=tok.to_s
|
898
|
+
case s
|
899
|
+
when /[^a-z_0-9]$/i: false
|
900
|
+
when /^[a-z_]/: @localvars===s or VARLIKE_KEYWORDS===s
|
901
|
+
when /^[A-Z]/: VarNameToken===tok
|
902
|
+
when /^[@$<]/: true
|
903
|
+
else raise "not var or method name: #{s}"
|
904
|
+
end
|
905
|
+
end
|
906
|
+
|
907
|
+
#-----------------------------------
|
908
|
+
def colon_quote_expected?(ch) #yukko hack
|
909
|
+
assert ':?'[ch]
|
910
|
+
readahead(2)[/^(\?[^#{WHSPLF}]|:[$@a-zA-Z_'"`\[*~+\-\/%<=>&|^])$/o] or return false
|
911
|
+
|
912
|
+
after_nonid_op? {
|
913
|
+
#possible func-call as operator
|
914
|
+
|
915
|
+
!is_var_name?
|
916
|
+
}
|
917
|
+
end
|
918
|
+
|
919
|
+
#-----------------------------------
|
920
|
+
def symbol_or_op(ch)
|
921
|
+
startpos=@file.pos
|
922
|
+
qe= colon_quote_expected?(ch)
|
923
|
+
lastchar=prevchar
|
924
|
+
eat_next_if(ch) or raise "needed: "+ch
|
925
|
+
|
926
|
+
#handle quoted symbols like :"foobar", :"[]"
|
927
|
+
qe and return symbol(':')
|
928
|
+
|
929
|
+
#look for another colon; return single : if not found
|
930
|
+
unless eat_next_if(?:)
|
931
|
+
#cancel implicit contexts...
|
932
|
+
@moretokens.push(*abort_noparens!(':'))
|
933
|
+
|
934
|
+
#end ternary context, if any
|
935
|
+
@bracestack.last.see @bracestack,:colon
|
936
|
+
|
937
|
+
TernaryContext===@bracestack.last and @bracestack.pop #should be in the context's see handler
|
938
|
+
|
939
|
+
if ExpectDoOrNlContext===@bracestack.last #should be in the context's see handler
|
940
|
+
@bracestack.pop
|
941
|
+
assert @bracestack.last.starter[/^(while|until|for)$/]
|
942
|
+
end
|
943
|
+
|
944
|
+
@moretokens.push KeywordToken.new(':',startpos)
|
945
|
+
return @moretokens.shift
|
946
|
+
end
|
947
|
+
|
948
|
+
#we definately found a ::
|
949
|
+
|
950
|
+
colon2=KeywordToken.new( '::',startpos)
|
951
|
+
lasttok=@last_operative_token
|
952
|
+
assert !(String===lasttok)
|
953
|
+
if (VarNameToken===lasttok or MethNameToken===lasttok) and
|
954
|
+
lasttok===/^[$@a-zA-Z_]/ and !WHSPCHARS[lastchar]
|
955
|
+
then
|
956
|
+
@moretokens << colon2
|
957
|
+
result= NoWsToken.new(startpos)
|
958
|
+
else
|
959
|
+
result=colon2
|
960
|
+
end
|
961
|
+
dot_rhs(colon2)
|
962
|
+
return result
|
963
|
+
end
|
964
|
+
|
965
|
+
#-----------------------------------
|
966
|
+
def symbol(notbare,couldbecallsite=!notbare)
|
967
|
+
assert !couldbecallsite
|
968
|
+
start=@file.pos
|
969
|
+
notbare and start-=1
|
970
|
+
klass=(notbare ? SymbolToken : MethNameToken)
|
971
|
+
|
972
|
+
#look for operators
|
973
|
+
opmatches=readahead(3)[RUBYSYMOPERATORREX]
|
974
|
+
result= opmatches ? @file.read(opmatches.size) :
|
975
|
+
case nc=nextchar
|
976
|
+
when ?" then assert notbare;double_quote('"')
|
977
|
+
when ?' then assert notbare;double_quote("'")
|
978
|
+
when ?` then @file.read(1)
|
979
|
+
when ?@ then at_identifier.to_s
|
980
|
+
when ?$ then dollar_identifier.to_s
|
981
|
+
when ?_,?a..?z then identifier_as_string(?:)
|
982
|
+
when ?A..?Z then
|
983
|
+
result=identifier_as_string(?:)
|
984
|
+
if @last_operative_token==='::'
|
985
|
+
assert klass==MethNameToken
|
986
|
+
/[A-Z_0-9]$/i===result and klass=VarNameToken
|
987
|
+
end
|
988
|
+
result
|
989
|
+
else error= "unexpected char starting symbol: #{nc.chr}"
|
990
|
+
end
|
991
|
+
return lexerror(klass.new(result,start),error)
|
992
|
+
end
|
993
|
+
|
994
|
+
#-----------------------------------
|
995
|
+
def callsite_symbol(tok_to_errify)
|
996
|
+
start=@file.pos
|
997
|
+
|
998
|
+
#look for operators
|
999
|
+
opmatches=readahead(3)[RUBYSYMOPERATORREX]
|
1000
|
+
return [opmatches ? @file.read(opmatches.size) :
|
1001
|
+
case nc=nextchar
|
1002
|
+
when ?` then @file.read(1)
|
1003
|
+
when ?_,?a..?z,?A..?Z then identifier_as_string(?:)
|
1004
|
+
else
|
1005
|
+
@last_operative_token=KeywordToken.new(';')
|
1006
|
+
lexerror(tok_to_errify,"unexpected char starting symbol: #{nc.chr}")
|
1007
|
+
nil
|
1008
|
+
end, start
|
1009
|
+
]
|
1010
|
+
end
|
1011
|
+
|
1012
|
+
#-----------------------------------
|
1013
|
+
def here_header
|
1014
|
+
@file.read(2)=='<<' or raise "parser insanity"
|
1015
|
+
|
1016
|
+
dash=eat_next_if(?-)
|
1017
|
+
quote=eat_next_if( /^['"`]$/)
|
1018
|
+
if quote
|
1019
|
+
ender=til_charset(/[#{quote}]/)
|
1020
|
+
(quote==getchar) or
|
1021
|
+
return lexerror(HerePlaceholderToken.new( dash, quote, ender ), "mismatched quotes in here doc")
|
1022
|
+
else
|
1023
|
+
quote='"'
|
1024
|
+
ender=til_charset(/[^a-zA-Z0-9_]/)
|
1025
|
+
ender.length >= 1 or
|
1026
|
+
return lexerror(HerePlaceholderToken.new( dash, quote, ender ), "invalid here header")
|
1027
|
+
end
|
1028
|
+
|
1029
|
+
res= HerePlaceholderToken.new( dash, quote, ender )
|
1030
|
+
@incomplete_here_tokens.push res
|
1031
|
+
|
1032
|
+
#hack: normally this should just be in get1token
|
1033
|
+
#this fixup is necessary because the call the get1token below
|
1034
|
+
#makes a recursion.
|
1035
|
+
@last_operative_token=res
|
1036
|
+
|
1037
|
+
safe_recurse { |a|
|
1038
|
+
assert(a.object_id==@moretokens.object_id)
|
1039
|
+
toks=[]
|
1040
|
+
begin
|
1041
|
+
#yech.
|
1042
|
+
#handle case of here header in a string inclusion, but
|
1043
|
+
#here body outside it.
|
1044
|
+
cnt=0
|
1045
|
+
1.upto @bracestack.size do |i|
|
1046
|
+
case @bracestack[-i]
|
1047
|
+
when AssignmentRhsContext,ParamListContextNoParen,TopLevelContext
|
1048
|
+
else cnt+=1
|
1049
|
+
end
|
1050
|
+
end
|
1051
|
+
if nextchar==?} and cnt==1
|
1052
|
+
res.bodyclass=OutlinedHereBodyToken
|
1053
|
+
break
|
1054
|
+
end
|
1055
|
+
|
1056
|
+
tok=get1token
|
1057
|
+
assert(a.object_id==@moretokens.object_id)
|
1058
|
+
toks<<tok
|
1059
|
+
EoiToken===tok and lexerror tok, "here body expected before eof"
|
1060
|
+
end while res.unsafe_to_use
|
1061
|
+
assert(a.object_id==@moretokens.object_id)
|
1062
|
+
a[0,0]= toks #same as a=toks+a, but keeps a's id
|
1063
|
+
}
|
1064
|
+
|
1065
|
+
return res
|
1066
|
+
|
1067
|
+
#the action continues in newline, where
|
1068
|
+
#the rest of the here token is read after a
|
1069
|
+
#newline has been seen and res.affix is eventually called
|
1070
|
+
end
|
1071
|
+
|
1072
|
+
#-----------------------------------
|
1073
|
+
def lessthan(ch) #match quadriop('<') or here doc or spaceship op
|
1074
|
+
case readahead(3)
|
1075
|
+
when /^<<['"`\-a-z0-9_]$/i
|
1076
|
+
if quote_expected?(ch) #and @last_operative_token!='class' #not needed?
|
1077
|
+
here_header
|
1078
|
+
else
|
1079
|
+
operator_or_methname_token @file.read(2)
|
1080
|
+
end
|
1081
|
+
when "<=>" then operator_or_methname_token @file.read(3)
|
1082
|
+
else quadriop(ch)
|
1083
|
+
end
|
1084
|
+
end
|
1085
|
+
|
1086
|
+
#-----------------------------------
|
1087
|
+
def escnewline(ch)
|
1088
|
+
assert ch == '\\'
|
1089
|
+
|
1090
|
+
pos=@file.pos
|
1091
|
+
result=getchar
|
1092
|
+
if nl=readnl
|
1093
|
+
result+=nl
|
1094
|
+
else
|
1095
|
+
error='illegal escape sequence'
|
1096
|
+
end
|
1097
|
+
lexerror EscNlToken.new(@filename,@linenum,result,pos), error
|
1098
|
+
end
|
1099
|
+
|
1100
|
+
#-----------------------------------
|
1101
|
+
def newline(ch)
|
1102
|
+
assert("\r\n"[nextchar.chr])
|
1103
|
+
|
1104
|
+
#handle here bodies queued up by previous line
|
1105
|
+
#(we should be more compatible with dos/mac style newlines...)
|
1106
|
+
if tofill=@incomplete_here_tokens.shift
|
1107
|
+
tofill.string.offset=@file.pos
|
1108
|
+
loop {
|
1109
|
+
assert("\r\n"[nextchar.chr])
|
1110
|
+
|
1111
|
+
#retr evrything til next nl
|
1112
|
+
line=all_quote(/^[\r\n]$/, tofill.quote, /^[\r\n]$/, :regex_esc_seq)
|
1113
|
+
#(you didn't know all_quote could take a regex, did you?)
|
1114
|
+
|
1115
|
+
#get rid of fals that otherwise appear to be in the middle of
|
1116
|
+
#a string (and are emitted out of order)
|
1117
|
+
fal=@moretokens.pop
|
1118
|
+
assert FileAndLineToken===fal || fal.nil?
|
1119
|
+
|
1120
|
+
back1char
|
1121
|
+
assert("\r\n"[nextchar.chr])
|
1122
|
+
|
1123
|
+
#matches terminating reg expr?
|
1124
|
+
break if line.elems.size==1 and
|
1125
|
+
line.elems[0][tofill.termex]
|
1126
|
+
|
1127
|
+
tofill.append_token line
|
1128
|
+
tofill.append readnl
|
1129
|
+
back1char
|
1130
|
+
}
|
1131
|
+
|
1132
|
+
assert("\r\n"[nextchar.chr])
|
1133
|
+
tofill.unsafe_to_use=false
|
1134
|
+
|
1135
|
+
return tofill.bodyclass.new(tofill)
|
1136
|
+
end
|
1137
|
+
|
1138
|
+
#ordinary newline handling (possibly implicitly escaped)
|
1139
|
+
assert("\r\n"[nextchar.chr])
|
1140
|
+
assert @moretokens.empty?
|
1141
|
+
result=if NewlineToken===@last_operative_token or #hack
|
1142
|
+
@last_operative_token===/^(;|begin|do|#{INNERBOUNDINGWORDS})$/ or #hack
|
1143
|
+
!after_nonid_op?{false}
|
1144
|
+
then #hack-o-rama: probly cases left out above
|
1145
|
+
a= abort_noparens!
|
1146
|
+
ExpectDoOrNlContext===@bracestack.last and @bracestack.pop
|
1147
|
+
@bracestack.last.see @bracestack,:semi
|
1148
|
+
|
1149
|
+
a << super(ch)
|
1150
|
+
@moretokens.replace a+@moretokens
|
1151
|
+
@moretokens.shift
|
1152
|
+
else
|
1153
|
+
offset=@file.pos
|
1154
|
+
#@moretokens <<
|
1155
|
+
EscNlToken.new(@filename,@linenum,readnl,offset)
|
1156
|
+
#WsToken.new ' ' #why? #should be "\\\n" ?
|
1157
|
+
end
|
1158
|
+
|
1159
|
+
start_of_line_directives
|
1160
|
+
|
1161
|
+
return result
|
1162
|
+
end
|
1163
|
+
|
1164
|
+
#-----------------------------------
|
1165
|
+
EQBEGIN=%r/^=begin[^a-zA-Z_0-9]$/
|
1166
|
+
EQBEGINLENGTH=7
|
1167
|
+
EQEND='=end'
|
1168
|
+
ENDMARKER=/^__END__[\r\n]$/
|
1169
|
+
ENDMARKERLENGTH=8
|
1170
|
+
def start_of_line_directives
|
1171
|
+
#handle =begin...=end (at start of a line)
|
1172
|
+
while EQBEGIN===readahead(EQBEGINLENGTH)
|
1173
|
+
startpos=@file.pos
|
1174
|
+
more=@file.read(EQBEGINLENGTH-1) #get =begin
|
1175
|
+
|
1176
|
+
#keep reading til /\n=end.*\n/
|
1177
|
+
@file.each(EQEND) {|cblock|
|
1178
|
+
more << cblock
|
1179
|
+
#must be at start of line
|
1180
|
+
break if /^[\r\n]#{EQEND}/o===readback(EQEND.length+1)
|
1181
|
+
}
|
1182
|
+
#read rest of line after =end
|
1183
|
+
more << @file.til_charset(/[\r\n]/)
|
1184
|
+
assert((?\r===nextchar or ?\n===nextchar))
|
1185
|
+
assert !(/[\r\n]/===more[-1,1])
|
1186
|
+
|
1187
|
+
newls= more.scan(/\r\n?|\n\r?/)
|
1188
|
+
@linenum+= newls.size
|
1189
|
+
|
1190
|
+
#inject the fresh comment into future token results
|
1191
|
+
@moretokens.push IgnoreToken.new(more,startpos)
|
1192
|
+
end
|
1193
|
+
|
1194
|
+
#handle __END__
|
1195
|
+
if ENDMARKER===readahead(ENDMARKERLENGTH)
|
1196
|
+
assert !(ImplicitContext===@bracestack.last)
|
1197
|
+
@moretokens.unshift endoffile_detected(@file.read(6))
|
1198
|
+
@file.pos=@file.stat.size
|
1199
|
+
end
|
1200
|
+
end
|
1201
|
+
|
1202
|
+
|
1203
|
+
|
1204
|
+
#-----------------------------------
|
1205
|
+
#used to resolve the ambiguity of
|
1206
|
+
# unary ops (+, -, *, &, ~ !) in ruby
|
1207
|
+
#returns whether current token is to be the start of a literal
|
1208
|
+
IDBEGINCHAR=/^[a-zA-Z_$@]/
|
1209
|
+
def unary_op_expected?(ch) #yukko hack
|
1210
|
+
'*&='[readahead(2)[1..1]] and return false
|
1211
|
+
|
1212
|
+
after_nonid_op? {
|
1213
|
+
#possible func-call as operator
|
1214
|
+
|
1215
|
+
not is_var_name? and
|
1216
|
+
WHSPLF[prevchar]
|
1217
|
+
}
|
1218
|
+
end
|
1219
|
+
|
1220
|
+
#-----------------------------------
|
1221
|
+
#used to resolve the ambiguity of
|
1222
|
+
# <<, %, ? in ruby
|
1223
|
+
#returns whether current token is to be the start of a literal
|
1224
|
+
#/ is not handled right here if whitespace immediately follows the /
|
1225
|
+
def quote_expected?(ch) #yukko hack
|
1226
|
+
case ch[0]
|
1227
|
+
when ?? then readahead(2)[/^\?[#{WHSPLF}]$/o] #not needed?
|
1228
|
+
when ?% then readahead(3)[/^%([a-ps-vyzA-PR-VX-Z]|[QqrwWx][a-zA-Z0-9])/]
|
1229
|
+
when ?< then !readahead(4)[/^<<-?['"`a-z0-9_]/i]
|
1230
|
+
else raise 'unexpected ch (#{ch}) in quote_expected?'
|
1231
|
+
# when ?+,?-,?&,?*,?~,?! then '*&='[readahead(2)[1..1]]
|
1232
|
+
end and return false
|
1233
|
+
|
1234
|
+
after_nonid_op? {
|
1235
|
+
#possible func-call as operator
|
1236
|
+
|
1237
|
+
not is_var_name? and
|
1238
|
+
WHSPLF[prevchar] and not WHSPLF[readahead(2)[1..1]]
|
1239
|
+
}
|
1240
|
+
end
|
1241
|
+
|
1242
|
+
#-----------------------------------
|
1243
|
+
#used to resolve the ambiguity of
|
1244
|
+
# <<, %, /, ?, :, and newline in ruby
|
1245
|
+
def after_nonid_op?
|
1246
|
+
case @last_operative_token
|
1247
|
+
when MethNameToken,VarNameToken, FUNCLIKE_KEYWORDS.token_pat
|
1248
|
+
return yield
|
1249
|
+
when StringToken, SymbolToken, NumberToken, HerePlaceholderToken,
|
1250
|
+
%r{^(class|module|do|end|self|true|false|nil|
|
1251
|
+
__FILE__|__LINE__|[\})\]]|alias|(un)?def|for
|
1252
|
+
)$}x.token_pat
|
1253
|
+
#do shouldn't be in above list... dunno about def/undef
|
1254
|
+
#maybe class/module shouldn't either?
|
1255
|
+
#for is also in NewlineToken branch, below.
|
1256
|
+
#what about rescue?
|
1257
|
+
return false
|
1258
|
+
when /^(#{RUBYOPERATORREX}|#{INNERBOUNDINGWORDS})$/o.token_pat
|
1259
|
+
#regexs above must match whole string
|
1260
|
+
#assert(@last_operative_token==$&) #disabled 'cause $& is now always nil :(
|
1261
|
+
return true
|
1262
|
+
when NewlineToken, nil, #nil means we're still at beginning of file
|
1263
|
+
/^([({\[]|or|not|and|if|unless|then|elsif|else|
|
1264
|
+
while|until|begin|for|in|case|when|ensure)$
|
1265
|
+
/x.token_pat
|
1266
|
+
return true
|
1267
|
+
#when KeywordToken
|
1268
|
+
# return true
|
1269
|
+
when IgnoreToken
|
1270
|
+
raise "last_operative_token shouldn't be ignoreable"
|
1271
|
+
else
|
1272
|
+
raise "after_nonid_op? after #{@last_operative_token}:#{@last_operative_token.class} -- now what"
|
1273
|
+
end
|
1274
|
+
end
|
1275
|
+
|
1276
|
+
#-----------------------------------
|
1277
|
+
def quadriop(ch) #match /&&?=?/ (&, &&, &=, or &&=)
|
1278
|
+
assert(%w[& * | < >].include?(ch))
|
1279
|
+
# '&*'[ch] and qe=quote_expected?(ch) #not needed?
|
1280
|
+
result=getchar + (eat_next_if(ch)or'')
|
1281
|
+
if eat_next_if(?=)
|
1282
|
+
result << ?=
|
1283
|
+
# elsif qe and result[/^[&*]$/] #not needed?
|
1284
|
+
# @moretokens<<NoWsToken.new(@file.pos) #not needed?
|
1285
|
+
end
|
1286
|
+
return operator_or_methname_token(result)
|
1287
|
+
end
|
1288
|
+
|
1289
|
+
#-----------------------------------
|
1290
|
+
def biop(ch) #match /%=?/ (% or %=)
|
1291
|
+
assert(ch[/^[%^~]$/])
|
1292
|
+
result=getchar
|
1293
|
+
if eat_next_if(?=)
|
1294
|
+
result <<?=
|
1295
|
+
end
|
1296
|
+
return operator_or_methname_token( result)
|
1297
|
+
end
|
1298
|
+
|
1299
|
+
#-----------------------------------
|
1300
|
+
def tilde(ch) #match /~=?/ (~ or ~=)
|
1301
|
+
assert(ch=='~')
|
1302
|
+
result=getchar
|
1303
|
+
# eat_next_if(?=) ?
|
1304
|
+
# result <<?= :
|
1305
|
+
WHSPLF[nextchar.chr] ||
|
1306
|
+
@moretokens << NoWsToken.new(@file.pos)
|
1307
|
+
#why is the NoWsToken necessary at this point?
|
1308
|
+
return operator_or_methname_token( result)
|
1309
|
+
#result should distinguish unary ~
|
1310
|
+
end
|
1311
|
+
|
1312
|
+
#-----------------------------------
|
1313
|
+
def want_op_name
|
1314
|
+
KeywordToken===@last_operative_token and
|
1315
|
+
@last_operative_token===/^(alias|(un)?def|\.|::)$/
|
1316
|
+
end
|
1317
|
+
|
1318
|
+
#-----------------------------------
|
1319
|
+
#match /[+\-]=?/ (+ or +=)
|
1320
|
+
#could be beginning of number, too
|
1321
|
+
#fixme: handle +@ and -@ here as well... (currently, this is done in symbol()?)
|
1322
|
+
def plusminus(ch)
|
1323
|
+
assert(/^[+\-]$/===ch)
|
1324
|
+
if unary_op_expected?(ch)
|
1325
|
+
if (?0..?9)===readahead(2)[1]
|
1326
|
+
return number(ch)
|
1327
|
+
else #unary operator
|
1328
|
+
result=getchar
|
1329
|
+
WHSPLF[nextchar.chr] or
|
1330
|
+
@moretokens << NoWsToken.new(@file.pos)
|
1331
|
+
return(operator_or_methname_token result)
|
1332
|
+
#todo: result should distinguish unary+binary +-
|
1333
|
+
end
|
1334
|
+
else #binary operator
|
1335
|
+
assert(! want_op_name)
|
1336
|
+
result=getchar
|
1337
|
+
if eat_next_if(?=)
|
1338
|
+
result << ?=
|
1339
|
+
end
|
1340
|
+
return(operator_or_methname_token result)
|
1341
|
+
#todo: result should distinguish unary+binary +-
|
1342
|
+
end
|
1343
|
+
end
|
1344
|
+
|
1345
|
+
#-----------------------------------
|
1346
|
+
def equals(ch) #match /=(>|~|==?)?/ (= or == or =~ or === or =>)
|
1347
|
+
offset=@file.pos
|
1348
|
+
str=getchar
|
1349
|
+
assert str=='='
|
1350
|
+
c=(eat_next_if(/^[~=>]$/)or'')
|
1351
|
+
str << c
|
1352
|
+
case c
|
1353
|
+
when '=': str<< (eat_next_if(?=)or'')
|
1354
|
+
|
1355
|
+
when '>': @bracestack.last.see @bracestack,:arrow
|
1356
|
+
when '': #record local variable definitions
|
1357
|
+
|
1358
|
+
@bracestack.push AssignmentRhsContext.new(@linenum)
|
1359
|
+
@moretokens.unshift AssignmentRhsListStartToken.new( offset+1)
|
1360
|
+
end
|
1361
|
+
return operator_or_methname_token( str,offset)
|
1362
|
+
end
|
1363
|
+
|
1364
|
+
#-----------------------------------
|
1365
|
+
def exclam(ch) #match /![~=]?/ (! or != or !~)
|
1366
|
+
assert nextchar==?!
|
1367
|
+
result=getchar
|
1368
|
+
k=eat_next_if(/^[~=]$/)
|
1369
|
+
if k
|
1370
|
+
result+=k
|
1371
|
+
else
|
1372
|
+
WHSPLF[nextchar.chr] or
|
1373
|
+
@moretokens << NoWsToken.new(@file.pos)
|
1374
|
+
end
|
1375
|
+
return KeywordToken.new(result)
|
1376
|
+
#result should distinguish unary !
|
1377
|
+
end
|
1378
|
+
|
1379
|
+
#-----------------------------------
|
1380
|
+
def dot(ch)
|
1381
|
+
str=''
|
1382
|
+
eat_next_if(?.) or raise "lexer confusion"
|
1383
|
+
|
1384
|
+
#three lumps of sugar or two?
|
1385
|
+
eat_next_if(?.) and
|
1386
|
+
return KeywordToken.new(eat_next_if(?.)? "..." : "..")
|
1387
|
+
|
1388
|
+
#else saw just single .
|
1389
|
+
#match a valid ruby id after the dot
|
1390
|
+
result= KeywordToken.new( ".")
|
1391
|
+
dot_rhs(result)
|
1392
|
+
return result
|
1393
|
+
end
|
1394
|
+
|
1395
|
+
#-----------------------------------
|
1396
|
+
def dot_rhs(prevtok)
|
1397
|
+
safe_recurse { |a|
|
1398
|
+
@last_operative_token=prevtok
|
1399
|
+
aa= ignored_tokens
|
1400
|
+
tok,pos=callsite_symbol(prevtok)
|
1401
|
+
tok and aa.push(*var_or_meth_name(tok,prevtok,pos))
|
1402
|
+
a.unshift(*aa)
|
1403
|
+
}
|
1404
|
+
end
|
1405
|
+
|
1406
|
+
#-----------------------------------
|
1407
|
+
def single_quote(ch=nil)
|
1408
|
+
double_quote(ch)
|
1409
|
+
end
|
1410
|
+
|
1411
|
+
#-----------------------------------
|
1412
|
+
def back_quote(ch=nil)
|
1413
|
+
oldpos=@file.pos
|
1414
|
+
@last_operative_token===/^(def|::|\.)$/ and return MethNameToken.new(
|
1415
|
+
(eat_next_if(?`) or raise "insanity"), oldpos
|
1416
|
+
)
|
1417
|
+
double_quote(ch)
|
1418
|
+
end
|
1419
|
+
|
1420
|
+
#-----------------------------------
|
1421
|
+
def comment(str)
|
1422
|
+
result=""
|
1423
|
+
#loop{
|
1424
|
+
result<<super(nil).to_s
|
1425
|
+
|
1426
|
+
if /^\#.*\#$/===result #if comment was ended by a crunch
|
1427
|
+
|
1428
|
+
#that's not a legal comment end in ruby, so just keep reading
|
1429
|
+
assert(result.to_s[-1]==?#)
|
1430
|
+
result.chomp! '#'
|
1431
|
+
|
1432
|
+
#back up one char in input so that the
|
1433
|
+
#super will see that # on the next go round.
|
1434
|
+
#this hack makes the ruma comment lexer work with ruby too.
|
1435
|
+
back1char
|
1436
|
+
|
1437
|
+
assert nextchar==?#
|
1438
|
+
#else break #not a crunch... just exit
|
1439
|
+
end
|
1440
|
+
#}
|
1441
|
+
|
1442
|
+
return IgnoreToken.new(result)
|
1443
|
+
end
|
1444
|
+
|
1445
|
+
#-----------------------------------
|
1446
|
+
def open_brace(ch)
|
1447
|
+
assert((ch!='[' or !want_op_name))
|
1448
|
+
assert(@moretokens.empty?)
|
1449
|
+
lastchar=prevchar
|
1450
|
+
ch=eat_next_if(/^[({\[]$/)or raise "lexer confusion"
|
1451
|
+
tokch=KeywordToken.new(ch,@file.pos-1)
|
1452
|
+
|
1453
|
+
#maybe emitting of NoWsToken can be moved into var_or_meth_name ??
|
1454
|
+
case tokch.ident
|
1455
|
+
when '['
|
1456
|
+
#fixme: in contexts expecting an (operator) method name, we
|
1457
|
+
# should match [] or []= at this point
|
1458
|
+
@bracestack.push ListImmedContext.new(ch,@linenum)
|
1459
|
+
lasttok=last_operative_token
|
1460
|
+
#could be: lasttok===/^[a-z_]/i
|
1461
|
+
if (VarNameToken===lasttok or MethNameToken===lasttok or
|
1462
|
+
lasttok===FUNCLIKE_KEYWORDS) and !WHSPCHARS[lastchar]
|
1463
|
+
@moretokens << (tokch)
|
1464
|
+
tokch= NoWsToken.new(@file.pos-1)
|
1465
|
+
end
|
1466
|
+
when '('
|
1467
|
+
lasttok=last_operative_token
|
1468
|
+
#could be: lasttok===/^[a-z_]/i
|
1469
|
+
if (VarNameToken===lasttok or MethNameToken===lasttok or
|
1470
|
+
lasttok===FUNCLIKE_KEYWORDS)
|
1471
|
+
unless WHSPCHARS[lastchar]
|
1472
|
+
@moretokens << tokch
|
1473
|
+
tokch= NoWsToken.new(@file.pos-1)
|
1474
|
+
end
|
1475
|
+
@bracestack.push ParamListContext.new(@linenum)
|
1476
|
+
else
|
1477
|
+
@bracestack.push ParenContext.new(@linenum)
|
1478
|
+
end
|
1479
|
+
|
1480
|
+
when '{'
|
1481
|
+
#check if we are in a hash literal or string inclusion (#{}),
|
1482
|
+
#in which case below would be bad.
|
1483
|
+
if after_nonid_op?{false}
|
1484
|
+
@bracestack.push ListImmedContext.new(ch,@linenum) #that is, a hash
|
1485
|
+
else
|
1486
|
+
=begin not needed now, i think
|
1487
|
+
# 'need to find matching callsite context and end it if implicit'
|
1488
|
+
lasttok=last_operative_token
|
1489
|
+
unless lasttok===')' and lasttok.callsite?
|
1490
|
+
@moretokens.push *(abort_1_noparen!(1).push tokch)
|
1491
|
+
tokch=@moretokens.shift
|
1492
|
+
end
|
1493
|
+
=end
|
1494
|
+
|
1495
|
+
@localvars.start_block
|
1496
|
+
@bracestack.push BlockContext.new(@linenum)
|
1497
|
+
block_param_list_lookahead
|
1498
|
+
end
|
1499
|
+
end
|
1500
|
+
return (tokch)
|
1501
|
+
end
|
1502
|
+
|
1503
|
+
#-----------------------------------
|
1504
|
+
def close_brace(ch)
|
1505
|
+
ch==eat_next_if(/[)}\]]/) or raise "lexer confusion"
|
1506
|
+
@moretokens.concat abort_noparens!(ch)
|
1507
|
+
@moretokens<< kw=KeywordToken.new( ch,@file.pos-1)
|
1508
|
+
@bracestack.last.see @bracestack,:semi #hack
|
1509
|
+
if @bracestack.empty?
|
1510
|
+
lexerror kw,"unmatched brace: #{ch}"
|
1511
|
+
return @moretokens.shift
|
1512
|
+
end
|
1513
|
+
ctx=@bracestack.pop
|
1514
|
+
origch,line=ctx.starter,ctx.linenum
|
1515
|
+
ch==PAIRS[origch] or
|
1516
|
+
lexerror kw,"mismatched braces: #{origch}#{ch}\n" +
|
1517
|
+
"matching brace location", @filename, line
|
1518
|
+
BlockContext===ctx and @localvars.end_block
|
1519
|
+
if ParamListContext==ctx.class
|
1520
|
+
assert ch==')'
|
1521
|
+
#kw.set_callsite! #not needed?
|
1522
|
+
end
|
1523
|
+
return @moretokens.shift
|
1524
|
+
end
|
1525
|
+
|
1526
|
+
#-----------------------------------
|
1527
|
+
def eof(ch=nil)
|
1528
|
+
#this must be the very last character...
|
1529
|
+
oldpos=@file.pos
|
1530
|
+
assert(?\0==@file.getc)
|
1531
|
+
|
1532
|
+
result= "\0#{ignored_tokens(true).delete_if{|t|FileAndLineToken===t}}"
|
1533
|
+
|
1534
|
+
@file.pos==@file.stat.size or
|
1535
|
+
lexerror result,'nul character is not at the end of file'
|
1536
|
+
@file.pos=@file.stat.size
|
1537
|
+
return(endoffile_detected result)
|
1538
|
+
end
|
1539
|
+
|
1540
|
+
#-----------------------------------
|
1541
|
+
def endoffile_detected(s='')
|
1542
|
+
@moretokens.push( *(abort_noparens!.push super(s)))
|
1543
|
+
result= @moretokens.shift
|
1544
|
+
balanced_braces? or (lexerror result,"unbalanced braces at eof. bracestack=#{@bracestack.inspect}")
|
1545
|
+
result
|
1546
|
+
end
|
1547
|
+
|
1548
|
+
#-----------------------------------
|
1549
|
+
def single_char_token(ch)
|
1550
|
+
KeywordToken.new super(ch), @file.pos-1
|
1551
|
+
end
|
1552
|
+
|
1553
|
+
#-----------------------------------
|
1554
|
+
def comma(ch)
|
1555
|
+
single_char_token(ch)
|
1556
|
+
end
|
1557
|
+
|
1558
|
+
#-----------------------------------
|
1559
|
+
def semicolon(ch)
|
1560
|
+
assert @moretokens.empty?
|
1561
|
+
@moretokens.push(*abort_noparens!)
|
1562
|
+
@bracestack.last.see @bracestack,:semi
|
1563
|
+
if ExpectDoOrNlContext===@bracestack.last #should be in context's see:semi handler
|
1564
|
+
@bracestack.pop
|
1565
|
+
assert @bracestack.last.starter[/^(while|until|for)$/]
|
1566
|
+
end
|
1567
|
+
@moretokens.push single_char_token(ch)
|
1568
|
+
return @moretokens.shift
|
1569
|
+
end
|
1570
|
+
|
1571
|
+
#-----------------------------------
|
1572
|
+
def operator_or_methname_token(s,offset=nil)
|
1573
|
+
assert RUBYOPERATORREX===s
|
1574
|
+
if RUBYNONSYMOPERATORREX===s
|
1575
|
+
KeywordToken
|
1576
|
+
elsif @last_operative_token===/^(\.|::|def|undef|alias|defined\?)$/
|
1577
|
+
MethNameToken
|
1578
|
+
else
|
1579
|
+
OperatorToken
|
1580
|
+
end.new(s,offset)
|
1581
|
+
end
|
1582
|
+
|
1583
|
+
#-----------------------------------
|
1584
|
+
#tokenify_results_of :identifier
|
1585
|
+
save_offsets_in(*CHARMAPPINGS.values.uniq-[:symbol_or_op,:open_brace,:whitespace])
|
1586
|
+
#save_offsets_in :symbol
|
1587
|
+
|
1588
|
+
end
|
1589
|
+
|