rubylexer 0.6.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/COPYING +510 -0
- data/README +134 -0
- data/Rantfile +37 -0
- data/assert.rb +31 -0
- data/charhandler.rb +84 -0
- data/charset.rb +76 -0
- data/context.rb +174 -0
- data/howtouse.txt +136 -0
- data/io.each_til_charset.rb +247 -0
- data/require.rb +103 -0
- data/rlold.rb +12 -0
- data/rubycode.rb +44 -0
- data/rubylexer.rb +1589 -0
- data/rulexer.rb +532 -0
- data/symboltable.rb +65 -0
- data/testcode/deletewarns.rb +39 -0
- data/testcode/dumptokens.rb +38 -0
- data/testcode/locatetest +12 -0
- data/testcode/rubylexervsruby.rb +104 -0
- data/testcode/rubylexervsruby.sh +51 -0
- data/testcode/tokentest.rb +237 -0
- data/testcode/torment +51 -0
- data/testdata/1.rb.broken +729 -0
- data/testdata/23.rb +24 -0
- data/testdata/g.rb +15 -0
- data/testdata/newsyntax.rb +18 -0
- data/testdata/noeolatend.rb +1 -0
- data/testdata/p.rb +1227 -0
- data/testdata/pleac.rb.broken +6282 -0
- data/testdata/pre.rb +33 -0
- data/testdata/pre.unix.rb +33 -0
- data/testdata/regtest.rb +621 -0
- data/testdata/tokentest.assert.rb.can +7 -0
- data/testdata/untitled1.rb +1 -0
- data/testdata/w.rb +22 -0
- data/testdata/wsdlDriver.rb +499 -0
- data/testing.txt +130 -0
- data/testresults/placeholder +0 -0
- data/token.rb +486 -0
- data/tokenprinter.rb +152 -0
- metadata +76 -0
data/rlold.rb
ADDED
data/rubycode.rb
ADDED
@@ -0,0 +1,44 @@
|
|
1
|
+
=begin copyright
|
2
|
+
rubylexer - a ruby lexer written in ruby
|
3
|
+
Copyright (C) 2004,2005 Caleb Clausen
|
4
|
+
|
5
|
+
This library is free software; you can redistribute it and/or
|
6
|
+
modify it under the terms of the GNU Lesser General Public
|
7
|
+
License as published by the Free Software Foundation; either
|
8
|
+
version 2.1 of the License, or (at your option) any later version.
|
9
|
+
|
10
|
+
This library is distributed in the hope that it will be useful,
|
11
|
+
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
12
|
+
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
13
|
+
Lesser General Public License for more details.
|
14
|
+
|
15
|
+
You should have received a copy of the GNU Lesser General Public
|
16
|
+
License along with this library; if not, write to the Free Software
|
17
|
+
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
|
18
|
+
=end
|
19
|
+
|
20
|
+
|
21
|
+
|
22
|
+
require "token.rb"
|
23
|
+
require "tokenprinter.rb"
|
24
|
+
|
25
|
+
class RubyCode < Token
|
26
|
+
def initialize(tokens,filename,linenum)
|
27
|
+
super(tokens)
|
28
|
+
@filename=filename
|
29
|
+
@linenum=linenum
|
30
|
+
end
|
31
|
+
|
32
|
+
def [](*args)
|
33
|
+
exec? ident.huh
|
34
|
+
end
|
35
|
+
|
36
|
+
def to_s()
|
37
|
+
result=[]
|
38
|
+
keepwsprinter=KeepWsTokenPrinter.new('',@linenum)
|
39
|
+
ident.each{|tok| result << keepwsprinter.sprint(tok) }
|
40
|
+
return result.to_s
|
41
|
+
end
|
42
|
+
end
|
43
|
+
|
44
|
+
|
data/rubylexer.rb
ADDED
@@ -0,0 +1,1589 @@
|
|
1
|
+
=begin copyright
|
2
|
+
rubylexer - a ruby lexer written in ruby
|
3
|
+
Copyright (C) 2004,2005 Caleb Clausen
|
4
|
+
|
5
|
+
This library is free software; you can redistribute it and/or
|
6
|
+
modify it under the terms of the GNU Lesser General Public
|
7
|
+
License as published by the Free Software Foundation; either
|
8
|
+
version 2.1 of the License, or (at your option) any later version.
|
9
|
+
|
10
|
+
This library is distributed in the hope that it will be useful,
|
11
|
+
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
12
|
+
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
13
|
+
Lesser General Public License for more details.
|
14
|
+
|
15
|
+
You should have received a copy of the GNU Lesser General Public
|
16
|
+
License along with this library; if not, write to the Free Software
|
17
|
+
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
|
18
|
+
=end
|
19
|
+
|
20
|
+
|
21
|
+
|
22
|
+
require "rulexer"
|
23
|
+
require "symboltable"
|
24
|
+
require "io.each_til_charset"
|
25
|
+
require "context.rb"
|
26
|
+
|
27
|
+
|
28
|
+
|
29
|
+
#-----------------------------------
|
30
|
+
class RubyLexer < RuLexer
|
31
|
+
include NestedContexts
|
32
|
+
|
33
|
+
RUBYSYMOPERATORREX=
|
34
|
+
%r{^([&|^/%~]|=(==?|~)|>[=>]?|<(<|=>?)?|[+\-]@?|\*\*?|\[\]=?)}
|
35
|
+
# (nasty beastie, eh?)
|
36
|
+
#these are the overridable operators
|
37
|
+
#does not match flow-control operators like: || && ! or and if not
|
38
|
+
#or op= ops like: += -= ||=
|
39
|
+
#or .. ... ?:
|
40
|
+
#for that use:
|
41
|
+
RUBYNONSYMOPERATORREX=
|
42
|
+
%r{^([%^~/\-+]=|(\|\|?|&&?)=?|(<<|>>|\*\*?)=|\.{1,3}|[?:,;]|=>?|![=~]?)$}
|
43
|
+
RUBYOPERATORREX=/#{RUBYSYMOPERATORREX}|#{RUBYNONSYMOPERATORREX}/o
|
44
|
+
UNSYMOPS=/^[~!]$/ #always unary
|
45
|
+
UBSYMOPS=/^([*&+-]|::)$/ #ops that could be unary or binary
|
46
|
+
WHSPCHARS=WHSPLF+"\\#"
|
47
|
+
OPORBEGINWORDS="(if|unless|while|until)"
|
48
|
+
BEGINWORDS=/^(def|class|module|begin|for|case|do|#{OPORBEGINWORDS})$/o
|
49
|
+
FUNCLIKE_KEYWORDS=/^(break|next|redo|return|raise|yield|defined\?|retry|super|BEGIN|END)$/
|
50
|
+
VARLIKE_KEYWORDS=/^(__FILE__|__LINE__|false|nil|self|true)$/
|
51
|
+
INNERBOUNDINGWORDS="(else|elsif|ensure|in|then|rescue|when)"
|
52
|
+
BINOPWORDS="(and|or)"
|
53
|
+
NEVERSTARTPARAMLISTWORDS=/^(#{OPORBEGINWORDS}|#{INNERBOUNDINGWORDS}|#{BINOPWORDS}|end)[^a-zA-Z0-9_!?=]?/o
|
54
|
+
NEVERSTARTPARAMLISTFIRST=CharSet[%[aoeitrwu]] #char set that begins NEVERSTARTPARAMLIST
|
55
|
+
NEVERSTARTPARAMLISTMAXLEN=7 #max len of a NEVERSTARTPARAMLIST
|
56
|
+
|
57
|
+
RUBYKEYWORDS=%r{
|
58
|
+
^(alias|#{BINOPWORDS}|not|undef|__END__|end|
|
59
|
+
#{VARLIKE_KEYWORDS}|#{FUNCLIKE_KEYWORDS}|
|
60
|
+
#{INNERBOUNDINGWORDS}|#{BEGINWORDS}
|
61
|
+
)$
|
62
|
+
}xo
|
63
|
+
CHARMAPPINGS = {
|
64
|
+
?$ => :dollar_identifier,
|
65
|
+
?@ => :at_identifier,
|
66
|
+
?a..?z => :identifier,
|
67
|
+
?A..?Z => :identifier,
|
68
|
+
?_ => :identifier,
|
69
|
+
?0..?9 => :number,
|
70
|
+
?" => :double_quote,
|
71
|
+
?' => :single_quote,
|
72
|
+
?` => :back_quote,
|
73
|
+
|
74
|
+
WHSP => :whitespace, #includes \r
|
75
|
+
?, => :comma,
|
76
|
+
?; => :semicolon,
|
77
|
+
|
78
|
+
?^ => :biop,
|
79
|
+
?~ => :tilde,
|
80
|
+
?= => :equals,
|
81
|
+
?! => :exclam,
|
82
|
+
?. => :dot,
|
83
|
+
|
84
|
+
#these ones could signal either an op or a term
|
85
|
+
?/ => :regex_or_div,
|
86
|
+
"|>" => :quadriop,
|
87
|
+
"*&" => :star_or_amp, #could be unary
|
88
|
+
"+-" => :plusminus, #could be unary
|
89
|
+
?< => :lessthan,
|
90
|
+
?% => :percent,
|
91
|
+
?? => :char_literal_or_op, #single-char int literal
|
92
|
+
?: => :symbol_or_op,
|
93
|
+
?\n => :newline, #implicitly escaped after op
|
94
|
+
#?\r => :newline, #implicitly escaped after op
|
95
|
+
|
96
|
+
?\\ => :escnewline,
|
97
|
+
?\0 => :eof,
|
98
|
+
|
99
|
+
"[({" => :open_brace,
|
100
|
+
"])}" => :close_brace,
|
101
|
+
|
102
|
+
|
103
|
+
?# => :comment
|
104
|
+
}
|
105
|
+
|
106
|
+
attr :incomplete_here_tokens
|
107
|
+
|
108
|
+
|
109
|
+
#-----------------------------------
|
110
|
+
def initialize(filename,file,linenum=1)
|
111
|
+
super(filename,file, linenum)
|
112
|
+
@start_linenum=linenum
|
113
|
+
@bracestack=[TopLevelContext.new]
|
114
|
+
@incomplete_here_tokens=[]
|
115
|
+
@localvars=SymbolTable.new
|
116
|
+
@defining_lvar=nil
|
117
|
+
|
118
|
+
@toptable=CharHandler.new(self, :illegal_char, CHARMAPPINGS)
|
119
|
+
|
120
|
+
start_of_line_directives
|
121
|
+
end
|
122
|
+
|
123
|
+
#-----------------------------------
|
124
|
+
def get1token
|
125
|
+
result=super #most of the action's here
|
126
|
+
|
127
|
+
#now cleanup and housekeeping
|
128
|
+
|
129
|
+
|
130
|
+
#check for bizarre token types
|
131
|
+
case result
|
132
|
+
when IgnoreToken#,nil
|
133
|
+
return result
|
134
|
+
when Token#,String
|
135
|
+
else
|
136
|
+
raise "#{@filename}:#{linenum}:token is a #{result.class}, last is #{@last_operative_token}"
|
137
|
+
end
|
138
|
+
|
139
|
+
@last_operative_token=result
|
140
|
+
|
141
|
+
return result
|
142
|
+
end
|
143
|
+
|
144
|
+
|
145
|
+
|
146
|
+
#-----------------------------------
|
147
|
+
def balanced_braces?
|
148
|
+
|
149
|
+
#@bracestack.empty?
|
150
|
+
@bracestack.size==1 and TopLevelContext===@bracestack.first
|
151
|
+
end
|
152
|
+
|
153
|
+
#-----------------------------------
|
154
|
+
def dollar_identifier(ch=nil)
|
155
|
+
s=eat_next_if(?$) or return nil
|
156
|
+
|
157
|
+
if t=((identifier_as_string(?$) or special_global))
|
158
|
+
s<<t
|
159
|
+
else error= "missing $id name"
|
160
|
+
end
|
161
|
+
|
162
|
+
return lexerror(VarNameToken.new(s),error)
|
163
|
+
end
|
164
|
+
|
165
|
+
#-----------------------------------
|
166
|
+
def at_identifier(ch=nil)
|
167
|
+
result = (eat_next_if(?@) or return nil)
|
168
|
+
result << (eat_next_if(?@)or'')
|
169
|
+
if t=identifier_as_string(?@)
|
170
|
+
result<<t
|
171
|
+
else error= "missing @id name"
|
172
|
+
end
|
173
|
+
return lexerror(VarNameToken.new(result),error)
|
174
|
+
end
|
175
|
+
|
176
|
+
private
|
177
|
+
#-----------------------------------
|
178
|
+
def here_spread_over_ruby_code(rl,tok)
|
179
|
+
assert(!rl.incomplete_here_tokens.empty?)
|
180
|
+
@incomplete_here_tokens += rl.incomplete_here_tokens
|
181
|
+
end
|
182
|
+
|
183
|
+
#-----------------------------------
|
184
|
+
def expect_do_or_end_or_nl!(st)
|
185
|
+
@bracestack.push ExpectDoOrNlContext.new(st,/(do|;|:|\n)/,@linenum)
|
186
|
+
end
|
187
|
+
|
188
|
+
#-----------------------------------
|
189
|
+
#match NoWstoken, ws, comment, or (escaped?) newline repeatedly
|
190
|
+
def maybe_no_ws_token
|
191
|
+
result=[]
|
192
|
+
while IgnoreToken===(tok=get1token)
|
193
|
+
EoiToken===tok and lexerror tok,"end of file not expected here"
|
194
|
+
result << tok
|
195
|
+
end
|
196
|
+
assert((not IgnoreToken===tok))
|
197
|
+
@moretokens.unshift tok
|
198
|
+
return result
|
199
|
+
end
|
200
|
+
|
201
|
+
#-----------------------------------
|
202
|
+
WSCHARSET=CharSet["#\\\n\s\t\v\r\f"]
|
203
|
+
def ignored_tokens(allow_eof=false)
|
204
|
+
result=[]
|
205
|
+
result<<@moretokens.shift while IgnoreToken===@moretokens.first
|
206
|
+
@moretokens.empty? or return result
|
207
|
+
if true
|
208
|
+
loop do
|
209
|
+
unless @moretokens.empty?
|
210
|
+
IgnoreToken===@moretokens.first or NewlineToken===@moretokens.first or
|
211
|
+
break
|
212
|
+
else
|
213
|
+
WSCHARSET===nextchar or break
|
214
|
+
end
|
215
|
+
|
216
|
+
tok=get1token
|
217
|
+
result<<tok
|
218
|
+
case tok
|
219
|
+
when NewlineToken : block_given? and yield tok
|
220
|
+
when EoiToken : allow_eof or lexerror tok,"end of file not expected here(2)"
|
221
|
+
when IgnoreToken
|
222
|
+
else raise "impossible"
|
223
|
+
end
|
224
|
+
end
|
225
|
+
|
226
|
+
else
|
227
|
+
@whsphandler||=CharHandler.new(self, :==,
|
228
|
+
"#" => :comment,
|
229
|
+
"\n" => :newline,
|
230
|
+
"\\" => :escnewline,
|
231
|
+
"\s\t\v\r\f" => :whitespace
|
232
|
+
)
|
233
|
+
#tok=nil
|
234
|
+
while tok=@whsphandler.go((nextchar or return result))
|
235
|
+
block_given? and NewlineToken===tok and yield tok
|
236
|
+
result << tok
|
237
|
+
end
|
238
|
+
end
|
239
|
+
return result
|
240
|
+
end
|
241
|
+
|
242
|
+
#-----------------------------------
|
243
|
+
def safe_recurse
|
244
|
+
old_moretokens=@moretokens
|
245
|
+
#old_bracestack=@bracestack.dup
|
246
|
+
@moretokens=[]
|
247
|
+
result= yield @moretokens
|
248
|
+
#assert @incomplete_here_tokens.empty?
|
249
|
+
#assert @bracestack==old_bracestack
|
250
|
+
@moretokens= old_moretokens.concat @moretokens
|
251
|
+
return result
|
252
|
+
#need to do something with @last_operative_token?
|
253
|
+
end
|
254
|
+
|
255
|
+
#-----------------------------------
|
256
|
+
def special_global #handle $-a and friends
|
257
|
+
assert prevchar=='$'
|
258
|
+
result = ((
|
259
|
+
#order matters here, but it shouldn't
|
260
|
+
#(but til_charset must be last)
|
261
|
+
eat_next_if(/^[!@&+`'=~\/\\,.;<>*"$?:]$/) or
|
262
|
+
(eat_next_if('-') and ("-"+getchar)) or
|
263
|
+
(?0..?9)===nextchar ? til_charset(/[^\d]/) : nil
|
264
|
+
))
|
265
|
+
end
|
266
|
+
|
267
|
+
#-----------------------------------
|
268
|
+
def identifier(context=nil)
|
269
|
+
oldpos=@file.pos
|
270
|
+
str=identifier_as_string(context)
|
271
|
+
|
272
|
+
#skip keyword processing if 'escaped' as it were, by def, . or ::
|
273
|
+
#or if in a non-bare context
|
274
|
+
#just asserts because those contexts are never encountered.
|
275
|
+
#control goes through symbol(<...>,nil)
|
276
|
+
assert( /^[a-z_]$/i===context)
|
277
|
+
assert !(@last_operative_token===/^(\.|::|(un)?def|alias)$/)
|
278
|
+
|
279
|
+
@moretokens.unshift(*parse_keywords(str,oldpos) do
|
280
|
+
#if not a keyword,
|
281
|
+
case str
|
282
|
+
when FUNCLIKE_KEYWORDS: #do nothing
|
283
|
+
when VARLIKE_KEYWORDS,RUBYKEYWORDS: raise "shouldnt see keywords here, now"
|
284
|
+
end
|
285
|
+
safe_recurse { |a| var_or_meth_name(str,@last_operative_token,oldpos) }
|
286
|
+
end)
|
287
|
+
return @moretokens.shift
|
288
|
+
end
|
289
|
+
|
290
|
+
#-----------------------------------
|
291
|
+
def identifier_as_string(context)
|
292
|
+
#must begin w/ letter or underscore
|
293
|
+
str=eat_next_if(/^[_a-z]$/i) or return nil
|
294
|
+
|
295
|
+
#equals, question mark, and exclamation mark
|
296
|
+
#might be allowed at the end in some contexts.
|
297
|
+
#(in def headers and symbols)
|
298
|
+
#otherwise, =,?, and ! are to be considered
|
299
|
+
#separate tokens. confusing, eh?
|
300
|
+
#i hope i've captured all right conditions....
|
301
|
+
#context should always be ?: right after def, ., and :: now
|
302
|
+
|
303
|
+
maybe_eq,maybe_qm,maybe_ex = case context
|
304
|
+
when ?@,?$ then [nil,nil,nil]
|
305
|
+
when ?: then [?=, ??, ?!]
|
306
|
+
else [nil,??, ?!]
|
307
|
+
end
|
308
|
+
|
309
|
+
str<<til_charset(/[^a-z0-9_]/i)
|
310
|
+
|
311
|
+
#look for ?, !, or =, if allowed
|
312
|
+
case b=@file.getc
|
313
|
+
when nil #means we're at eof
|
314
|
+
#handling nil here prevents b from ever matching
|
315
|
+
#a nil value of maybe_qm, maybe_ex or maybe_eq
|
316
|
+
when maybe_qm
|
317
|
+
str << b
|
318
|
+
when maybe_ex
|
319
|
+
nc=(nextchar unless @file.eof?)
|
320
|
+
#does ex appear to be part of a larger operator?
|
321
|
+
if nc==?= #or nc==?~
|
322
|
+
back1char
|
323
|
+
else
|
324
|
+
str << b
|
325
|
+
end
|
326
|
+
when maybe_eq
|
327
|
+
nc=(nextchar unless @file.eof?)
|
328
|
+
#does eq appear to be part of a larger operator?
|
329
|
+
if nc==?= or nc==?~ or nc==?>
|
330
|
+
back1char
|
331
|
+
else
|
332
|
+
str << b
|
333
|
+
end
|
334
|
+
else
|
335
|
+
back1char
|
336
|
+
end
|
337
|
+
|
338
|
+
|
339
|
+
return str
|
340
|
+
end
|
341
|
+
|
342
|
+
#-----------------------------------
|
343
|
+
#contexts in which comma may appear in ruby:
|
344
|
+
#multiple lhs (terminated by assign op)
|
345
|
+
#multiple rhs (in implicit context) (tbd)
|
346
|
+
#method actual param list (in ( or implicit context)
|
347
|
+
#method formal param list (in ( or implicit context)
|
348
|
+
#block formal param list (in | context) (tbd)
|
349
|
+
#hash immediate (in imm{ context)
|
350
|
+
#array immediate (in imm[ context)
|
351
|
+
#element reference/assignment (in [] or []= method actual parameter context)
|
352
|
+
#list after for
|
353
|
+
#list after rescue
|
354
|
+
#list after when
|
355
|
+
#list after undef
|
356
|
+
|
357
|
+
#note: comma in parens not around a param list is illegal
|
358
|
+
|
359
|
+
#-----------------------------------
|
360
|
+
#a comma has been seen. are we in an
|
361
|
+
#lvalue list or some other construct that uses commas?
|
362
|
+
def comma_in_lvalue_list?
|
363
|
+
not ListContext===@bracestack.last
|
364
|
+
end
|
365
|
+
|
366
|
+
#-----------------------------------
|
367
|
+
def in_lvar_define_state
|
368
|
+
#@defining_lvar is a hack
|
369
|
+
@defining_lvar or case ctx=@bracestack.last
|
370
|
+
when ForSMContext: ctx.state==:for
|
371
|
+
when RescueSMContext: ctx.state==:arrow
|
372
|
+
when BlockParamListContext: true
|
373
|
+
end
|
374
|
+
end
|
375
|
+
|
376
|
+
#-----------------------------------
|
377
|
+
#determine if an alphabetic identifier refers to a variable
|
378
|
+
#or method name. generates implicit parenthes(es) if it is a
|
379
|
+
#call site and no explicit parens are present. starts an implicit param list
|
380
|
+
#if appropriate. adds tok to the
|
381
|
+
#local var table if its a local var being defined for the first time.
|
382
|
+
|
383
|
+
#note: what we here call variables (rather, constants) following ::
|
384
|
+
#might actually be methods at runtime, but that's immaterial to tokenization.
|
385
|
+
|
386
|
+
#note: this routine should determine the correct token type for name and
|
387
|
+
#create the appropriate token. currently this is not done because callers
|
388
|
+
#typically have done it (perhaps incorrectly) already.
|
389
|
+
def var_or_meth_name(name,lasttok,pos)
|
390
|
+
#look for call site if not a keyword or keyword is function-like
|
391
|
+
#look for and ignore local variable names
|
392
|
+
|
393
|
+
assert String===name
|
394
|
+
|
395
|
+
#fixme: keywords shouldn't be treated specially after :: and .
|
396
|
+
|
397
|
+
#maybe_local really means 'maybe local or constant'
|
398
|
+
maybe_local=case name
|
399
|
+
when /[^a-z_0-9]$/i: #do nothing
|
400
|
+
when /^[a-z_]/: (@localvars===name or VARLIKE_KEYWORDS===name or in_lvar_define_state) and not lasttok===/^(\.|::)$/
|
401
|
+
when /^[A-Z]/: is_const=true;not lasttok==='.' #this is the right algorithm for constants...
|
402
|
+
end
|
403
|
+
|
404
|
+
assert(@moretokens.empty?)
|
405
|
+
|
406
|
+
tok=@last_operative_token=VarNameToken.new(name,pos)
|
407
|
+
|
408
|
+
oldpos=@file.pos
|
409
|
+
sawnl=false
|
410
|
+
result=ws_toks=ignored_tokens(true) {|nl| sawnl=true }
|
411
|
+
sawnl || @file.eof? and return result.unshift(
|
412
|
+
*if maybe_local : [tok]
|
413
|
+
else [MethNameToken.new(name,pos), #insert implicit parens right after tok
|
414
|
+
ImplicitParamListStartToken.new( oldpos),
|
415
|
+
ImplicitParamListEndToken.new( oldpos) ]
|
416
|
+
end
|
417
|
+
)
|
418
|
+
|
419
|
+
#if next op is assignment (or comma in lvalue list)
|
420
|
+
#then omit implicit parens
|
421
|
+
assignment_coming=case nc=nextchar
|
422
|
+
when ?=: not /^=[=~]$/===readahead(2)
|
423
|
+
when ?,: comma_in_lvalue_list?
|
424
|
+
when ?>,?<: /^([<>])\1=$/===readahead(3)
|
425
|
+
when ?*,?|,?&: /^([*|&])\1?=/===readahead(3)
|
426
|
+
when ?%,?/,?-,?+,?^: readahead(2)[1..1]=='='
|
427
|
+
end
|
428
|
+
if (assignment_coming or in_lvar_define_state)
|
429
|
+
tok=VarNameToken.new(name,pos)
|
430
|
+
if /[^a-z_0-9]$/i===name
|
431
|
+
lexerror tok,"not a valid variable name: #{name}"
|
432
|
+
elsif /^[a-z_]/===name and !(lasttok===/^(\.|::)$/)
|
433
|
+
@localvars[name]=true
|
434
|
+
end
|
435
|
+
return result.unshift(tok)
|
436
|
+
end
|
437
|
+
|
438
|
+
implicit_parens_to_emit=case nc
|
439
|
+
when ?!: readahead(2)=='!=' ? 2 : 1
|
440
|
+
when NEVERSTARTPARAMLISTFIRST
|
441
|
+
(NEVERSTARTPARAMLISTWORDS===readahead(NEVERSTARTPARAMLISTMAXLEN)) ? 2 : 1
|
442
|
+
when ?",?',?`,?a..?z,?A..?Z,?0..?9,?_,?@,?$,?~: 1
|
443
|
+
when ?{: maybe_local=false; 2
|
444
|
+
when ?(: maybe_local=false; 0
|
445
|
+
when ?},?],?),?;,?^, ?|, ?>, ?,, ?., ?=: 2
|
446
|
+
when ?+, ?-, ?*, ?&, ?%, ?/, ?:, ??: (ws_toks.empty? || readahead(2)[/^.[#{WHSPLF}]/]) ? 2 : 3
|
447
|
+
when ?<: (ws_toks.empty? || readahead(3)[/^<<[^"'`a-zA-Z_0-9-]/]) ? 2 : 3
|
448
|
+
when ?[: ws_toks.empty? ? 2 : 3
|
449
|
+
when ?\\, ?\s, ?\t, ?\n, ?\r, ?\v, ?#: raise 'failure'
|
450
|
+
else raise "unknown char after ident: #{nextchar.chr}"
|
451
|
+
end
|
452
|
+
|
453
|
+
implicit_parens_to_emit==3 and is_const and implicit_parens_to_emit=1
|
454
|
+
|
455
|
+
tok=if maybe_local and implicit_parens_to_emit>=2
|
456
|
+
implicit_parens_to_emit=0
|
457
|
+
VarNameToken
|
458
|
+
else
|
459
|
+
MethNameToken
|
460
|
+
end.new(name,pos)
|
461
|
+
|
462
|
+
|
463
|
+
case implicit_parens_to_emit
|
464
|
+
when 2:
|
465
|
+
result.unshift ImplicitParamListStartToken.new(oldpos),
|
466
|
+
ImplicitParamListEndToken.new(oldpos)
|
467
|
+
when 1,3:
|
468
|
+
result.unshift ImplicitParamListStartToken.new(oldpos)
|
469
|
+
@bracestack.push ParamListContextNoParen.new(@linenum)
|
470
|
+
when 0: #do nothing
|
471
|
+
else raise 'invalid value of implicit_parens_to_emit'
|
472
|
+
end
|
473
|
+
return result.unshift(tok)
|
474
|
+
# 'ok:'
|
475
|
+
# 'if unless while until {'
|
476
|
+
# '\n (unescaped) and or'
|
477
|
+
# 'then else elsif rescue ensure (illegal in value context)'
|
478
|
+
|
479
|
+
# 'need to pop noparen from bracestack on these tokens: (in operator context)'
|
480
|
+
# 'not ok:'
|
481
|
+
# 'not (but should it be?)'
|
482
|
+
end
|
483
|
+
|
484
|
+
#-----------------------------------
|
485
|
+
CONTEXT2ENDTOK={AssignmentRhsContext=>AssignmentRhsListEndToken,
|
486
|
+
ParamListContextNoParen=>ImplicitParamListEndToken,
|
487
|
+
KwParamListContext=>KwParamListEndToken
|
488
|
+
}
|
489
|
+
def abort_noparens!(str='')
|
490
|
+
#assert @moretokens.empty?
|
491
|
+
result=[]
|
492
|
+
while klass=CONTEXT2ENDTOK[@bracestack.last.class]
|
493
|
+
result << klass.new(@file.pos-str.length)
|
494
|
+
@bracestack.pop
|
495
|
+
end
|
496
|
+
return result
|
497
|
+
end
|
498
|
+
|
499
|
+
if false #no longer used
|
500
|
+
#-----------------------------------
|
501
|
+
def abort_1_noparen!(offs=0)
|
502
|
+
assert @moretokens.empty?
|
503
|
+
result=[]
|
504
|
+
while AssignmentRhsContext===@bracestack.last
|
505
|
+
@bracestack.pop
|
506
|
+
result << AssignmentRhsListEndToken.new(@file.pos-offs)
|
507
|
+
end
|
508
|
+
ParamListContextNoParen===@bracestack.last or lexerror huh,'{} with no matching callsite'
|
509
|
+
@bracestack.pop
|
510
|
+
result << ImplicitParamListEndToken.new(@file.pos-offs)
|
511
|
+
return result
|
512
|
+
end
|
513
|
+
end
|
514
|
+
|
515
|
+
#-----------------------------------
|
516
|
+
#parse keywords now, to prevent confusion over bare symbols
|
517
|
+
#and match end with corresponding preceding def or class or whatever.
|
518
|
+
#if arg is not a keyword, the block is called
|
519
|
+
def parse_keywords(str,offset)
|
520
|
+
assert @moretokens.empty?
|
521
|
+
result=[KeywordToken.new(str,offset)]
|
522
|
+
|
523
|
+
case str
|
524
|
+
when "end"
|
525
|
+
result.unshift(*abort_noparens!(str))
|
526
|
+
@bracestack.last.see @bracestack,:semi #sorta hacky... should make an :end event instead?
|
527
|
+
|
528
|
+
=begin not needed?
|
529
|
+
if ExpectDoOrNlContext===@bracestack.last
|
530
|
+
@bracestack.pop
|
531
|
+
assert @bracestack.last.starter[/^(while|until|for)$/]
|
532
|
+
end
|
533
|
+
=end
|
534
|
+
|
535
|
+
WantsEndContext===@bracestack.last or lexerror result.last, 'unbalanced end'
|
536
|
+
ctx=@bracestack.pop
|
537
|
+
start,line=ctx.starter,ctx.linenum
|
538
|
+
BEGINWORDS===start or lexerror result.last, "end does not match #{start or "nil"}"
|
539
|
+
/^(class|module|def|do)$/===start and @localvars.end_block
|
540
|
+
|
541
|
+
when "class","module"
|
542
|
+
result.first.has_end!
|
543
|
+
@bracestack.push WantsEndContext.new(str,@linenum)
|
544
|
+
@localvars.start_block
|
545
|
+
|
546
|
+
when "if","unless" #could be infix form without end
|
547
|
+
if after_nonid_op?{false} #prefix form
|
548
|
+
result.first.has_end!
|
549
|
+
@bracestack.push WantsEndContext.new(str,@linenum)
|
550
|
+
|
551
|
+
|
552
|
+
else #infix form
|
553
|
+
result.unshift(*abort_noparens!(str))
|
554
|
+
end
|
555
|
+
when "begin","case"
|
556
|
+
result.first.has_end!
|
557
|
+
@bracestack.push WantsEndContext.new(str,@linenum)
|
558
|
+
when "while","until" #could be infix form without end
|
559
|
+
if after_nonid_op?{false} #prefix form
|
560
|
+
result.first.has_end!
|
561
|
+
@bracestack.push WantsEndContext.new(str,@linenum)
|
562
|
+
expect_do_or_end_or_nl! str
|
563
|
+
|
564
|
+
else #infix form
|
565
|
+
result.unshift(*abort_noparens!(str))
|
566
|
+
end
|
567
|
+
when "for"
|
568
|
+
result.first.has_end!
|
569
|
+
@bracestack.push WantsEndContext.new(str,@linenum)
|
570
|
+
#expect_do_or_end_or_nl! str #handled by ForSMContext now
|
571
|
+
@bracestack.push ForSMContext.new(@linenum)
|
572
|
+
when "do"
|
573
|
+
result.unshift(*abort_noparens!(str))
|
574
|
+
if ExpectDoOrNlContext===@bracestack.last
|
575
|
+
@bracestack.pop
|
576
|
+
assert WantsEndContext===@bracestack.last
|
577
|
+
else
|
578
|
+
result.last.has_end!
|
579
|
+
@bracestack.push WantsEndContext.new(str,@linenum)
|
580
|
+
@localvars.start_block
|
581
|
+
block_param_list_lookahead
|
582
|
+
end
|
583
|
+
when "def"
|
584
|
+
result.first.has_end!
|
585
|
+
@bracestack.push WantsEndContext.new("def",@linenum)
|
586
|
+
@localvars.start_block
|
587
|
+
safe_recurse { |aa|
|
588
|
+
@last_operative_token=KeywordToken.new "def" #hack
|
589
|
+
result.concat ignored_tokens
|
590
|
+
|
591
|
+
#read an expr like a.b.c or a::b::c
|
592
|
+
#or (expr).b.c
|
593
|
+
if nextchar==?( #look for optional parenthesised head
|
594
|
+
old_size=@bracestack.size
|
595
|
+
parencount=0
|
596
|
+
begin
|
597
|
+
tok=get1token
|
598
|
+
case tok
|
599
|
+
when/^\($/.token_pat then parencount+=1
|
600
|
+
when/^\)$/.token_pat then parencount-=1
|
601
|
+
end
|
602
|
+
EoiToken===tok and lexerror tok, "eof in def header"
|
603
|
+
result<<tok
|
604
|
+
end until parencount==0 #@bracestack.size==old_size
|
605
|
+
else #no parentheses, all tail
|
606
|
+
@last_operative_token=KeywordToken.new "." #hack hack
|
607
|
+
result << symbol(false,false)
|
608
|
+
#this isn't quite right.... if a.b.c.d is seen, a, b, and c
|
609
|
+
#should be considered maybe varname instead of methnames.
|
610
|
+
#the last (d in the example) is always considered a methname;
|
611
|
+
#it's what's being defined.
|
612
|
+
end
|
613
|
+
#read tail: .b.c.d etc
|
614
|
+
@last_operative_token=result.last
|
615
|
+
state=:expect_op
|
616
|
+
loop do
|
617
|
+
|
618
|
+
#look for start of parameter list
|
619
|
+
nc=(@moretokens.first or nextchar.chr)
|
620
|
+
if state==:expect_op and /^[a-z_(&*]/i===nc
|
621
|
+
result.concat def_param_list
|
622
|
+
break
|
623
|
+
end
|
624
|
+
|
625
|
+
tok=get1token
|
626
|
+
result<<tok
|
627
|
+
case tok
|
628
|
+
when EoiToken
|
629
|
+
lexerror tok,'unexpected eof in def header'
|
630
|
+
when IgnoreToken
|
631
|
+
when MethNameToken #,VarNameToken # /^[a-z_]/i.token_pat
|
632
|
+
lexerror tok,'expected . or ::' unless state==:expect_name
|
633
|
+
state=:expect_op
|
634
|
+
when /^(\.|::)$/.token_pat
|
635
|
+
lexerror tok,'expected ident' unless state==:expect_op
|
636
|
+
state=:expect_name
|
637
|
+
when /^(;|end)$/.token_pat, NewlineToken #are we done with def name?
|
638
|
+
state==:expect_op or lexerror tok,'expected identifier'
|
639
|
+
break
|
640
|
+
else
|
641
|
+
lexerror(tok, "bizarre token in def name: " +
|
642
|
+
"#{tok}:#{tok.class}")
|
643
|
+
end
|
644
|
+
end
|
645
|
+
}
|
646
|
+
when "alias"
|
647
|
+
safe_recurse { |a|
|
648
|
+
@last_operative_token=KeywordToken.new "alias" #hack
|
649
|
+
result.concat ignored_tokens
|
650
|
+
res=symbol(eat_next_if(?:),false)
|
651
|
+
res ? result<<res : lexerror(result.first,"bad symbol in alias")
|
652
|
+
@last_operative_token=KeywordToken.new "alias" #hack
|
653
|
+
result.concat ignored_tokens
|
654
|
+
res=symbol(eat_next_if(?:),false)
|
655
|
+
res ? result<<res : lexerror(result.first,"bad symbol in alias")
|
656
|
+
}
|
657
|
+
when "undef"
|
658
|
+
safe_recurse { |a|
|
659
|
+
loop do
|
660
|
+
@last_operative_token=KeywordToken.new "," #hack
|
661
|
+
result.concat ignored_tokens
|
662
|
+
tok=symbol(eat_next_if(?:),false)
|
663
|
+
tok or lexerror(result.first,"bad symbol in undef")
|
664
|
+
result<< tok
|
665
|
+
@last_operative_token=tok
|
666
|
+
|
667
|
+
sawnl=false
|
668
|
+
result.concat ignored_tokens(true){|nl| sawnl=true}
|
669
|
+
|
670
|
+
break if sawnl or nextchar != ?,
|
671
|
+
tok= single_char_token(?,)
|
672
|
+
result<< tok
|
673
|
+
end
|
674
|
+
}
|
675
|
+
|
676
|
+
# when "defined?"
|
677
|
+
# huh
|
678
|
+
#defined? might have a baresymbol following it
|
679
|
+
#does it need to be handled specially?
|
680
|
+
|
681
|
+
when "when"
|
682
|
+
result.unshift(*abort_noparens!(str))
|
683
|
+
@bracestack.push KwParamListContext.new(str,@linenum)
|
684
|
+
|
685
|
+
when "rescue"
|
686
|
+
result.unshift(*abort_noparens!(str))
|
687
|
+
@bracestack.push RescueSMContext.new(@linenum)
|
688
|
+
|
689
|
+
when "then","in"
|
690
|
+
result.unshift(*abort_noparens!(str))
|
691
|
+
@bracestack.last.see @bracestack,str.to_sym
|
692
|
+
|
693
|
+
when /^(#{BINOPWORDS}|#{INNERBOUNDINGWORDS})$/o
|
694
|
+
result.unshift(*abort_noparens!(str))
|
695
|
+
|
696
|
+
when FUNCLIKE_KEYWORDS: result=yield
|
697
|
+
|
698
|
+
when RUBYKEYWORDS
|
699
|
+
#do nothing
|
700
|
+
|
701
|
+
else result=yield
|
702
|
+
|
703
|
+
end
|
704
|
+
|
705
|
+
return result
|
706
|
+
end
|
707
|
+
|
708
|
+
|
709
|
+
#-----------------------------------
|
710
|
+
def block_param_list_lookahead
|
711
|
+
safe_recurse{ |la|
|
712
|
+
@last_operative_token=KeywordToken.new ';'
|
713
|
+
a=ignored_tokens
|
714
|
+
|
715
|
+
if eat_next_if(?|)
|
716
|
+
a<<KeywordToken.new("|",@file.pos-1)
|
717
|
+
if eat_next_if(?|)
|
718
|
+
a.concat [NoWsToken.new(@file.pos-1),
|
719
|
+
KeywordToken.new('|',@file.pos-1)]
|
720
|
+
else
|
721
|
+
assert !@defining_lvar
|
722
|
+
@defining_lvar=true
|
723
|
+
assert((@last_operative_token===';' or NewlineToken===@last_operative_token))
|
724
|
+
@bracestack.push BlockParamListContext.new(@linenum)
|
725
|
+
#block param initializers are not supported here, because ruby doesn't allow them!
|
726
|
+
begin
|
727
|
+
tok=get1token
|
728
|
+
EoiToken===tok and lexerror tok,"eof in block parameter list"
|
729
|
+
a<<tok
|
730
|
+
end until tok==='|'
|
731
|
+
assert@defining_lvar
|
732
|
+
@defining_lvar=false
|
733
|
+
BlockParamListContext===@bracestack.last or raise 'expected BlockParamListContext atop @bracestack'
|
734
|
+
@bracestack.pop
|
735
|
+
@moretokens.empty? or
|
736
|
+
fixme %#moretokens might be set from get1token call above...might be bad#
|
737
|
+
end
|
738
|
+
end
|
739
|
+
|
740
|
+
@last_operative_token=KeywordToken.new ';'
|
741
|
+
#a.concat ignored_tokens
|
742
|
+
|
743
|
+
#assert @last_operative_token===';'
|
744
|
+
#a<<get1token
|
745
|
+
|
746
|
+
la[0,0]=a
|
747
|
+
}
|
748
|
+
end
|
749
|
+
|
750
|
+
#-----------------------------------
|
751
|
+
#handle parameter list of a method declaration.
|
752
|
+
#parentheses are optional... if missing param list
|
753
|
+
#is ended by (unescaped) newline or semicolon (at the same bracing level)
|
754
|
+
#expect a brace as the next token,
|
755
|
+
#then match the following tokens until
|
756
|
+
#the matching endbrace is found
|
757
|
+
def def_param_list
|
758
|
+
result=[]
|
759
|
+
normal_comma_level=old_bracestack_size=@bracestack.size
|
760
|
+
safe_recurse { |a|
|
761
|
+
assert(@moretokens.empty?)
|
762
|
+
assert((not IgnoreToken===@moretokens[0]))
|
763
|
+
assert((@moretokens[0] or not nextchar.chr[WHSPCHARS]))
|
764
|
+
|
765
|
+
#have parentheses?
|
766
|
+
if '('==@moretokens[0] or nextchar==?(
|
767
|
+
#get open paren token
|
768
|
+
result.concat maybe_no_ws_token
|
769
|
+
result << tok=get1token
|
770
|
+
assert(tok==='(')
|
771
|
+
|
772
|
+
|
773
|
+
#bracestack was changed by get1token above...
|
774
|
+
normal_comma_level+=1
|
775
|
+
assert(normal_comma_level==@bracestack.size)
|
776
|
+
endingblock=proc{|tok| tok===')' }
|
777
|
+
else
|
778
|
+
endingblock=proc{|tok| tok===';' or NewlineToken===tok}
|
779
|
+
end
|
780
|
+
class << endingblock
|
781
|
+
alias === call
|
782
|
+
end
|
783
|
+
|
784
|
+
@last_operative_token=KeywordToken.new ',' #hack
|
785
|
+
#read local parameter names
|
786
|
+
loop do
|
787
|
+
expect_name=(@last_operative_token===',' and
|
788
|
+
normal_comma_level==@bracestack.size)
|
789
|
+
expect_name and @defining_lvar||=true
|
790
|
+
result << tok=get1token
|
791
|
+
lexerror tok, "unexpected eof in def header" if EoiToken===tok
|
792
|
+
|
793
|
+
#break if at end of param list
|
794
|
+
endingblock===tok and
|
795
|
+
old_bracestack_size>=@bracestack.size and break
|
796
|
+
|
797
|
+
#next token is a local var name
|
798
|
+
#(or the one after that if unary ops present)
|
799
|
+
#result.concat ignored_tokens
|
800
|
+
expect_name and case tok
|
801
|
+
when IgnoreToken#, /^[A-Z]/ #do nothing
|
802
|
+
when VarNameToken
|
803
|
+
assert@defining_lvar
|
804
|
+
@defining_lvar=false
|
805
|
+
assert((not @last_operative_token===','))
|
806
|
+
when /^[&*]$/.token_pat #unary form...
|
807
|
+
#a NoWsToken is also expected... read it now
|
808
|
+
result.concat maybe_no_ws_token #not needed?
|
809
|
+
@last_operative_token=KeywordToken.new ','
|
810
|
+
else lexerror tok,"unfamiliar var name '#{tok}'"
|
811
|
+
end
|
812
|
+
end
|
813
|
+
|
814
|
+
@defining_lvar=false
|
815
|
+
|
816
|
+
|
817
|
+
assert(@bracestack.size <= old_bracestack_size)
|
818
|
+
assert(endingblock[tok])
|
819
|
+
|
820
|
+
#hack: force next token to look like start of a
|
821
|
+
#new stmt, if the last ignored_tokens
|
822
|
+
#call above did not find a newline
|
823
|
+
#(just in case the next token parsed
|
824
|
+
#happens to call quote_expected? or after_nonid_op)
|
825
|
+
result.concat ignored_tokens
|
826
|
+
if nextchar.chr[/[iuw\/<|>+\-*&%?:]/] and
|
827
|
+
!(NewlineToken===@last_operative_token) and
|
828
|
+
!(/^(end|;)$/===@last_operative_token)
|
829
|
+
@last_operative_token=KeywordToken.new ';'
|
830
|
+
result<< get1token
|
831
|
+
end
|
832
|
+
}
|
833
|
+
|
834
|
+
return result
|
835
|
+
end
|
836
|
+
|
837
|
+
|
838
|
+
#-----------------------------------
|
839
|
+
#handle % in ruby code. is it part of fancy quote or a modulo operator?
|
840
|
+
def percent(ch)
|
841
|
+
if quote_expected? ch
|
842
|
+
fancy_quote ch
|
843
|
+
else
|
844
|
+
biop ch
|
845
|
+
end
|
846
|
+
end
|
847
|
+
|
848
|
+
#-----------------------------------
|
849
|
+
#handle * in ruby code. is unary or binary operator?
|
850
|
+
def star_or_amp(ch)
|
851
|
+
assert('*&'[ch])
|
852
|
+
if unary_op_expected? ch
|
853
|
+
#readahead(2)[1..1][/[\s\v#\\]/] or #not needed?
|
854
|
+
result=operator_or_methname_token getchar
|
855
|
+
WHSPLF[nextchar.chr] or
|
856
|
+
@moretokens << NoWsToken.new(@file.pos)
|
857
|
+
return result
|
858
|
+
else
|
859
|
+
return(quadriop ch)
|
860
|
+
end
|
861
|
+
#result should distinguish unary+binary *&
|
862
|
+
end
|
863
|
+
|
864
|
+
#-----------------------------------
|
865
|
+
#handle ? in ruby code. is it part of ?..: or a character literal?
|
866
|
+
def char_literal_or_op(ch)
|
867
|
+
if colon_quote_expected? ch
|
868
|
+
getchar
|
869
|
+
NumberToken.new getchar_maybe_escape
|
870
|
+
else
|
871
|
+
@bracestack.push TernaryContext.new(@linenum)
|
872
|
+
KeywordToken.new getchar #operator
|
873
|
+
end
|
874
|
+
end
|
875
|
+
|
876
|
+
#-----------------------------------
|
877
|
+
def regex_or_div(ch)
|
878
|
+
#space after slash always means / operator, rather than regex start
|
879
|
+
if after_nonid_op?{ !is_var_name? and WHSPLF[prevchar] and !readahead(2)[%r{^/\s}] }
|
880
|
+
return regex(ch)
|
881
|
+
else #/ is operator
|
882
|
+
result=getchar
|
883
|
+
if eat_next_if(?=)
|
884
|
+
result << '='
|
885
|
+
end
|
886
|
+
return(operator_or_methname_token result)
|
887
|
+
end
|
888
|
+
end
|
889
|
+
|
890
|
+
#-----------------------------------
|
891
|
+
#return true if tok corresponds to a variable or constant, false if its for a method, nil for something else
|
892
|
+
#we assume tok is a valid token with a correctly formed name.
|
893
|
+
#...should really be called was_var_name
|
894
|
+
def is_var_name?
|
895
|
+
(tok=@last_operative_token)
|
896
|
+
|
897
|
+
s=tok.to_s
|
898
|
+
case s
|
899
|
+
when /[^a-z_0-9]$/i: false
|
900
|
+
when /^[a-z_]/: @localvars===s or VARLIKE_KEYWORDS===s
|
901
|
+
when /^[A-Z]/: VarNameToken===tok
|
902
|
+
when /^[@$<]/: true
|
903
|
+
else raise "not var or method name: #{s}"
|
904
|
+
end
|
905
|
+
end
|
906
|
+
|
907
|
+
#-----------------------------------
|
908
|
+
def colon_quote_expected?(ch) #yukko hack
|
909
|
+
assert ':?'[ch]
|
910
|
+
readahead(2)[/^(\?[^#{WHSPLF}]|:[$@a-zA-Z_'"`\[*~+\-\/%<=>&|^])$/o] or return false
|
911
|
+
|
912
|
+
after_nonid_op? {
|
913
|
+
#possible func-call as operator
|
914
|
+
|
915
|
+
!is_var_name?
|
916
|
+
}
|
917
|
+
end
|
918
|
+
|
919
|
+
#-----------------------------------
|
920
|
+
def symbol_or_op(ch)
|
921
|
+
startpos=@file.pos
|
922
|
+
qe= colon_quote_expected?(ch)
|
923
|
+
lastchar=prevchar
|
924
|
+
eat_next_if(ch) or raise "needed: "+ch
|
925
|
+
|
926
|
+
#handle quoted symbols like :"foobar", :"[]"
|
927
|
+
qe and return symbol(':')
|
928
|
+
|
929
|
+
#look for another colon; return single : if not found
|
930
|
+
unless eat_next_if(?:)
|
931
|
+
#cancel implicit contexts...
|
932
|
+
@moretokens.push(*abort_noparens!(':'))
|
933
|
+
|
934
|
+
#end ternary context, if any
|
935
|
+
@bracestack.last.see @bracestack,:colon
|
936
|
+
|
937
|
+
TernaryContext===@bracestack.last and @bracestack.pop #should be in the context's see handler
|
938
|
+
|
939
|
+
if ExpectDoOrNlContext===@bracestack.last #should be in the context's see handler
|
940
|
+
@bracestack.pop
|
941
|
+
assert @bracestack.last.starter[/^(while|until|for)$/]
|
942
|
+
end
|
943
|
+
|
944
|
+
@moretokens.push KeywordToken.new(':',startpos)
|
945
|
+
return @moretokens.shift
|
946
|
+
end
|
947
|
+
|
948
|
+
#we definately found a ::
|
949
|
+
|
950
|
+
colon2=KeywordToken.new( '::',startpos)
|
951
|
+
lasttok=@last_operative_token
|
952
|
+
assert !(String===lasttok)
|
953
|
+
if (VarNameToken===lasttok or MethNameToken===lasttok) and
|
954
|
+
lasttok===/^[$@a-zA-Z_]/ and !WHSPCHARS[lastchar]
|
955
|
+
then
|
956
|
+
@moretokens << colon2
|
957
|
+
result= NoWsToken.new(startpos)
|
958
|
+
else
|
959
|
+
result=colon2
|
960
|
+
end
|
961
|
+
dot_rhs(colon2)
|
962
|
+
return result
|
963
|
+
end
|
964
|
+
|
965
|
+
#-----------------------------------
|
966
|
+
def symbol(notbare,couldbecallsite=!notbare)
|
967
|
+
assert !couldbecallsite
|
968
|
+
start=@file.pos
|
969
|
+
notbare and start-=1
|
970
|
+
klass=(notbare ? SymbolToken : MethNameToken)
|
971
|
+
|
972
|
+
#look for operators
|
973
|
+
opmatches=readahead(3)[RUBYSYMOPERATORREX]
|
974
|
+
result= opmatches ? @file.read(opmatches.size) :
|
975
|
+
case nc=nextchar
|
976
|
+
when ?" then assert notbare;double_quote('"')
|
977
|
+
when ?' then assert notbare;double_quote("'")
|
978
|
+
when ?` then @file.read(1)
|
979
|
+
when ?@ then at_identifier.to_s
|
980
|
+
when ?$ then dollar_identifier.to_s
|
981
|
+
when ?_,?a..?z then identifier_as_string(?:)
|
982
|
+
when ?A..?Z then
|
983
|
+
result=identifier_as_string(?:)
|
984
|
+
if @last_operative_token==='::'
|
985
|
+
assert klass==MethNameToken
|
986
|
+
/[A-Z_0-9]$/i===result and klass=VarNameToken
|
987
|
+
end
|
988
|
+
result
|
989
|
+
else error= "unexpected char starting symbol: #{nc.chr}"
|
990
|
+
end
|
991
|
+
return lexerror(klass.new(result,start),error)
|
992
|
+
end
|
993
|
+
|
994
|
+
#-----------------------------------
|
995
|
+
def callsite_symbol(tok_to_errify)
|
996
|
+
start=@file.pos
|
997
|
+
|
998
|
+
#look for operators
|
999
|
+
opmatches=readahead(3)[RUBYSYMOPERATORREX]
|
1000
|
+
return [opmatches ? @file.read(opmatches.size) :
|
1001
|
+
case nc=nextchar
|
1002
|
+
when ?` then @file.read(1)
|
1003
|
+
when ?_,?a..?z,?A..?Z then identifier_as_string(?:)
|
1004
|
+
else
|
1005
|
+
@last_operative_token=KeywordToken.new(';')
|
1006
|
+
lexerror(tok_to_errify,"unexpected char starting symbol: #{nc.chr}")
|
1007
|
+
nil
|
1008
|
+
end, start
|
1009
|
+
]
|
1010
|
+
end
|
1011
|
+
|
1012
|
+
#-----------------------------------
|
1013
|
+
def here_header
|
1014
|
+
@file.read(2)=='<<' or raise "parser insanity"
|
1015
|
+
|
1016
|
+
dash=eat_next_if(?-)
|
1017
|
+
quote=eat_next_if( /^['"`]$/)
|
1018
|
+
if quote
|
1019
|
+
ender=til_charset(/[#{quote}]/)
|
1020
|
+
(quote==getchar) or
|
1021
|
+
return lexerror(HerePlaceholderToken.new( dash, quote, ender ), "mismatched quotes in here doc")
|
1022
|
+
else
|
1023
|
+
quote='"'
|
1024
|
+
ender=til_charset(/[^a-zA-Z0-9_]/)
|
1025
|
+
ender.length >= 1 or
|
1026
|
+
return lexerror(HerePlaceholderToken.new( dash, quote, ender ), "invalid here header")
|
1027
|
+
end
|
1028
|
+
|
1029
|
+
res= HerePlaceholderToken.new( dash, quote, ender )
|
1030
|
+
@incomplete_here_tokens.push res
|
1031
|
+
|
1032
|
+
#hack: normally this should just be in get1token
|
1033
|
+
#this fixup is necessary because the call the get1token below
|
1034
|
+
#makes a recursion.
|
1035
|
+
@last_operative_token=res
|
1036
|
+
|
1037
|
+
safe_recurse { |a|
|
1038
|
+
assert(a.object_id==@moretokens.object_id)
|
1039
|
+
toks=[]
|
1040
|
+
begin
|
1041
|
+
#yech.
|
1042
|
+
#handle case of here header in a string inclusion, but
|
1043
|
+
#here body outside it.
|
1044
|
+
cnt=0
|
1045
|
+
1.upto @bracestack.size do |i|
|
1046
|
+
case @bracestack[-i]
|
1047
|
+
when AssignmentRhsContext,ParamListContextNoParen,TopLevelContext
|
1048
|
+
else cnt+=1
|
1049
|
+
end
|
1050
|
+
end
|
1051
|
+
if nextchar==?} and cnt==1
|
1052
|
+
res.bodyclass=OutlinedHereBodyToken
|
1053
|
+
break
|
1054
|
+
end
|
1055
|
+
|
1056
|
+
tok=get1token
|
1057
|
+
assert(a.object_id==@moretokens.object_id)
|
1058
|
+
toks<<tok
|
1059
|
+
EoiToken===tok and lexerror tok, "here body expected before eof"
|
1060
|
+
end while res.unsafe_to_use
|
1061
|
+
assert(a.object_id==@moretokens.object_id)
|
1062
|
+
a[0,0]= toks #same as a=toks+a, but keeps a's id
|
1063
|
+
}
|
1064
|
+
|
1065
|
+
return res
|
1066
|
+
|
1067
|
+
#the action continues in newline, where
|
1068
|
+
#the rest of the here token is read after a
|
1069
|
+
#newline has been seen and res.affix is eventually called
|
1070
|
+
end
|
1071
|
+
|
1072
|
+
#-----------------------------------
|
1073
|
+
def lessthan(ch) #match quadriop('<') or here doc or spaceship op
|
1074
|
+
case readahead(3)
|
1075
|
+
when /^<<['"`\-a-z0-9_]$/i
|
1076
|
+
if quote_expected?(ch) #and @last_operative_token!='class' #not needed?
|
1077
|
+
here_header
|
1078
|
+
else
|
1079
|
+
operator_or_methname_token @file.read(2)
|
1080
|
+
end
|
1081
|
+
when "<=>" then operator_or_methname_token @file.read(3)
|
1082
|
+
else quadriop(ch)
|
1083
|
+
end
|
1084
|
+
end
|
1085
|
+
|
1086
|
+
#-----------------------------------
|
1087
|
+
def escnewline(ch)
|
1088
|
+
assert ch == '\\'
|
1089
|
+
|
1090
|
+
pos=@file.pos
|
1091
|
+
result=getchar
|
1092
|
+
if nl=readnl
|
1093
|
+
result+=nl
|
1094
|
+
else
|
1095
|
+
error='illegal escape sequence'
|
1096
|
+
end
|
1097
|
+
lexerror EscNlToken.new(@filename,@linenum,result,pos), error
|
1098
|
+
end
|
1099
|
+
|
1100
|
+
#-----------------------------------
|
1101
|
+
def newline(ch)
|
1102
|
+
assert("\r\n"[nextchar.chr])
|
1103
|
+
|
1104
|
+
#handle here bodies queued up by previous line
|
1105
|
+
#(we should be more compatible with dos/mac style newlines...)
|
1106
|
+
if tofill=@incomplete_here_tokens.shift
|
1107
|
+
tofill.string.offset=@file.pos
|
1108
|
+
loop {
|
1109
|
+
assert("\r\n"[nextchar.chr])
|
1110
|
+
|
1111
|
+
#retr evrything til next nl
|
1112
|
+
line=all_quote(/^[\r\n]$/, tofill.quote, /^[\r\n]$/, :regex_esc_seq)
|
1113
|
+
#(you didn't know all_quote could take a regex, did you?)
|
1114
|
+
|
1115
|
+
#get rid of fals that otherwise appear to be in the middle of
|
1116
|
+
#a string (and are emitted out of order)
|
1117
|
+
fal=@moretokens.pop
|
1118
|
+
assert FileAndLineToken===fal || fal.nil?
|
1119
|
+
|
1120
|
+
back1char
|
1121
|
+
assert("\r\n"[nextchar.chr])
|
1122
|
+
|
1123
|
+
#matches terminating reg expr?
|
1124
|
+
break if line.elems.size==1 and
|
1125
|
+
line.elems[0][tofill.termex]
|
1126
|
+
|
1127
|
+
tofill.append_token line
|
1128
|
+
tofill.append readnl
|
1129
|
+
back1char
|
1130
|
+
}
|
1131
|
+
|
1132
|
+
assert("\r\n"[nextchar.chr])
|
1133
|
+
tofill.unsafe_to_use=false
|
1134
|
+
|
1135
|
+
return tofill.bodyclass.new(tofill)
|
1136
|
+
end
|
1137
|
+
|
1138
|
+
#ordinary newline handling (possibly implicitly escaped)
|
1139
|
+
assert("\r\n"[nextchar.chr])
|
1140
|
+
assert @moretokens.empty?
|
1141
|
+
result=if NewlineToken===@last_operative_token or #hack
|
1142
|
+
@last_operative_token===/^(;|begin|do|#{INNERBOUNDINGWORDS})$/ or #hack
|
1143
|
+
!after_nonid_op?{false}
|
1144
|
+
then #hack-o-rama: probly cases left out above
|
1145
|
+
a= abort_noparens!
|
1146
|
+
ExpectDoOrNlContext===@bracestack.last and @bracestack.pop
|
1147
|
+
@bracestack.last.see @bracestack,:semi
|
1148
|
+
|
1149
|
+
a << super(ch)
|
1150
|
+
@moretokens.replace a+@moretokens
|
1151
|
+
@moretokens.shift
|
1152
|
+
else
|
1153
|
+
offset=@file.pos
|
1154
|
+
#@moretokens <<
|
1155
|
+
EscNlToken.new(@filename,@linenum,readnl,offset)
|
1156
|
+
#WsToken.new ' ' #why? #should be "\\\n" ?
|
1157
|
+
end
|
1158
|
+
|
1159
|
+
start_of_line_directives
|
1160
|
+
|
1161
|
+
return result
|
1162
|
+
end
|
1163
|
+
|
1164
|
+
#-----------------------------------
|
1165
|
+
EQBEGIN=%r/^=begin[^a-zA-Z_0-9]$/
|
1166
|
+
EQBEGINLENGTH=7
|
1167
|
+
EQEND='=end'
|
1168
|
+
ENDMARKER=/^__END__[\r\n]$/
|
1169
|
+
ENDMARKERLENGTH=8
|
1170
|
+
def start_of_line_directives
|
1171
|
+
#handle =begin...=end (at start of a line)
|
1172
|
+
while EQBEGIN===readahead(EQBEGINLENGTH)
|
1173
|
+
startpos=@file.pos
|
1174
|
+
more=@file.read(EQBEGINLENGTH-1) #get =begin
|
1175
|
+
|
1176
|
+
#keep reading til /\n=end.*\n/
|
1177
|
+
@file.each(EQEND) {|cblock|
|
1178
|
+
more << cblock
|
1179
|
+
#must be at start of line
|
1180
|
+
break if /^[\r\n]#{EQEND}/o===readback(EQEND.length+1)
|
1181
|
+
}
|
1182
|
+
#read rest of line after =end
|
1183
|
+
more << @file.til_charset(/[\r\n]/)
|
1184
|
+
assert((?\r===nextchar or ?\n===nextchar))
|
1185
|
+
assert !(/[\r\n]/===more[-1,1])
|
1186
|
+
|
1187
|
+
newls= more.scan(/\r\n?|\n\r?/)
|
1188
|
+
@linenum+= newls.size
|
1189
|
+
|
1190
|
+
#inject the fresh comment into future token results
|
1191
|
+
@moretokens.push IgnoreToken.new(more,startpos)
|
1192
|
+
end
|
1193
|
+
|
1194
|
+
#handle __END__
|
1195
|
+
if ENDMARKER===readahead(ENDMARKERLENGTH)
|
1196
|
+
assert !(ImplicitContext===@bracestack.last)
|
1197
|
+
@moretokens.unshift endoffile_detected(@file.read(6))
|
1198
|
+
@file.pos=@file.stat.size
|
1199
|
+
end
|
1200
|
+
end
|
1201
|
+
|
1202
|
+
|
1203
|
+
|
1204
|
+
#-----------------------------------
|
1205
|
+
#used to resolve the ambiguity of
|
1206
|
+
# unary ops (+, -, *, &, ~ !) in ruby
|
1207
|
+
#returns whether current token is to be the start of a literal
|
1208
|
+
IDBEGINCHAR=/^[a-zA-Z_$@]/
|
1209
|
+
def unary_op_expected?(ch) #yukko hack
|
1210
|
+
'*&='[readahead(2)[1..1]] and return false
|
1211
|
+
|
1212
|
+
after_nonid_op? {
|
1213
|
+
#possible func-call as operator
|
1214
|
+
|
1215
|
+
not is_var_name? and
|
1216
|
+
WHSPLF[prevchar]
|
1217
|
+
}
|
1218
|
+
end
|
1219
|
+
|
1220
|
+
#-----------------------------------
|
1221
|
+
#used to resolve the ambiguity of
|
1222
|
+
# <<, %, ? in ruby
|
1223
|
+
#returns whether current token is to be the start of a literal
|
1224
|
+
#/ is not handled right here if whitespace immediately follows the /
|
1225
|
+
def quote_expected?(ch) #yukko hack
|
1226
|
+
case ch[0]
|
1227
|
+
when ?? then readahead(2)[/^\?[#{WHSPLF}]$/o] #not needed?
|
1228
|
+
when ?% then readahead(3)[/^%([a-ps-vyzA-PR-VX-Z]|[QqrwWx][a-zA-Z0-9])/]
|
1229
|
+
when ?< then !readahead(4)[/^<<-?['"`a-z0-9_]/i]
|
1230
|
+
else raise 'unexpected ch (#{ch}) in quote_expected?'
|
1231
|
+
# when ?+,?-,?&,?*,?~,?! then '*&='[readahead(2)[1..1]]
|
1232
|
+
end and return false
|
1233
|
+
|
1234
|
+
after_nonid_op? {
|
1235
|
+
#possible func-call as operator
|
1236
|
+
|
1237
|
+
not is_var_name? and
|
1238
|
+
WHSPLF[prevchar] and not WHSPLF[readahead(2)[1..1]]
|
1239
|
+
}
|
1240
|
+
end
|
1241
|
+
|
1242
|
+
#-----------------------------------
|
1243
|
+
#used to resolve the ambiguity of
|
1244
|
+
# <<, %, /, ?, :, and newline in ruby
|
1245
|
+
def after_nonid_op?
|
1246
|
+
case @last_operative_token
|
1247
|
+
when MethNameToken,VarNameToken, FUNCLIKE_KEYWORDS.token_pat
|
1248
|
+
return yield
|
1249
|
+
when StringToken, SymbolToken, NumberToken, HerePlaceholderToken,
|
1250
|
+
%r{^(class|module|do|end|self|true|false|nil|
|
1251
|
+
__FILE__|__LINE__|[\})\]]|alias|(un)?def|for
|
1252
|
+
)$}x.token_pat
|
1253
|
+
#do shouldn't be in above list... dunno about def/undef
|
1254
|
+
#maybe class/module shouldn't either?
|
1255
|
+
#for is also in NewlineToken branch, below.
|
1256
|
+
#what about rescue?
|
1257
|
+
return false
|
1258
|
+
when /^(#{RUBYOPERATORREX}|#{INNERBOUNDINGWORDS})$/o.token_pat
|
1259
|
+
#regexs above must match whole string
|
1260
|
+
#assert(@last_operative_token==$&) #disabled 'cause $& is now always nil :(
|
1261
|
+
return true
|
1262
|
+
when NewlineToken, nil, #nil means we're still at beginning of file
|
1263
|
+
/^([({\[]|or|not|and|if|unless|then|elsif|else|
|
1264
|
+
while|until|begin|for|in|case|when|ensure)$
|
1265
|
+
/x.token_pat
|
1266
|
+
return true
|
1267
|
+
#when KeywordToken
|
1268
|
+
# return true
|
1269
|
+
when IgnoreToken
|
1270
|
+
raise "last_operative_token shouldn't be ignoreable"
|
1271
|
+
else
|
1272
|
+
raise "after_nonid_op? after #{@last_operative_token}:#{@last_operative_token.class} -- now what"
|
1273
|
+
end
|
1274
|
+
end
|
1275
|
+
|
1276
|
+
#-----------------------------------
|
1277
|
+
def quadriop(ch) #match /&&?=?/ (&, &&, &=, or &&=)
|
1278
|
+
assert(%w[& * | < >].include?(ch))
|
1279
|
+
# '&*'[ch] and qe=quote_expected?(ch) #not needed?
|
1280
|
+
result=getchar + (eat_next_if(ch)or'')
|
1281
|
+
if eat_next_if(?=)
|
1282
|
+
result << ?=
|
1283
|
+
# elsif qe and result[/^[&*]$/] #not needed?
|
1284
|
+
# @moretokens<<NoWsToken.new(@file.pos) #not needed?
|
1285
|
+
end
|
1286
|
+
return operator_or_methname_token(result)
|
1287
|
+
end
|
1288
|
+
|
1289
|
+
#-----------------------------------
|
1290
|
+
def biop(ch) #match /%=?/ (% or %=)
|
1291
|
+
assert(ch[/^[%^~]$/])
|
1292
|
+
result=getchar
|
1293
|
+
if eat_next_if(?=)
|
1294
|
+
result <<?=
|
1295
|
+
end
|
1296
|
+
return operator_or_methname_token( result)
|
1297
|
+
end
|
1298
|
+
|
1299
|
+
#-----------------------------------
|
1300
|
+
def tilde(ch) #match /~=?/ (~ or ~=)
|
1301
|
+
assert(ch=='~')
|
1302
|
+
result=getchar
|
1303
|
+
# eat_next_if(?=) ?
|
1304
|
+
# result <<?= :
|
1305
|
+
WHSPLF[nextchar.chr] ||
|
1306
|
+
@moretokens << NoWsToken.new(@file.pos)
|
1307
|
+
#why is the NoWsToken necessary at this point?
|
1308
|
+
return operator_or_methname_token( result)
|
1309
|
+
#result should distinguish unary ~
|
1310
|
+
end
|
1311
|
+
|
1312
|
+
#-----------------------------------
|
1313
|
+
def want_op_name
|
1314
|
+
KeywordToken===@last_operative_token and
|
1315
|
+
@last_operative_token===/^(alias|(un)?def|\.|::)$/
|
1316
|
+
end
|
1317
|
+
|
1318
|
+
#-----------------------------------
|
1319
|
+
#match /[+\-]=?/ (+ or +=)
|
1320
|
+
#could be beginning of number, too
|
1321
|
+
#fixme: handle +@ and -@ here as well... (currently, this is done in symbol()?)
|
1322
|
+
def plusminus(ch)
|
1323
|
+
assert(/^[+\-]$/===ch)
|
1324
|
+
if unary_op_expected?(ch)
|
1325
|
+
if (?0..?9)===readahead(2)[1]
|
1326
|
+
return number(ch)
|
1327
|
+
else #unary operator
|
1328
|
+
result=getchar
|
1329
|
+
WHSPLF[nextchar.chr] or
|
1330
|
+
@moretokens << NoWsToken.new(@file.pos)
|
1331
|
+
return(operator_or_methname_token result)
|
1332
|
+
#todo: result should distinguish unary+binary +-
|
1333
|
+
end
|
1334
|
+
else #binary operator
|
1335
|
+
assert(! want_op_name)
|
1336
|
+
result=getchar
|
1337
|
+
if eat_next_if(?=)
|
1338
|
+
result << ?=
|
1339
|
+
end
|
1340
|
+
return(operator_or_methname_token result)
|
1341
|
+
#todo: result should distinguish unary+binary +-
|
1342
|
+
end
|
1343
|
+
end
|
1344
|
+
|
1345
|
+
#-----------------------------------
|
1346
|
+
def equals(ch) #match /=(>|~|==?)?/ (= or == or =~ or === or =>)
|
1347
|
+
offset=@file.pos
|
1348
|
+
str=getchar
|
1349
|
+
assert str=='='
|
1350
|
+
c=(eat_next_if(/^[~=>]$/)or'')
|
1351
|
+
str << c
|
1352
|
+
case c
|
1353
|
+
when '=': str<< (eat_next_if(?=)or'')
|
1354
|
+
|
1355
|
+
when '>': @bracestack.last.see @bracestack,:arrow
|
1356
|
+
when '': #record local variable definitions
|
1357
|
+
|
1358
|
+
@bracestack.push AssignmentRhsContext.new(@linenum)
|
1359
|
+
@moretokens.unshift AssignmentRhsListStartToken.new( offset+1)
|
1360
|
+
end
|
1361
|
+
return operator_or_methname_token( str,offset)
|
1362
|
+
end
|
1363
|
+
|
1364
|
+
#-----------------------------------
|
1365
|
+
def exclam(ch) #match /![~=]?/ (! or != or !~)
|
1366
|
+
assert nextchar==?!
|
1367
|
+
result=getchar
|
1368
|
+
k=eat_next_if(/^[~=]$/)
|
1369
|
+
if k
|
1370
|
+
result+=k
|
1371
|
+
else
|
1372
|
+
WHSPLF[nextchar.chr] or
|
1373
|
+
@moretokens << NoWsToken.new(@file.pos)
|
1374
|
+
end
|
1375
|
+
return KeywordToken.new(result)
|
1376
|
+
#result should distinguish unary !
|
1377
|
+
end
|
1378
|
+
|
1379
|
+
#-----------------------------------
|
1380
|
+
def dot(ch)
|
1381
|
+
str=''
|
1382
|
+
eat_next_if(?.) or raise "lexer confusion"
|
1383
|
+
|
1384
|
+
#three lumps of sugar or two?
|
1385
|
+
eat_next_if(?.) and
|
1386
|
+
return KeywordToken.new(eat_next_if(?.)? "..." : "..")
|
1387
|
+
|
1388
|
+
#else saw just single .
|
1389
|
+
#match a valid ruby id after the dot
|
1390
|
+
result= KeywordToken.new( ".")
|
1391
|
+
dot_rhs(result)
|
1392
|
+
return result
|
1393
|
+
end
|
1394
|
+
|
1395
|
+
#-----------------------------------
|
1396
|
+
def dot_rhs(prevtok)
|
1397
|
+
safe_recurse { |a|
|
1398
|
+
@last_operative_token=prevtok
|
1399
|
+
aa= ignored_tokens
|
1400
|
+
tok,pos=callsite_symbol(prevtok)
|
1401
|
+
tok and aa.push(*var_or_meth_name(tok,prevtok,pos))
|
1402
|
+
a.unshift(*aa)
|
1403
|
+
}
|
1404
|
+
end
|
1405
|
+
|
1406
|
+
#-----------------------------------
|
1407
|
+
def single_quote(ch=nil)
|
1408
|
+
double_quote(ch)
|
1409
|
+
end
|
1410
|
+
|
1411
|
+
#-----------------------------------
|
1412
|
+
def back_quote(ch=nil)
|
1413
|
+
oldpos=@file.pos
|
1414
|
+
@last_operative_token===/^(def|::|\.)$/ and return MethNameToken.new(
|
1415
|
+
(eat_next_if(?`) or raise "insanity"), oldpos
|
1416
|
+
)
|
1417
|
+
double_quote(ch)
|
1418
|
+
end
|
1419
|
+
|
1420
|
+
#-----------------------------------
|
1421
|
+
def comment(str)
|
1422
|
+
result=""
|
1423
|
+
#loop{
|
1424
|
+
result<<super(nil).to_s
|
1425
|
+
|
1426
|
+
if /^\#.*\#$/===result #if comment was ended by a crunch
|
1427
|
+
|
1428
|
+
#that's not a legal comment end in ruby, so just keep reading
|
1429
|
+
assert(result.to_s[-1]==?#)
|
1430
|
+
result.chomp! '#'
|
1431
|
+
|
1432
|
+
#back up one char in input so that the
|
1433
|
+
#super will see that # on the next go round.
|
1434
|
+
#this hack makes the ruma comment lexer work with ruby too.
|
1435
|
+
back1char
|
1436
|
+
|
1437
|
+
assert nextchar==?#
|
1438
|
+
#else break #not a crunch... just exit
|
1439
|
+
end
|
1440
|
+
#}
|
1441
|
+
|
1442
|
+
return IgnoreToken.new(result)
|
1443
|
+
end
|
1444
|
+
|
1445
|
+
#-----------------------------------
|
1446
|
+
def open_brace(ch)
|
1447
|
+
assert((ch!='[' or !want_op_name))
|
1448
|
+
assert(@moretokens.empty?)
|
1449
|
+
lastchar=prevchar
|
1450
|
+
ch=eat_next_if(/^[({\[]$/)or raise "lexer confusion"
|
1451
|
+
tokch=KeywordToken.new(ch,@file.pos-1)
|
1452
|
+
|
1453
|
+
#maybe emitting of NoWsToken can be moved into var_or_meth_name ??
|
1454
|
+
case tokch.ident
|
1455
|
+
when '['
|
1456
|
+
#fixme: in contexts expecting an (operator) method name, we
|
1457
|
+
# should match [] or []= at this point
|
1458
|
+
@bracestack.push ListImmedContext.new(ch,@linenum)
|
1459
|
+
lasttok=last_operative_token
|
1460
|
+
#could be: lasttok===/^[a-z_]/i
|
1461
|
+
if (VarNameToken===lasttok or MethNameToken===lasttok or
|
1462
|
+
lasttok===FUNCLIKE_KEYWORDS) and !WHSPCHARS[lastchar]
|
1463
|
+
@moretokens << (tokch)
|
1464
|
+
tokch= NoWsToken.new(@file.pos-1)
|
1465
|
+
end
|
1466
|
+
when '('
|
1467
|
+
lasttok=last_operative_token
|
1468
|
+
#could be: lasttok===/^[a-z_]/i
|
1469
|
+
if (VarNameToken===lasttok or MethNameToken===lasttok or
|
1470
|
+
lasttok===FUNCLIKE_KEYWORDS)
|
1471
|
+
unless WHSPCHARS[lastchar]
|
1472
|
+
@moretokens << tokch
|
1473
|
+
tokch= NoWsToken.new(@file.pos-1)
|
1474
|
+
end
|
1475
|
+
@bracestack.push ParamListContext.new(@linenum)
|
1476
|
+
else
|
1477
|
+
@bracestack.push ParenContext.new(@linenum)
|
1478
|
+
end
|
1479
|
+
|
1480
|
+
when '{'
|
1481
|
+
#check if we are in a hash literal or string inclusion (#{}),
|
1482
|
+
#in which case below would be bad.
|
1483
|
+
if after_nonid_op?{false}
|
1484
|
+
@bracestack.push ListImmedContext.new(ch,@linenum) #that is, a hash
|
1485
|
+
else
|
1486
|
+
=begin not needed now, i think
|
1487
|
+
# 'need to find matching callsite context and end it if implicit'
|
1488
|
+
lasttok=last_operative_token
|
1489
|
+
unless lasttok===')' and lasttok.callsite?
|
1490
|
+
@moretokens.push *(abort_1_noparen!(1).push tokch)
|
1491
|
+
tokch=@moretokens.shift
|
1492
|
+
end
|
1493
|
+
=end
|
1494
|
+
|
1495
|
+
@localvars.start_block
|
1496
|
+
@bracestack.push BlockContext.new(@linenum)
|
1497
|
+
block_param_list_lookahead
|
1498
|
+
end
|
1499
|
+
end
|
1500
|
+
return (tokch)
|
1501
|
+
end
|
1502
|
+
|
1503
|
+
#-----------------------------------
|
1504
|
+
def close_brace(ch)
|
1505
|
+
ch==eat_next_if(/[)}\]]/) or raise "lexer confusion"
|
1506
|
+
@moretokens.concat abort_noparens!(ch)
|
1507
|
+
@moretokens<< kw=KeywordToken.new( ch,@file.pos-1)
|
1508
|
+
@bracestack.last.see @bracestack,:semi #hack
|
1509
|
+
if @bracestack.empty?
|
1510
|
+
lexerror kw,"unmatched brace: #{ch}"
|
1511
|
+
return @moretokens.shift
|
1512
|
+
end
|
1513
|
+
ctx=@bracestack.pop
|
1514
|
+
origch,line=ctx.starter,ctx.linenum
|
1515
|
+
ch==PAIRS[origch] or
|
1516
|
+
lexerror kw,"mismatched braces: #{origch}#{ch}\n" +
|
1517
|
+
"matching brace location", @filename, line
|
1518
|
+
BlockContext===ctx and @localvars.end_block
|
1519
|
+
if ParamListContext==ctx.class
|
1520
|
+
assert ch==')'
|
1521
|
+
#kw.set_callsite! #not needed?
|
1522
|
+
end
|
1523
|
+
return @moretokens.shift
|
1524
|
+
end
|
1525
|
+
|
1526
|
+
#-----------------------------------
|
1527
|
+
def eof(ch=nil)
|
1528
|
+
#this must be the very last character...
|
1529
|
+
oldpos=@file.pos
|
1530
|
+
assert(?\0==@file.getc)
|
1531
|
+
|
1532
|
+
result= "\0#{ignored_tokens(true).delete_if{|t|FileAndLineToken===t}}"
|
1533
|
+
|
1534
|
+
@file.pos==@file.stat.size or
|
1535
|
+
lexerror result,'nul character is not at the end of file'
|
1536
|
+
@file.pos=@file.stat.size
|
1537
|
+
return(endoffile_detected result)
|
1538
|
+
end
|
1539
|
+
|
1540
|
+
#-----------------------------------
|
1541
|
+
def endoffile_detected(s='')
|
1542
|
+
@moretokens.push( *(abort_noparens!.push super(s)))
|
1543
|
+
result= @moretokens.shift
|
1544
|
+
balanced_braces? or (lexerror result,"unbalanced braces at eof. bracestack=#{@bracestack.inspect}")
|
1545
|
+
result
|
1546
|
+
end
|
1547
|
+
|
1548
|
+
#-----------------------------------
|
1549
|
+
def single_char_token(ch)
|
1550
|
+
KeywordToken.new super(ch), @file.pos-1
|
1551
|
+
end
|
1552
|
+
|
1553
|
+
#-----------------------------------
|
1554
|
+
def comma(ch)
|
1555
|
+
single_char_token(ch)
|
1556
|
+
end
|
1557
|
+
|
1558
|
+
#-----------------------------------
|
1559
|
+
def semicolon(ch)
|
1560
|
+
assert @moretokens.empty?
|
1561
|
+
@moretokens.push(*abort_noparens!)
|
1562
|
+
@bracestack.last.see @bracestack,:semi
|
1563
|
+
if ExpectDoOrNlContext===@bracestack.last #should be in context's see:semi handler
|
1564
|
+
@bracestack.pop
|
1565
|
+
assert @bracestack.last.starter[/^(while|until|for)$/]
|
1566
|
+
end
|
1567
|
+
@moretokens.push single_char_token(ch)
|
1568
|
+
return @moretokens.shift
|
1569
|
+
end
|
1570
|
+
|
1571
|
+
#-----------------------------------
|
1572
|
+
def operator_or_methname_token(s,offset=nil)
|
1573
|
+
assert RUBYOPERATORREX===s
|
1574
|
+
if RUBYNONSYMOPERATORREX===s
|
1575
|
+
KeywordToken
|
1576
|
+
elsif @last_operative_token===/^(\.|::|def|undef|alias|defined\?)$/
|
1577
|
+
MethNameToken
|
1578
|
+
else
|
1579
|
+
OperatorToken
|
1580
|
+
end.new(s,offset)
|
1581
|
+
end
|
1582
|
+
|
1583
|
+
#-----------------------------------
|
1584
|
+
#tokenify_results_of :identifier
|
1585
|
+
save_offsets_in(*CHARMAPPINGS.values.uniq-[:symbol_or_op,:open_brace,:whitespace])
|
1586
|
+
#save_offsets_in :symbol
|
1587
|
+
|
1588
|
+
end
|
1589
|
+
|