rubylexer 0.6.2

Sign up to get free protection for your applications and to get access to all the features.
data/rlold.rb ADDED
@@ -0,0 +1,12 @@
1
+ class RubyLexer06 < RubyLexer
2
+ alias lexerror lexerror_exception
3
+ end
4
+
5
+ class<<RubyLexer
6
+ def version(num)
7
+ case num
8
+ when 0.0..0.6: RubyLexer06
9
+ else RubyLexer
10
+ end
11
+ end
12
+ end
data/rubycode.rb ADDED
@@ -0,0 +1,44 @@
1
+ =begin copyright
2
+ rubylexer - a ruby lexer written in ruby
3
+ Copyright (C) 2004,2005 Caleb Clausen
4
+
5
+ This library is free software; you can redistribute it and/or
6
+ modify it under the terms of the GNU Lesser General Public
7
+ License as published by the Free Software Foundation; either
8
+ version 2.1 of the License, or (at your option) any later version.
9
+
10
+ This library is distributed in the hope that it will be useful,
11
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
12
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13
+ Lesser General Public License for more details.
14
+
15
+ You should have received a copy of the GNU Lesser General Public
16
+ License along with this library; if not, write to the Free Software
17
+ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
18
+ =end
19
+
20
+
21
+
22
+ require "token.rb"
23
+ require "tokenprinter.rb"
24
+
25
+ class RubyCode < Token
26
+ def initialize(tokens,filename,linenum)
27
+ super(tokens)
28
+ @filename=filename
29
+ @linenum=linenum
30
+ end
31
+
32
+ def [](*args)
33
+ exec? ident.huh
34
+ end
35
+
36
+ def to_s()
37
+ result=[]
38
+ keepwsprinter=KeepWsTokenPrinter.new('',@linenum)
39
+ ident.each{|tok| result << keepwsprinter.sprint(tok) }
40
+ return result.to_s
41
+ end
42
+ end
43
+
44
+
data/rubylexer.rb ADDED
@@ -0,0 +1,1589 @@
1
+ =begin copyright
2
+ rubylexer - a ruby lexer written in ruby
3
+ Copyright (C) 2004,2005 Caleb Clausen
4
+
5
+ This library is free software; you can redistribute it and/or
6
+ modify it under the terms of the GNU Lesser General Public
7
+ License as published by the Free Software Foundation; either
8
+ version 2.1 of the License, or (at your option) any later version.
9
+
10
+ This library is distributed in the hope that it will be useful,
11
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
12
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13
+ Lesser General Public License for more details.
14
+
15
+ You should have received a copy of the GNU Lesser General Public
16
+ License along with this library; if not, write to the Free Software
17
+ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
18
+ =end
19
+
20
+
21
+
22
+ require "rulexer"
23
+ require "symboltable"
24
+ require "io.each_til_charset"
25
+ require "context.rb"
26
+
27
+
28
+
29
+ #-----------------------------------
30
+ class RubyLexer < RuLexer
31
+ include NestedContexts
32
+
33
+ RUBYSYMOPERATORREX=
34
+ %r{^([&|^/%~]|=(==?|~)|>[=>]?|<(<|=>?)?|[+\-]@?|\*\*?|\[\]=?)}
35
+ # (nasty beastie, eh?)
36
+ #these are the overridable operators
37
+ #does not match flow-control operators like: || && ! or and if not
38
+ #or op= ops like: += -= ||=
39
+ #or .. ... ?:
40
+ #for that use:
41
+ RUBYNONSYMOPERATORREX=
42
+ %r{^([%^~/\-+]=|(\|\|?|&&?)=?|(<<|>>|\*\*?)=|\.{1,3}|[?:,;]|=>?|![=~]?)$}
43
+ RUBYOPERATORREX=/#{RUBYSYMOPERATORREX}|#{RUBYNONSYMOPERATORREX}/o
44
+ UNSYMOPS=/^[~!]$/ #always unary
45
+ UBSYMOPS=/^([*&+-]|::)$/ #ops that could be unary or binary
46
+ WHSPCHARS=WHSPLF+"\\#"
47
+ OPORBEGINWORDS="(if|unless|while|until)"
48
+ BEGINWORDS=/^(def|class|module|begin|for|case|do|#{OPORBEGINWORDS})$/o
49
+ FUNCLIKE_KEYWORDS=/^(break|next|redo|return|raise|yield|defined\?|retry|super|BEGIN|END)$/
50
+ VARLIKE_KEYWORDS=/^(__FILE__|__LINE__|false|nil|self|true)$/
51
+ INNERBOUNDINGWORDS="(else|elsif|ensure|in|then|rescue|when)"
52
+ BINOPWORDS="(and|or)"
53
+ NEVERSTARTPARAMLISTWORDS=/^(#{OPORBEGINWORDS}|#{INNERBOUNDINGWORDS}|#{BINOPWORDS}|end)[^a-zA-Z0-9_!?=]?/o
54
+ NEVERSTARTPARAMLISTFIRST=CharSet[%[aoeitrwu]] #char set that begins NEVERSTARTPARAMLIST
55
+ NEVERSTARTPARAMLISTMAXLEN=7 #max len of a NEVERSTARTPARAMLIST
56
+
57
+ RUBYKEYWORDS=%r{
58
+ ^(alias|#{BINOPWORDS}|not|undef|__END__|end|
59
+ #{VARLIKE_KEYWORDS}|#{FUNCLIKE_KEYWORDS}|
60
+ #{INNERBOUNDINGWORDS}|#{BEGINWORDS}
61
+ )$
62
+ }xo
63
+ CHARMAPPINGS = {
64
+ ?$ => :dollar_identifier,
65
+ ?@ => :at_identifier,
66
+ ?a..?z => :identifier,
67
+ ?A..?Z => :identifier,
68
+ ?_ => :identifier,
69
+ ?0..?9 => :number,
70
+ ?" => :double_quote,
71
+ ?' => :single_quote,
72
+ ?` => :back_quote,
73
+
74
+ WHSP => :whitespace, #includes \r
75
+ ?, => :comma,
76
+ ?; => :semicolon,
77
+
78
+ ?^ => :biop,
79
+ ?~ => :tilde,
80
+ ?= => :equals,
81
+ ?! => :exclam,
82
+ ?. => :dot,
83
+
84
+ #these ones could signal either an op or a term
85
+ ?/ => :regex_or_div,
86
+ "|>" => :quadriop,
87
+ "*&" => :star_or_amp, #could be unary
88
+ "+-" => :plusminus, #could be unary
89
+ ?< => :lessthan,
90
+ ?% => :percent,
91
+ ?? => :char_literal_or_op, #single-char int literal
92
+ ?: => :symbol_or_op,
93
+ ?\n => :newline, #implicitly escaped after op
94
+ #?\r => :newline, #implicitly escaped after op
95
+
96
+ ?\\ => :escnewline,
97
+ ?\0 => :eof,
98
+
99
+ "[({" => :open_brace,
100
+ "])}" => :close_brace,
101
+
102
+
103
+ ?# => :comment
104
+ }
105
+
106
+ attr :incomplete_here_tokens
107
+
108
+
109
+ #-----------------------------------
110
+ def initialize(filename,file,linenum=1)
111
+ super(filename,file, linenum)
112
+ @start_linenum=linenum
113
+ @bracestack=[TopLevelContext.new]
114
+ @incomplete_here_tokens=[]
115
+ @localvars=SymbolTable.new
116
+ @defining_lvar=nil
117
+
118
+ @toptable=CharHandler.new(self, :illegal_char, CHARMAPPINGS)
119
+
120
+ start_of_line_directives
121
+ end
122
+
123
+ #-----------------------------------
124
+ def get1token
125
+ result=super #most of the action's here
126
+
127
+ #now cleanup and housekeeping
128
+
129
+
130
+ #check for bizarre token types
131
+ case result
132
+ when IgnoreToken#,nil
133
+ return result
134
+ when Token#,String
135
+ else
136
+ raise "#{@filename}:#{linenum}:token is a #{result.class}, last is #{@last_operative_token}"
137
+ end
138
+
139
+ @last_operative_token=result
140
+
141
+ return result
142
+ end
143
+
144
+
145
+
146
+ #-----------------------------------
147
+ def balanced_braces?
148
+
149
+ #@bracestack.empty?
150
+ @bracestack.size==1 and TopLevelContext===@bracestack.first
151
+ end
152
+
153
+ #-----------------------------------
154
+ def dollar_identifier(ch=nil)
155
+ s=eat_next_if(?$) or return nil
156
+
157
+ if t=((identifier_as_string(?$) or special_global))
158
+ s<<t
159
+ else error= "missing $id name"
160
+ end
161
+
162
+ return lexerror(VarNameToken.new(s),error)
163
+ end
164
+
165
+ #-----------------------------------
166
+ def at_identifier(ch=nil)
167
+ result = (eat_next_if(?@) or return nil)
168
+ result << (eat_next_if(?@)or'')
169
+ if t=identifier_as_string(?@)
170
+ result<<t
171
+ else error= "missing @id name"
172
+ end
173
+ return lexerror(VarNameToken.new(result),error)
174
+ end
175
+
176
+ private
177
+ #-----------------------------------
178
+ def here_spread_over_ruby_code(rl,tok)
179
+ assert(!rl.incomplete_here_tokens.empty?)
180
+ @incomplete_here_tokens += rl.incomplete_here_tokens
181
+ end
182
+
183
+ #-----------------------------------
184
+ def expect_do_or_end_or_nl!(st)
185
+ @bracestack.push ExpectDoOrNlContext.new(st,/(do|;|:|\n)/,@linenum)
186
+ end
187
+
188
+ #-----------------------------------
189
+ #match NoWstoken, ws, comment, or (escaped?) newline repeatedly
190
+ def maybe_no_ws_token
191
+ result=[]
192
+ while IgnoreToken===(tok=get1token)
193
+ EoiToken===tok and lexerror tok,"end of file not expected here"
194
+ result << tok
195
+ end
196
+ assert((not IgnoreToken===tok))
197
+ @moretokens.unshift tok
198
+ return result
199
+ end
200
+
201
+ #-----------------------------------
202
+ WSCHARSET=CharSet["#\\\n\s\t\v\r\f"]
203
+ def ignored_tokens(allow_eof=false)
204
+ result=[]
205
+ result<<@moretokens.shift while IgnoreToken===@moretokens.first
206
+ @moretokens.empty? or return result
207
+ if true
208
+ loop do
209
+ unless @moretokens.empty?
210
+ IgnoreToken===@moretokens.first or NewlineToken===@moretokens.first or
211
+ break
212
+ else
213
+ WSCHARSET===nextchar or break
214
+ end
215
+
216
+ tok=get1token
217
+ result<<tok
218
+ case tok
219
+ when NewlineToken : block_given? and yield tok
220
+ when EoiToken : allow_eof or lexerror tok,"end of file not expected here(2)"
221
+ when IgnoreToken
222
+ else raise "impossible"
223
+ end
224
+ end
225
+
226
+ else
227
+ @whsphandler||=CharHandler.new(self, :==,
228
+ "#" => :comment,
229
+ "\n" => :newline,
230
+ "\\" => :escnewline,
231
+ "\s\t\v\r\f" => :whitespace
232
+ )
233
+ #tok=nil
234
+ while tok=@whsphandler.go((nextchar or return result))
235
+ block_given? and NewlineToken===tok and yield tok
236
+ result << tok
237
+ end
238
+ end
239
+ return result
240
+ end
241
+
242
+ #-----------------------------------
243
+ def safe_recurse
244
+ old_moretokens=@moretokens
245
+ #old_bracestack=@bracestack.dup
246
+ @moretokens=[]
247
+ result= yield @moretokens
248
+ #assert @incomplete_here_tokens.empty?
249
+ #assert @bracestack==old_bracestack
250
+ @moretokens= old_moretokens.concat @moretokens
251
+ return result
252
+ #need to do something with @last_operative_token?
253
+ end
254
+
255
+ #-----------------------------------
256
+ def special_global #handle $-a and friends
257
+ assert prevchar=='$'
258
+ result = ((
259
+ #order matters here, but it shouldn't
260
+ #(but til_charset must be last)
261
+ eat_next_if(/^[!@&+`'=~\/\\,.;<>*"$?:]$/) or
262
+ (eat_next_if('-') and ("-"+getchar)) or
263
+ (?0..?9)===nextchar ? til_charset(/[^\d]/) : nil
264
+ ))
265
+ end
266
+
267
+ #-----------------------------------
268
+ def identifier(context=nil)
269
+ oldpos=@file.pos
270
+ str=identifier_as_string(context)
271
+
272
+ #skip keyword processing if 'escaped' as it were, by def, . or ::
273
+ #or if in a non-bare context
274
+ #just asserts because those contexts are never encountered.
275
+ #control goes through symbol(<...>,nil)
276
+ assert( /^[a-z_]$/i===context)
277
+ assert !(@last_operative_token===/^(\.|::|(un)?def|alias)$/)
278
+
279
+ @moretokens.unshift(*parse_keywords(str,oldpos) do
280
+ #if not a keyword,
281
+ case str
282
+ when FUNCLIKE_KEYWORDS: #do nothing
283
+ when VARLIKE_KEYWORDS,RUBYKEYWORDS: raise "shouldnt see keywords here, now"
284
+ end
285
+ safe_recurse { |a| var_or_meth_name(str,@last_operative_token,oldpos) }
286
+ end)
287
+ return @moretokens.shift
288
+ end
289
+
290
+ #-----------------------------------
291
+ def identifier_as_string(context)
292
+ #must begin w/ letter or underscore
293
+ str=eat_next_if(/^[_a-z]$/i) or return nil
294
+
295
+ #equals, question mark, and exclamation mark
296
+ #might be allowed at the end in some contexts.
297
+ #(in def headers and symbols)
298
+ #otherwise, =,?, and ! are to be considered
299
+ #separate tokens. confusing, eh?
300
+ #i hope i've captured all right conditions....
301
+ #context should always be ?: right after def, ., and :: now
302
+
303
+ maybe_eq,maybe_qm,maybe_ex = case context
304
+ when ?@,?$ then [nil,nil,nil]
305
+ when ?: then [?=, ??, ?!]
306
+ else [nil,??, ?!]
307
+ end
308
+
309
+ str<<til_charset(/[^a-z0-9_]/i)
310
+
311
+ #look for ?, !, or =, if allowed
312
+ case b=@file.getc
313
+ when nil #means we're at eof
314
+ #handling nil here prevents b from ever matching
315
+ #a nil value of maybe_qm, maybe_ex or maybe_eq
316
+ when maybe_qm
317
+ str << b
318
+ when maybe_ex
319
+ nc=(nextchar unless @file.eof?)
320
+ #does ex appear to be part of a larger operator?
321
+ if nc==?= #or nc==?~
322
+ back1char
323
+ else
324
+ str << b
325
+ end
326
+ when maybe_eq
327
+ nc=(nextchar unless @file.eof?)
328
+ #does eq appear to be part of a larger operator?
329
+ if nc==?= or nc==?~ or nc==?>
330
+ back1char
331
+ else
332
+ str << b
333
+ end
334
+ else
335
+ back1char
336
+ end
337
+
338
+
339
+ return str
340
+ end
341
+
342
+ #-----------------------------------
343
+ #contexts in which comma may appear in ruby:
344
+ #multiple lhs (terminated by assign op)
345
+ #multiple rhs (in implicit context) (tbd)
346
+ #method actual param list (in ( or implicit context)
347
+ #method formal param list (in ( or implicit context)
348
+ #block formal param list (in | context) (tbd)
349
+ #hash immediate (in imm{ context)
350
+ #array immediate (in imm[ context)
351
+ #element reference/assignment (in [] or []= method actual parameter context)
352
+ #list after for
353
+ #list after rescue
354
+ #list after when
355
+ #list after undef
356
+
357
+ #note: comma in parens not around a param list is illegal
358
+
359
+ #-----------------------------------
360
+ #a comma has been seen. are we in an
361
+ #lvalue list or some other construct that uses commas?
362
+ def comma_in_lvalue_list?
363
+ not ListContext===@bracestack.last
364
+ end
365
+
366
+ #-----------------------------------
367
+ def in_lvar_define_state
368
+ #@defining_lvar is a hack
369
+ @defining_lvar or case ctx=@bracestack.last
370
+ when ForSMContext: ctx.state==:for
371
+ when RescueSMContext: ctx.state==:arrow
372
+ when BlockParamListContext: true
373
+ end
374
+ end
375
+
376
+ #-----------------------------------
377
+ #determine if an alphabetic identifier refers to a variable
378
+ #or method name. generates implicit parenthes(es) if it is a
379
+ #call site and no explicit parens are present. starts an implicit param list
380
+ #if appropriate. adds tok to the
381
+ #local var table if its a local var being defined for the first time.
382
+
383
+ #note: what we here call variables (rather, constants) following ::
384
+ #might actually be methods at runtime, but that's immaterial to tokenization.
385
+
386
+ #note: this routine should determine the correct token type for name and
387
+ #create the appropriate token. currently this is not done because callers
388
+ #typically have done it (perhaps incorrectly) already.
389
+ def var_or_meth_name(name,lasttok,pos)
390
+ #look for call site if not a keyword or keyword is function-like
391
+ #look for and ignore local variable names
392
+
393
+ assert String===name
394
+
395
+ #fixme: keywords shouldn't be treated specially after :: and .
396
+
397
+ #maybe_local really means 'maybe local or constant'
398
+ maybe_local=case name
399
+ when /[^a-z_0-9]$/i: #do nothing
400
+ when /^[a-z_]/: (@localvars===name or VARLIKE_KEYWORDS===name or in_lvar_define_state) and not lasttok===/^(\.|::)$/
401
+ when /^[A-Z]/: is_const=true;not lasttok==='.' #this is the right algorithm for constants...
402
+ end
403
+
404
+ assert(@moretokens.empty?)
405
+
406
+ tok=@last_operative_token=VarNameToken.new(name,pos)
407
+
408
+ oldpos=@file.pos
409
+ sawnl=false
410
+ result=ws_toks=ignored_tokens(true) {|nl| sawnl=true }
411
+ sawnl || @file.eof? and return result.unshift(
412
+ *if maybe_local : [tok]
413
+ else [MethNameToken.new(name,pos), #insert implicit parens right after tok
414
+ ImplicitParamListStartToken.new( oldpos),
415
+ ImplicitParamListEndToken.new( oldpos) ]
416
+ end
417
+ )
418
+
419
+ #if next op is assignment (or comma in lvalue list)
420
+ #then omit implicit parens
421
+ assignment_coming=case nc=nextchar
422
+ when ?=: not /^=[=~]$/===readahead(2)
423
+ when ?,: comma_in_lvalue_list?
424
+ when ?>,?<: /^([<>])\1=$/===readahead(3)
425
+ when ?*,?|,?&: /^([*|&])\1?=/===readahead(3)
426
+ when ?%,?/,?-,?+,?^: readahead(2)[1..1]=='='
427
+ end
428
+ if (assignment_coming or in_lvar_define_state)
429
+ tok=VarNameToken.new(name,pos)
430
+ if /[^a-z_0-9]$/i===name
431
+ lexerror tok,"not a valid variable name: #{name}"
432
+ elsif /^[a-z_]/===name and !(lasttok===/^(\.|::)$/)
433
+ @localvars[name]=true
434
+ end
435
+ return result.unshift(tok)
436
+ end
437
+
438
+ implicit_parens_to_emit=case nc
439
+ when ?!: readahead(2)=='!=' ? 2 : 1
440
+ when NEVERSTARTPARAMLISTFIRST
441
+ (NEVERSTARTPARAMLISTWORDS===readahead(NEVERSTARTPARAMLISTMAXLEN)) ? 2 : 1
442
+ when ?",?',?`,?a..?z,?A..?Z,?0..?9,?_,?@,?$,?~: 1
443
+ when ?{: maybe_local=false; 2
444
+ when ?(: maybe_local=false; 0
445
+ when ?},?],?),?;,?^, ?|, ?>, ?,, ?., ?=: 2
446
+ when ?+, ?-, ?*, ?&, ?%, ?/, ?:, ??: (ws_toks.empty? || readahead(2)[/^.[#{WHSPLF}]/]) ? 2 : 3
447
+ when ?<: (ws_toks.empty? || readahead(3)[/^<<[^"'`a-zA-Z_0-9-]/]) ? 2 : 3
448
+ when ?[: ws_toks.empty? ? 2 : 3
449
+ when ?\\, ?\s, ?\t, ?\n, ?\r, ?\v, ?#: raise 'failure'
450
+ else raise "unknown char after ident: #{nextchar.chr}"
451
+ end
452
+
453
+ implicit_parens_to_emit==3 and is_const and implicit_parens_to_emit=1
454
+
455
+ tok=if maybe_local and implicit_parens_to_emit>=2
456
+ implicit_parens_to_emit=0
457
+ VarNameToken
458
+ else
459
+ MethNameToken
460
+ end.new(name,pos)
461
+
462
+
463
+ case implicit_parens_to_emit
464
+ when 2:
465
+ result.unshift ImplicitParamListStartToken.new(oldpos),
466
+ ImplicitParamListEndToken.new(oldpos)
467
+ when 1,3:
468
+ result.unshift ImplicitParamListStartToken.new(oldpos)
469
+ @bracestack.push ParamListContextNoParen.new(@linenum)
470
+ when 0: #do nothing
471
+ else raise 'invalid value of implicit_parens_to_emit'
472
+ end
473
+ return result.unshift(tok)
474
+ # 'ok:'
475
+ # 'if unless while until {'
476
+ # '\n (unescaped) and or'
477
+ # 'then else elsif rescue ensure (illegal in value context)'
478
+
479
+ # 'need to pop noparen from bracestack on these tokens: (in operator context)'
480
+ # 'not ok:'
481
+ # 'not (but should it be?)'
482
+ end
483
+
484
+ #-----------------------------------
485
+ CONTEXT2ENDTOK={AssignmentRhsContext=>AssignmentRhsListEndToken,
486
+ ParamListContextNoParen=>ImplicitParamListEndToken,
487
+ KwParamListContext=>KwParamListEndToken
488
+ }
489
+ def abort_noparens!(str='')
490
+ #assert @moretokens.empty?
491
+ result=[]
492
+ while klass=CONTEXT2ENDTOK[@bracestack.last.class]
493
+ result << klass.new(@file.pos-str.length)
494
+ @bracestack.pop
495
+ end
496
+ return result
497
+ end
498
+
499
+ if false #no longer used
500
+ #-----------------------------------
501
+ def abort_1_noparen!(offs=0)
502
+ assert @moretokens.empty?
503
+ result=[]
504
+ while AssignmentRhsContext===@bracestack.last
505
+ @bracestack.pop
506
+ result << AssignmentRhsListEndToken.new(@file.pos-offs)
507
+ end
508
+ ParamListContextNoParen===@bracestack.last or lexerror huh,'{} with no matching callsite'
509
+ @bracestack.pop
510
+ result << ImplicitParamListEndToken.new(@file.pos-offs)
511
+ return result
512
+ end
513
+ end
514
+
515
+ #-----------------------------------
516
+ #parse keywords now, to prevent confusion over bare symbols
517
+ #and match end with corresponding preceding def or class or whatever.
518
+ #if arg is not a keyword, the block is called
519
+ def parse_keywords(str,offset)
520
+ assert @moretokens.empty?
521
+ result=[KeywordToken.new(str,offset)]
522
+
523
+ case str
524
+ when "end"
525
+ result.unshift(*abort_noparens!(str))
526
+ @bracestack.last.see @bracestack,:semi #sorta hacky... should make an :end event instead?
527
+
528
+ =begin not needed?
529
+ if ExpectDoOrNlContext===@bracestack.last
530
+ @bracestack.pop
531
+ assert @bracestack.last.starter[/^(while|until|for)$/]
532
+ end
533
+ =end
534
+
535
+ WantsEndContext===@bracestack.last or lexerror result.last, 'unbalanced end'
536
+ ctx=@bracestack.pop
537
+ start,line=ctx.starter,ctx.linenum
538
+ BEGINWORDS===start or lexerror result.last, "end does not match #{start or "nil"}"
539
+ /^(class|module|def|do)$/===start and @localvars.end_block
540
+
541
+ when "class","module"
542
+ result.first.has_end!
543
+ @bracestack.push WantsEndContext.new(str,@linenum)
544
+ @localvars.start_block
545
+
546
+ when "if","unless" #could be infix form without end
547
+ if after_nonid_op?{false} #prefix form
548
+ result.first.has_end!
549
+ @bracestack.push WantsEndContext.new(str,@linenum)
550
+
551
+
552
+ else #infix form
553
+ result.unshift(*abort_noparens!(str))
554
+ end
555
+ when "begin","case"
556
+ result.first.has_end!
557
+ @bracestack.push WantsEndContext.new(str,@linenum)
558
+ when "while","until" #could be infix form without end
559
+ if after_nonid_op?{false} #prefix form
560
+ result.first.has_end!
561
+ @bracestack.push WantsEndContext.new(str,@linenum)
562
+ expect_do_or_end_or_nl! str
563
+
564
+ else #infix form
565
+ result.unshift(*abort_noparens!(str))
566
+ end
567
+ when "for"
568
+ result.first.has_end!
569
+ @bracestack.push WantsEndContext.new(str,@linenum)
570
+ #expect_do_or_end_or_nl! str #handled by ForSMContext now
571
+ @bracestack.push ForSMContext.new(@linenum)
572
+ when "do"
573
+ result.unshift(*abort_noparens!(str))
574
+ if ExpectDoOrNlContext===@bracestack.last
575
+ @bracestack.pop
576
+ assert WantsEndContext===@bracestack.last
577
+ else
578
+ result.last.has_end!
579
+ @bracestack.push WantsEndContext.new(str,@linenum)
580
+ @localvars.start_block
581
+ block_param_list_lookahead
582
+ end
583
+ when "def"
584
+ result.first.has_end!
585
+ @bracestack.push WantsEndContext.new("def",@linenum)
586
+ @localvars.start_block
587
+ safe_recurse { |aa|
588
+ @last_operative_token=KeywordToken.new "def" #hack
589
+ result.concat ignored_tokens
590
+
591
+ #read an expr like a.b.c or a::b::c
592
+ #or (expr).b.c
593
+ if nextchar==?( #look for optional parenthesised head
594
+ old_size=@bracestack.size
595
+ parencount=0
596
+ begin
597
+ tok=get1token
598
+ case tok
599
+ when/^\($/.token_pat then parencount+=1
600
+ when/^\)$/.token_pat then parencount-=1
601
+ end
602
+ EoiToken===tok and lexerror tok, "eof in def header"
603
+ result<<tok
604
+ end until parencount==0 #@bracestack.size==old_size
605
+ else #no parentheses, all tail
606
+ @last_operative_token=KeywordToken.new "." #hack hack
607
+ result << symbol(false,false)
608
+ #this isn't quite right.... if a.b.c.d is seen, a, b, and c
609
+ #should be considered maybe varname instead of methnames.
610
+ #the last (d in the example) is always considered a methname;
611
+ #it's what's being defined.
612
+ end
613
+ #read tail: .b.c.d etc
614
+ @last_operative_token=result.last
615
+ state=:expect_op
616
+ loop do
617
+
618
+ #look for start of parameter list
619
+ nc=(@moretokens.first or nextchar.chr)
620
+ if state==:expect_op and /^[a-z_(&*]/i===nc
621
+ result.concat def_param_list
622
+ break
623
+ end
624
+
625
+ tok=get1token
626
+ result<<tok
627
+ case tok
628
+ when EoiToken
629
+ lexerror tok,'unexpected eof in def header'
630
+ when IgnoreToken
631
+ when MethNameToken #,VarNameToken # /^[a-z_]/i.token_pat
632
+ lexerror tok,'expected . or ::' unless state==:expect_name
633
+ state=:expect_op
634
+ when /^(\.|::)$/.token_pat
635
+ lexerror tok,'expected ident' unless state==:expect_op
636
+ state=:expect_name
637
+ when /^(;|end)$/.token_pat, NewlineToken #are we done with def name?
638
+ state==:expect_op or lexerror tok,'expected identifier'
639
+ break
640
+ else
641
+ lexerror(tok, "bizarre token in def name: " +
642
+ "#{tok}:#{tok.class}")
643
+ end
644
+ end
645
+ }
646
+ when "alias"
647
+ safe_recurse { |a|
648
+ @last_operative_token=KeywordToken.new "alias" #hack
649
+ result.concat ignored_tokens
650
+ res=symbol(eat_next_if(?:),false)
651
+ res ? result<<res : lexerror(result.first,"bad symbol in alias")
652
+ @last_operative_token=KeywordToken.new "alias" #hack
653
+ result.concat ignored_tokens
654
+ res=symbol(eat_next_if(?:),false)
655
+ res ? result<<res : lexerror(result.first,"bad symbol in alias")
656
+ }
657
+ when "undef"
658
+ safe_recurse { |a|
659
+ loop do
660
+ @last_operative_token=KeywordToken.new "," #hack
661
+ result.concat ignored_tokens
662
+ tok=symbol(eat_next_if(?:),false)
663
+ tok or lexerror(result.first,"bad symbol in undef")
664
+ result<< tok
665
+ @last_operative_token=tok
666
+
667
+ sawnl=false
668
+ result.concat ignored_tokens(true){|nl| sawnl=true}
669
+
670
+ break if sawnl or nextchar != ?,
671
+ tok= single_char_token(?,)
672
+ result<< tok
673
+ end
674
+ }
675
+
676
+ # when "defined?"
677
+ # huh
678
+ #defined? might have a baresymbol following it
679
+ #does it need to be handled specially?
680
+
681
+ when "when"
682
+ result.unshift(*abort_noparens!(str))
683
+ @bracestack.push KwParamListContext.new(str,@linenum)
684
+
685
+ when "rescue"
686
+ result.unshift(*abort_noparens!(str))
687
+ @bracestack.push RescueSMContext.new(@linenum)
688
+
689
+ when "then","in"
690
+ result.unshift(*abort_noparens!(str))
691
+ @bracestack.last.see @bracestack,str.to_sym
692
+
693
+ when /^(#{BINOPWORDS}|#{INNERBOUNDINGWORDS})$/o
694
+ result.unshift(*abort_noparens!(str))
695
+
696
+ when FUNCLIKE_KEYWORDS: result=yield
697
+
698
+ when RUBYKEYWORDS
699
+ #do nothing
700
+
701
+ else result=yield
702
+
703
+ end
704
+
705
+ return result
706
+ end
707
+
708
+
709
+ #-----------------------------------
710
+ def block_param_list_lookahead
711
+ safe_recurse{ |la|
712
+ @last_operative_token=KeywordToken.new ';'
713
+ a=ignored_tokens
714
+
715
+ if eat_next_if(?|)
716
+ a<<KeywordToken.new("|",@file.pos-1)
717
+ if eat_next_if(?|)
718
+ a.concat [NoWsToken.new(@file.pos-1),
719
+ KeywordToken.new('|',@file.pos-1)]
720
+ else
721
+ assert !@defining_lvar
722
+ @defining_lvar=true
723
+ assert((@last_operative_token===';' or NewlineToken===@last_operative_token))
724
+ @bracestack.push BlockParamListContext.new(@linenum)
725
+ #block param initializers are not supported here, because ruby doesn't allow them!
726
+ begin
727
+ tok=get1token
728
+ EoiToken===tok and lexerror tok,"eof in block parameter list"
729
+ a<<tok
730
+ end until tok==='|'
731
+ assert@defining_lvar
732
+ @defining_lvar=false
733
+ BlockParamListContext===@bracestack.last or raise 'expected BlockParamListContext atop @bracestack'
734
+ @bracestack.pop
735
+ @moretokens.empty? or
736
+ fixme %#moretokens might be set from get1token call above...might be bad#
737
+ end
738
+ end
739
+
740
+ @last_operative_token=KeywordToken.new ';'
741
+ #a.concat ignored_tokens
742
+
743
+ #assert @last_operative_token===';'
744
+ #a<<get1token
745
+
746
+ la[0,0]=a
747
+ }
748
+ end
749
+
750
+ #-----------------------------------
751
+ #handle parameter list of a method declaration.
752
+ #parentheses are optional... if missing param list
753
+ #is ended by (unescaped) newline or semicolon (at the same bracing level)
754
+ #expect a brace as the next token,
755
+ #then match the following tokens until
756
+ #the matching endbrace is found
757
+ def def_param_list
758
+ result=[]
759
+ normal_comma_level=old_bracestack_size=@bracestack.size
760
+ safe_recurse { |a|
761
+ assert(@moretokens.empty?)
762
+ assert((not IgnoreToken===@moretokens[0]))
763
+ assert((@moretokens[0] or not nextchar.chr[WHSPCHARS]))
764
+
765
+ #have parentheses?
766
+ if '('==@moretokens[0] or nextchar==?(
767
+ #get open paren token
768
+ result.concat maybe_no_ws_token
769
+ result << tok=get1token
770
+ assert(tok==='(')
771
+
772
+
773
+ #bracestack was changed by get1token above...
774
+ normal_comma_level+=1
775
+ assert(normal_comma_level==@bracestack.size)
776
+ endingblock=proc{|tok| tok===')' }
777
+ else
778
+ endingblock=proc{|tok| tok===';' or NewlineToken===tok}
779
+ end
780
+ class << endingblock
781
+ alias === call
782
+ end
783
+
784
+ @last_operative_token=KeywordToken.new ',' #hack
785
+ #read local parameter names
786
+ loop do
787
+ expect_name=(@last_operative_token===',' and
788
+ normal_comma_level==@bracestack.size)
789
+ expect_name and @defining_lvar||=true
790
+ result << tok=get1token
791
+ lexerror tok, "unexpected eof in def header" if EoiToken===tok
792
+
793
+ #break if at end of param list
794
+ endingblock===tok and
795
+ old_bracestack_size>=@bracestack.size and break
796
+
797
+ #next token is a local var name
798
+ #(or the one after that if unary ops present)
799
+ #result.concat ignored_tokens
800
+ expect_name and case tok
801
+ when IgnoreToken#, /^[A-Z]/ #do nothing
802
+ when VarNameToken
803
+ assert@defining_lvar
804
+ @defining_lvar=false
805
+ assert((not @last_operative_token===','))
806
+ when /^[&*]$/.token_pat #unary form...
807
+ #a NoWsToken is also expected... read it now
808
+ result.concat maybe_no_ws_token #not needed?
809
+ @last_operative_token=KeywordToken.new ','
810
+ else lexerror tok,"unfamiliar var name '#{tok}'"
811
+ end
812
+ end
813
+
814
+ @defining_lvar=false
815
+
816
+
817
+ assert(@bracestack.size <= old_bracestack_size)
818
+ assert(endingblock[tok])
819
+
820
+ #hack: force next token to look like start of a
821
+ #new stmt, if the last ignored_tokens
822
+ #call above did not find a newline
823
+ #(just in case the next token parsed
824
+ #happens to call quote_expected? or after_nonid_op)
825
+ result.concat ignored_tokens
826
+ if nextchar.chr[/[iuw\/<|>+\-*&%?:]/] and
827
+ !(NewlineToken===@last_operative_token) and
828
+ !(/^(end|;)$/===@last_operative_token)
829
+ @last_operative_token=KeywordToken.new ';'
830
+ result<< get1token
831
+ end
832
+ }
833
+
834
+ return result
835
+ end
836
+
837
+
838
+ #-----------------------------------
839
+ #handle % in ruby code. is it part of fancy quote or a modulo operator?
840
+ def percent(ch)
841
+ if quote_expected? ch
842
+ fancy_quote ch
843
+ else
844
+ biop ch
845
+ end
846
+ end
847
+
848
+ #-----------------------------------
849
+ #handle * in ruby code. is unary or binary operator?
850
+ def star_or_amp(ch)
851
+ assert('*&'[ch])
852
+ if unary_op_expected? ch
853
+ #readahead(2)[1..1][/[\s\v#\\]/] or #not needed?
854
+ result=operator_or_methname_token getchar
855
+ WHSPLF[nextchar.chr] or
856
+ @moretokens << NoWsToken.new(@file.pos)
857
+ return result
858
+ else
859
+ return(quadriop ch)
860
+ end
861
+ #result should distinguish unary+binary *&
862
+ end
863
+
864
+ #-----------------------------------
865
+ #handle ? in ruby code. is it part of ?..: or a character literal?
866
+ def char_literal_or_op(ch)
867
+ if colon_quote_expected? ch
868
+ getchar
869
+ NumberToken.new getchar_maybe_escape
870
+ else
871
+ @bracestack.push TernaryContext.new(@linenum)
872
+ KeywordToken.new getchar #operator
873
+ end
874
+ end
875
+
876
+ #-----------------------------------
877
+ def regex_or_div(ch)
878
+ #space after slash always means / operator, rather than regex start
879
+ if after_nonid_op?{ !is_var_name? and WHSPLF[prevchar] and !readahead(2)[%r{^/\s}] }
880
+ return regex(ch)
881
+ else #/ is operator
882
+ result=getchar
883
+ if eat_next_if(?=)
884
+ result << '='
885
+ end
886
+ return(operator_or_methname_token result)
887
+ end
888
+ end
889
+
890
+ #-----------------------------------
891
+ #return true if tok corresponds to a variable or constant, false if its for a method, nil for something else
892
+ #we assume tok is a valid token with a correctly formed name.
893
+ #...should really be called was_var_name
894
+ def is_var_name?
895
+ (tok=@last_operative_token)
896
+
897
+ s=tok.to_s
898
+ case s
899
+ when /[^a-z_0-9]$/i: false
900
+ when /^[a-z_]/: @localvars===s or VARLIKE_KEYWORDS===s
901
+ when /^[A-Z]/: VarNameToken===tok
902
+ when /^[@$<]/: true
903
+ else raise "not var or method name: #{s}"
904
+ end
905
+ end
906
+
907
+ #-----------------------------------
908
+ def colon_quote_expected?(ch) #yukko hack
909
+ assert ':?'[ch]
910
+ readahead(2)[/^(\?[^#{WHSPLF}]|:[$@a-zA-Z_'"`\[*~+\-\/%<=>&|^])$/o] or return false
911
+
912
+ after_nonid_op? {
913
+ #possible func-call as operator
914
+
915
+ !is_var_name?
916
+ }
917
+ end
918
+
919
+ #-----------------------------------
920
+ def symbol_or_op(ch)
921
+ startpos=@file.pos
922
+ qe= colon_quote_expected?(ch)
923
+ lastchar=prevchar
924
+ eat_next_if(ch) or raise "needed: "+ch
925
+
926
+ #handle quoted symbols like :"foobar", :"[]"
927
+ qe and return symbol(':')
928
+
929
+ #look for another colon; return single : if not found
930
+ unless eat_next_if(?:)
931
+ #cancel implicit contexts...
932
+ @moretokens.push(*abort_noparens!(':'))
933
+
934
+ #end ternary context, if any
935
+ @bracestack.last.see @bracestack,:colon
936
+
937
+ TernaryContext===@bracestack.last and @bracestack.pop #should be in the context's see handler
938
+
939
+ if ExpectDoOrNlContext===@bracestack.last #should be in the context's see handler
940
+ @bracestack.pop
941
+ assert @bracestack.last.starter[/^(while|until|for)$/]
942
+ end
943
+
944
+ @moretokens.push KeywordToken.new(':',startpos)
945
+ return @moretokens.shift
946
+ end
947
+
948
+ #we definately found a ::
949
+
950
+ colon2=KeywordToken.new( '::',startpos)
951
+ lasttok=@last_operative_token
952
+ assert !(String===lasttok)
953
+ if (VarNameToken===lasttok or MethNameToken===lasttok) and
954
+ lasttok===/^[$@a-zA-Z_]/ and !WHSPCHARS[lastchar]
955
+ then
956
+ @moretokens << colon2
957
+ result= NoWsToken.new(startpos)
958
+ else
959
+ result=colon2
960
+ end
961
+ dot_rhs(colon2)
962
+ return result
963
+ end
964
+
965
+ #-----------------------------------
966
+ def symbol(notbare,couldbecallsite=!notbare)
967
+ assert !couldbecallsite
968
+ start=@file.pos
969
+ notbare and start-=1
970
+ klass=(notbare ? SymbolToken : MethNameToken)
971
+
972
+ #look for operators
973
+ opmatches=readahead(3)[RUBYSYMOPERATORREX]
974
+ result= opmatches ? @file.read(opmatches.size) :
975
+ case nc=nextchar
976
+ when ?" then assert notbare;double_quote('"')
977
+ when ?' then assert notbare;double_quote("'")
978
+ when ?` then @file.read(1)
979
+ when ?@ then at_identifier.to_s
980
+ when ?$ then dollar_identifier.to_s
981
+ when ?_,?a..?z then identifier_as_string(?:)
982
+ when ?A..?Z then
983
+ result=identifier_as_string(?:)
984
+ if @last_operative_token==='::'
985
+ assert klass==MethNameToken
986
+ /[A-Z_0-9]$/i===result and klass=VarNameToken
987
+ end
988
+ result
989
+ else error= "unexpected char starting symbol: #{nc.chr}"
990
+ end
991
+ return lexerror(klass.new(result,start),error)
992
+ end
993
+
994
+ #-----------------------------------
995
+ def callsite_symbol(tok_to_errify)
996
+ start=@file.pos
997
+
998
+ #look for operators
999
+ opmatches=readahead(3)[RUBYSYMOPERATORREX]
1000
+ return [opmatches ? @file.read(opmatches.size) :
1001
+ case nc=nextchar
1002
+ when ?` then @file.read(1)
1003
+ when ?_,?a..?z,?A..?Z then identifier_as_string(?:)
1004
+ else
1005
+ @last_operative_token=KeywordToken.new(';')
1006
+ lexerror(tok_to_errify,"unexpected char starting symbol: #{nc.chr}")
1007
+ nil
1008
+ end, start
1009
+ ]
1010
+ end
1011
+
1012
+ #-----------------------------------
1013
+ def here_header
1014
+ @file.read(2)=='<<' or raise "parser insanity"
1015
+
1016
+ dash=eat_next_if(?-)
1017
+ quote=eat_next_if( /^['"`]$/)
1018
+ if quote
1019
+ ender=til_charset(/[#{quote}]/)
1020
+ (quote==getchar) or
1021
+ return lexerror(HerePlaceholderToken.new( dash, quote, ender ), "mismatched quotes in here doc")
1022
+ else
1023
+ quote='"'
1024
+ ender=til_charset(/[^a-zA-Z0-9_]/)
1025
+ ender.length >= 1 or
1026
+ return lexerror(HerePlaceholderToken.new( dash, quote, ender ), "invalid here header")
1027
+ end
1028
+
1029
+ res= HerePlaceholderToken.new( dash, quote, ender )
1030
+ @incomplete_here_tokens.push res
1031
+
1032
+ #hack: normally this should just be in get1token
1033
+ #this fixup is necessary because the call the get1token below
1034
+ #makes a recursion.
1035
+ @last_operative_token=res
1036
+
1037
+ safe_recurse { |a|
1038
+ assert(a.object_id==@moretokens.object_id)
1039
+ toks=[]
1040
+ begin
1041
+ #yech.
1042
+ #handle case of here header in a string inclusion, but
1043
+ #here body outside it.
1044
+ cnt=0
1045
+ 1.upto @bracestack.size do |i|
1046
+ case @bracestack[-i]
1047
+ when AssignmentRhsContext,ParamListContextNoParen,TopLevelContext
1048
+ else cnt+=1
1049
+ end
1050
+ end
1051
+ if nextchar==?} and cnt==1
1052
+ res.bodyclass=OutlinedHereBodyToken
1053
+ break
1054
+ end
1055
+
1056
+ tok=get1token
1057
+ assert(a.object_id==@moretokens.object_id)
1058
+ toks<<tok
1059
+ EoiToken===tok and lexerror tok, "here body expected before eof"
1060
+ end while res.unsafe_to_use
1061
+ assert(a.object_id==@moretokens.object_id)
1062
+ a[0,0]= toks #same as a=toks+a, but keeps a's id
1063
+ }
1064
+
1065
+ return res
1066
+
1067
+ #the action continues in newline, where
1068
+ #the rest of the here token is read after a
1069
+ #newline has been seen and res.affix is eventually called
1070
+ end
1071
+
1072
+ #-----------------------------------
1073
+ def lessthan(ch) #match quadriop('<') or here doc or spaceship op
1074
+ case readahead(3)
1075
+ when /^<<['"`\-a-z0-9_]$/i
1076
+ if quote_expected?(ch) #and @last_operative_token!='class' #not needed?
1077
+ here_header
1078
+ else
1079
+ operator_or_methname_token @file.read(2)
1080
+ end
1081
+ when "<=>" then operator_or_methname_token @file.read(3)
1082
+ else quadriop(ch)
1083
+ end
1084
+ end
1085
+
1086
+ #-----------------------------------
1087
+ def escnewline(ch)
1088
+ assert ch == '\\'
1089
+
1090
+ pos=@file.pos
1091
+ result=getchar
1092
+ if nl=readnl
1093
+ result+=nl
1094
+ else
1095
+ error='illegal escape sequence'
1096
+ end
1097
+ lexerror EscNlToken.new(@filename,@linenum,result,pos), error
1098
+ end
1099
+
1100
+ #-----------------------------------
1101
+ def newline(ch)
1102
+ assert("\r\n"[nextchar.chr])
1103
+
1104
+ #handle here bodies queued up by previous line
1105
+ #(we should be more compatible with dos/mac style newlines...)
1106
+ if tofill=@incomplete_here_tokens.shift
1107
+ tofill.string.offset=@file.pos
1108
+ loop {
1109
+ assert("\r\n"[nextchar.chr])
1110
+
1111
+ #retr evrything til next nl
1112
+ line=all_quote(/^[\r\n]$/, tofill.quote, /^[\r\n]$/, :regex_esc_seq)
1113
+ #(you didn't know all_quote could take a regex, did you?)
1114
+
1115
+ #get rid of fals that otherwise appear to be in the middle of
1116
+ #a string (and are emitted out of order)
1117
+ fal=@moretokens.pop
1118
+ assert FileAndLineToken===fal || fal.nil?
1119
+
1120
+ back1char
1121
+ assert("\r\n"[nextchar.chr])
1122
+
1123
+ #matches terminating reg expr?
1124
+ break if line.elems.size==1 and
1125
+ line.elems[0][tofill.termex]
1126
+
1127
+ tofill.append_token line
1128
+ tofill.append readnl
1129
+ back1char
1130
+ }
1131
+
1132
+ assert("\r\n"[nextchar.chr])
1133
+ tofill.unsafe_to_use=false
1134
+
1135
+ return tofill.bodyclass.new(tofill)
1136
+ end
1137
+
1138
+ #ordinary newline handling (possibly implicitly escaped)
1139
+ assert("\r\n"[nextchar.chr])
1140
+ assert @moretokens.empty?
1141
+ result=if NewlineToken===@last_operative_token or #hack
1142
+ @last_operative_token===/^(;|begin|do|#{INNERBOUNDINGWORDS})$/ or #hack
1143
+ !after_nonid_op?{false}
1144
+ then #hack-o-rama: probly cases left out above
1145
+ a= abort_noparens!
1146
+ ExpectDoOrNlContext===@bracestack.last and @bracestack.pop
1147
+ @bracestack.last.see @bracestack,:semi
1148
+
1149
+ a << super(ch)
1150
+ @moretokens.replace a+@moretokens
1151
+ @moretokens.shift
1152
+ else
1153
+ offset=@file.pos
1154
+ #@moretokens <<
1155
+ EscNlToken.new(@filename,@linenum,readnl,offset)
1156
+ #WsToken.new ' ' #why? #should be "\\\n" ?
1157
+ end
1158
+
1159
+ start_of_line_directives
1160
+
1161
+ return result
1162
+ end
1163
+
1164
+ #-----------------------------------
1165
+ EQBEGIN=%r/^=begin[^a-zA-Z_0-9]$/
1166
+ EQBEGINLENGTH=7
1167
+ EQEND='=end'
1168
+ ENDMARKER=/^__END__[\r\n]$/
1169
+ ENDMARKERLENGTH=8
1170
+ def start_of_line_directives
1171
+ #handle =begin...=end (at start of a line)
1172
+ while EQBEGIN===readahead(EQBEGINLENGTH)
1173
+ startpos=@file.pos
1174
+ more=@file.read(EQBEGINLENGTH-1) #get =begin
1175
+
1176
+ #keep reading til /\n=end.*\n/
1177
+ @file.each(EQEND) {|cblock|
1178
+ more << cblock
1179
+ #must be at start of line
1180
+ break if /^[\r\n]#{EQEND}/o===readback(EQEND.length+1)
1181
+ }
1182
+ #read rest of line after =end
1183
+ more << @file.til_charset(/[\r\n]/)
1184
+ assert((?\r===nextchar or ?\n===nextchar))
1185
+ assert !(/[\r\n]/===more[-1,1])
1186
+
1187
+ newls= more.scan(/\r\n?|\n\r?/)
1188
+ @linenum+= newls.size
1189
+
1190
+ #inject the fresh comment into future token results
1191
+ @moretokens.push IgnoreToken.new(more,startpos)
1192
+ end
1193
+
1194
+ #handle __END__
1195
+ if ENDMARKER===readahead(ENDMARKERLENGTH)
1196
+ assert !(ImplicitContext===@bracestack.last)
1197
+ @moretokens.unshift endoffile_detected(@file.read(6))
1198
+ @file.pos=@file.stat.size
1199
+ end
1200
+ end
1201
+
1202
+
1203
+
1204
+ #-----------------------------------
1205
+ #used to resolve the ambiguity of
1206
+ # unary ops (+, -, *, &, ~ !) in ruby
1207
+ #returns whether current token is to be the start of a literal
1208
+ IDBEGINCHAR=/^[a-zA-Z_$@]/
1209
+ def unary_op_expected?(ch) #yukko hack
1210
+ '*&='[readahead(2)[1..1]] and return false
1211
+
1212
+ after_nonid_op? {
1213
+ #possible func-call as operator
1214
+
1215
+ not is_var_name? and
1216
+ WHSPLF[prevchar]
1217
+ }
1218
+ end
1219
+
1220
+ #-----------------------------------
1221
+ #used to resolve the ambiguity of
1222
+ # <<, %, ? in ruby
1223
+ #returns whether current token is to be the start of a literal
1224
+ #/ is not handled right here if whitespace immediately follows the /
1225
+ def quote_expected?(ch) #yukko hack
1226
+ case ch[0]
1227
+ when ?? then readahead(2)[/^\?[#{WHSPLF}]$/o] #not needed?
1228
+ when ?% then readahead(3)[/^%([a-ps-vyzA-PR-VX-Z]|[QqrwWx][a-zA-Z0-9])/]
1229
+ when ?< then !readahead(4)[/^<<-?['"`a-z0-9_]/i]
1230
+ else raise 'unexpected ch (#{ch}) in quote_expected?'
1231
+ # when ?+,?-,?&,?*,?~,?! then '*&='[readahead(2)[1..1]]
1232
+ end and return false
1233
+
1234
+ after_nonid_op? {
1235
+ #possible func-call as operator
1236
+
1237
+ not is_var_name? and
1238
+ WHSPLF[prevchar] and not WHSPLF[readahead(2)[1..1]]
1239
+ }
1240
+ end
1241
+
1242
+ #-----------------------------------
1243
+ #used to resolve the ambiguity of
1244
+ # <<, %, /, ?, :, and newline in ruby
1245
+ def after_nonid_op?
1246
+ case @last_operative_token
1247
+ when MethNameToken,VarNameToken, FUNCLIKE_KEYWORDS.token_pat
1248
+ return yield
1249
+ when StringToken, SymbolToken, NumberToken, HerePlaceholderToken,
1250
+ %r{^(class|module|do|end|self|true|false|nil|
1251
+ __FILE__|__LINE__|[\})\]]|alias|(un)?def|for
1252
+ )$}x.token_pat
1253
+ #do shouldn't be in above list... dunno about def/undef
1254
+ #maybe class/module shouldn't either?
1255
+ #for is also in NewlineToken branch, below.
1256
+ #what about rescue?
1257
+ return false
1258
+ when /^(#{RUBYOPERATORREX}|#{INNERBOUNDINGWORDS})$/o.token_pat
1259
+ #regexs above must match whole string
1260
+ #assert(@last_operative_token==$&) #disabled 'cause $& is now always nil :(
1261
+ return true
1262
+ when NewlineToken, nil, #nil means we're still at beginning of file
1263
+ /^([({\[]|or|not|and|if|unless|then|elsif|else|
1264
+ while|until|begin|for|in|case|when|ensure)$
1265
+ /x.token_pat
1266
+ return true
1267
+ #when KeywordToken
1268
+ # return true
1269
+ when IgnoreToken
1270
+ raise "last_operative_token shouldn't be ignoreable"
1271
+ else
1272
+ raise "after_nonid_op? after #{@last_operative_token}:#{@last_operative_token.class} -- now what"
1273
+ end
1274
+ end
1275
+
1276
+ #-----------------------------------
1277
+ def quadriop(ch) #match /&&?=?/ (&, &&, &=, or &&=)
1278
+ assert(%w[& * | < >].include?(ch))
1279
+ # '&*'[ch] and qe=quote_expected?(ch) #not needed?
1280
+ result=getchar + (eat_next_if(ch)or'')
1281
+ if eat_next_if(?=)
1282
+ result << ?=
1283
+ # elsif qe and result[/^[&*]$/] #not needed?
1284
+ # @moretokens<<NoWsToken.new(@file.pos) #not needed?
1285
+ end
1286
+ return operator_or_methname_token(result)
1287
+ end
1288
+
1289
+ #-----------------------------------
1290
+ def biop(ch) #match /%=?/ (% or %=)
1291
+ assert(ch[/^[%^~]$/])
1292
+ result=getchar
1293
+ if eat_next_if(?=)
1294
+ result <<?=
1295
+ end
1296
+ return operator_or_methname_token( result)
1297
+ end
1298
+
1299
+ #-----------------------------------
1300
+ def tilde(ch) #match /~=?/ (~ or ~=)
1301
+ assert(ch=='~')
1302
+ result=getchar
1303
+ # eat_next_if(?=) ?
1304
+ # result <<?= :
1305
+ WHSPLF[nextchar.chr] ||
1306
+ @moretokens << NoWsToken.new(@file.pos)
1307
+ #why is the NoWsToken necessary at this point?
1308
+ return operator_or_methname_token( result)
1309
+ #result should distinguish unary ~
1310
+ end
1311
+
1312
+ #-----------------------------------
1313
+ def want_op_name
1314
+ KeywordToken===@last_operative_token and
1315
+ @last_operative_token===/^(alias|(un)?def|\.|::)$/
1316
+ end
1317
+
1318
+ #-----------------------------------
1319
+ #match /[+\-]=?/ (+ or +=)
1320
+ #could be beginning of number, too
1321
+ #fixme: handle +@ and -@ here as well... (currently, this is done in symbol()?)
1322
+ def plusminus(ch)
1323
+ assert(/^[+\-]$/===ch)
1324
+ if unary_op_expected?(ch)
1325
+ if (?0..?9)===readahead(2)[1]
1326
+ return number(ch)
1327
+ else #unary operator
1328
+ result=getchar
1329
+ WHSPLF[nextchar.chr] or
1330
+ @moretokens << NoWsToken.new(@file.pos)
1331
+ return(operator_or_methname_token result)
1332
+ #todo: result should distinguish unary+binary +-
1333
+ end
1334
+ else #binary operator
1335
+ assert(! want_op_name)
1336
+ result=getchar
1337
+ if eat_next_if(?=)
1338
+ result << ?=
1339
+ end
1340
+ return(operator_or_methname_token result)
1341
+ #todo: result should distinguish unary+binary +-
1342
+ end
1343
+ end
1344
+
1345
+ #-----------------------------------
1346
+ def equals(ch) #match /=(>|~|==?)?/ (= or == or =~ or === or =>)
1347
+ offset=@file.pos
1348
+ str=getchar
1349
+ assert str=='='
1350
+ c=(eat_next_if(/^[~=>]$/)or'')
1351
+ str << c
1352
+ case c
1353
+ when '=': str<< (eat_next_if(?=)or'')
1354
+
1355
+ when '>': @bracestack.last.see @bracestack,:arrow
1356
+ when '': #record local variable definitions
1357
+
1358
+ @bracestack.push AssignmentRhsContext.new(@linenum)
1359
+ @moretokens.unshift AssignmentRhsListStartToken.new( offset+1)
1360
+ end
1361
+ return operator_or_methname_token( str,offset)
1362
+ end
1363
+
1364
+ #-----------------------------------
1365
+ def exclam(ch) #match /![~=]?/ (! or != or !~)
1366
+ assert nextchar==?!
1367
+ result=getchar
1368
+ k=eat_next_if(/^[~=]$/)
1369
+ if k
1370
+ result+=k
1371
+ else
1372
+ WHSPLF[nextchar.chr] or
1373
+ @moretokens << NoWsToken.new(@file.pos)
1374
+ end
1375
+ return KeywordToken.new(result)
1376
+ #result should distinguish unary !
1377
+ end
1378
+
1379
+ #-----------------------------------
1380
+ def dot(ch)
1381
+ str=''
1382
+ eat_next_if(?.) or raise "lexer confusion"
1383
+
1384
+ #three lumps of sugar or two?
1385
+ eat_next_if(?.) and
1386
+ return KeywordToken.new(eat_next_if(?.)? "..." : "..")
1387
+
1388
+ #else saw just single .
1389
+ #match a valid ruby id after the dot
1390
+ result= KeywordToken.new( ".")
1391
+ dot_rhs(result)
1392
+ return result
1393
+ end
1394
+
1395
+ #-----------------------------------
1396
+ def dot_rhs(prevtok)
1397
+ safe_recurse { |a|
1398
+ @last_operative_token=prevtok
1399
+ aa= ignored_tokens
1400
+ tok,pos=callsite_symbol(prevtok)
1401
+ tok and aa.push(*var_or_meth_name(tok,prevtok,pos))
1402
+ a.unshift(*aa)
1403
+ }
1404
+ end
1405
+
1406
+ #-----------------------------------
1407
+ def single_quote(ch=nil)
1408
+ double_quote(ch)
1409
+ end
1410
+
1411
+ #-----------------------------------
1412
+ def back_quote(ch=nil)
1413
+ oldpos=@file.pos
1414
+ @last_operative_token===/^(def|::|\.)$/ and return MethNameToken.new(
1415
+ (eat_next_if(?`) or raise "insanity"), oldpos
1416
+ )
1417
+ double_quote(ch)
1418
+ end
1419
+
1420
+ #-----------------------------------
1421
+ def comment(str)
1422
+ result=""
1423
+ #loop{
1424
+ result<<super(nil).to_s
1425
+
1426
+ if /^\#.*\#$/===result #if comment was ended by a crunch
1427
+
1428
+ #that's not a legal comment end in ruby, so just keep reading
1429
+ assert(result.to_s[-1]==?#)
1430
+ result.chomp! '#'
1431
+
1432
+ #back up one char in input so that the
1433
+ #super will see that # on the next go round.
1434
+ #this hack makes the ruma comment lexer work with ruby too.
1435
+ back1char
1436
+
1437
+ assert nextchar==?#
1438
+ #else break #not a crunch... just exit
1439
+ end
1440
+ #}
1441
+
1442
+ return IgnoreToken.new(result)
1443
+ end
1444
+
1445
+ #-----------------------------------
1446
+ def open_brace(ch)
1447
+ assert((ch!='[' or !want_op_name))
1448
+ assert(@moretokens.empty?)
1449
+ lastchar=prevchar
1450
+ ch=eat_next_if(/^[({\[]$/)or raise "lexer confusion"
1451
+ tokch=KeywordToken.new(ch,@file.pos-1)
1452
+
1453
+ #maybe emitting of NoWsToken can be moved into var_or_meth_name ??
1454
+ case tokch.ident
1455
+ when '['
1456
+ #fixme: in contexts expecting an (operator) method name, we
1457
+ # should match [] or []= at this point
1458
+ @bracestack.push ListImmedContext.new(ch,@linenum)
1459
+ lasttok=last_operative_token
1460
+ #could be: lasttok===/^[a-z_]/i
1461
+ if (VarNameToken===lasttok or MethNameToken===lasttok or
1462
+ lasttok===FUNCLIKE_KEYWORDS) and !WHSPCHARS[lastchar]
1463
+ @moretokens << (tokch)
1464
+ tokch= NoWsToken.new(@file.pos-1)
1465
+ end
1466
+ when '('
1467
+ lasttok=last_operative_token
1468
+ #could be: lasttok===/^[a-z_]/i
1469
+ if (VarNameToken===lasttok or MethNameToken===lasttok or
1470
+ lasttok===FUNCLIKE_KEYWORDS)
1471
+ unless WHSPCHARS[lastchar]
1472
+ @moretokens << tokch
1473
+ tokch= NoWsToken.new(@file.pos-1)
1474
+ end
1475
+ @bracestack.push ParamListContext.new(@linenum)
1476
+ else
1477
+ @bracestack.push ParenContext.new(@linenum)
1478
+ end
1479
+
1480
+ when '{'
1481
+ #check if we are in a hash literal or string inclusion (#{}),
1482
+ #in which case below would be bad.
1483
+ if after_nonid_op?{false}
1484
+ @bracestack.push ListImmedContext.new(ch,@linenum) #that is, a hash
1485
+ else
1486
+ =begin not needed now, i think
1487
+ # 'need to find matching callsite context and end it if implicit'
1488
+ lasttok=last_operative_token
1489
+ unless lasttok===')' and lasttok.callsite?
1490
+ @moretokens.push *(abort_1_noparen!(1).push tokch)
1491
+ tokch=@moretokens.shift
1492
+ end
1493
+ =end
1494
+
1495
+ @localvars.start_block
1496
+ @bracestack.push BlockContext.new(@linenum)
1497
+ block_param_list_lookahead
1498
+ end
1499
+ end
1500
+ return (tokch)
1501
+ end
1502
+
1503
+ #-----------------------------------
1504
+ def close_brace(ch)
1505
+ ch==eat_next_if(/[)}\]]/) or raise "lexer confusion"
1506
+ @moretokens.concat abort_noparens!(ch)
1507
+ @moretokens<< kw=KeywordToken.new( ch,@file.pos-1)
1508
+ @bracestack.last.see @bracestack,:semi #hack
1509
+ if @bracestack.empty?
1510
+ lexerror kw,"unmatched brace: #{ch}"
1511
+ return @moretokens.shift
1512
+ end
1513
+ ctx=@bracestack.pop
1514
+ origch,line=ctx.starter,ctx.linenum
1515
+ ch==PAIRS[origch] or
1516
+ lexerror kw,"mismatched braces: #{origch}#{ch}\n" +
1517
+ "matching brace location", @filename, line
1518
+ BlockContext===ctx and @localvars.end_block
1519
+ if ParamListContext==ctx.class
1520
+ assert ch==')'
1521
+ #kw.set_callsite! #not needed?
1522
+ end
1523
+ return @moretokens.shift
1524
+ end
1525
+
1526
+ #-----------------------------------
1527
+ def eof(ch=nil)
1528
+ #this must be the very last character...
1529
+ oldpos=@file.pos
1530
+ assert(?\0==@file.getc)
1531
+
1532
+ result= "\0#{ignored_tokens(true).delete_if{|t|FileAndLineToken===t}}"
1533
+
1534
+ @file.pos==@file.stat.size or
1535
+ lexerror result,'nul character is not at the end of file'
1536
+ @file.pos=@file.stat.size
1537
+ return(endoffile_detected result)
1538
+ end
1539
+
1540
+ #-----------------------------------
1541
+ def endoffile_detected(s='')
1542
+ @moretokens.push( *(abort_noparens!.push super(s)))
1543
+ result= @moretokens.shift
1544
+ balanced_braces? or (lexerror result,"unbalanced braces at eof. bracestack=#{@bracestack.inspect}")
1545
+ result
1546
+ end
1547
+
1548
+ #-----------------------------------
1549
+ def single_char_token(ch)
1550
+ KeywordToken.new super(ch), @file.pos-1
1551
+ end
1552
+
1553
+ #-----------------------------------
1554
+ def comma(ch)
1555
+ single_char_token(ch)
1556
+ end
1557
+
1558
+ #-----------------------------------
1559
+ def semicolon(ch)
1560
+ assert @moretokens.empty?
1561
+ @moretokens.push(*abort_noparens!)
1562
+ @bracestack.last.see @bracestack,:semi
1563
+ if ExpectDoOrNlContext===@bracestack.last #should be in context's see:semi handler
1564
+ @bracestack.pop
1565
+ assert @bracestack.last.starter[/^(while|until|for)$/]
1566
+ end
1567
+ @moretokens.push single_char_token(ch)
1568
+ return @moretokens.shift
1569
+ end
1570
+
1571
+ #-----------------------------------
1572
+ def operator_or_methname_token(s,offset=nil)
1573
+ assert RUBYOPERATORREX===s
1574
+ if RUBYNONSYMOPERATORREX===s
1575
+ KeywordToken
1576
+ elsif @last_operative_token===/^(\.|::|def|undef|alias|defined\?)$/
1577
+ MethNameToken
1578
+ else
1579
+ OperatorToken
1580
+ end.new(s,offset)
1581
+ end
1582
+
1583
+ #-----------------------------------
1584
+ #tokenify_results_of :identifier
1585
+ save_offsets_in(*CHARMAPPINGS.values.uniq-[:symbol_or_op,:open_brace,:whitespace])
1586
+ #save_offsets_in :symbol
1587
+
1588
+ end
1589
+