rubylexer 0.6.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/rlold.rb ADDED
@@ -0,0 +1,12 @@
1
+ class RubyLexer06 < RubyLexer
2
+ alias lexerror lexerror_exception
3
+ end
4
+
5
+ class<<RubyLexer
6
+ def version(num)
7
+ case num
8
+ when 0.0..0.6: RubyLexer06
9
+ else RubyLexer
10
+ end
11
+ end
12
+ end
data/rubycode.rb ADDED
@@ -0,0 +1,44 @@
1
+ =begin copyright
2
+ rubylexer - a ruby lexer written in ruby
3
+ Copyright (C) 2004,2005 Caleb Clausen
4
+
5
+ This library is free software; you can redistribute it and/or
6
+ modify it under the terms of the GNU Lesser General Public
7
+ License as published by the Free Software Foundation; either
8
+ version 2.1 of the License, or (at your option) any later version.
9
+
10
+ This library is distributed in the hope that it will be useful,
11
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
12
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13
+ Lesser General Public License for more details.
14
+
15
+ You should have received a copy of the GNU Lesser General Public
16
+ License along with this library; if not, write to the Free Software
17
+ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
18
+ =end
19
+
20
+
21
+
22
+ require "token.rb"
23
+ require "tokenprinter.rb"
24
+
25
+ class RubyCode < Token
26
+ def initialize(tokens,filename,linenum)
27
+ super(tokens)
28
+ @filename=filename
29
+ @linenum=linenum
30
+ end
31
+
32
+ def [](*args)
33
+ exec? ident.huh
34
+ end
35
+
36
+ def to_s()
37
+ result=[]
38
+ keepwsprinter=KeepWsTokenPrinter.new('',@linenum)
39
+ ident.each{|tok| result << keepwsprinter.sprint(tok) }
40
+ return result.to_s
41
+ end
42
+ end
43
+
44
+
data/rubylexer.rb ADDED
@@ -0,0 +1,1589 @@
1
+ =begin copyright
2
+ rubylexer - a ruby lexer written in ruby
3
+ Copyright (C) 2004,2005 Caleb Clausen
4
+
5
+ This library is free software; you can redistribute it and/or
6
+ modify it under the terms of the GNU Lesser General Public
7
+ License as published by the Free Software Foundation; either
8
+ version 2.1 of the License, or (at your option) any later version.
9
+
10
+ This library is distributed in the hope that it will be useful,
11
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
12
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13
+ Lesser General Public License for more details.
14
+
15
+ You should have received a copy of the GNU Lesser General Public
16
+ License along with this library; if not, write to the Free Software
17
+ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
18
+ =end
19
+
20
+
21
+
22
+ require "rulexer"
23
+ require "symboltable"
24
+ require "io.each_til_charset"
25
+ require "context.rb"
26
+
27
+
28
+
29
+ #-----------------------------------
30
+ class RubyLexer < RuLexer
31
+ include NestedContexts
32
+
33
+ RUBYSYMOPERATORREX=
34
+ %r{^([&|^/%~]|=(==?|~)|>[=>]?|<(<|=>?)?|[+\-]@?|\*\*?|\[\]=?)}
35
+ # (nasty beastie, eh?)
36
+ #these are the overridable operators
37
+ #does not match flow-control operators like: || && ! or and if not
38
+ #or op= ops like: += -= ||=
39
+ #or .. ... ?:
40
+ #for that use:
41
+ RUBYNONSYMOPERATORREX=
42
+ %r{^([%^~/\-+]=|(\|\|?|&&?)=?|(<<|>>|\*\*?)=|\.{1,3}|[?:,;]|=>?|![=~]?)$}
43
+ RUBYOPERATORREX=/#{RUBYSYMOPERATORREX}|#{RUBYNONSYMOPERATORREX}/o
44
+ UNSYMOPS=/^[~!]$/ #always unary
45
+ UBSYMOPS=/^([*&+-]|::)$/ #ops that could be unary or binary
46
+ WHSPCHARS=WHSPLF+"\\#"
47
+ OPORBEGINWORDS="(if|unless|while|until)"
48
+ BEGINWORDS=/^(def|class|module|begin|for|case|do|#{OPORBEGINWORDS})$/o
49
+ FUNCLIKE_KEYWORDS=/^(break|next|redo|return|raise|yield|defined\?|retry|super|BEGIN|END)$/
50
+ VARLIKE_KEYWORDS=/^(__FILE__|__LINE__|false|nil|self|true)$/
51
+ INNERBOUNDINGWORDS="(else|elsif|ensure|in|then|rescue|when)"
52
+ BINOPWORDS="(and|or)"
53
+ NEVERSTARTPARAMLISTWORDS=/^(#{OPORBEGINWORDS}|#{INNERBOUNDINGWORDS}|#{BINOPWORDS}|end)[^a-zA-Z0-9_!?=]?/o
54
+ NEVERSTARTPARAMLISTFIRST=CharSet[%[aoeitrwu]] #char set that begins NEVERSTARTPARAMLIST
55
+ NEVERSTARTPARAMLISTMAXLEN=7 #max len of a NEVERSTARTPARAMLIST
56
+
57
+ RUBYKEYWORDS=%r{
58
+ ^(alias|#{BINOPWORDS}|not|undef|__END__|end|
59
+ #{VARLIKE_KEYWORDS}|#{FUNCLIKE_KEYWORDS}|
60
+ #{INNERBOUNDINGWORDS}|#{BEGINWORDS}
61
+ )$
62
+ }xo
63
+ CHARMAPPINGS = {
64
+ ?$ => :dollar_identifier,
65
+ ?@ => :at_identifier,
66
+ ?a..?z => :identifier,
67
+ ?A..?Z => :identifier,
68
+ ?_ => :identifier,
69
+ ?0..?9 => :number,
70
+ ?" => :double_quote,
71
+ ?' => :single_quote,
72
+ ?` => :back_quote,
73
+
74
+ WHSP => :whitespace, #includes \r
75
+ ?, => :comma,
76
+ ?; => :semicolon,
77
+
78
+ ?^ => :biop,
79
+ ?~ => :tilde,
80
+ ?= => :equals,
81
+ ?! => :exclam,
82
+ ?. => :dot,
83
+
84
+ #these ones could signal either an op or a term
85
+ ?/ => :regex_or_div,
86
+ "|>" => :quadriop,
87
+ "*&" => :star_or_amp, #could be unary
88
+ "+-" => :plusminus, #could be unary
89
+ ?< => :lessthan,
90
+ ?% => :percent,
91
+ ?? => :char_literal_or_op, #single-char int literal
92
+ ?: => :symbol_or_op,
93
+ ?\n => :newline, #implicitly escaped after op
94
+ #?\r => :newline, #implicitly escaped after op
95
+
96
+ ?\\ => :escnewline,
97
+ ?\0 => :eof,
98
+
99
+ "[({" => :open_brace,
100
+ "])}" => :close_brace,
101
+
102
+
103
+ ?# => :comment
104
+ }
105
+
106
+ attr :incomplete_here_tokens
107
+
108
+
109
+ #-----------------------------------
110
+ def initialize(filename,file,linenum=1)
111
+ super(filename,file, linenum)
112
+ @start_linenum=linenum
113
+ @bracestack=[TopLevelContext.new]
114
+ @incomplete_here_tokens=[]
115
+ @localvars=SymbolTable.new
116
+ @defining_lvar=nil
117
+
118
+ @toptable=CharHandler.new(self, :illegal_char, CHARMAPPINGS)
119
+
120
+ start_of_line_directives
121
+ end
122
+
123
+ #-----------------------------------
124
+ def get1token
125
+ result=super #most of the action's here
126
+
127
+ #now cleanup and housekeeping
128
+
129
+
130
+ #check for bizarre token types
131
+ case result
132
+ when IgnoreToken#,nil
133
+ return result
134
+ when Token#,String
135
+ else
136
+ raise "#{@filename}:#{linenum}:token is a #{result.class}, last is #{@last_operative_token}"
137
+ end
138
+
139
+ @last_operative_token=result
140
+
141
+ return result
142
+ end
143
+
144
+
145
+
146
+ #-----------------------------------
147
+ def balanced_braces?
148
+
149
+ #@bracestack.empty?
150
+ @bracestack.size==1 and TopLevelContext===@bracestack.first
151
+ end
152
+
153
+ #-----------------------------------
154
+ def dollar_identifier(ch=nil)
155
+ s=eat_next_if(?$) or return nil
156
+
157
+ if t=((identifier_as_string(?$) or special_global))
158
+ s<<t
159
+ else error= "missing $id name"
160
+ end
161
+
162
+ return lexerror(VarNameToken.new(s),error)
163
+ end
164
+
165
+ #-----------------------------------
166
+ def at_identifier(ch=nil)
167
+ result = (eat_next_if(?@) or return nil)
168
+ result << (eat_next_if(?@)or'')
169
+ if t=identifier_as_string(?@)
170
+ result<<t
171
+ else error= "missing @id name"
172
+ end
173
+ return lexerror(VarNameToken.new(result),error)
174
+ end
175
+
176
+ private
177
+ #-----------------------------------
178
+ def here_spread_over_ruby_code(rl,tok)
179
+ assert(!rl.incomplete_here_tokens.empty?)
180
+ @incomplete_here_tokens += rl.incomplete_here_tokens
181
+ end
182
+
183
+ #-----------------------------------
184
+ def expect_do_or_end_or_nl!(st)
185
+ @bracestack.push ExpectDoOrNlContext.new(st,/(do|;|:|\n)/,@linenum)
186
+ end
187
+
188
+ #-----------------------------------
189
+ #match NoWstoken, ws, comment, or (escaped?) newline repeatedly
190
+ def maybe_no_ws_token
191
+ result=[]
192
+ while IgnoreToken===(tok=get1token)
193
+ EoiToken===tok and lexerror tok,"end of file not expected here"
194
+ result << tok
195
+ end
196
+ assert((not IgnoreToken===tok))
197
+ @moretokens.unshift tok
198
+ return result
199
+ end
200
+
201
+ #-----------------------------------
202
+ WSCHARSET=CharSet["#\\\n\s\t\v\r\f"]
203
+ def ignored_tokens(allow_eof=false)
204
+ result=[]
205
+ result<<@moretokens.shift while IgnoreToken===@moretokens.first
206
+ @moretokens.empty? or return result
207
+ if true
208
+ loop do
209
+ unless @moretokens.empty?
210
+ IgnoreToken===@moretokens.first or NewlineToken===@moretokens.first or
211
+ break
212
+ else
213
+ WSCHARSET===nextchar or break
214
+ end
215
+
216
+ tok=get1token
217
+ result<<tok
218
+ case tok
219
+ when NewlineToken : block_given? and yield tok
220
+ when EoiToken : allow_eof or lexerror tok,"end of file not expected here(2)"
221
+ when IgnoreToken
222
+ else raise "impossible"
223
+ end
224
+ end
225
+
226
+ else
227
+ @whsphandler||=CharHandler.new(self, :==,
228
+ "#" => :comment,
229
+ "\n" => :newline,
230
+ "\\" => :escnewline,
231
+ "\s\t\v\r\f" => :whitespace
232
+ )
233
+ #tok=nil
234
+ while tok=@whsphandler.go((nextchar or return result))
235
+ block_given? and NewlineToken===tok and yield tok
236
+ result << tok
237
+ end
238
+ end
239
+ return result
240
+ end
241
+
242
+ #-----------------------------------
243
+ def safe_recurse
244
+ old_moretokens=@moretokens
245
+ #old_bracestack=@bracestack.dup
246
+ @moretokens=[]
247
+ result= yield @moretokens
248
+ #assert @incomplete_here_tokens.empty?
249
+ #assert @bracestack==old_bracestack
250
+ @moretokens= old_moretokens.concat @moretokens
251
+ return result
252
+ #need to do something with @last_operative_token?
253
+ end
254
+
255
+ #-----------------------------------
256
+ def special_global #handle $-a and friends
257
+ assert prevchar=='$'
258
+ result = ((
259
+ #order matters here, but it shouldn't
260
+ #(but til_charset must be last)
261
+ eat_next_if(/^[!@&+`'=~\/\\,.;<>*"$?:]$/) or
262
+ (eat_next_if('-') and ("-"+getchar)) or
263
+ (?0..?9)===nextchar ? til_charset(/[^\d]/) : nil
264
+ ))
265
+ end
266
+
267
+ #-----------------------------------
268
+ def identifier(context=nil)
269
+ oldpos=@file.pos
270
+ str=identifier_as_string(context)
271
+
272
+ #skip keyword processing if 'escaped' as it were, by def, . or ::
273
+ #or if in a non-bare context
274
+ #just asserts because those contexts are never encountered.
275
+ #control goes through symbol(<...>,nil)
276
+ assert( /^[a-z_]$/i===context)
277
+ assert !(@last_operative_token===/^(\.|::|(un)?def|alias)$/)
278
+
279
+ @moretokens.unshift(*parse_keywords(str,oldpos) do
280
+ #if not a keyword,
281
+ case str
282
+ when FUNCLIKE_KEYWORDS: #do nothing
283
+ when VARLIKE_KEYWORDS,RUBYKEYWORDS: raise "shouldnt see keywords here, now"
284
+ end
285
+ safe_recurse { |a| var_or_meth_name(str,@last_operative_token,oldpos) }
286
+ end)
287
+ return @moretokens.shift
288
+ end
289
+
290
+ #-----------------------------------
291
+ def identifier_as_string(context)
292
+ #must begin w/ letter or underscore
293
+ str=eat_next_if(/^[_a-z]$/i) or return nil
294
+
295
+ #equals, question mark, and exclamation mark
296
+ #might be allowed at the end in some contexts.
297
+ #(in def headers and symbols)
298
+ #otherwise, =,?, and ! are to be considered
299
+ #separate tokens. confusing, eh?
300
+ #i hope i've captured all right conditions....
301
+ #context should always be ?: right after def, ., and :: now
302
+
303
+ maybe_eq,maybe_qm,maybe_ex = case context
304
+ when ?@,?$ then [nil,nil,nil]
305
+ when ?: then [?=, ??, ?!]
306
+ else [nil,??, ?!]
307
+ end
308
+
309
+ str<<til_charset(/[^a-z0-9_]/i)
310
+
311
+ #look for ?, !, or =, if allowed
312
+ case b=@file.getc
313
+ when nil #means we're at eof
314
+ #handling nil here prevents b from ever matching
315
+ #a nil value of maybe_qm, maybe_ex or maybe_eq
316
+ when maybe_qm
317
+ str << b
318
+ when maybe_ex
319
+ nc=(nextchar unless @file.eof?)
320
+ #does ex appear to be part of a larger operator?
321
+ if nc==?= #or nc==?~
322
+ back1char
323
+ else
324
+ str << b
325
+ end
326
+ when maybe_eq
327
+ nc=(nextchar unless @file.eof?)
328
+ #does eq appear to be part of a larger operator?
329
+ if nc==?= or nc==?~ or nc==?>
330
+ back1char
331
+ else
332
+ str << b
333
+ end
334
+ else
335
+ back1char
336
+ end
337
+
338
+
339
+ return str
340
+ end
341
+
342
+ #-----------------------------------
343
+ #contexts in which comma may appear in ruby:
344
+ #multiple lhs (terminated by assign op)
345
+ #multiple rhs (in implicit context) (tbd)
346
+ #method actual param list (in ( or implicit context)
347
+ #method formal param list (in ( or implicit context)
348
+ #block formal param list (in | context) (tbd)
349
+ #hash immediate (in imm{ context)
350
+ #array immediate (in imm[ context)
351
+ #element reference/assignment (in [] or []= method actual parameter context)
352
+ #list after for
353
+ #list after rescue
354
+ #list after when
355
+ #list after undef
356
+
357
+ #note: comma in parens not around a param list is illegal
358
+
359
+ #-----------------------------------
360
+ #a comma has been seen. are we in an
361
+ #lvalue list or some other construct that uses commas?
362
+ def comma_in_lvalue_list?
363
+ not ListContext===@bracestack.last
364
+ end
365
+
366
+ #-----------------------------------
367
+ def in_lvar_define_state
368
+ #@defining_lvar is a hack
369
+ @defining_lvar or case ctx=@bracestack.last
370
+ when ForSMContext: ctx.state==:for
371
+ when RescueSMContext: ctx.state==:arrow
372
+ when BlockParamListContext: true
373
+ end
374
+ end
375
+
376
+ #-----------------------------------
377
+ #determine if an alphabetic identifier refers to a variable
378
+ #or method name. generates implicit parenthes(es) if it is a
379
+ #call site and no explicit parens are present. starts an implicit param list
380
+ #if appropriate. adds tok to the
381
+ #local var table if its a local var being defined for the first time.
382
+
383
+ #note: what we here call variables (rather, constants) following ::
384
+ #might actually be methods at runtime, but that's immaterial to tokenization.
385
+
386
+ #note: this routine should determine the correct token type for name and
387
+ #create the appropriate token. currently this is not done because callers
388
+ #typically have done it (perhaps incorrectly) already.
389
+ def var_or_meth_name(name,lasttok,pos)
390
+ #look for call site if not a keyword or keyword is function-like
391
+ #look for and ignore local variable names
392
+
393
+ assert String===name
394
+
395
+ #fixme: keywords shouldn't be treated specially after :: and .
396
+
397
+ #maybe_local really means 'maybe local or constant'
398
+ maybe_local=case name
399
+ when /[^a-z_0-9]$/i: #do nothing
400
+ when /^[a-z_]/: (@localvars===name or VARLIKE_KEYWORDS===name or in_lvar_define_state) and not lasttok===/^(\.|::)$/
401
+ when /^[A-Z]/: is_const=true;not lasttok==='.' #this is the right algorithm for constants...
402
+ end
403
+
404
+ assert(@moretokens.empty?)
405
+
406
+ tok=@last_operative_token=VarNameToken.new(name,pos)
407
+
408
+ oldpos=@file.pos
409
+ sawnl=false
410
+ result=ws_toks=ignored_tokens(true) {|nl| sawnl=true }
411
+ sawnl || @file.eof? and return result.unshift(
412
+ *if maybe_local : [tok]
413
+ else [MethNameToken.new(name,pos), #insert implicit parens right after tok
414
+ ImplicitParamListStartToken.new( oldpos),
415
+ ImplicitParamListEndToken.new( oldpos) ]
416
+ end
417
+ )
418
+
419
+ #if next op is assignment (or comma in lvalue list)
420
+ #then omit implicit parens
421
+ assignment_coming=case nc=nextchar
422
+ when ?=: not /^=[=~]$/===readahead(2)
423
+ when ?,: comma_in_lvalue_list?
424
+ when ?>,?<: /^([<>])\1=$/===readahead(3)
425
+ when ?*,?|,?&: /^([*|&])\1?=/===readahead(3)
426
+ when ?%,?/,?-,?+,?^: readahead(2)[1..1]=='='
427
+ end
428
+ if (assignment_coming or in_lvar_define_state)
429
+ tok=VarNameToken.new(name,pos)
430
+ if /[^a-z_0-9]$/i===name
431
+ lexerror tok,"not a valid variable name: #{name}"
432
+ elsif /^[a-z_]/===name and !(lasttok===/^(\.|::)$/)
433
+ @localvars[name]=true
434
+ end
435
+ return result.unshift(tok)
436
+ end
437
+
438
+ implicit_parens_to_emit=case nc
439
+ when ?!: readahead(2)=='!=' ? 2 : 1
440
+ when NEVERSTARTPARAMLISTFIRST
441
+ (NEVERSTARTPARAMLISTWORDS===readahead(NEVERSTARTPARAMLISTMAXLEN)) ? 2 : 1
442
+ when ?",?',?`,?a..?z,?A..?Z,?0..?9,?_,?@,?$,?~: 1
443
+ when ?{: maybe_local=false; 2
444
+ when ?(: maybe_local=false; 0
445
+ when ?},?],?),?;,?^, ?|, ?>, ?,, ?., ?=: 2
446
+ when ?+, ?-, ?*, ?&, ?%, ?/, ?:, ??: (ws_toks.empty? || readahead(2)[/^.[#{WHSPLF}]/]) ? 2 : 3
447
+ when ?<: (ws_toks.empty? || readahead(3)[/^<<[^"'`a-zA-Z_0-9-]/]) ? 2 : 3
448
+ when ?[: ws_toks.empty? ? 2 : 3
449
+ when ?\\, ?\s, ?\t, ?\n, ?\r, ?\v, ?#: raise 'failure'
450
+ else raise "unknown char after ident: #{nextchar.chr}"
451
+ end
452
+
453
+ implicit_parens_to_emit==3 and is_const and implicit_parens_to_emit=1
454
+
455
+ tok=if maybe_local and implicit_parens_to_emit>=2
456
+ implicit_parens_to_emit=0
457
+ VarNameToken
458
+ else
459
+ MethNameToken
460
+ end.new(name,pos)
461
+
462
+
463
+ case implicit_parens_to_emit
464
+ when 2:
465
+ result.unshift ImplicitParamListStartToken.new(oldpos),
466
+ ImplicitParamListEndToken.new(oldpos)
467
+ when 1,3:
468
+ result.unshift ImplicitParamListStartToken.new(oldpos)
469
+ @bracestack.push ParamListContextNoParen.new(@linenum)
470
+ when 0: #do nothing
471
+ else raise 'invalid value of implicit_parens_to_emit'
472
+ end
473
+ return result.unshift(tok)
474
+ # 'ok:'
475
+ # 'if unless while until {'
476
+ # '\n (unescaped) and or'
477
+ # 'then else elsif rescue ensure (illegal in value context)'
478
+
479
+ # 'need to pop noparen from bracestack on these tokens: (in operator context)'
480
+ # 'not ok:'
481
+ # 'not (but should it be?)'
482
+ end
483
+
484
+ #-----------------------------------
485
+ CONTEXT2ENDTOK={AssignmentRhsContext=>AssignmentRhsListEndToken,
486
+ ParamListContextNoParen=>ImplicitParamListEndToken,
487
+ KwParamListContext=>KwParamListEndToken
488
+ }
489
+ def abort_noparens!(str='')
490
+ #assert @moretokens.empty?
491
+ result=[]
492
+ while klass=CONTEXT2ENDTOK[@bracestack.last.class]
493
+ result << klass.new(@file.pos-str.length)
494
+ @bracestack.pop
495
+ end
496
+ return result
497
+ end
498
+
499
+ if false #no longer used
500
+ #-----------------------------------
501
+ def abort_1_noparen!(offs=0)
502
+ assert @moretokens.empty?
503
+ result=[]
504
+ while AssignmentRhsContext===@bracestack.last
505
+ @bracestack.pop
506
+ result << AssignmentRhsListEndToken.new(@file.pos-offs)
507
+ end
508
+ ParamListContextNoParen===@bracestack.last or lexerror huh,'{} with no matching callsite'
509
+ @bracestack.pop
510
+ result << ImplicitParamListEndToken.new(@file.pos-offs)
511
+ return result
512
+ end
513
+ end
514
+
515
+ #-----------------------------------
516
+ #parse keywords now, to prevent confusion over bare symbols
517
+ #and match end with corresponding preceding def or class or whatever.
518
+ #if arg is not a keyword, the block is called
519
+ def parse_keywords(str,offset)
520
+ assert @moretokens.empty?
521
+ result=[KeywordToken.new(str,offset)]
522
+
523
+ case str
524
+ when "end"
525
+ result.unshift(*abort_noparens!(str))
526
+ @bracestack.last.see @bracestack,:semi #sorta hacky... should make an :end event instead?
527
+
528
+ =begin not needed?
529
+ if ExpectDoOrNlContext===@bracestack.last
530
+ @bracestack.pop
531
+ assert @bracestack.last.starter[/^(while|until|for)$/]
532
+ end
533
+ =end
534
+
535
+ WantsEndContext===@bracestack.last or lexerror result.last, 'unbalanced end'
536
+ ctx=@bracestack.pop
537
+ start,line=ctx.starter,ctx.linenum
538
+ BEGINWORDS===start or lexerror result.last, "end does not match #{start or "nil"}"
539
+ /^(class|module|def|do)$/===start and @localvars.end_block
540
+
541
+ when "class","module"
542
+ result.first.has_end!
543
+ @bracestack.push WantsEndContext.new(str,@linenum)
544
+ @localvars.start_block
545
+
546
+ when "if","unless" #could be infix form without end
547
+ if after_nonid_op?{false} #prefix form
548
+ result.first.has_end!
549
+ @bracestack.push WantsEndContext.new(str,@linenum)
550
+
551
+
552
+ else #infix form
553
+ result.unshift(*abort_noparens!(str))
554
+ end
555
+ when "begin","case"
556
+ result.first.has_end!
557
+ @bracestack.push WantsEndContext.new(str,@linenum)
558
+ when "while","until" #could be infix form without end
559
+ if after_nonid_op?{false} #prefix form
560
+ result.first.has_end!
561
+ @bracestack.push WantsEndContext.new(str,@linenum)
562
+ expect_do_or_end_or_nl! str
563
+
564
+ else #infix form
565
+ result.unshift(*abort_noparens!(str))
566
+ end
567
+ when "for"
568
+ result.first.has_end!
569
+ @bracestack.push WantsEndContext.new(str,@linenum)
570
+ #expect_do_or_end_or_nl! str #handled by ForSMContext now
571
+ @bracestack.push ForSMContext.new(@linenum)
572
+ when "do"
573
+ result.unshift(*abort_noparens!(str))
574
+ if ExpectDoOrNlContext===@bracestack.last
575
+ @bracestack.pop
576
+ assert WantsEndContext===@bracestack.last
577
+ else
578
+ result.last.has_end!
579
+ @bracestack.push WantsEndContext.new(str,@linenum)
580
+ @localvars.start_block
581
+ block_param_list_lookahead
582
+ end
583
+ when "def"
584
+ result.first.has_end!
585
+ @bracestack.push WantsEndContext.new("def",@linenum)
586
+ @localvars.start_block
587
+ safe_recurse { |aa|
588
+ @last_operative_token=KeywordToken.new "def" #hack
589
+ result.concat ignored_tokens
590
+
591
+ #read an expr like a.b.c or a::b::c
592
+ #or (expr).b.c
593
+ if nextchar==?( #look for optional parenthesised head
594
+ old_size=@bracestack.size
595
+ parencount=0
596
+ begin
597
+ tok=get1token
598
+ case tok
599
+ when/^\($/.token_pat then parencount+=1
600
+ when/^\)$/.token_pat then parencount-=1
601
+ end
602
+ EoiToken===tok and lexerror tok, "eof in def header"
603
+ result<<tok
604
+ end until parencount==0 #@bracestack.size==old_size
605
+ else #no parentheses, all tail
606
+ @last_operative_token=KeywordToken.new "." #hack hack
607
+ result << symbol(false,false)
608
+ #this isn't quite right.... if a.b.c.d is seen, a, b, and c
609
+ #should be considered maybe varname instead of methnames.
610
+ #the last (d in the example) is always considered a methname;
611
+ #it's what's being defined.
612
+ end
613
+ #read tail: .b.c.d etc
614
+ @last_operative_token=result.last
615
+ state=:expect_op
616
+ loop do
617
+
618
+ #look for start of parameter list
619
+ nc=(@moretokens.first or nextchar.chr)
620
+ if state==:expect_op and /^[a-z_(&*]/i===nc
621
+ result.concat def_param_list
622
+ break
623
+ end
624
+
625
+ tok=get1token
626
+ result<<tok
627
+ case tok
628
+ when EoiToken
629
+ lexerror tok,'unexpected eof in def header'
630
+ when IgnoreToken
631
+ when MethNameToken #,VarNameToken # /^[a-z_]/i.token_pat
632
+ lexerror tok,'expected . or ::' unless state==:expect_name
633
+ state=:expect_op
634
+ when /^(\.|::)$/.token_pat
635
+ lexerror tok,'expected ident' unless state==:expect_op
636
+ state=:expect_name
637
+ when /^(;|end)$/.token_pat, NewlineToken #are we done with def name?
638
+ state==:expect_op or lexerror tok,'expected identifier'
639
+ break
640
+ else
641
+ lexerror(tok, "bizarre token in def name: " +
642
+ "#{tok}:#{tok.class}")
643
+ end
644
+ end
645
+ }
646
+ when "alias"
647
+ safe_recurse { |a|
648
+ @last_operative_token=KeywordToken.new "alias" #hack
649
+ result.concat ignored_tokens
650
+ res=symbol(eat_next_if(?:),false)
651
+ res ? result<<res : lexerror(result.first,"bad symbol in alias")
652
+ @last_operative_token=KeywordToken.new "alias" #hack
653
+ result.concat ignored_tokens
654
+ res=symbol(eat_next_if(?:),false)
655
+ res ? result<<res : lexerror(result.first,"bad symbol in alias")
656
+ }
657
+ when "undef"
658
+ safe_recurse { |a|
659
+ loop do
660
+ @last_operative_token=KeywordToken.new "," #hack
661
+ result.concat ignored_tokens
662
+ tok=symbol(eat_next_if(?:),false)
663
+ tok or lexerror(result.first,"bad symbol in undef")
664
+ result<< tok
665
+ @last_operative_token=tok
666
+
667
+ sawnl=false
668
+ result.concat ignored_tokens(true){|nl| sawnl=true}
669
+
670
+ break if sawnl or nextchar != ?,
671
+ tok= single_char_token(?,)
672
+ result<< tok
673
+ end
674
+ }
675
+
676
+ # when "defined?"
677
+ # huh
678
+ #defined? might have a baresymbol following it
679
+ #does it need to be handled specially?
680
+
681
+ when "when"
682
+ result.unshift(*abort_noparens!(str))
683
+ @bracestack.push KwParamListContext.new(str,@linenum)
684
+
685
+ when "rescue"
686
+ result.unshift(*abort_noparens!(str))
687
+ @bracestack.push RescueSMContext.new(@linenum)
688
+
689
+ when "then","in"
690
+ result.unshift(*abort_noparens!(str))
691
+ @bracestack.last.see @bracestack,str.to_sym
692
+
693
+ when /^(#{BINOPWORDS}|#{INNERBOUNDINGWORDS})$/o
694
+ result.unshift(*abort_noparens!(str))
695
+
696
+ when FUNCLIKE_KEYWORDS: result=yield
697
+
698
+ when RUBYKEYWORDS
699
+ #do nothing
700
+
701
+ else result=yield
702
+
703
+ end
704
+
705
+ return result
706
+ end
707
+
708
+
709
+ #-----------------------------------
710
+ def block_param_list_lookahead
711
+ safe_recurse{ |la|
712
+ @last_operative_token=KeywordToken.new ';'
713
+ a=ignored_tokens
714
+
715
+ if eat_next_if(?|)
716
+ a<<KeywordToken.new("|",@file.pos-1)
717
+ if eat_next_if(?|)
718
+ a.concat [NoWsToken.new(@file.pos-1),
719
+ KeywordToken.new('|',@file.pos-1)]
720
+ else
721
+ assert !@defining_lvar
722
+ @defining_lvar=true
723
+ assert((@last_operative_token===';' or NewlineToken===@last_operative_token))
724
+ @bracestack.push BlockParamListContext.new(@linenum)
725
+ #block param initializers are not supported here, because ruby doesn't allow them!
726
+ begin
727
+ tok=get1token
728
+ EoiToken===tok and lexerror tok,"eof in block parameter list"
729
+ a<<tok
730
+ end until tok==='|'
731
+ assert@defining_lvar
732
+ @defining_lvar=false
733
+ BlockParamListContext===@bracestack.last or raise 'expected BlockParamListContext atop @bracestack'
734
+ @bracestack.pop
735
+ @moretokens.empty? or
736
+ fixme %#moretokens might be set from get1token call above...might be bad#
737
+ end
738
+ end
739
+
740
+ @last_operative_token=KeywordToken.new ';'
741
+ #a.concat ignored_tokens
742
+
743
+ #assert @last_operative_token===';'
744
+ #a<<get1token
745
+
746
+ la[0,0]=a
747
+ }
748
+ end
749
+
750
+ #-----------------------------------
751
+ #handle parameter list of a method declaration.
752
+ #parentheses are optional... if missing param list
753
+ #is ended by (unescaped) newline or semicolon (at the same bracing level)
754
+ #expect a brace as the next token,
755
+ #then match the following tokens until
756
+ #the matching endbrace is found
757
+ def def_param_list
758
+ result=[]
759
+ normal_comma_level=old_bracestack_size=@bracestack.size
760
+ safe_recurse { |a|
761
+ assert(@moretokens.empty?)
762
+ assert((not IgnoreToken===@moretokens[0]))
763
+ assert((@moretokens[0] or not nextchar.chr[WHSPCHARS]))
764
+
765
+ #have parentheses?
766
+ if '('==@moretokens[0] or nextchar==?(
767
+ #get open paren token
768
+ result.concat maybe_no_ws_token
769
+ result << tok=get1token
770
+ assert(tok==='(')
771
+
772
+
773
+ #bracestack was changed by get1token above...
774
+ normal_comma_level+=1
775
+ assert(normal_comma_level==@bracestack.size)
776
+ endingblock=proc{|tok| tok===')' }
777
+ else
778
+ endingblock=proc{|tok| tok===';' or NewlineToken===tok}
779
+ end
780
+ class << endingblock
781
+ alias === call
782
+ end
783
+
784
+ @last_operative_token=KeywordToken.new ',' #hack
785
+ #read local parameter names
786
+ loop do
787
+ expect_name=(@last_operative_token===',' and
788
+ normal_comma_level==@bracestack.size)
789
+ expect_name and @defining_lvar||=true
790
+ result << tok=get1token
791
+ lexerror tok, "unexpected eof in def header" if EoiToken===tok
792
+
793
+ #break if at end of param list
794
+ endingblock===tok and
795
+ old_bracestack_size>=@bracestack.size and break
796
+
797
+ #next token is a local var name
798
+ #(or the one after that if unary ops present)
799
+ #result.concat ignored_tokens
800
+ expect_name and case tok
801
+ when IgnoreToken#, /^[A-Z]/ #do nothing
802
+ when VarNameToken
803
+ assert@defining_lvar
804
+ @defining_lvar=false
805
+ assert((not @last_operative_token===','))
806
+ when /^[&*]$/.token_pat #unary form...
807
+ #a NoWsToken is also expected... read it now
808
+ result.concat maybe_no_ws_token #not needed?
809
+ @last_operative_token=KeywordToken.new ','
810
+ else lexerror tok,"unfamiliar var name '#{tok}'"
811
+ end
812
+ end
813
+
814
+ @defining_lvar=false
815
+
816
+
817
+ assert(@bracestack.size <= old_bracestack_size)
818
+ assert(endingblock[tok])
819
+
820
+ #hack: force next token to look like start of a
821
+ #new stmt, if the last ignored_tokens
822
+ #call above did not find a newline
823
+ #(just in case the next token parsed
824
+ #happens to call quote_expected? or after_nonid_op)
825
+ result.concat ignored_tokens
826
+ if nextchar.chr[/[iuw\/<|>+\-*&%?:]/] and
827
+ !(NewlineToken===@last_operative_token) and
828
+ !(/^(end|;)$/===@last_operative_token)
829
+ @last_operative_token=KeywordToken.new ';'
830
+ result<< get1token
831
+ end
832
+ }
833
+
834
+ return result
835
+ end
836
+
837
+
838
+ #-----------------------------------
839
+ #handle % in ruby code. is it part of fancy quote or a modulo operator?
840
+ def percent(ch)
841
+ if quote_expected? ch
842
+ fancy_quote ch
843
+ else
844
+ biop ch
845
+ end
846
+ end
847
+
848
+ #-----------------------------------
849
+ #handle * in ruby code. is unary or binary operator?
850
+ def star_or_amp(ch)
851
+ assert('*&'[ch])
852
+ if unary_op_expected? ch
853
+ #readahead(2)[1..1][/[\s\v#\\]/] or #not needed?
854
+ result=operator_or_methname_token getchar
855
+ WHSPLF[nextchar.chr] or
856
+ @moretokens << NoWsToken.new(@file.pos)
857
+ return result
858
+ else
859
+ return(quadriop ch)
860
+ end
861
+ #result should distinguish unary+binary *&
862
+ end
863
+
864
+ #-----------------------------------
865
+ #handle ? in ruby code. is it part of ?..: or a character literal?
866
+ def char_literal_or_op(ch)
867
+ if colon_quote_expected? ch
868
+ getchar
869
+ NumberToken.new getchar_maybe_escape
870
+ else
871
+ @bracestack.push TernaryContext.new(@linenum)
872
+ KeywordToken.new getchar #operator
873
+ end
874
+ end
875
+
876
+ #-----------------------------------
877
+ def regex_or_div(ch)
878
+ #space after slash always means / operator, rather than regex start
879
+ if after_nonid_op?{ !is_var_name? and WHSPLF[prevchar] and !readahead(2)[%r{^/\s}] }
880
+ return regex(ch)
881
+ else #/ is operator
882
+ result=getchar
883
+ if eat_next_if(?=)
884
+ result << '='
885
+ end
886
+ return(operator_or_methname_token result)
887
+ end
888
+ end
889
+
890
+ #-----------------------------------
891
+ #return true if tok corresponds to a variable or constant, false if its for a method, nil for something else
892
+ #we assume tok is a valid token with a correctly formed name.
893
+ #...should really be called was_var_name
894
+ def is_var_name?
895
+ (tok=@last_operative_token)
896
+
897
+ s=tok.to_s
898
+ case s
899
+ when /[^a-z_0-9]$/i: false
900
+ when /^[a-z_]/: @localvars===s or VARLIKE_KEYWORDS===s
901
+ when /^[A-Z]/: VarNameToken===tok
902
+ when /^[@$<]/: true
903
+ else raise "not var or method name: #{s}"
904
+ end
905
+ end
906
+
907
+ #-----------------------------------
908
+ def colon_quote_expected?(ch) #yukko hack
909
+ assert ':?'[ch]
910
+ readahead(2)[/^(\?[^#{WHSPLF}]|:[$@a-zA-Z_'"`\[*~+\-\/%<=>&|^])$/o] or return false
911
+
912
+ after_nonid_op? {
913
+ #possible func-call as operator
914
+
915
+ !is_var_name?
916
+ }
917
+ end
918
+
919
+ #-----------------------------------
920
+ def symbol_or_op(ch)
921
+ startpos=@file.pos
922
+ qe= colon_quote_expected?(ch)
923
+ lastchar=prevchar
924
+ eat_next_if(ch) or raise "needed: "+ch
925
+
926
+ #handle quoted symbols like :"foobar", :"[]"
927
+ qe and return symbol(':')
928
+
929
+ #look for another colon; return single : if not found
930
+ unless eat_next_if(?:)
931
+ #cancel implicit contexts...
932
+ @moretokens.push(*abort_noparens!(':'))
933
+
934
+ #end ternary context, if any
935
+ @bracestack.last.see @bracestack,:colon
936
+
937
+ TernaryContext===@bracestack.last and @bracestack.pop #should be in the context's see handler
938
+
939
+ if ExpectDoOrNlContext===@bracestack.last #should be in the context's see handler
940
+ @bracestack.pop
941
+ assert @bracestack.last.starter[/^(while|until|for)$/]
942
+ end
943
+
944
+ @moretokens.push KeywordToken.new(':',startpos)
945
+ return @moretokens.shift
946
+ end
947
+
948
+ #we definately found a ::
949
+
950
+ colon2=KeywordToken.new( '::',startpos)
951
+ lasttok=@last_operative_token
952
+ assert !(String===lasttok)
953
+ if (VarNameToken===lasttok or MethNameToken===lasttok) and
954
+ lasttok===/^[$@a-zA-Z_]/ and !WHSPCHARS[lastchar]
955
+ then
956
+ @moretokens << colon2
957
+ result= NoWsToken.new(startpos)
958
+ else
959
+ result=colon2
960
+ end
961
+ dot_rhs(colon2)
962
+ return result
963
+ end
964
+
965
+ #-----------------------------------
966
+ def symbol(notbare,couldbecallsite=!notbare)
967
+ assert !couldbecallsite
968
+ start=@file.pos
969
+ notbare and start-=1
970
+ klass=(notbare ? SymbolToken : MethNameToken)
971
+
972
+ #look for operators
973
+ opmatches=readahead(3)[RUBYSYMOPERATORREX]
974
+ result= opmatches ? @file.read(opmatches.size) :
975
+ case nc=nextchar
976
+ when ?" then assert notbare;double_quote('"')
977
+ when ?' then assert notbare;double_quote("'")
978
+ when ?` then @file.read(1)
979
+ when ?@ then at_identifier.to_s
980
+ when ?$ then dollar_identifier.to_s
981
+ when ?_,?a..?z then identifier_as_string(?:)
982
+ when ?A..?Z then
983
+ result=identifier_as_string(?:)
984
+ if @last_operative_token==='::'
985
+ assert klass==MethNameToken
986
+ /[A-Z_0-9]$/i===result and klass=VarNameToken
987
+ end
988
+ result
989
+ else error= "unexpected char starting symbol: #{nc.chr}"
990
+ end
991
+ return lexerror(klass.new(result,start),error)
992
+ end
993
+
994
+ #-----------------------------------
995
+ def callsite_symbol(tok_to_errify)
996
+ start=@file.pos
997
+
998
+ #look for operators
999
+ opmatches=readahead(3)[RUBYSYMOPERATORREX]
1000
+ return [opmatches ? @file.read(opmatches.size) :
1001
+ case nc=nextchar
1002
+ when ?` then @file.read(1)
1003
+ when ?_,?a..?z,?A..?Z then identifier_as_string(?:)
1004
+ else
1005
+ @last_operative_token=KeywordToken.new(';')
1006
+ lexerror(tok_to_errify,"unexpected char starting symbol: #{nc.chr}")
1007
+ nil
1008
+ end, start
1009
+ ]
1010
+ end
1011
+
1012
+ #-----------------------------------
1013
+ def here_header
1014
+ @file.read(2)=='<<' or raise "parser insanity"
1015
+
1016
+ dash=eat_next_if(?-)
1017
+ quote=eat_next_if( /^['"`]$/)
1018
+ if quote
1019
+ ender=til_charset(/[#{quote}]/)
1020
+ (quote==getchar) or
1021
+ return lexerror(HerePlaceholderToken.new( dash, quote, ender ), "mismatched quotes in here doc")
1022
+ else
1023
+ quote='"'
1024
+ ender=til_charset(/[^a-zA-Z0-9_]/)
1025
+ ender.length >= 1 or
1026
+ return lexerror(HerePlaceholderToken.new( dash, quote, ender ), "invalid here header")
1027
+ end
1028
+
1029
+ res= HerePlaceholderToken.new( dash, quote, ender )
1030
+ @incomplete_here_tokens.push res
1031
+
1032
+ #hack: normally this should just be in get1token
1033
+ #this fixup is necessary because the call the get1token below
1034
+ #makes a recursion.
1035
+ @last_operative_token=res
1036
+
1037
+ safe_recurse { |a|
1038
+ assert(a.object_id==@moretokens.object_id)
1039
+ toks=[]
1040
+ begin
1041
+ #yech.
1042
+ #handle case of here header in a string inclusion, but
1043
+ #here body outside it.
1044
+ cnt=0
1045
+ 1.upto @bracestack.size do |i|
1046
+ case @bracestack[-i]
1047
+ when AssignmentRhsContext,ParamListContextNoParen,TopLevelContext
1048
+ else cnt+=1
1049
+ end
1050
+ end
1051
+ if nextchar==?} and cnt==1
1052
+ res.bodyclass=OutlinedHereBodyToken
1053
+ break
1054
+ end
1055
+
1056
+ tok=get1token
1057
+ assert(a.object_id==@moretokens.object_id)
1058
+ toks<<tok
1059
+ EoiToken===tok and lexerror tok, "here body expected before eof"
1060
+ end while res.unsafe_to_use
1061
+ assert(a.object_id==@moretokens.object_id)
1062
+ a[0,0]= toks #same as a=toks+a, but keeps a's id
1063
+ }
1064
+
1065
+ return res
1066
+
1067
+ #the action continues in newline, where
1068
+ #the rest of the here token is read after a
1069
+ #newline has been seen and res.affix is eventually called
1070
+ end
1071
+
1072
+ #-----------------------------------
1073
+ def lessthan(ch) #match quadriop('<') or here doc or spaceship op
1074
+ case readahead(3)
1075
+ when /^<<['"`\-a-z0-9_]$/i
1076
+ if quote_expected?(ch) #and @last_operative_token!='class' #not needed?
1077
+ here_header
1078
+ else
1079
+ operator_or_methname_token @file.read(2)
1080
+ end
1081
+ when "<=>" then operator_or_methname_token @file.read(3)
1082
+ else quadriop(ch)
1083
+ end
1084
+ end
1085
+
1086
+ #-----------------------------------
1087
+ def escnewline(ch)
1088
+ assert ch == '\\'
1089
+
1090
+ pos=@file.pos
1091
+ result=getchar
1092
+ if nl=readnl
1093
+ result+=nl
1094
+ else
1095
+ error='illegal escape sequence'
1096
+ end
1097
+ lexerror EscNlToken.new(@filename,@linenum,result,pos), error
1098
+ end
1099
+
1100
+ #-----------------------------------
1101
+ def newline(ch)
1102
+ assert("\r\n"[nextchar.chr])
1103
+
1104
+ #handle here bodies queued up by previous line
1105
+ #(we should be more compatible with dos/mac style newlines...)
1106
+ if tofill=@incomplete_here_tokens.shift
1107
+ tofill.string.offset=@file.pos
1108
+ loop {
1109
+ assert("\r\n"[nextchar.chr])
1110
+
1111
+ #retr evrything til next nl
1112
+ line=all_quote(/^[\r\n]$/, tofill.quote, /^[\r\n]$/, :regex_esc_seq)
1113
+ #(you didn't know all_quote could take a regex, did you?)
1114
+
1115
+ #get rid of fals that otherwise appear to be in the middle of
1116
+ #a string (and are emitted out of order)
1117
+ fal=@moretokens.pop
1118
+ assert FileAndLineToken===fal || fal.nil?
1119
+
1120
+ back1char
1121
+ assert("\r\n"[nextchar.chr])
1122
+
1123
+ #matches terminating reg expr?
1124
+ break if line.elems.size==1 and
1125
+ line.elems[0][tofill.termex]
1126
+
1127
+ tofill.append_token line
1128
+ tofill.append readnl
1129
+ back1char
1130
+ }
1131
+
1132
+ assert("\r\n"[nextchar.chr])
1133
+ tofill.unsafe_to_use=false
1134
+
1135
+ return tofill.bodyclass.new(tofill)
1136
+ end
1137
+
1138
+ #ordinary newline handling (possibly implicitly escaped)
1139
+ assert("\r\n"[nextchar.chr])
1140
+ assert @moretokens.empty?
1141
+ result=if NewlineToken===@last_operative_token or #hack
1142
+ @last_operative_token===/^(;|begin|do|#{INNERBOUNDINGWORDS})$/ or #hack
1143
+ !after_nonid_op?{false}
1144
+ then #hack-o-rama: probly cases left out above
1145
+ a= abort_noparens!
1146
+ ExpectDoOrNlContext===@bracestack.last and @bracestack.pop
1147
+ @bracestack.last.see @bracestack,:semi
1148
+
1149
+ a << super(ch)
1150
+ @moretokens.replace a+@moretokens
1151
+ @moretokens.shift
1152
+ else
1153
+ offset=@file.pos
1154
+ #@moretokens <<
1155
+ EscNlToken.new(@filename,@linenum,readnl,offset)
1156
+ #WsToken.new ' ' #why? #should be "\\\n" ?
1157
+ end
1158
+
1159
+ start_of_line_directives
1160
+
1161
+ return result
1162
+ end
1163
+
1164
+ #-----------------------------------
1165
+ EQBEGIN=%r/^=begin[^a-zA-Z_0-9]$/
1166
+ EQBEGINLENGTH=7
1167
+ EQEND='=end'
1168
+ ENDMARKER=/^__END__[\r\n]$/
1169
+ ENDMARKERLENGTH=8
1170
+ def start_of_line_directives
1171
+ #handle =begin...=end (at start of a line)
1172
+ while EQBEGIN===readahead(EQBEGINLENGTH)
1173
+ startpos=@file.pos
1174
+ more=@file.read(EQBEGINLENGTH-1) #get =begin
1175
+
1176
+ #keep reading til /\n=end.*\n/
1177
+ @file.each(EQEND) {|cblock|
1178
+ more << cblock
1179
+ #must be at start of line
1180
+ break if /^[\r\n]#{EQEND}/o===readback(EQEND.length+1)
1181
+ }
1182
+ #read rest of line after =end
1183
+ more << @file.til_charset(/[\r\n]/)
1184
+ assert((?\r===nextchar or ?\n===nextchar))
1185
+ assert !(/[\r\n]/===more[-1,1])
1186
+
1187
+ newls= more.scan(/\r\n?|\n\r?/)
1188
+ @linenum+= newls.size
1189
+
1190
+ #inject the fresh comment into future token results
1191
+ @moretokens.push IgnoreToken.new(more,startpos)
1192
+ end
1193
+
1194
+ #handle __END__
1195
+ if ENDMARKER===readahead(ENDMARKERLENGTH)
1196
+ assert !(ImplicitContext===@bracestack.last)
1197
+ @moretokens.unshift endoffile_detected(@file.read(6))
1198
+ @file.pos=@file.stat.size
1199
+ end
1200
+ end
1201
+
1202
+
1203
+
1204
+ #-----------------------------------
1205
+ #used to resolve the ambiguity of
1206
+ # unary ops (+, -, *, &, ~ !) in ruby
1207
+ #returns whether current token is to be the start of a literal
1208
+ IDBEGINCHAR=/^[a-zA-Z_$@]/
1209
+ def unary_op_expected?(ch) #yukko hack
1210
+ '*&='[readahead(2)[1..1]] and return false
1211
+
1212
+ after_nonid_op? {
1213
+ #possible func-call as operator
1214
+
1215
+ not is_var_name? and
1216
+ WHSPLF[prevchar]
1217
+ }
1218
+ end
1219
+
1220
+ #-----------------------------------
1221
+ #used to resolve the ambiguity of
1222
+ # <<, %, ? in ruby
1223
+ #returns whether current token is to be the start of a literal
1224
+ #/ is not handled right here if whitespace immediately follows the /
1225
+ def quote_expected?(ch) #yukko hack
1226
+ case ch[0]
1227
+ when ?? then readahead(2)[/^\?[#{WHSPLF}]$/o] #not needed?
1228
+ when ?% then readahead(3)[/^%([a-ps-vyzA-PR-VX-Z]|[QqrwWx][a-zA-Z0-9])/]
1229
+ when ?< then !readahead(4)[/^<<-?['"`a-z0-9_]/i]
1230
+ else raise 'unexpected ch (#{ch}) in quote_expected?'
1231
+ # when ?+,?-,?&,?*,?~,?! then '*&='[readahead(2)[1..1]]
1232
+ end and return false
1233
+
1234
+ after_nonid_op? {
1235
+ #possible func-call as operator
1236
+
1237
+ not is_var_name? and
1238
+ WHSPLF[prevchar] and not WHSPLF[readahead(2)[1..1]]
1239
+ }
1240
+ end
1241
+
1242
+ #-----------------------------------
1243
+ #used to resolve the ambiguity of
1244
+ # <<, %, /, ?, :, and newline in ruby
1245
+ def after_nonid_op?
1246
+ case @last_operative_token
1247
+ when MethNameToken,VarNameToken, FUNCLIKE_KEYWORDS.token_pat
1248
+ return yield
1249
+ when StringToken, SymbolToken, NumberToken, HerePlaceholderToken,
1250
+ %r{^(class|module|do|end|self|true|false|nil|
1251
+ __FILE__|__LINE__|[\})\]]|alias|(un)?def|for
1252
+ )$}x.token_pat
1253
+ #do shouldn't be in above list... dunno about def/undef
1254
+ #maybe class/module shouldn't either?
1255
+ #for is also in NewlineToken branch, below.
1256
+ #what about rescue?
1257
+ return false
1258
+ when /^(#{RUBYOPERATORREX}|#{INNERBOUNDINGWORDS})$/o.token_pat
1259
+ #regexs above must match whole string
1260
+ #assert(@last_operative_token==$&) #disabled 'cause $& is now always nil :(
1261
+ return true
1262
+ when NewlineToken, nil, #nil means we're still at beginning of file
1263
+ /^([({\[]|or|not|and|if|unless|then|elsif|else|
1264
+ while|until|begin|for|in|case|when|ensure)$
1265
+ /x.token_pat
1266
+ return true
1267
+ #when KeywordToken
1268
+ # return true
1269
+ when IgnoreToken
1270
+ raise "last_operative_token shouldn't be ignoreable"
1271
+ else
1272
+ raise "after_nonid_op? after #{@last_operative_token}:#{@last_operative_token.class} -- now what"
1273
+ end
1274
+ end
1275
+
1276
+ #-----------------------------------
1277
+ def quadriop(ch) #match /&&?=?/ (&, &&, &=, or &&=)
1278
+ assert(%w[& * | < >].include?(ch))
1279
+ # '&*'[ch] and qe=quote_expected?(ch) #not needed?
1280
+ result=getchar + (eat_next_if(ch)or'')
1281
+ if eat_next_if(?=)
1282
+ result << ?=
1283
+ # elsif qe and result[/^[&*]$/] #not needed?
1284
+ # @moretokens<<NoWsToken.new(@file.pos) #not needed?
1285
+ end
1286
+ return operator_or_methname_token(result)
1287
+ end
1288
+
1289
+ #-----------------------------------
1290
+ def biop(ch) #match /%=?/ (% or %=)
1291
+ assert(ch[/^[%^~]$/])
1292
+ result=getchar
1293
+ if eat_next_if(?=)
1294
+ result <<?=
1295
+ end
1296
+ return operator_or_methname_token( result)
1297
+ end
1298
+
1299
+ #-----------------------------------
1300
+ def tilde(ch) #match /~=?/ (~ or ~=)
1301
+ assert(ch=='~')
1302
+ result=getchar
1303
+ # eat_next_if(?=) ?
1304
+ # result <<?= :
1305
+ WHSPLF[nextchar.chr] ||
1306
+ @moretokens << NoWsToken.new(@file.pos)
1307
+ #why is the NoWsToken necessary at this point?
1308
+ return operator_or_methname_token( result)
1309
+ #result should distinguish unary ~
1310
+ end
1311
+
1312
+ #-----------------------------------
1313
+ def want_op_name
1314
+ KeywordToken===@last_operative_token and
1315
+ @last_operative_token===/^(alias|(un)?def|\.|::)$/
1316
+ end
1317
+
1318
+ #-----------------------------------
1319
+ #match /[+\-]=?/ (+ or +=)
1320
+ #could be beginning of number, too
1321
+ #fixme: handle +@ and -@ here as well... (currently, this is done in symbol()?)
1322
+ def plusminus(ch)
1323
+ assert(/^[+\-]$/===ch)
1324
+ if unary_op_expected?(ch)
1325
+ if (?0..?9)===readahead(2)[1]
1326
+ return number(ch)
1327
+ else #unary operator
1328
+ result=getchar
1329
+ WHSPLF[nextchar.chr] or
1330
+ @moretokens << NoWsToken.new(@file.pos)
1331
+ return(operator_or_methname_token result)
1332
+ #todo: result should distinguish unary+binary +-
1333
+ end
1334
+ else #binary operator
1335
+ assert(! want_op_name)
1336
+ result=getchar
1337
+ if eat_next_if(?=)
1338
+ result << ?=
1339
+ end
1340
+ return(operator_or_methname_token result)
1341
+ #todo: result should distinguish unary+binary +-
1342
+ end
1343
+ end
1344
+
1345
+ #-----------------------------------
1346
+ def equals(ch) #match /=(>|~|==?)?/ (= or == or =~ or === or =>)
1347
+ offset=@file.pos
1348
+ str=getchar
1349
+ assert str=='='
1350
+ c=(eat_next_if(/^[~=>]$/)or'')
1351
+ str << c
1352
+ case c
1353
+ when '=': str<< (eat_next_if(?=)or'')
1354
+
1355
+ when '>': @bracestack.last.see @bracestack,:arrow
1356
+ when '': #record local variable definitions
1357
+
1358
+ @bracestack.push AssignmentRhsContext.new(@linenum)
1359
+ @moretokens.unshift AssignmentRhsListStartToken.new( offset+1)
1360
+ end
1361
+ return operator_or_methname_token( str,offset)
1362
+ end
1363
+
1364
+ #-----------------------------------
1365
+ def exclam(ch) #match /![~=]?/ (! or != or !~)
1366
+ assert nextchar==?!
1367
+ result=getchar
1368
+ k=eat_next_if(/^[~=]$/)
1369
+ if k
1370
+ result+=k
1371
+ else
1372
+ WHSPLF[nextchar.chr] or
1373
+ @moretokens << NoWsToken.new(@file.pos)
1374
+ end
1375
+ return KeywordToken.new(result)
1376
+ #result should distinguish unary !
1377
+ end
1378
+
1379
+ #-----------------------------------
1380
+ def dot(ch)
1381
+ str=''
1382
+ eat_next_if(?.) or raise "lexer confusion"
1383
+
1384
+ #three lumps of sugar or two?
1385
+ eat_next_if(?.) and
1386
+ return KeywordToken.new(eat_next_if(?.)? "..." : "..")
1387
+
1388
+ #else saw just single .
1389
+ #match a valid ruby id after the dot
1390
+ result= KeywordToken.new( ".")
1391
+ dot_rhs(result)
1392
+ return result
1393
+ end
1394
+
1395
+ #-----------------------------------
1396
+ def dot_rhs(prevtok)
1397
+ safe_recurse { |a|
1398
+ @last_operative_token=prevtok
1399
+ aa= ignored_tokens
1400
+ tok,pos=callsite_symbol(prevtok)
1401
+ tok and aa.push(*var_or_meth_name(tok,prevtok,pos))
1402
+ a.unshift(*aa)
1403
+ }
1404
+ end
1405
+
1406
+ #-----------------------------------
1407
+ def single_quote(ch=nil)
1408
+ double_quote(ch)
1409
+ end
1410
+
1411
+ #-----------------------------------
1412
+ def back_quote(ch=nil)
1413
+ oldpos=@file.pos
1414
+ @last_operative_token===/^(def|::|\.)$/ and return MethNameToken.new(
1415
+ (eat_next_if(?`) or raise "insanity"), oldpos
1416
+ )
1417
+ double_quote(ch)
1418
+ end
1419
+
1420
+ #-----------------------------------
1421
+ def comment(str)
1422
+ result=""
1423
+ #loop{
1424
+ result<<super(nil).to_s
1425
+
1426
+ if /^\#.*\#$/===result #if comment was ended by a crunch
1427
+
1428
+ #that's not a legal comment end in ruby, so just keep reading
1429
+ assert(result.to_s[-1]==?#)
1430
+ result.chomp! '#'
1431
+
1432
+ #back up one char in input so that the
1433
+ #super will see that # on the next go round.
1434
+ #this hack makes the ruma comment lexer work with ruby too.
1435
+ back1char
1436
+
1437
+ assert nextchar==?#
1438
+ #else break #not a crunch... just exit
1439
+ end
1440
+ #}
1441
+
1442
+ return IgnoreToken.new(result)
1443
+ end
1444
+
1445
+ #-----------------------------------
1446
+ def open_brace(ch)
1447
+ assert((ch!='[' or !want_op_name))
1448
+ assert(@moretokens.empty?)
1449
+ lastchar=prevchar
1450
+ ch=eat_next_if(/^[({\[]$/)or raise "lexer confusion"
1451
+ tokch=KeywordToken.new(ch,@file.pos-1)
1452
+
1453
+ #maybe emitting of NoWsToken can be moved into var_or_meth_name ??
1454
+ case tokch.ident
1455
+ when '['
1456
+ #fixme: in contexts expecting an (operator) method name, we
1457
+ # should match [] or []= at this point
1458
+ @bracestack.push ListImmedContext.new(ch,@linenum)
1459
+ lasttok=last_operative_token
1460
+ #could be: lasttok===/^[a-z_]/i
1461
+ if (VarNameToken===lasttok or MethNameToken===lasttok or
1462
+ lasttok===FUNCLIKE_KEYWORDS) and !WHSPCHARS[lastchar]
1463
+ @moretokens << (tokch)
1464
+ tokch= NoWsToken.new(@file.pos-1)
1465
+ end
1466
+ when '('
1467
+ lasttok=last_operative_token
1468
+ #could be: lasttok===/^[a-z_]/i
1469
+ if (VarNameToken===lasttok or MethNameToken===lasttok or
1470
+ lasttok===FUNCLIKE_KEYWORDS)
1471
+ unless WHSPCHARS[lastchar]
1472
+ @moretokens << tokch
1473
+ tokch= NoWsToken.new(@file.pos-1)
1474
+ end
1475
+ @bracestack.push ParamListContext.new(@linenum)
1476
+ else
1477
+ @bracestack.push ParenContext.new(@linenum)
1478
+ end
1479
+
1480
+ when '{'
1481
+ #check if we are in a hash literal or string inclusion (#{}),
1482
+ #in which case below would be bad.
1483
+ if after_nonid_op?{false}
1484
+ @bracestack.push ListImmedContext.new(ch,@linenum) #that is, a hash
1485
+ else
1486
+ =begin not needed now, i think
1487
+ # 'need to find matching callsite context and end it if implicit'
1488
+ lasttok=last_operative_token
1489
+ unless lasttok===')' and lasttok.callsite?
1490
+ @moretokens.push *(abort_1_noparen!(1).push tokch)
1491
+ tokch=@moretokens.shift
1492
+ end
1493
+ =end
1494
+
1495
+ @localvars.start_block
1496
+ @bracestack.push BlockContext.new(@linenum)
1497
+ block_param_list_lookahead
1498
+ end
1499
+ end
1500
+ return (tokch)
1501
+ end
1502
+
1503
+ #-----------------------------------
1504
+ def close_brace(ch)
1505
+ ch==eat_next_if(/[)}\]]/) or raise "lexer confusion"
1506
+ @moretokens.concat abort_noparens!(ch)
1507
+ @moretokens<< kw=KeywordToken.new( ch,@file.pos-1)
1508
+ @bracestack.last.see @bracestack,:semi #hack
1509
+ if @bracestack.empty?
1510
+ lexerror kw,"unmatched brace: #{ch}"
1511
+ return @moretokens.shift
1512
+ end
1513
+ ctx=@bracestack.pop
1514
+ origch,line=ctx.starter,ctx.linenum
1515
+ ch==PAIRS[origch] or
1516
+ lexerror kw,"mismatched braces: #{origch}#{ch}\n" +
1517
+ "matching brace location", @filename, line
1518
+ BlockContext===ctx and @localvars.end_block
1519
+ if ParamListContext==ctx.class
1520
+ assert ch==')'
1521
+ #kw.set_callsite! #not needed?
1522
+ end
1523
+ return @moretokens.shift
1524
+ end
1525
+
1526
+ #-----------------------------------
1527
+ def eof(ch=nil)
1528
+ #this must be the very last character...
1529
+ oldpos=@file.pos
1530
+ assert(?\0==@file.getc)
1531
+
1532
+ result= "\0#{ignored_tokens(true).delete_if{|t|FileAndLineToken===t}}"
1533
+
1534
+ @file.pos==@file.stat.size or
1535
+ lexerror result,'nul character is not at the end of file'
1536
+ @file.pos=@file.stat.size
1537
+ return(endoffile_detected result)
1538
+ end
1539
+
1540
+ #-----------------------------------
1541
+ def endoffile_detected(s='')
1542
+ @moretokens.push( *(abort_noparens!.push super(s)))
1543
+ result= @moretokens.shift
1544
+ balanced_braces? or (lexerror result,"unbalanced braces at eof. bracestack=#{@bracestack.inspect}")
1545
+ result
1546
+ end
1547
+
1548
+ #-----------------------------------
1549
+ def single_char_token(ch)
1550
+ KeywordToken.new super(ch), @file.pos-1
1551
+ end
1552
+
1553
+ #-----------------------------------
1554
+ def comma(ch)
1555
+ single_char_token(ch)
1556
+ end
1557
+
1558
+ #-----------------------------------
1559
+ def semicolon(ch)
1560
+ assert @moretokens.empty?
1561
+ @moretokens.push(*abort_noparens!)
1562
+ @bracestack.last.see @bracestack,:semi
1563
+ if ExpectDoOrNlContext===@bracestack.last #should be in context's see:semi handler
1564
+ @bracestack.pop
1565
+ assert @bracestack.last.starter[/^(while|until|for)$/]
1566
+ end
1567
+ @moretokens.push single_char_token(ch)
1568
+ return @moretokens.shift
1569
+ end
1570
+
1571
+ #-----------------------------------
1572
+ def operator_or_methname_token(s,offset=nil)
1573
+ assert RUBYOPERATORREX===s
1574
+ if RUBYNONSYMOPERATORREX===s
1575
+ KeywordToken
1576
+ elsif @last_operative_token===/^(\.|::|def|undef|alias|defined\?)$/
1577
+ MethNameToken
1578
+ else
1579
+ OperatorToken
1580
+ end.new(s,offset)
1581
+ end
1582
+
1583
+ #-----------------------------------
1584
+ #tokenify_results_of :identifier
1585
+ save_offsets_in(*CHARMAPPINGS.values.uniq-[:symbol_or_op,:open_brace,:whitespace])
1586
+ #save_offsets_in :symbol
1587
+
1588
+ end
1589
+