yard 0.2.0

Sign up to get free protection for your applications and to get access to all the features.

Potentially problematic release.


This version of yard might be problematic. Click here for more details.

data/lib/ruby_lex.rb ADDED
@@ -0,0 +1,1318 @@
1
+ require "e2mmap"
2
+ require "irb/slex"
3
+
4
+ module RubyToken
5
+ EXPR_BEG = :EXPR_BEG
6
+ EXPR_MID = :EXPR_MID
7
+ EXPR_END = :EXPR_END
8
+ EXPR_ARG = :EXPR_ARG
9
+ EXPR_FNAME = :EXPR_FNAME
10
+ EXPR_DOT = :EXPR_DOT
11
+ EXPR_CLASS = :EXPR_CLASS
12
+
13
+ class Token
14
+ NO_TEXT = "??".freeze
15
+ attr :text
16
+
17
+ def initialize(line_no, char_no)
18
+ @line_no = line_no
19
+ @char_no = char_no
20
+ @text = NO_TEXT
21
+ end
22
+
23
+ # Because we're used in contexts that expect to return a token,
24
+ # we set the text string and then return ourselves
25
+ def set_text(text)
26
+ @text = text
27
+ self
28
+ end
29
+
30
+ attr_reader :line_no, :char_no, :text
31
+ attr_accessor :lex_state
32
+ end
33
+
34
+ class TkNode < Token
35
+ attr :node
36
+ end
37
+
38
+ class TkId < Token
39
+ def initialize(line_no, char_no, name)
40
+ super(line_no, char_no)
41
+ @name = name
42
+ end
43
+ attr :name
44
+ end
45
+
46
+ class TkKW < TkId
47
+ end
48
+
49
+ class TkVal < Token
50
+ def initialize(line_no, char_no, value = nil)
51
+ super(line_no, char_no)
52
+ set_text(value)
53
+ end
54
+ end
55
+
56
+ class TkOp < Token
57
+ def name
58
+ self.class.op_name
59
+ end
60
+ end
61
+
62
+ class TkOPASGN < TkOp
63
+ def initialize(line_no, char_no, op)
64
+ super(line_no, char_no)
65
+ op = TkReading2Token[op] unless op.kind_of?(Symbol)
66
+ @op = op
67
+ end
68
+ attr :op
69
+ end
70
+
71
+ class TkUnknownChar < Token
72
+ def initialize(line_no, char_no, id)
73
+ super(line_no, char_no)
74
+ @name = char_no.chr
75
+ end
76
+ attr :name
77
+ end
78
+
79
+ class TkError < Token
80
+ end
81
+
82
+ def set_token_position(line, char)
83
+ @prev_line_no = line
84
+ @prev_char_no = char
85
+ end
86
+
87
+ def Token(token, value = nil)
88
+ tk = nil
89
+ case token
90
+ when String, Symbol
91
+ source = token.kind_of?(String) ? TkReading2Token : TkSymbol2Token
92
+ if (tk = source[token]).nil?
93
+ IRB.fail TkReading2TokenNoKey, token
94
+ end
95
+ tk = Token(tk[0], value)
96
+ else
97
+ tk = if (token.ancestors & [TkId, TkVal, TkOPASGN, TkUnknownChar]).empty?
98
+ token.new(@prev_line_no, @prev_char_no)
99
+ else
100
+ token.new(@prev_line_no, @prev_char_no, value)
101
+ end
102
+ end
103
+ tk
104
+ end
105
+
106
+ TokenDefinitions = [
107
+ [:TkCLASS, TkKW, "class", EXPR_CLASS],
108
+ [:TkMODULE, TkKW, "module", EXPR_BEG],
109
+ [:TkDEF, TkKW, "def", EXPR_FNAME],
110
+ [:TkUNDEF, TkKW, "undef", EXPR_FNAME],
111
+ [:TkBEGIN, TkKW, "begin", EXPR_BEG],
112
+ [:TkRESCUE, TkKW, "rescue", EXPR_MID],
113
+ [:TkENSURE, TkKW, "ensure", EXPR_BEG],
114
+ [:TkEND, TkKW, "end", EXPR_END],
115
+ [:TkIF, TkKW, "if", EXPR_BEG, :TkIF_MOD],
116
+ [:TkUNLESS, TkKW, "unless", EXPR_BEG, :TkUNLESS_MOD],
117
+ [:TkTHEN, TkKW, "then", EXPR_BEG],
118
+ [:TkELSIF, TkKW, "elsif", EXPR_BEG],
119
+ [:TkELSE, TkKW, "else", EXPR_BEG],
120
+ [:TkCASE, TkKW, "case", EXPR_BEG],
121
+ [:TkWHEN, TkKW, "when", EXPR_BEG],
122
+ [:TkWHILE, TkKW, "while", EXPR_BEG, :TkWHILE_MOD],
123
+ [:TkUNTIL, TkKW, "until", EXPR_BEG, :TkUNTIL_MOD],
124
+ [:TkFOR, TkKW, "for", EXPR_BEG],
125
+ [:TkBREAK, TkKW, "break", EXPR_END],
126
+ [:TkNEXT, TkKW, "next", EXPR_END],
127
+ [:TkREDO, TkKW, "redo", EXPR_END],
128
+ [:TkRETRY, TkKW, "retry", EXPR_END],
129
+ [:TkIN, TkKW, "in", EXPR_BEG],
130
+ [:TkDO, TkKW, "do", EXPR_BEG],
131
+ [:TkRETURN, TkKW, "return", EXPR_MID],
132
+ [:TkYIELD, TkKW, "yield", EXPR_END],
133
+ [:TkSUPER, TkKW, "super", EXPR_END],
134
+ [:TkSELF, TkKW, "self", EXPR_END],
135
+ [:TkNIL, TkKW, "nil", EXPR_END],
136
+ [:TkTRUE, TkKW, "true", EXPR_END],
137
+ [:TkFALSE, TkKW, "false", EXPR_END],
138
+ [:TkAND, TkKW, "and", EXPR_BEG],
139
+ [:TkOR, TkKW, "or", EXPR_BEG],
140
+ [:TkNOT, TkKW, "not", EXPR_BEG],
141
+ [:TkIF_MOD, TkKW],
142
+ [:TkUNLESS_MOD, TkKW],
143
+ [:TkWHILE_MOD, TkKW],
144
+ [:TkUNTIL_MOD, TkKW],
145
+ [:TkALIAS, TkKW, "alias", EXPR_FNAME],
146
+ [:TkDEFINED, TkKW, "defined?", EXPR_END],
147
+ [:TklBEGIN, TkKW, "BEGIN", EXPR_END],
148
+ [:TklEND, TkKW, "END", EXPR_END],
149
+ [:Tk__LINE__, TkKW, "__LINE__", EXPR_END],
150
+ [:Tk__FILE__, TkKW, "__FILE__", EXPR_END],
151
+
152
+ [:TkIDENTIFIER, TkId],
153
+ [:TkFID, TkId],
154
+ [:TkGVAR, TkId],
155
+ [:TkIVAR, TkId],
156
+ [:TkCONSTANT, TkId],
157
+
158
+ [:TkINTEGER, TkVal],
159
+ [:TkFLOAT, TkVal],
160
+ [:TkSTRING, TkVal],
161
+ [:TkXSTRING, TkVal],
162
+ [:TkREGEXP, TkVal],
163
+ [:TkCOMMENT, TkVal],
164
+
165
+ [:TkDSTRING, TkNode],
166
+ [:TkDXSTRING, TkNode],
167
+ [:TkDREGEXP, TkNode],
168
+ [:TkNTH_REF, TkId],
169
+ [:TkBACK_REF, TkId],
170
+
171
+ [:TkUPLUS, TkOp, "+@"],
172
+ [:TkUMINUS, TkOp, "-@"],
173
+ [:TkPOW, TkOp, "**"],
174
+ [:TkCMP, TkOp, "<=>"],
175
+ [:TkEQ, TkOp, "=="],
176
+ [:TkEQQ, TkOp, "==="],
177
+ [:TkNEQ, TkOp, "!="],
178
+ [:TkGEQ, TkOp, ">="],
179
+ [:TkLEQ, TkOp, "<="],
180
+ [:TkANDOP, TkOp, "&&"],
181
+ [:TkOROP, TkOp, "||"],
182
+ [:TkMATCH, TkOp, "=~"],
183
+ [:TkNMATCH, TkOp, "!~"],
184
+ [:TkDOT2, TkOp, ".."],
185
+ [:TkDOT3, TkOp, "..."],
186
+ [:TkAREF, TkOp, "[]"],
187
+ [:TkASET, TkOp, "[]="],
188
+ [:TkLSHFT, TkOp, "<<"],
189
+ [:TkRSHFT, TkOp, ">>"],
190
+ [:TkCOLON2, TkOp],
191
+ [:TkCOLON3, TkOp],
192
+ [:OPASGN, TkOp], # +=, -= etc. #
193
+ [:TkASSOC, TkOp, "=>"],
194
+ [:TkQUESTION, TkOp, "?"], #?
195
+ [:TkCOLON, TkOp, ":"], #:
196
+
197
+ [:TkfLPAREN], # func( #
198
+ [:TkfLBRACK], # func[ #
199
+ [:TkfLBRACE], # func{ #
200
+ [:TkSTAR], # *arg
201
+ [:TkAMPER], # &arg #
202
+ [:TkSYMBOL, TkId], # :SYMBOL
203
+ [:TkSYMBEG, TkId],
204
+ [:TkGT, TkOp, ">"],
205
+ [:TkLT, TkOp, "<"],
206
+ [:TkPLUS, TkOp, "+"],
207
+ [:TkMINUS, TkOp, "-"],
208
+ [:TkMULT, TkOp, "*"],
209
+ [:TkDIV, TkOp, "/"],
210
+ [:TkMOD, TkOp, "%"],
211
+ [:TkBITOR, TkOp, "|"],
212
+ [:TkBITXOR, TkOp, "^"],
213
+ [:TkBITAND, TkOp, "&"],
214
+ [:TkBITNOT, TkOp, "~"],
215
+ [:TkNOTOP, TkOp, "!"],
216
+
217
+ [:TkBACKQUOTE, TkOp, "`"],
218
+
219
+ [:TkASSIGN, Token, "="],
220
+ [:TkDOT, Token, "."],
221
+ [:TkLPAREN, Token, "("], #(exp)
222
+ [:TkLBRACK, Token, "["], #[arry]
223
+ [:TkLBRACE, Token, "{"], #{hash}
224
+ [:TkRPAREN, Token, ")"],
225
+ [:TkRBRACK, Token, "]"],
226
+ [:TkRBRACE, Token, "}"],
227
+ [:TkCOMMA, Token, ","],
228
+ [:TkSEMICOLON, Token, ";"],
229
+
230
+ [:TkRD_COMMENT],
231
+ [:TkSPACE],
232
+ [:TkNL],
233
+ [:TkEND_OF_SCRIPT],
234
+
235
+ [:TkBACKSLASH, TkUnknownChar, "\\"],
236
+ [:TkAT, TkUnknownChar, "@"],
237
+ [:TkDOLLAR, TkUnknownChar, "\$"], #"
238
+ ]
239
+
240
+ # {reading => token_class}
241
+ # {reading => [token_class, *opt]}
242
+ TkReading2Token = {}
243
+ TkSymbol2Token = {}
244
+
245
+ def RubyToken.def_token(token_n, super_token = Token, reading = nil, *opts)
246
+ token_n = token_n.id2name unless token_n.kind_of?(String)
247
+ if RubyToken.const_defined?(token_n)
248
+ #IRB.fail AlreadyDefinedToken, token_n
249
+ end
250
+
251
+ token_c = Class.new super_token
252
+ RubyToken.const_set token_n, token_c
253
+ # token_c.inspect
254
+
255
+ if reading
256
+ if TkReading2Token[reading]
257
+ IRB.fail TkReading2TokenDuplicateError, token_n, reading
258
+ end
259
+ if opts.empty?
260
+ TkReading2Token[reading] = [token_c]
261
+ else
262
+ TkReading2Token[reading] = [token_c].concat(opts)
263
+ end
264
+ end
265
+ TkSymbol2Token[token_n.intern] = token_c
266
+
267
+ if token_c <= TkOp
268
+ token_c.class_eval %{
269
+ def self.op_name; "#{reading}"; end
270
+ }
271
+ end
272
+ end
273
+
274
+ for defs in TokenDefinitions
275
+ def_token(*defs)
276
+ end
277
+
278
+ NEWLINE_TOKEN = TkNL.new(0,0)
279
+ NEWLINE_TOKEN.set_text("\n")
280
+
281
+ end
282
+
283
+
284
+
285
+ # Lexical analyzer for Ruby source
286
+
287
+ class RubyLex
288
+
289
+ ######################################################################
290
+ #
291
+ # Read an input stream character by character. We allow for unlimited
292
+ # ungetting of characters just read.
293
+ #
294
+ # We simplify the implementation greatly by reading the entire input
295
+ # into a buffer initially, and then simply traversing it using
296
+ # pointers.
297
+ #
298
+ # We also have to allow for the <i>here document diversion</i>. This
299
+ # little gem comes about when the lexer encounters a here
300
+ # document. At this point we effectively need to split the input
301
+ # stream into two parts: one to read the body of the here document,
302
+ # the other to read the rest of the input line where the here
303
+ # document was initially encountered. For example, we might have
304
+ #
305
+ # do_something(<<-A, <<-B)
306
+ # stuff
307
+ # for
308
+ # A
309
+ # stuff
310
+ # for
311
+ # B
312
+ #
313
+ # When the lexer encounters the <<A, it reads until the end of the
314
+ # line, and keeps it around for later. It then reads the body of the
315
+ # here document. Once complete, it needs to read the rest of the
316
+ # original line, but then skip the here document body.
317
+ #
318
+
319
+ class BufferedReader
320
+
321
+ attr_reader :line_num
322
+
323
+ def initialize(content)
324
+ if /\t/ =~ content
325
+ tab_width = 2
326
+ content = content.split(/\n/).map do |line|
327
+ 1 while line.gsub!(/\t+/) { ' ' * (tab_width*$&.length - $`.length % tab_width)} && $~ #`
328
+ line
329
+ end .join("\n")
330
+ end
331
+ @content = content
332
+ @content << "\n" unless @content[-1,1] == "\n"
333
+ @size = @content.size
334
+ @offset = 0
335
+ @hwm = 0
336
+ @line_num = 1
337
+ @read_back_offset = 0
338
+ @last_newline = 0
339
+ @newline_pending = false
340
+ end
341
+
342
+ def column
343
+ @offset - @last_newline
344
+ end
345
+
346
+ def getc
347
+ return nil if @offset >= @size
348
+ ch = @content[@offset, 1]
349
+
350
+ @offset += 1
351
+ @hwm = @offset if @hwm < @offset
352
+
353
+ if @newline_pending
354
+ @line_num += 1
355
+ @last_newline = @offset - 1
356
+ @newline_pending = false
357
+ end
358
+
359
+ if ch == "\n"
360
+ @newline_pending = true
361
+ end
362
+ ch
363
+ end
364
+
365
+ def getc_already_read
366
+ getc
367
+ end
368
+
369
+ def ungetc(ch)
370
+ raise "unget past beginning of file" if @offset <= 0
371
+ @offset -= 1
372
+ if @content[@offset] == ?\n
373
+ @newline_pending = false
374
+ end
375
+ end
376
+
377
+ def get_read
378
+ res = @content[@read_back_offset...@offset]
379
+ @read_back_offset = @offset
380
+ res
381
+ end
382
+
383
+ def peek(at)
384
+ pos = @offset + at
385
+ if pos >= @size
386
+ nil
387
+ else
388
+ @content[pos, 1]
389
+ end
390
+ end
391
+
392
+ def peek_equal(str)
393
+ @content[@offset, str.length] == str
394
+ end
395
+
396
+ def divert_read_from(reserve)
397
+ @content[@offset, 0] = reserve
398
+ @size = @content.size
399
+ end
400
+ end
401
+
402
+ # end of nested class BufferedReader
403
+
404
+ extend Exception2MessageMapper
405
+ def_exception(:AlreadyDefinedToken, "Already defined token(%s)")
406
+ def_exception(:TkReading2TokenNoKey, "key nothing(key='%s')")
407
+ def_exception(:TkSymbol2TokenNoKey, "key nothing(key='%s')")
408
+ def_exception(:TkReading2TokenDuplicateError,
409
+ "key duplicate(token_n='%s', key='%s')")
410
+ def_exception(:SyntaxError, "%s")
411
+
412
+ include RubyToken
413
+ include IRB
414
+
415
+ attr_reader :continue
416
+ attr_reader :lex_state
417
+
418
+ def RubyLex.debug?
419
+ false
420
+ end
421
+
422
+ def initialize(content)
423
+ lex_init
424
+
425
+ @reader = BufferedReader.new(content)
426
+
427
+ @exp_line_no = @line_no = 1
428
+ @base_char_no = 0
429
+ @indent = 0
430
+
431
+ @ltype = nil
432
+ @quoted = nil
433
+ @lex_state = EXPR_BEG
434
+ @space_seen = false
435
+
436
+ @continue = false
437
+ @line = ""
438
+
439
+ @skip_space = false
440
+ @read_auto_clean_up = false
441
+ @exception_on_syntax_error = true
442
+ end
443
+
444
+ attr :skip_space, true
445
+ attr :read_auto_clean_up, true
446
+ attr :exception_on_syntax_error, true
447
+
448
+ attr :indent
449
+
450
+ # io functions
451
+ def line_no
452
+ @reader.line_num
453
+ end
454
+
455
+ def char_no
456
+ @reader.column
457
+ end
458
+
459
+ def get_read
460
+ @reader.get_read
461
+ end
462
+
463
+ def getc
464
+ @reader.getc
465
+ end
466
+
467
+ def getc_of_rests
468
+ @reader.getc_already_read
469
+ end
470
+
471
+ def gets
472
+ c = getc or return
473
+ l = ""
474
+ begin
475
+ l.concat c unless c == "\r"
476
+ break if c == "\n"
477
+ end while c = getc
478
+ l
479
+ end
480
+
481
+
482
+ def ungetc(c = nil)
483
+ @reader.ungetc(c)
484
+ end
485
+
486
+ def peek_equal?(str)
487
+ @reader.peek_equal(str)
488
+ end
489
+
490
+ def peek(i = 0)
491
+ @reader.peek(i)
492
+ end
493
+
494
+ def lex
495
+ until (((tk = token).kind_of?(TkNL) || tk.kind_of?(TkEND_OF_SCRIPT)) &&
496
+ !@continue or
497
+ tk.nil?)
498
+ end
499
+ line = get_read
500
+
501
+ if line == "" and tk.kind_of?(TkEND_OF_SCRIPT) || tk.nil?
502
+ nil
503
+ else
504
+ line
505
+ end
506
+ end
507
+
508
+ def token
509
+ set_token_position(line_no, char_no)
510
+ begin
511
+ begin
512
+ tk = @OP.match(self)
513
+ @space_seen = tk.kind_of?(TkSPACE)
514
+ rescue SyntaxError
515
+ abort if @exception_on_syntax_error
516
+ tk = TkError.new(line_no, char_no)
517
+ end
518
+ end while @skip_space and tk.kind_of?(TkSPACE)
519
+ if @read_auto_clean_up
520
+ get_read
521
+ end
522
+ # throw :eof unless tk
523
+ p tk if $DEBUG
524
+ tk.lex_state = lex_state if tk
525
+ tk
526
+ end
527
+
528
+ ENINDENT_CLAUSE = [
529
+ "case", "class", "def", "do", "for", "if",
530
+ "module", "unless", "until", "while", "begin" #, "when"
531
+ ]
532
+ DEINDENT_CLAUSE = ["end" #, "when"
533
+ ]
534
+
535
+ PERCENT_LTYPE = {
536
+ "q" => "\'",
537
+ "Q" => "\"",
538
+ "x" => "\`",
539
+ "r" => "/",
540
+ "w" => "]"
541
+ }
542
+
543
+ PERCENT_PAREN = {
544
+ "{" => "}",
545
+ "[" => "]",
546
+ "<" => ">",
547
+ "(" => ")"
548
+ }
549
+
550
+ Ltype2Token = {
551
+ "\'" => TkSTRING,
552
+ "\"" => TkSTRING,
553
+ "\`" => TkXSTRING,
554
+ "/" => TkREGEXP,
555
+ "]" => TkDSTRING
556
+ }
557
+ Ltype2Token.default = TkSTRING
558
+
559
+ DLtype2Token = {
560
+ "\"" => TkDSTRING,
561
+ "\`" => TkDXSTRING,
562
+ "/" => TkDREGEXP,
563
+ }
564
+
565
+ def lex_init()
566
+ @OP = SLex.new
567
+ @OP.def_rules("\0", "\004", "\032") do |chars, io|
568
+ Token(TkEND_OF_SCRIPT).set_text(chars)
569
+ end
570
+
571
+ @OP.def_rules(" ", "\t", "\f", "\r", "\13") do |chars, io|
572
+ @space_seen = TRUE
573
+ while (ch = getc) =~ /[ \t\f\r\13]/
574
+ chars << ch
575
+ end
576
+ ungetc
577
+ Token(TkSPACE).set_text(chars)
578
+ end
579
+
580
+ @OP.def_rule("#") do
581
+ |op, io|
582
+ identify_comment
583
+ end
584
+
585
+ @OP.def_rule("=begin", proc{@prev_char_no == 0 && peek(0) =~ /\s/}) do
586
+ |op, io|
587
+ str = op
588
+ @ltype = "="
589
+
590
+
591
+ begin
592
+ line = ""
593
+ begin
594
+ ch = getc
595
+ line << ch
596
+ end until ch == "\n"
597
+ str << line
598
+ end until line =~ /^=end/
599
+
600
+ ungetc
601
+
602
+ @ltype = nil
603
+
604
+ if str =~ /\A=begin\s+rdoc/i
605
+ str.sub!(/\A=begin.*\n/, '')
606
+ str.sub!(/^=end.*/m, '')
607
+ Token(TkCOMMENT).set_text(str)
608
+ else
609
+ Token(TkRD_COMMENT)#.set_text(str)
610
+ end
611
+ end
612
+
613
+ @OP.def_rule("\n") do
614
+ print "\\n\n" if RubyLex.debug?
615
+ case @lex_state
616
+ when EXPR_BEG, EXPR_FNAME, EXPR_DOT
617
+ @continue = TRUE
618
+ else
619
+ @continue = FALSE
620
+ @lex_state = EXPR_BEG
621
+ end
622
+ Token(TkNL).set_text("\n")
623
+ end
624
+
625
+ @OP.def_rules("*", "**",
626
+ "!", "!=", "!~",
627
+ "=", "==", "===",
628
+ "=~", "<=>",
629
+ "<", "<=",
630
+ ">", ">=", ">>") do |op, io|
631
+ @lex_state = EXPR_BEG
632
+ Token(op).set_text(op)
633
+ end
634
+
635
+ @OP.def_rules("<<") do |op, io|
636
+ tk = nil
637
+ if @lex_state != EXPR_END && @lex_state != EXPR_CLASS &&
638
+ (@lex_state != EXPR_ARG || @space_seen)
639
+ c = peek(0)
640
+ tk = identify_here_document if /[-\w_\"\'\`]/ =~ c
641
+ end
642
+ if !tk
643
+ @lex_state = EXPR_BEG
644
+ tk = Token(op).set_text(op)
645
+ end
646
+ tk
647
+ end
648
+
649
+ @OP.def_rules("'", '"') do |op, io|
650
+ identify_string(op)
651
+ end
652
+
653
+ @OP.def_rules("`") do |op, io|
654
+ if @lex_state == EXPR_FNAME
655
+ Token(op).set_text(op)
656
+ else
657
+ identify_string(op)
658
+ end
659
+ end
660
+
661
+ @OP.def_rules('?') do |op, io|
662
+ if @lex_state == EXPR_END
663
+ @lex_state = EXPR_BEG
664
+ Token(TkQUESTION).set_text(op)
665
+ else
666
+ ch = getc
667
+ if @lex_state == EXPR_ARG && ch !~ /\s/
668
+ ungetc
669
+ @lex_state = EXPR_BEG
670
+ Token(TkQUESTION).set_text(op)
671
+ else
672
+ str = op
673
+ str << ch
674
+ if (ch == '\\') #'
675
+ str << read_escape
676
+ end
677
+ @lex_state = EXPR_END
678
+ Token(TkINTEGER).set_text(str)
679
+ end
680
+ end
681
+ end
682
+
683
+ @OP.def_rules("&", "&&", "|", "||") do |op, io|
684
+ @lex_state = EXPR_BEG
685
+ Token(op).set_text(op)
686
+ end
687
+
688
+ @OP.def_rules("+=", "-=", "*=", "**=",
689
+ "&=", "|=", "^=", "<<=", ">>=", "||=", "&&=") do |op, io|
690
+ @lex_state = EXPR_BEG
691
+ op =~ /^(.*)=$/
692
+ Token(TkOPASGN, $1).set_text(op)
693
+ end
694
+
695
+ @OP.def_rule("+@", proc{@lex_state == EXPR_FNAME}) do |op, io|
696
+ Token(TkUPLUS).set_text(op)
697
+ end
698
+
699
+ @OP.def_rule("-@", proc{@lex_state == EXPR_FNAME}) do |op, io|
700
+ Token(TkUMINUS).set_text(op)
701
+ end
702
+
703
+ @OP.def_rules("+", "-") do |op, io|
704
+ catch(:RET) do
705
+ if @lex_state == EXPR_ARG
706
+ if @space_seen and peek(0) =~ /[0-9]/
707
+ throw :RET, identify_number(op)
708
+ else
709
+ @lex_state = EXPR_BEG
710
+ end
711
+ elsif @lex_state != EXPR_END and peek(0) =~ /[0-9]/
712
+ throw :RET, identify_number(op)
713
+ else
714
+ @lex_state = EXPR_BEG
715
+ end
716
+ Token(op).set_text(op)
717
+ end
718
+ end
719
+
720
+ @OP.def_rule(".") do
721
+ @lex_state = EXPR_BEG
722
+ if peek(0) =~ /[0-9]/
723
+ ungetc
724
+ identify_number("")
725
+ else
726
+ # for obj.if
727
+ @lex_state = EXPR_DOT
728
+ Token(TkDOT).set_text(".")
729
+ end
730
+ end
731
+
732
+ @OP.def_rules("..", "...") do |op, io|
733
+ @lex_state = EXPR_BEG
734
+ Token(op).set_text(op)
735
+ end
736
+
737
+ lex_int2
738
+ end
739
+
740
+ def lex_int2
741
+ @OP.def_rules("]", "}", ")") do
742
+ |op, io|
743
+ @lex_state = EXPR_END
744
+ @indent -= 1
745
+ Token(op).set_text(op)
746
+ end
747
+
748
+ @OP.def_rule(":") do
749
+ if @lex_state == EXPR_END || peek(0) =~ /\s/
750
+ @lex_state = EXPR_BEG
751
+ tk = Token(TkCOLON)
752
+ else
753
+ @lex_state = EXPR_FNAME
754
+ tk = Token(TkSYMBEG)
755
+ end
756
+ tk.set_text(":")
757
+ end
758
+
759
+ @OP.def_rule("::") do
760
+ # p @lex_state.id2name, @space_seen
761
+ if @lex_state == EXPR_BEG or @lex_state == EXPR_ARG && @space_seen
762
+ @lex_state = EXPR_BEG
763
+ tk = Token(TkCOLON3)
764
+ else
765
+ @lex_state = EXPR_DOT
766
+ tk = Token(TkCOLON2)
767
+ end
768
+ tk.set_text("::")
769
+ end
770
+
771
+ @OP.def_rule("/") do |op, io|
772
+ if @lex_state == EXPR_BEG || @lex_state == EXPR_MID
773
+ identify_string(op)
774
+ elsif peek(0) == '='
775
+ getc
776
+ @lex_state = EXPR_BEG
777
+ Token(TkOPASGN, :/).set_text("/=") #")
778
+ elsif @lex_state == EXPR_ARG and @space_seen and peek(0) !~ /\s/
779
+ identify_string(op)
780
+ else
781
+ @lex_state = EXPR_BEG
782
+ Token("/").set_text(op)
783
+ end
784
+ end
785
+
786
+ @OP.def_rules("^") do
787
+ @lex_state = EXPR_BEG
788
+ Token("^").set_text("^")
789
+ end
790
+
791
+ # @OP.def_rules("^=") do
792
+ # @lex_state = EXPR_BEG
793
+ # Token(TkOPASGN, :^)
794
+ # end
795
+
796
+ @OP.def_rules(",", ";") do |op, io|
797
+ @lex_state = EXPR_BEG
798
+ Token(op).set_text(op)
799
+ end
800
+
801
+ @OP.def_rule("~") do
802
+ @lex_state = EXPR_BEG
803
+ Token("~").set_text("~")
804
+ end
805
+
806
+ @OP.def_rule("~@", proc{@lex_state = EXPR_FNAME}) do
807
+ @lex_state = EXPR_BEG
808
+ Token("~").set_text("~@")
809
+ end
810
+
811
+ @OP.def_rule("(") do
812
+ @indent += 1
813
+ if @lex_state == EXPR_BEG || @lex_state == EXPR_MID
814
+ @lex_state = EXPR_BEG
815
+ tk = Token(TkfLPAREN)
816
+ else
817
+ @lex_state = EXPR_BEG
818
+ tk = Token(TkLPAREN)
819
+ end
820
+ tk.set_text("(")
821
+ end
822
+
823
+ @OP.def_rule("[]", proc{@lex_state == EXPR_FNAME}) do
824
+ Token("[]").set_text("[]")
825
+ end
826
+
827
+ @OP.def_rule("[]=", proc{@lex_state == EXPR_FNAME}) do
828
+ Token("[]=").set_text("[]=")
829
+ end
830
+
831
+ @OP.def_rule("[") do
832
+ @indent += 1
833
+ if @lex_state == EXPR_FNAME
834
+ t = Token(TkfLBRACK)
835
+ else
836
+ if @lex_state == EXPR_BEG || @lex_state == EXPR_MID
837
+ t = Token(TkLBRACK)
838
+ elsif @lex_state == EXPR_ARG && @space_seen
839
+ t = Token(TkLBRACK)
840
+ else
841
+ t = Token(TkfLBRACK)
842
+ end
843
+ @lex_state = EXPR_BEG
844
+ end
845
+ t.set_text("[")
846
+ end
847
+
848
+ @OP.def_rule("{") do
849
+ @indent += 1
850
+ if @lex_state != EXPR_END && @lex_state != EXPR_ARG
851
+ t = Token(TkLBRACE)
852
+ else
853
+ t = Token(TkfLBRACE)
854
+ end
855
+ @lex_state = EXPR_BEG
856
+ t.set_text("{")
857
+ end
858
+
859
+ @OP.def_rule('\\') do #'
860
+ if getc == "\n"
861
+ @space_seen = true
862
+ @continue = true
863
+ Token(TkSPACE).set_text("\\\n")
864
+ else
865
+ ungetc
866
+ Token("\\").set_text("\\") #"
867
+ end
868
+ end
869
+
870
+ @OP.def_rule('%') do
871
+ |op, io|
872
+ if @lex_state == EXPR_BEG || @lex_state == EXPR_MID
873
+ identify_quotation('%')
874
+ elsif peek(0) == '='
875
+ getc
876
+ Token(TkOPASGN, "%").set_text("%=")
877
+ elsif @lex_state == EXPR_ARG and @space_seen and peek(0) !~ /\s/
878
+ identify_quotation('%')
879
+ else
880
+ @lex_state = EXPR_BEG
881
+ Token("%").set_text("%")
882
+ end
883
+ end
884
+
885
+ @OP.def_rule('$') do #'
886
+ identify_gvar
887
+ end
888
+
889
+ @OP.def_rule('@') do
890
+ if peek(0) =~ /[@\w_]/
891
+ ungetc
892
+ identify_identifier
893
+ else
894
+ Token("@").set_text("@")
895
+ end
896
+ end
897
+
898
+ # @OP.def_rule("def", proc{|op, io| /\s/ =~ io.peek(0)}) do
899
+ # |op, io|
900
+ # @indent += 1
901
+ # @lex_state = EXPR_FNAME
902
+ # # @lex_state = EXPR_END
903
+ # # until @rests[0] == "\n" or @rests[0] == ";"
904
+ # # rests.shift
905
+ # # end
906
+ # end
907
+
908
+ @OP.def_rule("__END__", proc{@prev_char_no == 0 && peek(0) =~ /[\r\n]/}) do
909
+ throw :eof
910
+ end
911
+
912
+ @OP.def_rule("") do
913
+ |op, io|
914
+ printf "MATCH: start %s: %s\n", op, io.inspect if RubyLex.debug?
915
+ if peek(0) =~ /[0-9]/
916
+ t = identify_number("")
917
+ elsif peek(0) =~ /[\w_]/
918
+ t = identify_identifier
919
+ end
920
+ printf "MATCH: end %s: %s\n", op, io.inspect if RubyLex.debug?
921
+ t
922
+ end
923
+
924
+ p @OP if RubyLex.debug?
925
+ end
926
+
927
+ def identify_gvar
928
+ @lex_state = EXPR_END
929
+ str = "$"
930
+
931
+ tk = case ch = getc
932
+ when /[~_*$?!@\/\\;,=:<>".]/ #"
933
+ str << ch
934
+ Token(TkGVAR, str)
935
+
936
+ when "-"
937
+ str << "-" << getc
938
+ Token(TkGVAR, str)
939
+
940
+ when "&", "`", "'", "+"
941
+ str << ch
942
+ Token(TkBACK_REF, str)
943
+
944
+ when /[1-9]/
945
+ str << ch
946
+ while (ch = getc) =~ /[0-9]/
947
+ str << ch
948
+ end
949
+ ungetc
950
+ Token(TkNTH_REF)
951
+ when /\w/
952
+ ungetc
953
+ ungetc
954
+ return identify_identifier
955
+ else
956
+ ungetc
957
+ Token("$")
958
+ end
959
+ tk.set_text(str)
960
+ end
961
+
962
+ def identify_identifier
963
+ token = ""
964
+ token.concat getc if peek(0) =~ /[$@]/
965
+ token.concat getc if peek(0) == "@"
966
+
967
+ while (ch = getc) =~ /\w|_/
968
+ print ":", ch, ":" if RubyLex.debug?
969
+ token.concat ch
970
+ end
971
+ ungetc
972
+
973
+ if ch == "!" or ch == "?"
974
+ token.concat getc
975
+ end
976
+ # fix token
977
+
978
+ # $stderr.puts "identifier - #{token}, state = #@lex_state"
979
+
980
+ case token
981
+ when /^\$/
982
+ return Token(TkGVAR, token).set_text(token)
983
+ when /^\@/
984
+ @lex_state = EXPR_END
985
+ return Token(TkIVAR, token).set_text(token)
986
+ end
987
+
988
+ if @lex_state != EXPR_DOT
989
+ print token, "\n" if RubyLex.debug?
990
+
991
+ token_c, *trans = TkReading2Token[token]
992
+ if token_c
993
+ # reserved word?
994
+
995
+ if (@lex_state != EXPR_BEG &&
996
+ @lex_state != EXPR_FNAME &&
997
+ trans[1])
998
+ # modifiers
999
+ token_c = TkSymbol2Token[trans[1]]
1000
+ @lex_state = trans[0]
1001
+ else
1002
+ if @lex_state != EXPR_FNAME
1003
+ if ENINDENT_CLAUSE.include?(token)
1004
+ @indent += 1
1005
+ elsif DEINDENT_CLAUSE.include?(token)
1006
+ @indent -= 1
1007
+ end
1008
+ @lex_state = trans[0]
1009
+ else
1010
+ @lex_state = EXPR_END
1011
+ end
1012
+ end
1013
+ return Token(token_c, token).set_text(token)
1014
+ end
1015
+ end
1016
+
1017
+ if @lex_state == EXPR_FNAME
1018
+ @lex_state = EXPR_END
1019
+ if peek(0) == '='
1020
+ token.concat getc
1021
+ end
1022
+ elsif @lex_state == EXPR_BEG || @lex_state == EXPR_DOT
1023
+ @lex_state = EXPR_ARG
1024
+ else
1025
+ @lex_state = EXPR_END
1026
+ end
1027
+
1028
+ if token[0, 1] =~ /[A-Z]/
1029
+ return Token(TkCONSTANT, token).set_text(token)
1030
+ elsif token[token.size - 1, 1] =~ /[!?]/
1031
+ return Token(TkFID, token).set_text(token)
1032
+ else
1033
+ return Token(TkIDENTIFIER, token).set_text(token)
1034
+ end
1035
+ end
1036
+
1037
+ def identify_here_document
1038
+ ch = getc
1039
+ if ch == "-"
1040
+ ch = getc
1041
+ indent = true
1042
+ end
1043
+ if /['"`]/ =~ ch # '
1044
+ lt = ch
1045
+ quoted = ""
1046
+ while (c = getc) && c != lt
1047
+ quoted.concat c
1048
+ end
1049
+ else
1050
+ lt = '"'
1051
+ quoted = ch.dup
1052
+ while (c = getc) && c =~ /\w/
1053
+ quoted.concat c
1054
+ end
1055
+ ungetc
1056
+ end
1057
+
1058
+ ltback, @ltype = @ltype, lt
1059
+ reserve = ""
1060
+
1061
+ while ch = getc
1062
+ reserve << ch
1063
+ if ch == "\\" #"
1064
+ ch = getc
1065
+ reserve << ch
1066
+ elsif ch == "\n"
1067
+ break
1068
+ end
1069
+ end
1070
+
1071
+ str = ""
1072
+ while (l = gets)
1073
+ l.chomp!
1074
+ l.strip! if indent
1075
+ break if l == quoted
1076
+ str << l.chomp << "\n"
1077
+ end
1078
+
1079
+ @reader.divert_read_from(reserve)
1080
+
1081
+ @ltype = ltback
1082
+ @lex_state = EXPR_END
1083
+ Token(Ltype2Token[lt], str).set_text(str.dump)
1084
+ end
1085
+
1086
+ def identify_quotation(initial_char)
1087
+ ch = getc
1088
+ if lt = PERCENT_LTYPE[ch]
1089
+ initial_char += ch
1090
+ ch = getc
1091
+ elsif ch =~ /\W/
1092
+ lt = "\""
1093
+ else
1094
+ RubyLex.fail SyntaxError, "unknown type of %string ('#{ch}')"
1095
+ end
1096
+ # if ch !~ /\W/
1097
+ # ungetc
1098
+ # next
1099
+ # end
1100
+ #@ltype = lt
1101
+ @quoted = ch unless @quoted = PERCENT_PAREN[ch]
1102
+ identify_string(lt, @quoted, ch, initial_char)
1103
+ end
1104
+
1105
+ def identify_number(start)
1106
+ str = start.dup
1107
+
1108
+ if start == "+" or start == "-" or start == ""
1109
+ start = getc
1110
+ str << start
1111
+ end
1112
+
1113
+ @lex_state = EXPR_END
1114
+
1115
+ if start == "0"
1116
+ if peek(0) == "x"
1117
+ ch = getc
1118
+ str << ch
1119
+ match = /[0-9a-f_]/
1120
+ else
1121
+ match = /[0-7_]/
1122
+ end
1123
+ while ch = getc
1124
+ if ch !~ match
1125
+ ungetc
1126
+ break
1127
+ else
1128
+ str << ch
1129
+ end
1130
+ end
1131
+ return Token(TkINTEGER).set_text(str)
1132
+ end
1133
+
1134
+ type = TkINTEGER
1135
+ allow_point = TRUE
1136
+ allow_e = TRUE
1137
+ while ch = getc
1138
+ case ch
1139
+ when /[0-9_]/
1140
+ str << ch
1141
+
1142
+ when allow_point && "."
1143
+ type = TkFLOAT
1144
+ if peek(0) !~ /[0-9]/
1145
+ ungetc
1146
+ break
1147
+ end
1148
+ str << ch
1149
+ allow_point = false
1150
+
1151
+ when allow_e && "e", allow_e && "E"
1152
+ str << ch
1153
+ type = TkFLOAT
1154
+ if peek(0) =~ /[+-]/
1155
+ str << getc
1156
+ end
1157
+ allow_e = false
1158
+ allow_point = false
1159
+ else
1160
+ ungetc
1161
+ break
1162
+ end
1163
+ end
1164
+ Token(type).set_text(str)
1165
+ end
1166
+
1167
+ def identify_string(ltype, quoted = ltype, opener=nil, initial_char = nil)
1168
+ @ltype = ltype
1169
+ @quoted = quoted
1170
+ subtype = nil
1171
+
1172
+ str = ""
1173
+ str << initial_char if initial_char
1174
+ str << (opener||quoted)
1175
+
1176
+ nest = 0
1177
+ begin
1178
+ while ch = getc
1179
+ str << ch
1180
+ if @quoted == ch
1181
+ if nest == 0
1182
+ break
1183
+ else
1184
+ nest -= 1
1185
+ end
1186
+ elsif opener == ch
1187
+ nest += 1
1188
+ elsif @ltype != "'" && @ltype != "]" and ch == "#"
1189
+ ch = getc
1190
+ if ch == "{"
1191
+ subtype = true
1192
+ str << ch << skip_inner_expression
1193
+ else
1194
+ ungetc(ch)
1195
+ end
1196
+ elsif ch == '\\' #'
1197
+ str << read_escape
1198
+ end
1199
+ end
1200
+ if @ltype == "/"
1201
+ if peek(0) =~ /i|o|n|e|s/
1202
+ str << getc
1203
+ end
1204
+ end
1205
+ if subtype
1206
+ Token(DLtype2Token[ltype], str)
1207
+ else
1208
+ Token(Ltype2Token[ltype], str)
1209
+ end.set_text(str)
1210
+ ensure
1211
+ @ltype = nil
1212
+ @quoted = nil
1213
+ @lex_state = EXPR_END
1214
+ end
1215
+ end
1216
+
1217
+ def skip_inner_expression
1218
+ res = ""
1219
+ nest = 0
1220
+ while (ch = getc)
1221
+ res << ch
1222
+ if ch == '}'
1223
+ break if nest.zero?
1224
+ nest -= 1
1225
+ elsif ch == '{'
1226
+ nest += 1
1227
+ end
1228
+ end
1229
+ res
1230
+ end
1231
+
1232
+ def identify_comment
1233
+ @ltype = "#"
1234
+ comment = "#"
1235
+ while ch = getc
1236
+ if ch == "\\"
1237
+ ch = getc
1238
+ if ch == "\n"
1239
+ ch = " "
1240
+ else
1241
+ comment << "\\"
1242
+ end
1243
+ else
1244
+ if ch == "\n"
1245
+ @ltype = nil
1246
+ ungetc
1247
+ break
1248
+ end
1249
+ end
1250
+ comment << ch
1251
+ end
1252
+ return Token(TkCOMMENT).set_text(comment)
1253
+ end
1254
+
1255
+ def read_escape
1256
+ res = ""
1257
+ case ch = getc
1258
+ when /[0-7]/
1259
+ ungetc ch
1260
+ 3.times do
1261
+ case ch = getc
1262
+ when /[0-7]/
1263
+ when nil
1264
+ break
1265
+ else
1266
+ ungetc
1267
+ break
1268
+ end
1269
+ res << ch
1270
+ end
1271
+
1272
+ when "x"
1273
+ res << ch
1274
+ 2.times do
1275
+ case ch = getc
1276
+ when /[0-9a-fA-F]/
1277
+ when nil
1278
+ break
1279
+ else
1280
+ ungetc
1281
+ break
1282
+ end
1283
+ res << ch
1284
+ end
1285
+
1286
+ when "M"
1287
+ res << ch
1288
+ if (ch = getc) != '-'
1289
+ ungetc
1290
+ else
1291
+ res << ch
1292
+ if (ch = getc) == "\\" #"
1293
+ res << ch
1294
+ res << read_escape
1295
+ else
1296
+ res << ch
1297
+ end
1298
+ end
1299
+
1300
+ when "C", "c" #, "^"
1301
+ res << ch
1302
+ if ch == "C" and (ch = getc) != "-"
1303
+ ungetc
1304
+ else
1305
+ res << ch
1306
+ if (ch = getc) == "\\" #"
1307
+ res << ch
1308
+ res << read_escape
1309
+ else
1310
+ res << ch
1311
+ end
1312
+ end
1313
+ else
1314
+ res << ch
1315
+ end
1316
+ res
1317
+ end
1318
+ end