fabiokung-ruby_parser 2.0.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/lib/ruby_lexer.rb ADDED
@@ -0,0 +1,1331 @@
1
+ $: << File.expand_path("~/Work/p4/zss/src/ParseTree/dev/lib") # for me, not you.
2
+ require 'sexp'
3
+ require 'ruby_parser_extras'
4
+
5
+ class RubyLexer
6
+ attr_accessor :command_start
7
+ attr_accessor :cmdarg
8
+ attr_accessor :cond
9
+ attr_accessor :nest
10
+
11
+ ESC_RE = /\\([0-7]{1,3}|x[0-9a-fA-F]{1,2}|M-.|(C-|c)\?|(C-|c).|[^0-7xMCc])/
12
+
13
+ # Additional context surrounding tokens that both the lexer and
14
+ # grammar use.
15
+ attr_reader :lex_state
16
+
17
+ attr_accessor :lex_strterm
18
+
19
+ attr_accessor :parser # HACK for very end of lexer... *sigh*
20
+
21
+ # Stream of data that yylex examines.
22
+ attr_reader :src
23
+
24
+ # Last token read via yylex.
25
+ attr_accessor :token
26
+
27
+ attr_accessor :string_buffer
28
+
29
+ # Value of last token which had a value associated with it.
30
+ attr_accessor :yacc_value
31
+
32
+ # What handles warnings
33
+ attr_accessor :warnings
34
+
35
+ EOF = :eof_haha!
36
+
37
+ # ruby constants for strings (should this be moved somewhere else?)
38
+ STR_FUNC_BORING = 0x00
39
+ STR_FUNC_ESCAPE = 0x01 # TODO: remove and replace with REGEXP
40
+ STR_FUNC_EXPAND = 0x02
41
+ STR_FUNC_REGEXP = 0x04
42
+ STR_FUNC_AWORDS = 0x08
43
+ STR_FUNC_SYMBOL = 0x10
44
+ STR_FUNC_INDENT = 0x20 # <<-HEREDOC
45
+
46
+ STR_SQUOTE = STR_FUNC_BORING
47
+ STR_DQUOTE = STR_FUNC_BORING | STR_FUNC_EXPAND
48
+ STR_XQUOTE = STR_FUNC_BORING | STR_FUNC_EXPAND
49
+ STR_REGEXP = STR_FUNC_REGEXP | STR_FUNC_ESCAPE | STR_FUNC_EXPAND
50
+ STR_SSYM = STR_FUNC_SYMBOL
51
+ STR_DSYM = STR_FUNC_SYMBOL | STR_FUNC_EXPAND
52
+
53
+ # How the parser advances to the next token.
54
+ #
55
+ # @return true if not at end of file (EOF).
56
+
57
+ def advance
58
+ r = yylex
59
+ self.token = r
60
+
61
+ raise "yylex returned nil" unless r
62
+
63
+ return RubyLexer::EOF != r
64
+ end
65
+
66
+ def arg_ambiguous
67
+ self.warning("Ambiguous first argument. make sure.")
68
+ end
69
+
70
+ def comments
71
+ c = @comments.join
72
+ @comments.clear
73
+ c
74
+ end
75
+
76
+ def expr_beg_push val
77
+ cond.push false
78
+ cmdarg.push false
79
+ self.lex_state = :expr_beg
80
+ self.yacc_value = val
81
+ end
82
+
83
+ def fix_arg_lex_state
84
+ self.lex_state = if lex_state == :expr_fname || lex_state == :expr_dot
85
+ :expr_arg
86
+ else
87
+ :expr_beg
88
+ end
89
+ end
90
+
91
+ def heredoc here # 63 lines
92
+ _, eos, func, last_line = here
93
+
94
+ indent = (func & STR_FUNC_INDENT) != 0
95
+ expand = (func & STR_FUNC_EXPAND) != 0
96
+ eos_re = indent ? /[ \t]*#{eos}(\r?\n|\z)/ : /#{eos}(\r?\n|\z)/
97
+ err_msg = "can't match #{eos_re.inspect} anywhere in "
98
+
99
+ rb_compile_error err_msg if
100
+ src.eos?
101
+
102
+ if src.beginning_of_line? && src.scan(eos_re) then
103
+ src.unread_many last_line # TODO: figure out how to remove this
104
+ self.yacc_value = eos
105
+ return :tSTRING_END
106
+ end
107
+
108
+ self.string_buffer = []
109
+
110
+ if expand then
111
+ case
112
+ when src.scan(/#[$@]/) then
113
+ src.pos -= 1 # FIX omg stupid
114
+ self.yacc_value = src.matched
115
+ return :tSTRING_DVAR
116
+ when src.scan(/#[{]/) then
117
+ self.yacc_value = src.matched
118
+ return :tSTRING_DBEG
119
+ when src.scan(/#/) then
120
+ string_buffer << '#'
121
+ end
122
+
123
+ until src.scan(eos_re) do
124
+ c = tokadd_string func, "\n", nil
125
+
126
+ rb_compile_error err_msg if
127
+ c == RubyLexer::EOF
128
+
129
+ if c != "\n" then
130
+ self.yacc_value = string_buffer.join.delete("\r")
131
+ return :tSTRING_CONTENT
132
+ else
133
+ string_buffer << src.scan(/\n/)
134
+ end
135
+
136
+ rb_compile_error err_msg if
137
+ src.eos?
138
+ end
139
+
140
+ # tack on a NL after the heredoc token - FIX NL should not be needed
141
+ src.unread_many(eos + "\n") # TODO: remove this... stupid stupid stupid
142
+ else
143
+ until src.check(eos_re) do
144
+ string_buffer << src.scan(/.*(\n|\z)/)
145
+ rb_compile_error err_msg if
146
+ src.eos?
147
+ end
148
+ end
149
+
150
+ self.lex_strterm = [:heredoc, eos, func, last_line]
151
+ self.yacc_value = string_buffer.join.delete("\r")
152
+
153
+ return :tSTRING_CONTENT
154
+ end
155
+
156
+ def heredoc_identifier # 51 lines
157
+ term, func = nil, STR_FUNC_BORING
158
+ self.string_buffer = []
159
+
160
+ case
161
+ when src.scan(/(-?)(['"`])(.*?)\2/) then
162
+ term = src[2]
163
+ unless src[1].empty? then
164
+ func |= STR_FUNC_INDENT
165
+ end
166
+ func |= case term
167
+ when "\'" then
168
+ STR_SQUOTE
169
+ when '"' then
170
+ STR_DQUOTE
171
+ else
172
+ STR_XQUOTE
173
+ end
174
+ string_buffer << src[3]
175
+ when src.scan(/-?(['"`])(?!\1*\Z)/) then
176
+ rb_compile_error "unterminated here document identifier"
177
+ when src.scan(/(-?)(\w+)/) then
178
+ term = '"'
179
+ func |= STR_DQUOTE
180
+ unless src[1].empty? then
181
+ func |= STR_FUNC_INDENT
182
+ end
183
+ string_buffer << src[2]
184
+ else
185
+ return nil
186
+ end
187
+
188
+ if src.check(/.*\n/) then
189
+ # TODO: think about storing off the char range instead
190
+ line = src.string[src.pos, src.matched_size]
191
+ src.string[src.pos, src.matched_size] = "\n"
192
+ src.pos += 1
193
+ else
194
+ line = nil
195
+ end
196
+
197
+ self.lex_strterm = [:heredoc, string_buffer.join, func, line]
198
+
199
+ if term == '`' then
200
+ self.yacc_value = "`"
201
+ return :tXSTRING_BEG
202
+ else
203
+ self.yacc_value = "\""
204
+ return :tSTRING_BEG
205
+ end
206
+ end
207
+
208
+ def initialize
209
+ self.cond = StackState.new(:cond)
210
+ self.cmdarg = StackState.new(:cmdarg)
211
+ self.nest = 0
212
+ @comments = []
213
+
214
+ reset
215
+ end
216
+
217
+ def int_with_base base
218
+ rb_compile_error "Invalid numeric format" if src.matched =~ /__/
219
+ self.yacc_value = src.matched.to_i(base)
220
+ return :tINTEGER
221
+ end
222
+
223
+ def lex_state= o
224
+ raise "wtf?" unless Symbol === o
225
+ @lex_state = o
226
+ end
227
+
228
+ attr_writer :lineno
229
+ def lineno
230
+ @lineno ||= src.lineno
231
+ end
232
+
233
+ ##
234
+ # Parse a number from the input stream.
235
+ #
236
+ # @param c The first character of the number.
237
+ # @return A int constant wich represents a token.
238
+
239
+ def parse_number
240
+ self.lex_state = :expr_end
241
+
242
+ case
243
+ when src.scan(/[+-]?0[xbd]\b/) then
244
+ rb_compile_error "Invalid numeric format"
245
+ when src.scan(/[+-]?0x[a-f0-9_]+/i) then
246
+ int_with_base(16)
247
+ when src.scan(/[+-]?0b[01_]+/) then
248
+ int_with_base(2)
249
+ when src.scan(/[+-]?0d[0-9_]+/) then
250
+ int_with_base(10)
251
+ when src.scan(/[+-]?0o?[0-7_]*[89]/) then
252
+ rb_compile_error "Illegal octal digit."
253
+ when src.scan(/[+-]?0o?[0-7_]+|0o/) then
254
+ int_with_base(8)
255
+ when src.scan(/[+-]?[\d_]+_(e|\.)/) then
256
+ rb_compile_error "Trailing '_' in number."
257
+ when src.scan(/[+-]?[\d_]+\.[\d_]+(e[+-]?[\d_]+)?\b|[+-]?[\d_]+e[+-]?[\d_]+\b/i) then
258
+ number = src.matched
259
+ if number =~ /__/ then
260
+ rb_compile_error "Invalid numeric format"
261
+ end
262
+ self.yacc_value = number.to_f
263
+ :tFLOAT
264
+ when src.scan(/[+-]?0\b/) then
265
+ int_with_base(10)
266
+ when src.scan(/[+-]?[\d_]+\b/) then
267
+ int_with_base(10)
268
+ else
269
+ rb_compile_error "Bad number format"
270
+ end
271
+ end
272
+
273
+ def parse_quote # 58 lines
274
+ beg, nnd, short_hand, c = nil, nil, false, nil
275
+
276
+ if src.scan(/[a-z0-9]{1,2}/i) then # Long-hand (e.g. %Q{}).
277
+ rb_compile_error "unknown type of %string" if src.matched_size == 2
278
+ c, beg, short_hand = src.matched, src.getch, false
279
+ else # Short-hand (e.g. %{, %., %!, etc)
280
+ c, beg, short_hand = 'Q', src.getch, true
281
+ end
282
+
283
+ if src.eos? or c == RubyLexer::EOF or beg == RubyLexer::EOF then
284
+ rb_compile_error "unterminated quoted string meets end of file"
285
+ end
286
+
287
+ # Figure nnd-char. "\0" is special to indicate beg=nnd and that no nesting?
288
+ nnd = { "(" => ")", "[" => "]", "{" => "}", "<" => ">" }[beg]
289
+ nnd, beg = beg, "\0" if nnd.nil?
290
+
291
+ token_type, self.yacc_value = nil, "%#{c}#{beg}"
292
+ token_type, string_type = case c
293
+ when 'Q' then
294
+ ch = short_hand ? nnd : c + beg
295
+ self.yacc_value = "%#{ch}"
296
+ [:tSTRING_BEG, STR_DQUOTE]
297
+ when 'q' then
298
+ [:tSTRING_BEG, STR_SQUOTE]
299
+ when 'W' then
300
+ src.scan(/\s*/)
301
+ [:tWORDS_BEG, STR_DQUOTE | STR_FUNC_AWORDS]
302
+ when 'w' then
303
+ src.scan(/\s*/)
304
+ [:tAWORDS_BEG, STR_SQUOTE | STR_FUNC_AWORDS]
305
+ when 'x' then
306
+ [:tXSTRING_BEG, STR_XQUOTE]
307
+ when 'r' then
308
+ [:tREGEXP_BEG, STR_REGEXP]
309
+ when 's' then
310
+ self.lex_state = :expr_fname
311
+ [:tSYMBEG, STR_SSYM]
312
+ end
313
+
314
+ rb_compile_error "Bad %string type. Expected [Qqwxr\W], found '#{c}'." if
315
+ token_type.nil?
316
+
317
+ self.lex_strterm = [:strterm, string_type, nnd, beg]
318
+
319
+ return token_type
320
+ end
321
+
322
+ def parse_string(quote) # 65 lines
323
+ _, string_type, term, open = quote
324
+
325
+ space = false # FIX: remove these
326
+ func = string_type
327
+ paren = open
328
+ term_re = Regexp.escape term
329
+
330
+ awords = (func & STR_FUNC_AWORDS) != 0
331
+ regexp = (func & STR_FUNC_REGEXP) != 0
332
+ expand = (func & STR_FUNC_EXPAND) != 0
333
+
334
+ unless func then # FIX: impossible, prolly needs == 0
335
+ self.lineno = nil
336
+ return :tSTRING_END
337
+ end
338
+
339
+ space = true if awords and src.scan(/\s+/)
340
+
341
+ if self.nest == 0 && src.scan(/#{term_re}/) then
342
+ if awords then
343
+ quote[1] = nil
344
+ return :tSPACE
345
+ elsif regexp then
346
+ self.yacc_value = self.regx_options
347
+ self.lineno = nil
348
+ return :tREGEXP_END
349
+ else
350
+ self.yacc_value = term
351
+ self.lineno = nil
352
+ return :tSTRING_END
353
+ end
354
+ end
355
+
356
+ if space then
357
+ return :tSPACE
358
+ end
359
+
360
+ self.string_buffer = []
361
+
362
+ if expand
363
+ case
364
+ when src.scan(/#(?=[$@])/) then
365
+ return :tSTRING_DVAR
366
+ when src.scan(/#[{]/) then
367
+ return :tSTRING_DBEG
368
+ when src.scan(/#/) then
369
+ string_buffer << '#'
370
+ end
371
+ end
372
+
373
+ if tokadd_string(func, term, paren) == RubyLexer::EOF then
374
+ rb_compile_error "unterminated string meets end of file"
375
+ end
376
+
377
+ self.yacc_value = string_buffer.join
378
+
379
+
380
+ return :tSTRING_CONTENT
381
+ end
382
+
383
+ def rb_compile_error msg
384
+ msg += ". near line #{self.lineno}: #{src.rest[/^.*/].inspect}"
385
+ raise SyntaxError, msg
386
+ end
387
+
388
+ def read_escape # 51 lines
389
+ case
390
+ when src.scan(/\\/) then # Backslash
391
+ '\\'
392
+ when src.scan(/n/) then # newline
393
+ "\n"
394
+ when src.scan(/t/) then # horizontal tab
395
+ "\t"
396
+ when src.scan(/r/) then # carriage-return
397
+ "\r"
398
+ when src.scan(/f/) then # form-feed
399
+ "\f"
400
+ when src.scan(/v/) then # vertical tab
401
+ "\13"
402
+ when src.scan(/a/) then # alarm(bell)
403
+ "\007"
404
+ when src.scan(/e/) then # escape
405
+ "\033"
406
+ when src.scan(/b/) then # backspace
407
+ "\010"
408
+ when src.scan(/s/) then # space
409
+ " "
410
+ when src.scan(/[0-7]{1,3}/) then # octal constant
411
+ src.matched.to_i(8).chr
412
+ when src.scan(/x([0-9a-fA-F]{1,2})/) then # hex constant
413
+ src[1].to_i(16).chr
414
+ when src.scan(/M-\\/) then
415
+ c = self.read_escape
416
+ c[0] = (c[0].ord | 0x80).chr
417
+ c
418
+ when src.scan(/M-(.)/) then
419
+ c = src[1]
420
+ c[0] = (c[0].ord | 0x80).chr
421
+ c
422
+ when src.scan(/C-\\|c\\/) then
423
+ c = self.read_escape
424
+ c[0] = (c[0].ord & 0x9f).chr
425
+ c
426
+ when src.scan(/C-\?|c\?/) then
427
+ 0177.chr
428
+ when src.scan(/(C-|c)(.)/) then
429
+ c = src[2]
430
+ c[0] = (c[0].ord & 0x9f).chr
431
+ c
432
+ when src.scan(/[McCx0-9]/) || src.eos? then
433
+ rb_compile_error("Invalid escape character syntax")
434
+ else
435
+ src.getch
436
+ end
437
+ end
438
+
439
+ def regx_options # 15 lines
440
+ good, bad = [], []
441
+
442
+ if src.scan(/[a-z]+/) then
443
+ good, bad = src.matched.split(//).partition { |s| s =~ /^[ixmonesu]$/ }
444
+ end
445
+
446
+ unless bad.empty? then
447
+ rb_compile_error("unknown regexp option%s - %s" %
448
+ [(bad.size > 1 ? "s" : ""), bad.join.inspect])
449
+ end
450
+
451
+ return good.join
452
+ end
453
+
454
+ def reset
455
+ self.command_start = true
456
+ self.lex_strterm = nil
457
+ self.token = nil
458
+ self.yacc_value = nil
459
+
460
+ @src = nil
461
+ @lex_state = nil
462
+ end
463
+
464
+ def src= src
465
+ raise "bad src: #{src.inspect}" unless String === src
466
+ @src = RPStringScanner.new(src)
467
+ end
468
+
469
+ def tokadd_escape term # 20 lines
470
+ case
471
+ when src.scan(/\\\n/) then
472
+ # just ignore
473
+ when src.scan(/\\([0-7]{1,3}|x[0-9a-fA-F]{1,2})/) then
474
+ self.string_buffer << src.matched
475
+ when src.scan(/\\([MC]-|c)(?=\\)/) then
476
+ self.string_buffer << src.matched
477
+ self.tokadd_escape term
478
+ when src.scan(/\\([MC]-|c)(.)/) then
479
+ self.string_buffer << src.matched
480
+ when src.scan(/\\[McCx]/) then
481
+ rb_compile_error "Invalid escape character syntax"
482
+ when src.scan(/\\(.)/m) then
483
+ self.string_buffer << src.matched
484
+ else
485
+ rb_compile_error "Invalid escape character syntax"
486
+ end
487
+ end
488
+
489
+ def tokadd_string(func, term, paren) # 105 lines
490
+ awords = (func & STR_FUNC_AWORDS) != 0
491
+ escape = (func & STR_FUNC_ESCAPE) != 0
492
+ expand = (func & STR_FUNC_EXPAND) != 0
493
+ regexp = (func & STR_FUNC_REGEXP) != 0
494
+ symbol = (func & STR_FUNC_SYMBOL) != 0
495
+
496
+ paren_re = paren.nil? ? nil : Regexp.new(Regexp.escape(paren))
497
+ term_re = Regexp.new(Regexp.escape(term))
498
+
499
+ until src.eos? do
500
+ c = nil
501
+ handled = true
502
+ case
503
+ when self.nest == 0 && src.scan(term_re) then
504
+ src.pos -= 1
505
+ break
506
+ when paren_re && src.scan(paren_re) then
507
+ self.nest += 1
508
+ when src.scan(term_re) then
509
+ self.nest -= 1
510
+ when awords && src.scan(/\s/) then
511
+ src.pos -= 1
512
+ break
513
+ when expand && src.scan(/#(?=[\$\@\{])/) then
514
+ src.pos -= 1
515
+ break
516
+ when expand && src.scan(/#(?!\n)/) then
517
+ # do nothing
518
+ when src.check(/\\/) then
519
+ case
520
+ when awords && src.scan(/\\\n/) then
521
+ string_buffer << "\n"
522
+ next
523
+ when awords && src.scan(/\\\s/) then
524
+ c = ' '
525
+ when expand && src.scan(/\\\n/) then
526
+ next
527
+ when regexp && src.check(/\\/) then
528
+ self.tokadd_escape term
529
+ next
530
+ when expand && src.scan(/\\/) then
531
+ c = self.read_escape
532
+ when src.scan(/\\\n/) then
533
+ # do nothing
534
+ when src.scan(/\\\\/) then
535
+ string_buffer << '\\' if escape
536
+ c = '\\'
537
+ when src.scan(/\\/) then
538
+ unless src.scan(term_re) || paren.nil? || src.scan(paren_re) then
539
+ string_buffer << "\\"
540
+ end
541
+ else
542
+ handled = false
543
+ end
544
+ else
545
+ handled = false
546
+ end # case
547
+
548
+ unless handled then
549
+
550
+ t = Regexp.escape term
551
+ x = Regexp.escape(paren) if paren && paren != "\000"
552
+ re = if awords then
553
+ /[^#{t}#{x}\#\0\\\n\ ]+|./ # |. to pick up whatever
554
+ else
555
+ /[^#{t}#{x}\#\0\\]+|./
556
+ end
557
+
558
+ src.scan re
559
+ c = src.matched
560
+
561
+ rb_compile_error "symbol cannot contain '\\0'" if symbol && c =~ /\0/
562
+ end # unless handled
563
+
564
+ c ||= src.matched
565
+ string_buffer << c
566
+ end # until
567
+
568
+ c ||= src.matched
569
+ c = RubyLexer::EOF if src.eos?
570
+
571
+
572
+ return c
573
+ end
574
+
575
+ def unescape s
576
+
577
+ r = {
578
+ "a" => "\007",
579
+ "b" => "\010",
580
+ "e" => "\033",
581
+ "f" => "\f",
582
+ "n" => "\n",
583
+ "r" => "\r",
584
+ "s" => " ",
585
+ "t" => "\t",
586
+ "v" => "\13",
587
+ "\\" => '\\',
588
+ "\n" => "",
589
+ "C-\?" => 0177.chr,
590
+ "c\?" => 0177.chr,
591
+ }[s]
592
+
593
+ return r if r
594
+
595
+ case s
596
+ when /^[0-7]{1,3}/ then
597
+ $&.to_i(8).chr
598
+ when /^x([0-9a-fA-F]{1,2})/ then
599
+ $1.to_i(16).chr
600
+ when /^M-(.)/ then
601
+ ($1[0].ord | 0x80).chr
602
+ when /^(C-|c)(.)/ then
603
+ ($2[0].ord & 0x9f).chr
604
+ when /^[McCx0-9]/ then
605
+ rb_compile_error("Invalid escape character syntax")
606
+ else
607
+ s
608
+ end
609
+ end
610
+
611
+ def warning s
612
+ # do nothing for now
613
+ end
614
+
615
+ ##
616
+ # Returns the next token. Also sets yy_val is needed.
617
+ #
618
+ # @return Description of the Returned Value
619
+
620
+ def yylex # 826 lines
621
+
622
+ c = ''
623
+ space_seen = false
624
+ command_state = false
625
+ src = self.src
626
+
627
+ self.token = nil
628
+ self.yacc_value = nil
629
+
630
+ return yylex_string if lex_strterm
631
+
632
+ command_state = self.command_start
633
+ self.command_start = false
634
+
635
+ last_state = lex_state
636
+
637
+ loop do # START OF CASE
638
+ if src.scan(/\ |\t|\r|\f|\13/) then # white spaces, 13 = '\v
639
+ space_seen = true
640
+ next
641
+ elsif src.check(/[^a-zA-Z]/) then
642
+ if src.scan(/\n|#/) then
643
+ self.lineno = nil
644
+ c = src.matched
645
+ if c == '#' then
646
+ src.unread c # ok
647
+
648
+ while src.scan(/\s*#.*(\n+|\z)/) do
649
+ @comments << src.matched.gsub(/^ +#/, '#').gsub(/^ +$/, '')
650
+ end
651
+
652
+ if src.eos? then
653
+ return RubyLexer::EOF
654
+ end
655
+ end
656
+
657
+ # Replace a string of newlines with a single one
658
+ src.scan(/\n+/)
659
+
660
+ if [:expr_beg, :expr_fname,
661
+ :expr_dot, :expr_class].include? lex_state then
662
+ next
663
+ end
664
+
665
+ self.command_start = true
666
+ self.lex_state = :expr_beg
667
+ return :tNL
668
+ elsif src.scan(/[\]\)\}]/) then
669
+ cond.lexpop
670
+ cmdarg.lexpop
671
+ self.lex_state = :expr_end
672
+ self.yacc_value = src.matched
673
+ result = {
674
+ ")" => :tRPAREN,
675
+ "]" => :tRBRACK,
676
+ "}" => :tRCURLY
677
+ }[src.matched]
678
+ return result
679
+ elsif src.check(/\./) then
680
+ if src.scan(/\.\.\./) then
681
+ self.lex_state = :expr_beg
682
+ self.yacc_value = "..."
683
+ return :tDOT3
684
+ elsif src.scan(/\.\./) then
685
+ self.lex_state = :expr_beg
686
+ self.yacc_value = ".."
687
+ return :tDOT2
688
+ elsif src.scan(/\.\d/) then
689
+ rb_compile_error "no .<digit> floating literal anymore put 0 before dot"
690
+ elsif src.scan(/\./) then
691
+ self.lex_state = :expr_dot
692
+ self.yacc_value = "."
693
+ return :tDOT
694
+ end
695
+ elsif src.scan(/\,/) then
696
+ self.lex_state = :expr_beg
697
+ self.yacc_value = ","
698
+ return :tCOMMA
699
+ elsif src.scan(/\(/) then
700
+ result = :tLPAREN2
701
+ self.command_start = true
702
+ if lex_state == :expr_beg || lex_state == :expr_mid then
703
+ result = :tLPAREN
704
+ elsif space_seen then
705
+ if lex_state == :expr_cmdarg then
706
+ result = :tLPAREN_ARG
707
+ elsif lex_state == :expr_arg then
708
+ warning("don't put space before argument parentheses")
709
+ result = :tLPAREN2
710
+ end
711
+ end
712
+
713
+ self.expr_beg_push "("
714
+
715
+ return result
716
+ elsif src.check(/\=/) then
717
+ if src.scan(/\=\=\=/) then
718
+ self.fix_arg_lex_state
719
+ self.yacc_value = "==="
720
+ return :tEQQ
721
+ elsif src.scan(/\=\=/) then
722
+ self.fix_arg_lex_state
723
+ self.yacc_value = "=="
724
+ return :tEQ
725
+ elsif src.scan(/\=~/) then
726
+ self.fix_arg_lex_state
727
+ self.yacc_value = "=~"
728
+ return :tMATCH
729
+ elsif src.scan(/\=>/) then
730
+ self.fix_arg_lex_state
731
+ self.yacc_value = "=>"
732
+ return :tASSOC
733
+ elsif src.scan(/\=/) then
734
+ if src.was_begin_of_line and src.scan(/begin(?=\s)/) then
735
+ @comments << '=' << src.matched
736
+
737
+ unless src.scan(/.*?\n=end\s*(\n|\z)/m) then
738
+ @comments.clear
739
+ rb_compile_error("embedded document meets end of file")
740
+ end
741
+
742
+ @comments << src.matched
743
+
744
+ next
745
+ else
746
+ self.fix_arg_lex_state
747
+ self.yacc_value = '='
748
+ return :tEQL
749
+ end
750
+ end
751
+ elsif src.scan(/\"(#{ESC_RE}|#(#{ESC_RE}|[^\{\#\@\$\"\\])|[^\"\\\#])*\"/o) then
752
+ self.yacc_value = src.matched[1..-2].gsub(ESC_RE) { unescape $1 }
753
+ self.lex_state = :expr_end
754
+ return :tSTRING
755
+ elsif src.scan(/\"/) then # FALLBACK
756
+ self.lex_strterm = [:strterm, STR_DQUOTE, '"', "\0"] # TODO: question this
757
+ self.yacc_value = "\""
758
+ return :tSTRING_BEG
759
+ elsif src.scan(/\@\@?\w*/) then
760
+ self.token = src.matched
761
+
762
+ rb_compile_error "`#{token}` is not allowed as a variable name" if
763
+ token =~ /\@\d/
764
+
765
+ return process_token(command_state)
766
+ elsif src.scan(/\:\:/) then
767
+ if (lex_state == :expr_beg ||
768
+ lex_state == :expr_mid ||
769
+ lex_state == :expr_class ||
770
+ (lex_state.is_argument && space_seen)) then
771
+ self.lex_state = :expr_beg
772
+ self.yacc_value = "::"
773
+ return :tCOLON3
774
+ end
775
+
776
+ self.lex_state = :expr_dot
777
+ self.yacc_value = "::"
778
+ return :tCOLON2
779
+ elsif lex_state != :expr_end && lex_state != :expr_endarg && src.scan(/:([a-zA-Z_]\w*(?:[?!]|=(?!>))?)/) then
780
+ self.yacc_value = src[1]
781
+ self.lex_state = :expr_end
782
+ return :tSYMBOL
783
+ elsif src.scan(/\:/) then
784
+ # ?: / then / when
785
+ if (lex_state == :expr_end || lex_state == :expr_endarg||
786
+ src.check(/\s/)) then
787
+ self.lex_state = :expr_beg
788
+ self.yacc_value = ":"
789
+ return :tCOLON
790
+ end
791
+
792
+ case
793
+ when src.scan(/\'/) then
794
+ self.lex_strterm = [:strterm, STR_SSYM, src.matched, "\0"]
795
+ when src.scan(/\"/) then
796
+ self.lex_strterm = [:strterm, STR_DSYM, src.matched, "\0"]
797
+ end
798
+
799
+ self.lex_state = :expr_fname
800
+ self.yacc_value = ":"
801
+ return :tSYMBEG
802
+ elsif src.check(/[0-9]/) then
803
+ return parse_number
804
+ elsif src.scan(/\[/) then
805
+ result = src.matched
806
+
807
+ if lex_state == :expr_fname || lex_state == :expr_dot then
808
+ self.lex_state = :expr_arg
809
+ case
810
+ when src.scan(/\]\=/) then
811
+ self.yacc_value = "[]="
812
+ return :tASET
813
+ when src.scan(/\]/) then
814
+ self.yacc_value = "[]"
815
+ return :tAREF
816
+ else
817
+ rb_compile_error "unexpected '['"
818
+ end
819
+ elsif lex_state == :expr_beg || lex_state == :expr_mid then
820
+ result = :tLBRACK
821
+ elsif lex_state.is_argument && space_seen then
822
+ result = :tLBRACK
823
+ end
824
+
825
+ self.expr_beg_push "["
826
+
827
+ return result
828
+ elsif src.scan(/\'(\\.|[^\'])*\'/) then
829
+ self.yacc_value = src.matched[1..-2].gsub(/\\\\/, "\\").gsub(/\\'/, "'")
830
+ self.lex_state = :expr_end
831
+ return :tSTRING
832
+ elsif src.check(/\|/) then
833
+ if src.scan(/\|\|\=/) then
834
+ self.lex_state = :expr_beg
835
+ self.yacc_value = "||"
836
+ return :tOP_ASGN
837
+ elsif src.scan(/\|\|/) then
838
+ self.lex_state = :expr_beg
839
+ self.yacc_value = "||"
840
+ return :tOROP
841
+ elsif src.scan(/\|\=/) then
842
+ self.lex_state = :expr_beg
843
+ self.yacc_value = "|"
844
+ return :tOP_ASGN
845
+ elsif src.scan(/\|/) then
846
+ self.fix_arg_lex_state
847
+ self.yacc_value = "|"
848
+ return :tPIPE
849
+ end
850
+ elsif src.scan(/\{/) then
851
+ result = if lex_state.is_argument || lex_state == :expr_end then
852
+ :tLCURLY # block (primary)
853
+ elsif lex_state == :expr_endarg then
854
+ :tLBRACE_ARG # block (expr)
855
+ else
856
+ :tLBRACE # hash
857
+ end
858
+
859
+ self.expr_beg_push "{"
860
+
861
+ return result
862
+ elsif src.scan(/[+-]/) then
863
+ sign = src.matched
864
+ utype, type = if sign == "+" then
865
+ [:tUPLUS, :tPLUS]
866
+ else
867
+ [:tUMINUS, :tMINUS]
868
+ end
869
+
870
+ if lex_state == :expr_fname || lex_state == :expr_dot then
871
+ self.lex_state = :expr_arg
872
+ if src.scan(/@/) then
873
+ self.yacc_value = "#{sign}@"
874
+ return utype
875
+ else
876
+ self.yacc_value = sign
877
+ return type
878
+ end
879
+ end
880
+
881
+ if src.scan(/\=/) then
882
+ self.lex_state = :expr_beg
883
+ self.yacc_value = sign
884
+ return :tOP_ASGN
885
+ end
886
+
887
+ if (lex_state == :expr_beg || lex_state == :expr_mid ||
888
+ (lex_state.is_argument && space_seen && !src.check(/\s/))) then
889
+ if lex_state.is_argument then
890
+ arg_ambiguous
891
+ end
892
+
893
+ self.lex_state = :expr_beg
894
+ self.yacc_value = sign
895
+
896
+ if src.check(/\d/) then
897
+ if utype == :tUPLUS then
898
+ return self.parse_number
899
+ else
900
+ return :tUMINUS_NUM
901
+ end
902
+ end
903
+
904
+ return utype
905
+ end
906
+
907
+ self.lex_state = :expr_beg
908
+ self.yacc_value = sign
909
+ return type
910
+ elsif src.check(/\*/) then
911
+ if src.scan(/\*\*=/) then
912
+ self.lex_state = :expr_beg
913
+ self.yacc_value = "**"
914
+ return :tOP_ASGN
915
+ elsif src.scan(/\*\*/) then
916
+ self.yacc_value = "**"
917
+ self.fix_arg_lex_state
918
+ return :tPOW
919
+ elsif src.scan(/\*\=/) then
920
+ self.lex_state = :expr_beg
921
+ self.yacc_value = "*"
922
+ return :tOP_ASGN
923
+ elsif src.scan(/\*/) then
924
+ result = if lex_state.is_argument && space_seen && src.check(/\S/) then
925
+ warning("`*' interpreted as argument prefix")
926
+ :tSTAR
927
+ elsif lex_state == :expr_beg || lex_state == :expr_mid then
928
+ :tSTAR
929
+ else
930
+ :tSTAR2
931
+ end
932
+ self.yacc_value = "*"
933
+ self.fix_arg_lex_state
934
+
935
+ return result
936
+ end
937
+ elsif src.check(/\!/) then
938
+ if src.scan(/\!\=/) then
939
+ self.lex_state = :expr_beg
940
+ self.yacc_value = "!="
941
+ return :tNEQ
942
+ elsif src.scan(/\!~/) then
943
+ self.lex_state = :expr_beg
944
+ self.yacc_value = "!~"
945
+ return :tNMATCH
946
+ elsif src.scan(/\!/) then
947
+ self.lex_state = :expr_beg
948
+ self.yacc_value = "!"
949
+ return :tBANG
950
+ end
951
+ elsif src.check(/\</) then
952
+ if src.scan(/\<\=\>/) then
953
+ self.fix_arg_lex_state
954
+ self.yacc_value = "<=>"
955
+ return :tCMP
956
+ elsif src.scan(/\<\=/) then
957
+ self.fix_arg_lex_state
958
+ self.yacc_value = "<="
959
+ return :tLEQ
960
+ elsif src.scan(/\<\<\=/) then
961
+ self.fix_arg_lex_state
962
+ self.lex_state = :expr_beg
963
+ self.yacc_value = "\<\<"
964
+ return :tOP_ASGN
965
+ elsif src.scan(/\<\</) then
966
+ if (! [:expr_end, :expr_dot,
967
+ :expr_endarg, :expr_class].include?(lex_state) &&
968
+ (!lex_state.is_argument || space_seen)) then
969
+ tok = self.heredoc_identifier
970
+ if tok then
971
+ return tok
972
+ end
973
+ end
974
+
975
+ self.fix_arg_lex_state
976
+ self.yacc_value = "\<\<"
977
+ return :tLSHFT
978
+ elsif src.scan(/\</) then
979
+ self.fix_arg_lex_state
980
+ self.yacc_value = "<"
981
+ return :tLT
982
+ end
983
+ elsif src.check(/\>/) then
984
+ if src.scan(/\>\=/) then
985
+ self.fix_arg_lex_state
986
+ self.yacc_value = ">="
987
+ return :tGEQ
988
+ elsif src.scan(/\>\>=/) then
989
+ self.fix_arg_lex_state
990
+ self.lex_state = :expr_beg
991
+ self.yacc_value = ">>"
992
+ return :tOP_ASGN
993
+ elsif src.scan(/\>\>/) then
994
+ self.fix_arg_lex_state
995
+ self.yacc_value = ">>"
996
+ return :tRSHFT
997
+ elsif src.scan(/\>/) then
998
+ self.fix_arg_lex_state
999
+ self.yacc_value = ">"
1000
+ return :tGT
1001
+ end
1002
+ elsif src.scan(/\`/) then
1003
+ self.yacc_value = "`"
1004
+ case lex_state
1005
+ when :expr_fname then
1006
+ self.lex_state = :expr_end
1007
+ return :tBACK_REF2
1008
+ when :expr_dot then
1009
+ self.lex_state = if command_state then
1010
+ :expr_cmdarg
1011
+ else
1012
+ :expr_arg
1013
+ end
1014
+ return :tBACK_REF2
1015
+ end
1016
+ self.lex_strterm = [:strterm, STR_XQUOTE, '`', "\0"]
1017
+ return :tXSTRING_BEG
1018
+ elsif src.scan(/\?/) then
1019
+ if lex_state == :expr_end || lex_state == :expr_endarg then
1020
+ self.lex_state = :expr_beg
1021
+ self.yacc_value = "?"
1022
+ return :tEH
1023
+ end
1024
+
1025
+ if src.eos? then
1026
+ rb_compile_error "incomplete character syntax"
1027
+ end
1028
+
1029
+ if src.check(/\s|\v/) then
1030
+ unless lex_state.is_argument then
1031
+ c2 = { " " => 's',
1032
+ "\n" => 'n',
1033
+ "\t" => 't',
1034
+ "\v" => 'v',
1035
+ "\r" => 'r',
1036
+ "\f" => 'f' }[src.matched]
1037
+
1038
+ if c2 then
1039
+ warning("invalid character syntax; use ?\\" + c2)
1040
+ end
1041
+ end
1042
+
1043
+ # ternary
1044
+ self.lex_state = :expr_beg
1045
+ self.yacc_value = "?"
1046
+ return :tEH
1047
+ elsif src.check(/\w(?=\w)/) then # ternary, also
1048
+ self.lex_state = :expr_beg
1049
+ self.yacc_value = "?"
1050
+ return :tEH
1051
+ end
1052
+
1053
+ c = if src.scan(/\\/) then
1054
+ self.read_escape
1055
+ else
1056
+ src.getch
1057
+ end
1058
+ self.lex_state = :expr_end
1059
+ self.yacc_value = c[0].ord & 0xff
1060
+ return :tINTEGER
1061
+ elsif src.check(/\&/) then
1062
+ if src.scan(/\&\&\=/) then
1063
+ self.yacc_value = "&&"
1064
+ self.lex_state = :expr_beg
1065
+ return :tOP_ASGN
1066
+ elsif src.scan(/\&\&/) then
1067
+ self.lex_state = :expr_beg
1068
+ self.yacc_value = "&&"
1069
+ return :tANDOP
1070
+ elsif src.scan(/\&\=/) then
1071
+ self.yacc_value = "&"
1072
+ self.lex_state = :expr_beg
1073
+ return :tOP_ASGN
1074
+ elsif src.scan(/&/) then
1075
+ result = if lex_state.is_argument && space_seen &&
1076
+ !src.check(/\s/) then
1077
+ warning("`&' interpreted as argument prefix")
1078
+ :tAMPER
1079
+ elsif lex_state == :expr_beg || lex_state == :expr_mid then
1080
+ :tAMPER
1081
+ else
1082
+ :tAMPER2
1083
+ end
1084
+
1085
+ self.fix_arg_lex_state
1086
+ self.yacc_value = "&"
1087
+ return result
1088
+ end
1089
+ elsif src.scan(/\//) then
1090
+ if lex_state == :expr_beg || lex_state == :expr_mid then
1091
+ self.lex_strterm = [:strterm, STR_REGEXP, '/', "\0"]
1092
+ self.yacc_value = "/"
1093
+ return :tREGEXP_BEG
1094
+ end
1095
+
1096
+ if src.scan(/\=/) then
1097
+ self.yacc_value = "/"
1098
+ self.lex_state = :expr_beg
1099
+ return :tOP_ASGN
1100
+ end
1101
+
1102
+ if lex_state.is_argument && space_seen then
1103
+ unless src.scan(/\s/) then
1104
+ arg_ambiguous
1105
+ self.lex_strterm = [:strterm, STR_REGEXP, '/', "\0"]
1106
+ self.yacc_value = "/"
1107
+ return :tREGEXP_BEG
1108
+ end
1109
+ end
1110
+
1111
+ self.fix_arg_lex_state
1112
+ self.yacc_value = "/"
1113
+
1114
+ return :tDIVIDE
1115
+ elsif src.scan(/\^=/) then
1116
+ self.lex_state = :expr_beg
1117
+ self.yacc_value = "^"
1118
+ return :tOP_ASGN
1119
+ elsif src.scan(/\^/) then
1120
+ self.fix_arg_lex_state
1121
+ self.yacc_value = "^"
1122
+ return :tCARET
1123
+ elsif src.scan(/\;/) then
1124
+ self.command_start = true
1125
+ self.lex_state = :expr_beg
1126
+ self.yacc_value = ";"
1127
+ return :tSEMI
1128
+ elsif src.scan(/\~/) then
1129
+ if lex_state == :expr_fname || lex_state == :expr_dot then
1130
+ src.scan(/@/)
1131
+ end
1132
+
1133
+ self.fix_arg_lex_state
1134
+ self.yacc_value = "~"
1135
+
1136
+ return :tTILDE
1137
+ elsif src.scan(/\\/) then
1138
+ if src.scan(/\n/) then
1139
+ self.lineno = nil
1140
+ space_seen = true
1141
+ next
1142
+ end
1143
+ rb_compile_error "bare backslash only allowed before newline"
1144
+ elsif src.scan(/\%/) then
1145
+ if lex_state == :expr_beg || lex_state == :expr_mid then
1146
+ return parse_quote
1147
+ end
1148
+
1149
+ if src.scan(/\=/) then
1150
+ self.lex_state = :expr_beg
1151
+ self.yacc_value = "%"
1152
+ return :tOP_ASGN
1153
+ end
1154
+
1155
+ if lex_state.is_argument && space_seen && ! src.check(/\s/) then
1156
+ return parse_quote
1157
+ end
1158
+
1159
+ self.fix_arg_lex_state
1160
+ self.yacc_value = "%"
1161
+
1162
+ return :tPERCENT
1163
+ elsif src.check(/\$/) then
1164
+ if src.scan(/(\$_)(\w+)/) then
1165
+ self.lex_state = :expr_end
1166
+ self.token = src.matched
1167
+ return process_token(command_state)
1168
+ elsif src.scan(/\$_/) then
1169
+ self.lex_state = :expr_end
1170
+ self.token = src.matched
1171
+ self.yacc_value = src.matched
1172
+ return :tGVAR
1173
+ elsif src.scan(/\$[~*$?!@\/\\;,.=:<>\"]|\$-\w?/) then
1174
+ self.lex_state = :expr_end
1175
+ self.yacc_value = src.matched
1176
+ return :tGVAR
1177
+ elsif src.scan(/\$([\&\`\'\+])/) then
1178
+ self.lex_state = :expr_end
1179
+ # Explicit reference to these vars as symbols...
1180
+ if last_state == :expr_fname then
1181
+ self.yacc_value = src.matched
1182
+ return :tGVAR
1183
+ else
1184
+ self.yacc_value = src[1].to_sym
1185
+ return :tBACK_REF
1186
+ end
1187
+ elsif src.scan(/\$([1-9]\d*)/) then
1188
+ self.lex_state = :expr_end
1189
+ if last_state == :expr_fname then
1190
+ self.yacc_value = src.matched
1191
+ return :tGVAR
1192
+ else
1193
+ self.yacc_value = src[1].to_i
1194
+ return :tNTH_REF
1195
+ end
1196
+ elsif src.scan(/\$0/) then
1197
+ self.lex_state = :expr_end
1198
+ self.token = src.matched
1199
+ return process_token(command_state)
1200
+ elsif src.scan(/\$\W|\$\z/) then # TODO: remove?
1201
+ self.lex_state = :expr_end
1202
+ self.yacc_value = "$"
1203
+ return "$"
1204
+ elsif src.scan(/\$\w+/)
1205
+ self.lex_state = :expr_end
1206
+ self.token = src.matched
1207
+ return process_token(command_state)
1208
+ end
1209
+ elsif src.check(/\_/) then
1210
+ if src.beginning_of_line? && src.scan(/\__END__(\n|\Z)/) then
1211
+ self.lineno = nil
1212
+ return RubyLexer::EOF
1213
+ elsif src.scan(/\_\w*/) then
1214
+ self.token = src.matched
1215
+ return process_token(command_state)
1216
+ end
1217
+ end
1218
+ end # END OF CASE
1219
+
1220
+ if src.scan(/\004|\032|\000/) || src.eos? then # ^D, ^Z, EOF
1221
+ return RubyLexer::EOF
1222
+ else # alpha check
1223
+ if src.scan(/\W/) then
1224
+ rb_compile_error "Invalid char #{src.matched.inspect} in expression"
1225
+ end
1226
+ end
1227
+
1228
+ self.token = src.matched if self.src.scan(/\w+/)
1229
+
1230
+ return process_token(command_state)
1231
+ end
1232
+ end
1233
+
1234
+ def process_token(command_state)
1235
+
1236
+ token << src.matched if token =~ /^\w/ && src.scan(/[\!\?](?!=)/)
1237
+
1238
+ result = nil
1239
+ last_state = lex_state
1240
+
1241
+
1242
+ case token
1243
+ when /^\$/ then
1244
+ self.lex_state, result = :expr_end, :tGVAR
1245
+ when /^@@/ then
1246
+ self.lex_state, result = :expr_end, :tCVAR
1247
+ when /^@/ then
1248
+ self.lex_state, result = :expr_end, :tIVAR
1249
+ else
1250
+ if token =~ /[!?]$/ then
1251
+ result = :tFID
1252
+ else
1253
+ if lex_state == :expr_fname then
1254
+ # ident=, not =~ => == or followed by =>
1255
+ # TODO test lexing of a=>b vs a==>b
1256
+ if src.scan(/=(?:(?![~>=])|(?==>))/) then
1257
+ result = :tIDENTIFIER
1258
+ token << src.matched
1259
+ end
1260
+ end
1261
+
1262
+ result ||= if token =~ /^[A-Z]/ then
1263
+ :tCONSTANT
1264
+ else
1265
+ :tIDENTIFIER
1266
+ end
1267
+ end
1268
+
1269
+ unless lex_state == :expr_dot then
1270
+ # See if it is a reserved word.
1271
+ keyword = Keyword.keyword token
1272
+
1273
+ if keyword then
1274
+ state = lex_state
1275
+ self.lex_state = keyword.state
1276
+ self.yacc_value = token
1277
+
1278
+ if keyword.id0 == :kDO then
1279
+ self.command_start = true
1280
+ return :kDO_COND if cond.is_in_state
1281
+ return :kDO_BLOCK if cmdarg.is_in_state && state != :expr_cmdarg
1282
+ return :kDO_BLOCK if state == :expr_endarg
1283
+ return :kDO
1284
+ end
1285
+
1286
+ return keyword.id0 if state == :expr_beg
1287
+
1288
+ self.lex_state = :expr_beg if keyword.id0 != keyword.id1
1289
+
1290
+ return keyword.id1
1291
+ end
1292
+ end
1293
+
1294
+ if (lex_state == :expr_beg || lex_state == :expr_mid ||
1295
+ lex_state == :expr_dot || lex_state == :expr_arg ||
1296
+ lex_state == :expr_cmdarg) then
1297
+ if command_state then
1298
+ self.lex_state = :expr_cmdarg
1299
+ else
1300
+ self.lex_state = :expr_arg
1301
+ end
1302
+ else
1303
+ self.lex_state = :expr_end
1304
+ end
1305
+ end
1306
+
1307
+ self.yacc_value = token
1308
+
1309
+
1310
+ self.lex_state = :expr_end if
1311
+ last_state != :expr_dot && self.parser.env[token.to_sym] == :lvar
1312
+
1313
+ return result
1314
+ end
1315
+
1316
+ def yylex_string # 23 lines
1317
+ token = if lex_strterm[0] == :heredoc then
1318
+ self.heredoc lex_strterm
1319
+ else
1320
+ self.parse_string lex_strterm
1321
+ end
1322
+
1323
+ if token == :tSTRING_END || token == :tREGEXP_END then
1324
+ self.lineno = nil
1325
+ self.lex_strterm = nil
1326
+ self.lex_state = :expr_end
1327
+ end
1328
+
1329
+ return token
1330
+ end
1331
+ end