rb-ruby_parser 2.0.4.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/VERSION ADDED
@@ -0,0 +1 @@
1
+ 2.0.4.1
@@ -0,0 +1,88 @@
1
+ #!/usr/bin/ruby -s
2
+
3
+ $q ||= false
4
+ $g ||= false
5
+
6
+ require 'rubygems'
7
+ require 'ruby_parser'
8
+ require 'pp'
9
+
10
+ good = bad = 0
11
+
12
+ multi = ARGV.size != 1
13
+ total_time = 0
14
+ total_loc = 0
15
+ total_kbytes = 0
16
+ times = {}
17
+ locs = {}
18
+ kbytes = {}
19
+
20
+ begin
21
+ ARGV.each do |file|
22
+ rp = RubyParser.new
23
+ loc = `wc -l #{file}`.strip.to_i
24
+ size = `wc -c #{file}`.strip.to_i / 1024.0
25
+ locs[file] = loc
26
+ kbytes[file] = size
27
+ total_loc += loc
28
+ total_kbytes += size
29
+ if $q then
30
+ $stderr.print "."
31
+ else
32
+ warn "# file = #{file} loc = #{loc}"
33
+ end
34
+ GC.start if $g
35
+
36
+ t = Time.now
37
+ begin
38
+ begin
39
+ rp.reset
40
+ r = rp.parse(File.read(file), file)
41
+ pp r unless $q
42
+ good += 1
43
+ rescue SyntaxError => e
44
+ warn "SyntaxError for #{file}: #{e.message}"
45
+ bad += 1
46
+ end
47
+ rescue => e
48
+ warn "#{e.backtrace.first} #{e.inspect.gsub(/\n/, ' ')} for #{file}"
49
+ warn " #{e.backtrace.join("\n ")}"
50
+ bad += 1
51
+ end
52
+
53
+ t = Time.now - t
54
+ times[file] = t
55
+ total_time += t
56
+ end
57
+ rescue Interrupt
58
+ # do nothing
59
+ end
60
+
61
+ warn "done"
62
+
63
+ total = 0
64
+ times.values.each do |t|
65
+ total += t
66
+ end
67
+
68
+ puts
69
+ puts "good = #{good} bad = #{bad}" if multi
70
+ puts
71
+
72
+ format = "%5.2fs:%9.2f l/s:%8.2f Kb/s:%5d Kb:%5d loc:%s"
73
+
74
+ times.sort_by { |f, t| -t }.each do |f, t|
75
+ next if t < 0.005
76
+ loc = locs[f]
77
+ size = kbytes[f]
78
+ puts format % [t, loc / t, size / t, size, loc, f]
79
+ end
80
+
81
+ puts
82
+
83
+ puts format % [total_time,
84
+ total_loc / total_time,
85
+ total_kbytes / total_time,
86
+ total_kbytes,
87
+ total_loc,
88
+ "TOTAL"] unless total_time == 0
@@ -0,0 +1,120 @@
1
+ #!/usr/bin/ruby -ws
2
+
3
+ $f ||= false
4
+
5
+ $:.unshift "../../ruby_parser/dev/lib"
6
+ $:.unshift "../../ruby2ruby/dev/lib"
7
+
8
+ require 'rubygems'
9
+ require 'ruby2ruby'
10
+ require 'ruby_parser'
11
+
12
+ require 'gauntlet'
13
+
14
+ class RubyParserGauntlet < Gauntlet
15
+ def initialize
16
+ super
17
+
18
+ self.data = Hash.new { |h,k| h[k] = {} }
19
+ old_data = load_yaml data_file
20
+ self.data.merge! old_data
21
+ end
22
+
23
+ def should_skip? name
24
+ if $f then
25
+ if Hash === data[name] then
26
+ ! data[name].empty?
27
+ else
28
+ data[name]
29
+ end
30
+ else
31
+ data[name] == true # yes, == true on purpose
32
+ end
33
+ end
34
+
35
+ def diff_pp o1, o2
36
+ require 'pp'
37
+
38
+ File.open("/tmp/a.#{$$}", "w") do |f|
39
+ PP.pp o1, f
40
+ end
41
+
42
+ File.open("/tmp/b.#{$$}", "w") do |f|
43
+ PP.pp o2, f
44
+ end
45
+
46
+ `diff -u /tmp/a.#{$$} /tmp/b.#{$$}`
47
+ ensure
48
+ File.unlink "/tmp/a.#{$$}" rescue nil
49
+ File.unlink "/tmp/b.#{$$}" rescue nil
50
+ end
51
+
52
+ def broke name, file, msg
53
+ warn "bad"
54
+ self.data[name][file] = msg
55
+ self.dirty = true
56
+ end
57
+
58
+ def process path, name
59
+ begin
60
+ $stderr.print " #{path}: "
61
+ rp = RubyParser.new
62
+ r2r = Ruby2Ruby.new
63
+
64
+ old_ruby = File.read(path)
65
+
66
+ begin
67
+ old_sexp = rp.process old_ruby
68
+ rescue Racc::ParseError => e
69
+ self.data[name][path] = :unparsable
70
+ self.dirty = true
71
+ return
72
+ end
73
+
74
+ new_ruby = r2r.process old_sexp.deep_clone
75
+
76
+ begin
77
+ new_sexp = rp.process new_ruby
78
+ rescue Racc::ParseError => e
79
+ broke name, path, "couldn't parse new_ruby: #{e.message.strip}"
80
+ return
81
+ end
82
+
83
+ if old_sexp != new_sexp then
84
+ broke name, path, diff_pp(old_sexp, new_sexp)
85
+ return
86
+ end
87
+
88
+ self.data[name][path] = true
89
+ self.dirty = true
90
+
91
+ warn "good"
92
+ rescue Interrupt
93
+ puts "User cancelled"
94
+ exit 1
95
+ rescue Exception => e
96
+ broke name, path, " UNKNOWN ERROR: #{e}: #{e.message.strip}"
97
+ end
98
+ end
99
+
100
+ def run name
101
+ warn name
102
+ Dir["**/*.rb"].sort.each do |path|
103
+ next if path =~ /gemspec.rb/ # HACK
104
+ next if data[name][path] == true
105
+ process path, name
106
+ end
107
+
108
+ if self.data[name].values.all? { |v| v == true } then
109
+ warn " ALL GOOD!"
110
+ self.data[name] = true
111
+ self.dirty = true
112
+ end
113
+ end
114
+ end
115
+
116
+ filter = ARGV.shift
117
+ filter = Regexp.new filter if filter
118
+
119
+ gauntlet = RubyParserGauntlet.new
120
+ gauntlet.run_the_gauntlet filter
@@ -0,0 +1,1329 @@
1
+ class RubyLexer
2
+ attr_accessor :command_start
3
+ attr_accessor :cmdarg
4
+ attr_accessor :cond
5
+ attr_accessor :nest
6
+
7
+ ESC_RE = /\\([0-7]{1,3}|x[0-9a-fA-F]{1,2}|M-[^\\]|(C-|c)[^\\]|[^0-7xMCc])/
8
+
9
+ # Additional context surrounding tokens that both the lexer and
10
+ # grammar use.
11
+ attr_reader :lex_state
12
+
13
+ attr_accessor :lex_strterm
14
+
15
+ attr_accessor :parser # HACK for very end of lexer... *sigh*
16
+
17
+ # Stream of data that yylex examines.
18
+ attr_reader :src
19
+
20
+ # Last token read via yylex.
21
+ attr_accessor :token
22
+
23
+ attr_accessor :string_buffer
24
+
25
+ # Value of last token which had a value associated with it.
26
+ attr_accessor :yacc_value
27
+
28
+ # What handles warnings
29
+ attr_accessor :warnings
30
+
31
+ EOF = :eof_haha!
32
+
33
+ # ruby constants for strings (should this be moved somewhere else?)
34
+ STR_FUNC_BORING = 0x00
35
+ STR_FUNC_ESCAPE = 0x01 # TODO: remove and replace with REGEXP
36
+ STR_FUNC_EXPAND = 0x02
37
+ STR_FUNC_REGEXP = 0x04
38
+ STR_FUNC_AWORDS = 0x08
39
+ STR_FUNC_SYMBOL = 0x10
40
+ STR_FUNC_INDENT = 0x20 # <<-HEREDOC
41
+
42
+ STR_SQUOTE = STR_FUNC_BORING
43
+ STR_DQUOTE = STR_FUNC_BORING | STR_FUNC_EXPAND
44
+ STR_XQUOTE = STR_FUNC_BORING | STR_FUNC_EXPAND
45
+ STR_REGEXP = STR_FUNC_REGEXP | STR_FUNC_ESCAPE | STR_FUNC_EXPAND
46
+ STR_SSYM = STR_FUNC_SYMBOL
47
+ STR_DSYM = STR_FUNC_SYMBOL | STR_FUNC_EXPAND
48
+
49
+ # How the parser advances to the next token.
50
+ #
51
+ # @return true if not at end of file (EOF).
52
+
53
+ def advance
54
+ r = yylex
55
+ self.token = r
56
+
57
+ raise "yylex returned nil" unless r
58
+
59
+ return RubyLexer::EOF != r
60
+ end
61
+
62
+ def arg_ambiguous
63
+ self.warning("Ambiguous first argument. make sure.")
64
+ end
65
+
66
+ def comments
67
+ c = @comments.join
68
+ @comments.clear
69
+ c
70
+ end
71
+
72
+ def expr_beg_push val
73
+ cond.push false
74
+ cmdarg.push false
75
+ self.lex_state = :expr_beg
76
+ self.yacc_value = val
77
+ end
78
+
79
+ def fix_arg_lex_state
80
+ self.lex_state = if lex_state == :expr_fname || lex_state == :expr_dot
81
+ :expr_arg
82
+ else
83
+ :expr_beg
84
+ end
85
+ end
86
+
87
+ def heredoc here # 63 lines
88
+ _, eos, func, last_line = here
89
+
90
+ indent = (func & STR_FUNC_INDENT) != 0
91
+ expand = (func & STR_FUNC_EXPAND) != 0
92
+ eos_re = indent ? /[ \t]*#{eos}(\r?\n|\z)/ : /#{eos}(\r?\n|\z)/
93
+ err_msg = "can't match #{eos_re.inspect} anywhere in "
94
+
95
+ rb_compile_error err_msg if
96
+ src.eos?
97
+
98
+ if src.beginning_of_line? && src.scan(eos_re) then
99
+ src.unread_many last_line # TODO: figure out how to remove this
100
+ self.yacc_value = eos
101
+ return :tSTRING_END
102
+ end
103
+
104
+ self.string_buffer = []
105
+
106
+ if expand then
107
+ case
108
+ when src.scan(/#[$@]/) then
109
+ src.pos -= 1 # FIX omg stupid
110
+ self.yacc_value = src.matched
111
+ return :tSTRING_DVAR
112
+ when src.scan(/#[{]/) then
113
+ self.yacc_value = src.matched
114
+ return :tSTRING_DBEG
115
+ when src.scan(/#/) then
116
+ string_buffer << '#'
117
+ end
118
+
119
+ until src.scan(eos_re) do
120
+ c = tokadd_string func, "\n", nil
121
+
122
+ rb_compile_error err_msg if
123
+ c == RubyLexer::EOF
124
+
125
+ if c != "\n" then
126
+ self.yacc_value = string_buffer.join.delete("\r")
127
+ return :tSTRING_CONTENT
128
+ else
129
+ string_buffer << src.scan(/\n/)
130
+ end
131
+
132
+ rb_compile_error err_msg if
133
+ src.eos?
134
+ end
135
+
136
+ # tack on a NL after the heredoc token - FIX NL should not be needed
137
+ src.unread_many(eos + "\n") # TODO: remove this... stupid stupid stupid
138
+ else
139
+ until src.check(eos_re) do
140
+ string_buffer << src.scan(/.*(\n|\z)/)
141
+ rb_compile_error err_msg if
142
+ src.eos?
143
+ end
144
+ end
145
+
146
+ self.lex_strterm = [:heredoc, eos, func, last_line]
147
+ self.yacc_value = string_buffer.join.delete("\r")
148
+
149
+ return :tSTRING_CONTENT
150
+ end
151
+
152
+ def heredoc_identifier # 51 lines
153
+ term, func = nil, STR_FUNC_BORING
154
+ self.string_buffer = []
155
+
156
+ case
157
+ when src.scan(/(-?)(['"`])(.*?)\2/) then
158
+ term = src[2]
159
+ unless src[1].empty? then
160
+ func |= STR_FUNC_INDENT
161
+ end
162
+ func |= case term
163
+ when "\'" then
164
+ STR_SQUOTE
165
+ when '"' then
166
+ STR_DQUOTE
167
+ else
168
+ STR_XQUOTE
169
+ end
170
+ string_buffer << src[3]
171
+ when src.scan(/-?(['"`])(?!\1*\Z)/) then
172
+ rb_compile_error "unterminated here document identifier"
173
+ when src.scan(/(-?)(\w+)/) then
174
+ term = '"'
175
+ func |= STR_DQUOTE
176
+ unless src[1].empty? then
177
+ func |= STR_FUNC_INDENT
178
+ end
179
+ string_buffer << src[2]
180
+ else
181
+ return nil
182
+ end
183
+
184
+ if src.check(/.*\n/) then
185
+ # TODO: think about storing off the char range instead
186
+ line = src.string[src.pos, src.matched_size]
187
+ src.string[src.pos, src.matched_size] = "\n"
188
+ src.pos += 1
189
+ else
190
+ line = nil
191
+ end
192
+
193
+ self.lex_strterm = [:heredoc, string_buffer.join, func, line]
194
+
195
+ if term == '`' then
196
+ self.yacc_value = "`"
197
+ return :tXSTRING_BEG
198
+ else
199
+ self.yacc_value = "\""
200
+ return :tSTRING_BEG
201
+ end
202
+ end
203
+
204
+ def initialize
205
+ self.cond = StackState.new(:cond)
206
+ self.cmdarg = StackState.new(:cmdarg)
207
+ self.nest = 0
208
+ @comments = []
209
+
210
+ reset
211
+ end
212
+
213
+ def int_with_base base
214
+ rb_compile_error "Invalid numeric format" if src.matched =~ /__/
215
+ self.yacc_value = src.matched.to_i(base)
216
+ return :tINTEGER
217
+ end
218
+
219
+ def lex_state= o
220
+ raise "wtf?" unless Symbol === o
221
+ @lex_state = o
222
+ end
223
+
224
+ attr_writer :lineno
225
+ def lineno
226
+ @lineno ||= src.lineno
227
+ end
228
+
229
+ ##
230
+ # Parse a number from the input stream.
231
+ #
232
+ # @param c The first character of the number.
233
+ # @return A int constant wich represents a token.
234
+
235
+ def parse_number
236
+ self.lex_state = :expr_end
237
+
238
+ case
239
+ when src.scan(/[+-]?0[xbd]\b/) then
240
+ rb_compile_error "Invalid numeric format"
241
+ when src.scan(/[+-]?0x[a-f0-9_]+/i) then
242
+ int_with_base(16)
243
+ when src.scan(/[+-]?0b[01_]+/) then
244
+ int_with_base(2)
245
+ when src.scan(/[+-]?0d[0-9_]+/) then
246
+ int_with_base(10)
247
+ when src.scan(/[+-]?0[Oo]?[0-7_]*[89]/) then
248
+ rb_compile_error "Illegal octal digit."
249
+ when src.scan(/[+-]?0[Oo]?[0-7_]+|0[Oo]/) then
250
+ int_with_base(8)
251
+ when src.scan(/[+-]?[\d_]+_(e|\.)/) then
252
+ rb_compile_error "Trailing '_' in number."
253
+ when src.scan(/[+-]?[\d_]+\.[\d_]+(e[+-]?[\d_]+)?\b|[+-]?[\d_]+e[+-]?[\d_]+\b/i) then
254
+ number = src.matched
255
+ if number =~ /__/ then
256
+ rb_compile_error "Invalid numeric format"
257
+ end
258
+ self.yacc_value = number.to_f
259
+ :tFLOAT
260
+ when src.scan(/[+-]?0\b/) then
261
+ int_with_base(10)
262
+ when src.scan(/[+-]?[\d_]+\b/) then
263
+ int_with_base(10)
264
+ else
265
+ rb_compile_error "Bad number format"
266
+ end
267
+ end
268
+
269
+ def parse_quote # 58 lines
270
+ beg, nnd, short_hand, c = nil, nil, false, nil
271
+
272
+ if src.scan(/[a-z0-9]{1,2}/i) then # Long-hand (e.g. %Q{}).
273
+ rb_compile_error "unknown type of %string" if src.matched_size == 2
274
+ c, beg, short_hand = src.matched, src.getch, false
275
+ else # Short-hand (e.g. %{, %., %!, etc)
276
+ c, beg, short_hand = 'Q', src.getch, true
277
+ end
278
+
279
+ if src.eos? or c == RubyLexer::EOF or beg == RubyLexer::EOF then
280
+ rb_compile_error "unterminated quoted string meets end of file"
281
+ end
282
+
283
+ # Figure nnd-char. "\0" is special to indicate beg=nnd and that no nesting?
284
+ nnd = { "(" => ")", "[" => "]", "{" => "}", "<" => ">" }[beg]
285
+ nnd, beg = beg, "\0" if nnd.nil?
286
+
287
+ token_type, self.yacc_value = nil, "%#{c}#{beg}"
288
+ token_type, string_type = case c
289
+ when 'Q' then
290
+ ch = short_hand ? nnd : c + beg
291
+ self.yacc_value = "%#{ch}"
292
+ [:tSTRING_BEG, STR_DQUOTE]
293
+ when 'q' then
294
+ [:tSTRING_BEG, STR_SQUOTE]
295
+ when 'W' then
296
+ src.scan(/\s*/)
297
+ [:tWORDS_BEG, STR_DQUOTE | STR_FUNC_AWORDS]
298
+ when 'w' then
299
+ src.scan(/\s*/)
300
+ [:tAWORDS_BEG, STR_SQUOTE | STR_FUNC_AWORDS]
301
+ when 'x' then
302
+ [:tXSTRING_BEG, STR_XQUOTE]
303
+ when 'r' then
304
+ [:tREGEXP_BEG, STR_REGEXP]
305
+ when 's' then
306
+ self.lex_state = :expr_fname
307
+ [:tSYMBEG, STR_SSYM]
308
+ end
309
+
310
+ rb_compile_error "Bad %string type. Expected [Qqwxr\W], found '#{c}'." if
311
+ token_type.nil?
312
+
313
+ self.lex_strterm = [:strterm, string_type, nnd, beg]
314
+
315
+ return token_type
316
+ end
317
+
318
+ def parse_string(quote) # 65 lines
319
+ _, string_type, term, open = quote
320
+
321
+ space = false # FIX: remove these
322
+ func = string_type
323
+ paren = open
324
+ term_re = Regexp.escape term
325
+
326
+ awords = (func & STR_FUNC_AWORDS) != 0
327
+ regexp = (func & STR_FUNC_REGEXP) != 0
328
+ expand = (func & STR_FUNC_EXPAND) != 0
329
+
330
+ unless func then # FIX: impossible, prolly needs == 0
331
+ self.lineno = nil
332
+ return :tSTRING_END
333
+ end
334
+
335
+ space = true if awords and src.scan(/\s+/)
336
+
337
+ if self.nest == 0 && src.scan(/#{term_re}/) then
338
+ if awords then
339
+ quote[1] = nil
340
+ return :tSPACE
341
+ elsif regexp then
342
+ self.yacc_value = self.regx_options
343
+ self.lineno = nil
344
+ return :tREGEXP_END
345
+ else
346
+ self.yacc_value = term
347
+ self.lineno = nil
348
+ return :tSTRING_END
349
+ end
350
+ end
351
+
352
+ if space then
353
+ return :tSPACE
354
+ end
355
+
356
+ self.string_buffer = []
357
+
358
+ if expand
359
+ case
360
+ when src.scan(/#(?=[$@])/) then
361
+ return :tSTRING_DVAR
362
+ when src.scan(/#[{]/) then
363
+ return :tSTRING_DBEG
364
+ when src.scan(/#/) then
365
+ string_buffer << '#'
366
+ end
367
+ end
368
+
369
+ if tokadd_string(func, term, paren) == RubyLexer::EOF then
370
+ rb_compile_error "unterminated string meets end of file"
371
+ end
372
+
373
+ self.yacc_value = string_buffer.join
374
+
375
+
376
+ return :tSTRING_CONTENT
377
+ end
378
+
379
+ def rb_compile_error msg
380
+ msg += ". near line #{self.lineno}: #{src.rest[/^.*/].inspect}"
381
+ raise SyntaxError, msg
382
+ end
383
+
384
+ def read_escape # 51 lines
385
+ case
386
+ when src.scan(/\\/) then # Backslash
387
+ '\\'
388
+ when src.scan(/n/) then # newline
389
+ "\n"
390
+ when src.scan(/t/) then # horizontal tab
391
+ "\t"
392
+ when src.scan(/r/) then # carriage-return
393
+ "\r"
394
+ when src.scan(/f/) then # form-feed
395
+ "\f"
396
+ when src.scan(/v/) then # vertical tab
397
+ "\13"
398
+ when src.scan(/a/) then # alarm(bell)
399
+ "\007"
400
+ when src.scan(/e/) then # escape
401
+ "\033"
402
+ when src.scan(/b/) then # backspace
403
+ "\010"
404
+ when src.scan(/s/) then # space
405
+ " "
406
+ when src.scan(/[0-7]{1,3}/) then # octal constant
407
+ src.matched.to_i(8).chr
408
+ when src.scan(/x([0-9a-fA-F]{1,2})/) then # hex constant
409
+ src[1].to_i(16).chr
410
+ when src.check(/M-\\[\\MCc]/) then
411
+ src.scan(/M-\\/) # eat it
412
+ c = self.read_escape
413
+ c[0] = (c[0].ord | 0x80).chr
414
+ c
415
+ when src.scan(/M-(.)/) then
416
+ c = src[1]
417
+ c[0] = (c[0].ord | 0x80).chr
418
+ c
419
+ when src.check(/(C-|c)\\[\\MCc]/) then
420
+ src.scan(/(C-|c)\\/) # eat it
421
+ c = self.read_escape
422
+ c[0] = (c[0].ord & 0x9f).chr
423
+ c
424
+ when src.scan(/C-\?|c\?/) then
425
+ 127.chr
426
+ when src.scan(/(C-|c)(.)/) then
427
+ c = src[2]
428
+ c[0] = (c[0].ord & 0x9f).chr
429
+ c
430
+ when src.scan(/[McCx0-9]/) || src.eos? then
431
+ rb_compile_error("Invalid escape character syntax")
432
+ else
433
+ src.getch
434
+ end
435
+ end
436
+
437
+ def regx_options # 15 lines
438
+ good, bad = [], []
439
+
440
+ if src.scan(/[a-z]+/) then
441
+ good, bad = src.matched.split(//).partition { |s| s =~ /^[ixmonesu]$/ }
442
+ end
443
+
444
+ unless bad.empty? then
445
+ rb_compile_error("unknown regexp option%s - %s" %
446
+ [(bad.size > 1 ? "s" : ""), bad.join.inspect])
447
+ end
448
+
449
+ return good.join
450
+ end
451
+
452
+ def reset
453
+ self.command_start = true
454
+ self.lex_strterm = nil
455
+ self.token = nil
456
+ self.yacc_value = nil
457
+
458
+ @src = nil
459
+ @lex_state = nil
460
+ end
461
+
462
+ def src= src
463
+ raise "bad src: #{src.inspect}" unless String === src
464
+ @src = RPStringScanner.new(src)
465
+ end
466
+
467
+ def tokadd_escape term # 20 lines
468
+ case
469
+ when src.scan(/\\\n/) then
470
+ # just ignore
471
+ when src.scan(/\\([0-7]{1,3}|x[0-9a-fA-F]{1,2})/) then
472
+ self.string_buffer << src.matched
473
+ when src.scan(/\\([MC]-|c)(?=\\)/) then
474
+ self.string_buffer << src.matched
475
+ self.tokadd_escape term
476
+ when src.scan(/\\([MC]-|c)(.)/) then
477
+ self.string_buffer << src.matched
478
+ when src.scan(/\\[McCx]/) then
479
+ rb_compile_error "Invalid escape character syntax"
480
+ when src.scan(/\\(.)/m) then
481
+ self.string_buffer << src.matched
482
+ else
483
+ rb_compile_error "Invalid escape character syntax"
484
+ end
485
+ end
486
+
487
+ def tokadd_string(func, term, paren) # 105 lines
488
+ awords = (func & STR_FUNC_AWORDS) != 0
489
+ escape = (func & STR_FUNC_ESCAPE) != 0
490
+ expand = (func & STR_FUNC_EXPAND) != 0
491
+ regexp = (func & STR_FUNC_REGEXP) != 0
492
+ symbol = (func & STR_FUNC_SYMBOL) != 0
493
+
494
+ paren_re = paren.nil? ? nil : Regexp.new(Regexp.escape(paren))
495
+ term_re = Regexp.new(Regexp.escape(term))
496
+
497
+ until src.eos? do
498
+ c = nil
499
+ handled = true
500
+ case
501
+ when self.nest == 0 && src.scan(term_re) then
502
+ src.pos -= 1
503
+ break
504
+ when paren_re && src.scan(paren_re) then
505
+ self.nest += 1
506
+ when src.scan(term_re) then
507
+ self.nest -= 1
508
+ when awords && src.scan(/\s/) then
509
+ src.pos -= 1
510
+ break
511
+ when expand && src.scan(/#(?=[\$\@\{])/) then
512
+ src.pos -= 1
513
+ break
514
+ when expand && src.scan(/#(?!\n)/) then
515
+ # do nothing
516
+ when src.check(/\\/) then
517
+ case
518
+ when awords && src.scan(/\\\n/) then
519
+ string_buffer << "\n"
520
+ next
521
+ when awords && src.scan(/\\\s/) then
522
+ c = ' '
523
+ when expand && src.scan(/\\\n/) then
524
+ next
525
+ when regexp && src.check(/\\/) then
526
+ self.tokadd_escape term
527
+ next
528
+ when expand && src.scan(/\\/) then
529
+ c = self.read_escape
530
+ when src.scan(/\\\n/) then
531
+ # do nothing
532
+ when src.scan(/\\\\/) then
533
+ string_buffer << '\\' if escape
534
+ c = '\\'
535
+ when src.scan(/\\/) then
536
+ unless src.scan(term_re) || paren.nil? || src.scan(paren_re) then
537
+ string_buffer << "\\"
538
+ end
539
+ else
540
+ handled = false
541
+ end
542
+ else
543
+ handled = false
544
+ end # case
545
+
546
+ unless handled then
547
+
548
+ t = Regexp.escape term
549
+ x = Regexp.escape(paren) if paren && paren != "\000"
550
+ re = if awords then
551
+ /[^#{t}#{x}\#\0\\\n\ ]+|./ # |. to pick up whatever
552
+ else
553
+ /[^#{t}#{x}\#\0\\]+|./
554
+ end
555
+
556
+ src.scan re
557
+ c = src.matched
558
+
559
+ rb_compile_error "symbol cannot contain '\\0'" if symbol && c =~ /\0/
560
+ end # unless handled
561
+
562
+ c ||= src.matched
563
+ string_buffer << c
564
+ end # until
565
+
566
+ c ||= src.matched
567
+ c = RubyLexer::EOF if src.eos?
568
+
569
+
570
+ return c
571
+ end
572
+
573
+ def unescape s
574
+
575
+ r = {
576
+ "a" => "\007",
577
+ "b" => "\010",
578
+ "e" => "\033",
579
+ "f" => "\f",
580
+ "n" => "\n",
581
+ "r" => "\r",
582
+ "s" => " ",
583
+ "t" => "\t",
584
+ "v" => "\13",
585
+ "\\" => '\\',
586
+ "\n" => "",
587
+ "C-\?" => 127.chr,
588
+ "c\?" => 127.chr,
589
+ }[s]
590
+
591
+ return r if r
592
+
593
+ case s
594
+ when /^[0-7]{1,3}/ then
595
+ $&.to_i(8).chr
596
+ when /^x([0-9a-fA-F]{1,2})/ then
597
+ $1.to_i(16).chr
598
+ when /^M-(.)/ then
599
+ ($1[0].ord | 0x80).chr
600
+ when /^(C-|c)(.)/ then
601
+ ($2[0].ord & 0x9f).chr
602
+ when /^[McCx0-9]/ then
603
+ rb_compile_error("Invalid escape character syntax")
604
+ else
605
+ s
606
+ end
607
+ end
608
+
609
+ def warning s
610
+ # do nothing for now
611
+ end
612
+
613
+ ##
614
+ # Returns the next token. Also sets yy_val is needed.
615
+ #
616
+ # @return Description of the Returned Value
617
+
618
+ def yylex # 826 lines
619
+
620
+ c = ''
621
+ space_seen = false
622
+ command_state = false
623
+ src = self.src
624
+
625
+ self.token = nil
626
+ self.yacc_value = nil
627
+
628
+ return yylex_string if lex_strterm
629
+
630
+ command_state = self.command_start
631
+ self.command_start = false
632
+
633
+ last_state = lex_state
634
+
635
+ loop do # START OF CASE
636
+ if src.scan(/\ |\t|\r|\f|\13/) then # white spaces, 13 = '\v
637
+ space_seen = true
638
+ next
639
+ elsif src.check(/[^a-zA-Z]/) then
640
+ if src.scan(/\n|#/) then
641
+ self.lineno = nil
642
+ c = src.matched
643
+ if c == '#' then
644
+ src.unread c # ok
645
+
646
+ while src.scan(/\s*#.*(\n+|\z)/) do
647
+ @comments << src.matched.gsub(/^ +#/, '#').gsub(/^ +$/, '')
648
+ end
649
+
650
+ if src.eos? then
651
+ return RubyLexer::EOF
652
+ end
653
+ end
654
+
655
+ # Replace a string of newlines with a single one
656
+ src.scan(/\n+/)
657
+
658
+ if [:expr_beg, :expr_fname,
659
+ :expr_dot, :expr_class].include? lex_state then
660
+ next
661
+ end
662
+
663
+ self.command_start = true
664
+ self.lex_state = :expr_beg
665
+ return :tNL
666
+ elsif src.scan(/[\]\)\}]/) then
667
+ cond.lexpop
668
+ cmdarg.lexpop
669
+ self.lex_state = :expr_end
670
+ self.yacc_value = src.matched
671
+ result = {
672
+ ")" => :tRPAREN,
673
+ "]" => :tRBRACK,
674
+ "}" => :tRCURLY
675
+ }[src.matched]
676
+ return result
677
+ elsif src.check(/\./) then
678
+ if src.scan(/\.\.\./) then
679
+ self.lex_state = :expr_beg
680
+ self.yacc_value = "..."
681
+ return :tDOT3
682
+ elsif src.scan(/\.\./) then
683
+ self.lex_state = :expr_beg
684
+ self.yacc_value = ".."
685
+ return :tDOT2
686
+ elsif src.scan(/\.\d/) then
687
+ rb_compile_error "no .<digit> floating literal anymore put 0 before dot"
688
+ elsif src.scan(/\./) then
689
+ self.lex_state = :expr_dot
690
+ self.yacc_value = "."
691
+ return :tDOT
692
+ end
693
+ elsif src.scan(/\,/) then
694
+ self.lex_state = :expr_beg
695
+ self.yacc_value = ","
696
+ return :tCOMMA
697
+ elsif src.scan(/\(/) then
698
+ result = :tLPAREN2
699
+ self.command_start = true
700
+ if lex_state == :expr_beg || lex_state == :expr_mid then
701
+ result = :tLPAREN
702
+ elsif space_seen then
703
+ if lex_state == :expr_cmdarg then
704
+ result = :tLPAREN_ARG
705
+ elsif lex_state == :expr_arg then
706
+ warning("don't put space before argument parentheses")
707
+ result = :tLPAREN2
708
+ end
709
+ end
710
+
711
+ self.expr_beg_push "("
712
+
713
+ return result
714
+ elsif src.check(/\=/) then
715
+ if src.scan(/\=\=\=/) then
716
+ self.fix_arg_lex_state
717
+ self.yacc_value = "==="
718
+ return :tEQQ
719
+ elsif src.scan(/\=\=/) then
720
+ self.fix_arg_lex_state
721
+ self.yacc_value = "=="
722
+ return :tEQ
723
+ elsif src.scan(/\=~/) then
724
+ self.fix_arg_lex_state
725
+ self.yacc_value = "=~"
726
+ return :tMATCH
727
+ elsif src.scan(/\=>/) then
728
+ self.fix_arg_lex_state
729
+ self.yacc_value = "=>"
730
+ return :tASSOC
731
+ elsif src.scan(/\=/) then
732
+ if src.was_begin_of_line and src.scan(/begin(?=\s)/) then
733
+ @comments << '=' << src.matched
734
+
735
+ unless src.scan(/.*?\n=end( |\t|\f)*[^(\n|\z)]*(\n|\z)/m) then
736
+ @comments.clear
737
+ rb_compile_error("embedded document meets end of file")
738
+ end
739
+
740
+ @comments << src.matched
741
+
742
+ next
743
+ else
744
+ self.fix_arg_lex_state
745
+ self.yacc_value = '='
746
+ return :tEQL
747
+ end
748
+ end
749
+ elsif src.scan(/\"(#{ESC_RE}|#(#{ESC_RE}|[^\{\#\@\$\"\\])|[^\"\\\#])*\"/o) then
750
+ self.yacc_value = src.matched[1..-2].gsub(ESC_RE) { unescape $1 }
751
+ self.lex_state = :expr_end
752
+ return :tSTRING
753
+ elsif src.scan(/\"/) then # FALLBACK
754
+ self.lex_strterm = [:strterm, STR_DQUOTE, '"', "\0"] # TODO: question this
755
+ self.yacc_value = "\""
756
+ return :tSTRING_BEG
757
+ elsif src.scan(/\@\@?\w*/) then
758
+ self.token = src.matched
759
+
760
+ rb_compile_error "`#{token}` is not allowed as a variable name" if
761
+ token =~ /\@\d/
762
+
763
+ return process_token(command_state)
764
+ elsif src.scan(/\:\:/) then
765
+ if (lex_state == :expr_beg ||
766
+ lex_state == :expr_mid ||
767
+ lex_state == :expr_class ||
768
+ (lex_state.is_argument && space_seen)) then
769
+ self.lex_state = :expr_beg
770
+ self.yacc_value = "::"
771
+ return :tCOLON3
772
+ end
773
+
774
+ self.lex_state = :expr_dot
775
+ self.yacc_value = "::"
776
+ return :tCOLON2
777
+ elsif lex_state != :expr_end && lex_state != :expr_endarg && src.scan(/:([a-zA-Z_]\w*(?:[?!]|=(?!>))?)/) then
778
+ self.yacc_value = src[1]
779
+ self.lex_state = :expr_end
780
+ return :tSYMBOL
781
+ elsif src.scan(/\:/) then
782
+ # ?: / then / when
783
+ if (lex_state == :expr_end || lex_state == :expr_endarg||
784
+ src.check(/\s/)) then
785
+ self.lex_state = :expr_beg
786
+ self.yacc_value = ":"
787
+ return :tCOLON
788
+ end
789
+
790
+ case
791
+ when src.scan(/\'/) then
792
+ self.lex_strterm = [:strterm, STR_SSYM, src.matched, "\0"]
793
+ when src.scan(/\"/) then
794
+ self.lex_strterm = [:strterm, STR_DSYM, src.matched, "\0"]
795
+ end
796
+
797
+ self.lex_state = :expr_fname
798
+ self.yacc_value = ":"
799
+ return :tSYMBEG
800
+ elsif src.check(/[0-9]/) then
801
+ return parse_number
802
+ elsif src.scan(/\[/) then
803
+ result = src.matched
804
+
805
+ if lex_state == :expr_fname || lex_state == :expr_dot then
806
+ self.lex_state = :expr_arg
807
+ case
808
+ when src.scan(/\]\=/) then
809
+ self.yacc_value = "[]="
810
+ return :tASET
811
+ when src.scan(/\]/) then
812
+ self.yacc_value = "[]"
813
+ return :tAREF
814
+ else
815
+ rb_compile_error "unexpected '['"
816
+ end
817
+ elsif lex_state == :expr_beg || lex_state == :expr_mid then
818
+ result = :tLBRACK
819
+ elsif lex_state.is_argument && space_seen then
820
+ result = :tLBRACK
821
+ end
822
+
823
+ self.expr_beg_push "["
824
+
825
+ return result
826
+ elsif src.scan(/\'(\\.|[^\'])*\'/) then
827
+ self.yacc_value = src.matched[1..-2].gsub(/\\\\/, "\\").gsub(/\\'/, "'")
828
+ self.lex_state = :expr_end
829
+ return :tSTRING
830
+ elsif src.check(/\|/) then
831
+ if src.scan(/\|\|\=/) then
832
+ self.lex_state = :expr_beg
833
+ self.yacc_value = "||"
834
+ return :tOP_ASGN
835
+ elsif src.scan(/\|\|/) then
836
+ self.lex_state = :expr_beg
837
+ self.yacc_value = "||"
838
+ return :tOROP
839
+ elsif src.scan(/\|\=/) then
840
+ self.lex_state = :expr_beg
841
+ self.yacc_value = "|"
842
+ return :tOP_ASGN
843
+ elsif src.scan(/\|/) then
844
+ self.fix_arg_lex_state
845
+ self.yacc_value = "|"
846
+ return :tPIPE
847
+ end
848
+ elsif src.scan(/\{/) then
849
+ result = if lex_state.is_argument || lex_state == :expr_end then
850
+ :tLCURLY # block (primary)
851
+ elsif lex_state == :expr_endarg then
852
+ :tLBRACE_ARG # block (expr)
853
+ else
854
+ :tLBRACE # hash
855
+ end
856
+
857
+ self.expr_beg_push "{"
858
+
859
+ return result
860
+ elsif src.scan(/[+-]/) then
861
+ sign = src.matched
862
+ utype, type = if sign == "+" then
863
+ [:tUPLUS, :tPLUS]
864
+ else
865
+ [:tUMINUS, :tMINUS]
866
+ end
867
+
868
+ if lex_state == :expr_fname || lex_state == :expr_dot then
869
+ self.lex_state = :expr_arg
870
+ if src.scan(/@/) then
871
+ self.yacc_value = "#{sign}@"
872
+ return utype
873
+ else
874
+ self.yacc_value = sign
875
+ return type
876
+ end
877
+ end
878
+
879
+ if src.scan(/\=/) then
880
+ self.lex_state = :expr_beg
881
+ self.yacc_value = sign
882
+ return :tOP_ASGN
883
+ end
884
+
885
+ if (lex_state == :expr_beg || lex_state == :expr_mid ||
886
+ (lex_state.is_argument && space_seen && !src.check(/\s/))) then
887
+ if lex_state.is_argument then
888
+ arg_ambiguous
889
+ end
890
+
891
+ self.lex_state = :expr_beg
892
+ self.yacc_value = sign
893
+
894
+ if src.check(/\d/) then
895
+ if utype == :tUPLUS then
896
+ return self.parse_number
897
+ else
898
+ return :tUMINUS_NUM
899
+ end
900
+ end
901
+
902
+ return utype
903
+ end
904
+
905
+ self.lex_state = :expr_beg
906
+ self.yacc_value = sign
907
+ return type
908
+ elsif src.check(/\*/) then
909
+ if src.scan(/\*\*=/) then
910
+ self.lex_state = :expr_beg
911
+ self.yacc_value = "**"
912
+ return :tOP_ASGN
913
+ elsif src.scan(/\*\*/) then
914
+ self.yacc_value = "**"
915
+ self.fix_arg_lex_state
916
+ return :tPOW
917
+ elsif src.scan(/\*\=/) then
918
+ self.lex_state = :expr_beg
919
+ self.yacc_value = "*"
920
+ return :tOP_ASGN
921
+ elsif src.scan(/\*/) then
922
+ result = if lex_state.is_argument && space_seen && src.check(/\S/) then
923
+ warning("`*' interpreted as argument prefix")
924
+ :tSTAR
925
+ elsif lex_state == :expr_beg || lex_state == :expr_mid then
926
+ :tSTAR
927
+ else
928
+ :tSTAR2
929
+ end
930
+ self.yacc_value = "*"
931
+ self.fix_arg_lex_state
932
+
933
+ return result
934
+ end
935
+ elsif src.check(/\!/) then
936
+ if src.scan(/\!\=/) then
937
+ self.lex_state = :expr_beg
938
+ self.yacc_value = "!="
939
+ return :tNEQ
940
+ elsif src.scan(/\!~/) then
941
+ self.lex_state = :expr_beg
942
+ self.yacc_value = "!~"
943
+ return :tNMATCH
944
+ elsif src.scan(/\!/) then
945
+ self.lex_state = :expr_beg
946
+ self.yacc_value = "!"
947
+ return :tBANG
948
+ end
949
+ elsif src.check(/\</) then
950
+ if src.scan(/\<\=\>/) then
951
+ self.fix_arg_lex_state
952
+ self.yacc_value = "<=>"
953
+ return :tCMP
954
+ elsif src.scan(/\<\=/) then
955
+ self.fix_arg_lex_state
956
+ self.yacc_value = "<="
957
+ return :tLEQ
958
+ elsif src.scan(/\<\<\=/) then
959
+ self.fix_arg_lex_state
960
+ self.lex_state = :expr_beg
961
+ self.yacc_value = "\<\<"
962
+ return :tOP_ASGN
963
+ elsif src.scan(/\<\</) then
964
+ if (! [:expr_end, :expr_dot,
965
+ :expr_endarg, :expr_class].include?(lex_state) &&
966
+ (!lex_state.is_argument || space_seen)) then
967
+ tok = self.heredoc_identifier
968
+ if tok then
969
+ return tok
970
+ end
971
+ end
972
+
973
+ self.fix_arg_lex_state
974
+ self.yacc_value = "\<\<"
975
+ return :tLSHFT
976
+ elsif src.scan(/\</) then
977
+ self.fix_arg_lex_state
978
+ self.yacc_value = "<"
979
+ return :tLT
980
+ end
981
+ elsif src.check(/\>/) then
982
+ if src.scan(/\>\=/) then
983
+ self.fix_arg_lex_state
984
+ self.yacc_value = ">="
985
+ return :tGEQ
986
+ elsif src.scan(/\>\>=/) then
987
+ self.fix_arg_lex_state
988
+ self.lex_state = :expr_beg
989
+ self.yacc_value = ">>"
990
+ return :tOP_ASGN
991
+ elsif src.scan(/\>\>/) then
992
+ self.fix_arg_lex_state
993
+ self.yacc_value = ">>"
994
+ return :tRSHFT
995
+ elsif src.scan(/\>/) then
996
+ self.fix_arg_lex_state
997
+ self.yacc_value = ">"
998
+ return :tGT
999
+ end
1000
+ elsif src.scan(/\`/) then
1001
+ self.yacc_value = "`"
1002
+ case lex_state
1003
+ when :expr_fname then
1004
+ self.lex_state = :expr_end
1005
+ return :tBACK_REF2
1006
+ when :expr_dot then
1007
+ self.lex_state = if command_state then
1008
+ :expr_cmdarg
1009
+ else
1010
+ :expr_arg
1011
+ end
1012
+ return :tBACK_REF2
1013
+ end
1014
+ self.lex_strterm = [:strterm, STR_XQUOTE, '`', "\0"]
1015
+ return :tXSTRING_BEG
1016
+ elsif src.scan(/\?/) then
1017
+ if lex_state == :expr_end || lex_state == :expr_endarg then
1018
+ self.lex_state = :expr_beg
1019
+ self.yacc_value = "?"
1020
+ return :tEH
1021
+ end
1022
+
1023
+ if src.eos? then
1024
+ rb_compile_error "incomplete character syntax"
1025
+ end
1026
+
1027
+ if src.check(/\s|\v/) then
1028
+ unless lex_state.is_argument then
1029
+ c2 = { " " => 's',
1030
+ "\n" => 'n',
1031
+ "\t" => 't',
1032
+ "\v" => 'v',
1033
+ "\r" => 'r',
1034
+ "\f" => 'f' }[src.matched]
1035
+
1036
+ if c2 then
1037
+ warning("invalid character syntax; use ?\\" + c2)
1038
+ end
1039
+ end
1040
+
1041
+ # ternary
1042
+ self.lex_state = :expr_beg
1043
+ self.yacc_value = "?"
1044
+ return :tEH
1045
+ elsif src.check(/\w(?=\w)/) then # ternary, also
1046
+ self.lex_state = :expr_beg
1047
+ self.yacc_value = "?"
1048
+ return :tEH
1049
+ end
1050
+
1051
+ c = if src.scan(/\\/) then
1052
+ self.read_escape
1053
+ else
1054
+ src.getch
1055
+ end
1056
+ self.lex_state = :expr_end
1057
+ self.yacc_value = c[0].ord & 0xff
1058
+ return :tINTEGER
1059
+ elsif src.check(/\&/) then
1060
+ if src.scan(/\&\&\=/) then
1061
+ self.yacc_value = "&&"
1062
+ self.lex_state = :expr_beg
1063
+ return :tOP_ASGN
1064
+ elsif src.scan(/\&\&/) then
1065
+ self.lex_state = :expr_beg
1066
+ self.yacc_value = "&&"
1067
+ return :tANDOP
1068
+ elsif src.scan(/\&\=/) then
1069
+ self.yacc_value = "&"
1070
+ self.lex_state = :expr_beg
1071
+ return :tOP_ASGN
1072
+ elsif src.scan(/&/) then
1073
+ result = if lex_state.is_argument && space_seen &&
1074
+ !src.check(/\s/) then
1075
+ warning("`&' interpreted as argument prefix")
1076
+ :tAMPER
1077
+ elsif lex_state == :expr_beg || lex_state == :expr_mid then
1078
+ :tAMPER
1079
+ else
1080
+ :tAMPER2
1081
+ end
1082
+
1083
+ self.fix_arg_lex_state
1084
+ self.yacc_value = "&"
1085
+ return result
1086
+ end
1087
+ elsif src.scan(/\//) then
1088
+ if lex_state == :expr_beg || lex_state == :expr_mid then
1089
+ self.lex_strterm = [:strterm, STR_REGEXP, '/', "\0"]
1090
+ self.yacc_value = "/"
1091
+ return :tREGEXP_BEG
1092
+ end
1093
+
1094
+ if src.scan(/\=/) then
1095
+ self.yacc_value = "/"
1096
+ self.lex_state = :expr_beg
1097
+ return :tOP_ASGN
1098
+ end
1099
+
1100
+ if lex_state.is_argument && space_seen then
1101
+ unless src.scan(/\s/) then
1102
+ arg_ambiguous
1103
+ self.lex_strterm = [:strterm, STR_REGEXP, '/', "\0"]
1104
+ self.yacc_value = "/"
1105
+ return :tREGEXP_BEG
1106
+ end
1107
+ end
1108
+
1109
+ self.fix_arg_lex_state
1110
+ self.yacc_value = "/"
1111
+
1112
+ return :tDIVIDE
1113
+ elsif src.scan(/\^=/) then
1114
+ self.lex_state = :expr_beg
1115
+ self.yacc_value = "^"
1116
+ return :tOP_ASGN
1117
+ elsif src.scan(/\^/) then
1118
+ self.fix_arg_lex_state
1119
+ self.yacc_value = "^"
1120
+ return :tCARET
1121
+ elsif src.scan(/\;/) then
1122
+ self.command_start = true
1123
+ self.lex_state = :expr_beg
1124
+ self.yacc_value = ";"
1125
+ return :tSEMI
1126
+ elsif src.scan(/\~/) then
1127
+ if lex_state == :expr_fname || lex_state == :expr_dot then
1128
+ src.scan(/@/)
1129
+ end
1130
+
1131
+ self.fix_arg_lex_state
1132
+ self.yacc_value = "~"
1133
+
1134
+ return :tTILDE
1135
+ elsif src.scan(/\\/) then
1136
+ if src.scan(/\n/) then
1137
+ self.lineno = nil
1138
+ space_seen = true
1139
+ next
1140
+ end
1141
+ rb_compile_error "bare backslash only allowed before newline"
1142
+ elsif src.scan(/\%/) then
1143
+ if lex_state == :expr_beg || lex_state == :expr_mid then
1144
+ return parse_quote
1145
+ end
1146
+
1147
+ if src.scan(/\=/) then
1148
+ self.lex_state = :expr_beg
1149
+ self.yacc_value = "%"
1150
+ return :tOP_ASGN
1151
+ end
1152
+
1153
+ if lex_state.is_argument && space_seen && ! src.check(/\s/) then
1154
+ return parse_quote
1155
+ end
1156
+
1157
+ self.fix_arg_lex_state
1158
+ self.yacc_value = "%"
1159
+
1160
+ return :tPERCENT
1161
+ elsif src.check(/\$/) then
1162
+ if src.scan(/(\$_)(\w+)/) then
1163
+ self.lex_state = :expr_end
1164
+ self.token = src.matched
1165
+ return process_token(command_state)
1166
+ elsif src.scan(/\$_/) then
1167
+ self.lex_state = :expr_end
1168
+ self.token = src.matched
1169
+ self.yacc_value = src.matched
1170
+ return :tGVAR
1171
+ elsif src.scan(/\$[~*$?!@\/\\;,.=:<>\"]|\$-\w?/) then
1172
+ self.lex_state = :expr_end
1173
+ self.yacc_value = src.matched
1174
+ return :tGVAR
1175
+ elsif src.scan(/\$([\&\`\'\+])/) then
1176
+ self.lex_state = :expr_end
1177
+ # Explicit reference to these vars as symbols...
1178
+ if last_state == :expr_fname then
1179
+ self.yacc_value = src.matched
1180
+ return :tGVAR
1181
+ else
1182
+ self.yacc_value = src[1].to_sym
1183
+ return :tBACK_REF
1184
+ end
1185
+ elsif src.scan(/\$([1-9]\d*)/) then
1186
+ self.lex_state = :expr_end
1187
+ if last_state == :expr_fname then
1188
+ self.yacc_value = src.matched
1189
+ return :tGVAR
1190
+ else
1191
+ self.yacc_value = src[1].to_i
1192
+ return :tNTH_REF
1193
+ end
1194
+ elsif src.scan(/\$0/) then
1195
+ self.lex_state = :expr_end
1196
+ self.token = src.matched
1197
+ return process_token(command_state)
1198
+ elsif src.scan(/\$\W|\$\z/) then # TODO: remove?
1199
+ self.lex_state = :expr_end
1200
+ self.yacc_value = "$"
1201
+ return "$"
1202
+ elsif src.scan(/\$\w+/)
1203
+ self.lex_state = :expr_end
1204
+ self.token = src.matched
1205
+ return process_token(command_state)
1206
+ end
1207
+ elsif src.check(/\_/) then
1208
+ if src.beginning_of_line? && src.scan(/\__END__(\n|\Z)/) then
1209
+ self.lineno = nil
1210
+ return RubyLexer::EOF
1211
+ elsif src.scan(/\_\w*/) then
1212
+ self.token = src.matched
1213
+ return process_token(command_state)
1214
+ end
1215
+ end
1216
+ end # END OF CASE
1217
+
1218
+ if src.scan(/\004|\032|\000/) || src.eos? then # ^D, ^Z, EOF
1219
+ return RubyLexer::EOF
1220
+ else # alpha check
1221
+ if src.scan(/\W/) then
1222
+ rb_compile_error "Invalid char #{src.matched.inspect} in expression"
1223
+ end
1224
+ end
1225
+
1226
+ self.token = src.matched if self.src.scan(/\w+/)
1227
+
1228
+ return process_token(command_state)
1229
+ end
1230
+ end
1231
+
1232
+ def process_token(command_state)
1233
+
1234
+ token << src.matched if token =~ /^\w/ && src.scan(/[\!\?](?!=)/)
1235
+
1236
+ result = nil
1237
+ last_state = lex_state
1238
+
1239
+
1240
+ case token
1241
+ when /^\$/ then
1242
+ self.lex_state, result = :expr_end, :tGVAR
1243
+ when /^@@/ then
1244
+ self.lex_state, result = :expr_end, :tCVAR
1245
+ when /^@/ then
1246
+ self.lex_state, result = :expr_end, :tIVAR
1247
+ else
1248
+ if token =~ /[!?]$/ then
1249
+ result = :tFID
1250
+ else
1251
+ if lex_state == :expr_fname then
1252
+ # ident=, not =~ => == or followed by =>
1253
+ # TODO test lexing of a=>b vs a==>b
1254
+ if src.scan(/=(?:(?![~>=])|(?==>))/) then
1255
+ result = :tIDENTIFIER
1256
+ token << src.matched
1257
+ end
1258
+ end
1259
+
1260
+ result ||= if token =~ /^[A-Z]/ then
1261
+ :tCONSTANT
1262
+ else
1263
+ :tIDENTIFIER
1264
+ end
1265
+ end
1266
+
1267
+ unless lex_state == :expr_dot then
1268
+ # See if it is a reserved word.
1269
+ keyword = Keyword.keyword token
1270
+
1271
+ if keyword then
1272
+ state = lex_state
1273
+ self.lex_state = keyword.state
1274
+ self.yacc_value = token
1275
+
1276
+ if keyword.id0 == :kDO then
1277
+ self.command_start = true
1278
+ return :kDO_COND if cond.is_in_state
1279
+ return :kDO_BLOCK if cmdarg.is_in_state && state != :expr_cmdarg
1280
+ return :kDO_BLOCK if state == :expr_endarg
1281
+ return :kDO
1282
+ end
1283
+
1284
+ return keyword.id0 if state == :expr_beg
1285
+
1286
+ self.lex_state = :expr_beg if keyword.id0 != keyword.id1
1287
+
1288
+ return keyword.id1
1289
+ end
1290
+ end
1291
+
1292
+ if (lex_state == :expr_beg || lex_state == :expr_mid ||
1293
+ lex_state == :expr_dot || lex_state == :expr_arg ||
1294
+ lex_state == :expr_cmdarg) then
1295
+ if command_state then
1296
+ self.lex_state = :expr_cmdarg
1297
+ else
1298
+ self.lex_state = :expr_arg
1299
+ end
1300
+ else
1301
+ self.lex_state = :expr_end
1302
+ end
1303
+ end
1304
+
1305
+ self.yacc_value = token
1306
+
1307
+
1308
+ self.lex_state = :expr_end if
1309
+ last_state != :expr_dot && self.parser.env[token.to_sym] == :lvar
1310
+
1311
+ return result
1312
+ end
1313
+
1314
+ def yylex_string # 23 lines
1315
+ token = if lex_strterm[0] == :heredoc then
1316
+ self.heredoc lex_strterm
1317
+ else
1318
+ self.parse_string lex_strterm
1319
+ end
1320
+
1321
+ if token == :tSTRING_END || token == :tREGEXP_END then
1322
+ self.lineno = nil
1323
+ self.lex_strterm = nil
1324
+ self.lex_state = :expr_end
1325
+ end
1326
+
1327
+ return token
1328
+ end
1329
+ end