brakeman 0.7.2 → 0.8.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,70 @@
1
+ require 'checks/base_check'
2
+
3
+ #Check for bypassing mass assignment protection
4
+ #with without_protection => true
5
+ #
6
+ #Only for Rails 3.1
7
+ class CheckWithoutProtection < BaseCheck
8
+ Checks.add self
9
+
10
+ def run_check
11
+ if mass_assign_disabled? tracker or version_between? "0.0.0", "3.0.99"
12
+ return
13
+ end
14
+
15
+ models = []
16
+ tracker.models.each do |name, m|
17
+ if parent?(tracker, m, :"ActiveRecord::Base")
18
+ models << name
19
+ end
20
+ end
21
+
22
+ return if models.empty?
23
+
24
+ @results = Set.new
25
+
26
+ calls = tracker.find_call models, [:new,
27
+ :attributes=,
28
+ :update_attribute,
29
+ :update_attributes,
30
+ :update_attributes!,
31
+ :create,
32
+ :create!]
33
+
34
+ calls.each do |result|
35
+ process result
36
+ end
37
+ end
38
+
39
+ #All results should be Model.new(...) or Model.attributes=() calls
40
+ def process_result res
41
+ call = res[-1]
42
+ last_arg = call[3][-1]
43
+
44
+ if hash? last_arg and not @results.include? call
45
+
46
+ hash_iterate(last_arg) do |k,v|
47
+ if symbol? k and k[1] == :without_protection and v[0] == :true
48
+ @results << call
49
+
50
+ if include_user_input? call[3]
51
+ confidence = CONFIDENCE[:high]
52
+ else
53
+ confidence = CONFIDENCE[:med]
54
+ end
55
+
56
+ warn :result => res,
57
+ :warning_type => "Mass Assignment",
58
+ :message => "Unprotected mass assignment",
59
+ :line => call.line,
60
+ :code => call,
61
+ :confidence => confidence
62
+
63
+ break
64
+ end
65
+ end
66
+ end
67
+
68
+ res
69
+ end
70
+ end
@@ -59,7 +59,7 @@ class ControllerProcessor < BaseProcessor
59
59
 
60
60
  #Methods called inside class definition
61
61
  #like attr_* and other settings
62
- if @current_method.nil? and target.nil?
62
+ if @current_method.nil? and target.nil? and @controller
63
63
  if args.length == 1 #actually, empty
64
64
  case method
65
65
  when :private, :protected, :public
@@ -90,9 +90,13 @@ class ControllerProcessor < BaseProcessor
90
90
  #layout :false or layout nil
91
91
  @controller[:layout] = false
92
92
  end
93
+ else
94
+ @controller[:options][method] ||= []
95
+ @controller[:options][method] << exp
93
96
  end
94
97
  end
95
- ignore
98
+
99
+ exp
96
100
  elsif target == nil and method == :render
97
101
  make_render exp
98
102
  elsif exp == FORMAT_HTML and context[1] != :iter
@@ -71,7 +71,7 @@ class RoutesProcessor < BaseProcessor
71
71
  args = exp[3][1..-1]
72
72
 
73
73
  hash_iterate args[0] do |k, v|
74
- if symbol? k and k[1] == :to
74
+ if symbol? k and k[1] == :to and string? v[1]
75
75
  controller, action = extract_action v[1]
76
76
 
77
77
  self.current_controller = controller
@@ -368,6 +368,16 @@ class Report
368
368
  raise "PDF output is not yet supported."
369
369
  end
370
370
 
371
+ def rails_version
372
+ if version = tracker.config[:rails_version]
373
+ return version
374
+ elsif OPTIONS[:rails3]
375
+ return "3.x"
376
+ else
377
+ return "Unknown"
378
+ end
379
+ end
380
+
371
381
  #Return header for HTML output. Uses CSS from OPTIONS[:html_style]
372
382
  def html_header
373
383
  if File.exist? OPTIONS[:html_style]
@@ -398,11 +408,13 @@ class Report
398
408
  <table>
399
409
  <tr>
400
410
  <th>Application Path</th>
411
+ <th>Rails Version</th>
401
412
  <th>Report Generation Time</th>
402
413
  <th>Checks Performed</th>
403
414
  </tr>
404
415
  <tr>
405
416
  <td>#{File.expand_path OPTIONS[:app_path]}</td>
417
+ <td>#{rails_version}</td>
406
418
  <td>#{Time.now}</td>
407
419
  <td>#{checks.checks_run.sort.join(", ")}</td>
408
420
  </tr>
@@ -412,13 +424,13 @@ class Report
412
424
 
413
425
  #Generate header for text output
414
426
  def text_header
415
- "\n+BRAKEMAN REPORT+\n\nApplication path: #{File.expand_path OPTIONS[:app_path]}\nGenerated at #{Time.now}\nChecks run: #{checks.checks_run.sort.join(", ")}\n"
427
+ "\n+BRAKEMAN REPORT+\n\nApplication path: #{File.expand_path OPTIONS[:app_path]}\nRails version: #{rails_version}\nGenerated at #{Time.now}\nChecks run: #{checks.checks_run.sort.join(", ")}\n"
416
428
  end
417
429
 
418
430
  #Generate header for CSV output
419
431
  def csv_header
420
- header = Ruport::Data::Table(["Application Path", "Report Generation Time", "Checks Performed"])
421
- header << [File.expand_path(OPTIONS[:app_path]), Time.now.to_s, checks.checks_run.sort.join(", ")]
432
+ header = Ruport::Data::Table(["Application Path", "Report Generation Time", "Checks Performed", "Rails Version"])
433
+ header << [File.expand_path(OPTIONS[:app_path]), Time.now.to_s, checks.checks_run.sort.join(", "), rails_version]
422
434
  "BRAKEMAN REPORT\n\n" << header.to_csv
423
435
  end
424
436
 
@@ -0,0 +1,1320 @@
1
+ class RubyLexer
2
+ attr_accessor :command_start
3
+ attr_accessor :cmdarg
4
+ attr_accessor :cond
5
+ attr_accessor :nest
6
+
7
+ ESC_RE = /\\([0-7]{1,3}|x[0-9a-fA-F]{1,2}|M-[^\\]|(C-|c)[^\\]|[^0-7xMCc])/
8
+
9
+ # Additional context surrounding tokens that both the lexer and
10
+ # grammar use.
11
+ attr_reader :lex_state
12
+
13
+ attr_accessor :lex_strterm
14
+
15
+ attr_accessor :parser # HACK for very end of lexer... *sigh*
16
+
17
+ # Stream of data that yylex examines.
18
+ attr_reader :src
19
+
20
+ # Last token read via yylex.
21
+ attr_accessor :token
22
+
23
+ attr_accessor :string_buffer
24
+
25
+ # Value of last token which had a value associated with it.
26
+ attr_accessor :yacc_value
27
+
28
+ # What handles warnings
29
+ attr_accessor :warnings
30
+
31
+ EOF = :eof_haha!
32
+
33
+ # ruby constants for strings (should this be moved somewhere else?)
34
+ STR_FUNC_BORING = 0x00
35
+ STR_FUNC_ESCAPE = 0x01 # TODO: remove and replace with REGEXP
36
+ STR_FUNC_EXPAND = 0x02
37
+ STR_FUNC_REGEXP = 0x04
38
+ STR_FUNC_AWORDS = 0x08
39
+ STR_FUNC_SYMBOL = 0x10
40
+ STR_FUNC_INDENT = 0x20 # <<-HEREDOC
41
+
42
+ STR_SQUOTE = STR_FUNC_BORING
43
+ STR_DQUOTE = STR_FUNC_BORING | STR_FUNC_EXPAND
44
+ STR_XQUOTE = STR_FUNC_BORING | STR_FUNC_EXPAND
45
+ STR_REGEXP = STR_FUNC_REGEXP | STR_FUNC_ESCAPE | STR_FUNC_EXPAND
46
+ STR_SSYM = STR_FUNC_SYMBOL
47
+ STR_DSYM = STR_FUNC_SYMBOL | STR_FUNC_EXPAND
48
+
49
+ TOKENS = {
50
+ "!" => :tBANG,
51
+ "!=" => :tNEQ,
52
+ "!~" => :tNMATCH,
53
+ "," => :tCOMMA,
54
+ ".." => :tDOT2,
55
+ "..." => :tDOT3,
56
+ "=" => :tEQL,
57
+ "==" => :tEQ,
58
+ "===" => :tEQQ,
59
+ "=>" => :tASSOC,
60
+ "=~" => :tMATCH,
61
+ }
62
+
63
+ # How the parser advances to the next token.
64
+ #
65
+ # @return true if not at end of file (EOF).
66
+
67
+ def advance
68
+ r = yylex
69
+ self.token = r
70
+
71
+ raise "yylex returned nil" unless r
72
+
73
+ return RubyLexer::EOF != r
74
+ end
75
+
76
+ def arg_ambiguous
77
+ self.warning("Ambiguous first argument. make sure.")
78
+ end
79
+
80
+ def comments
81
+ c = @comments.join
82
+ @comments.clear
83
+ c
84
+ end
85
+
86
+ def expr_beg_push val
87
+ cond.push false
88
+ cmdarg.push false
89
+ self.lex_state = :expr_beg
90
+ self.yacc_value = val
91
+ end
92
+
93
+ def fix_arg_lex_state
94
+ self.lex_state = if lex_state == :expr_fname || lex_state == :expr_dot
95
+ :expr_arg
96
+ else
97
+ :expr_beg
98
+ end
99
+ end
100
+
101
+ def heredoc here # 63 lines
102
+ _, eos, func, last_line = here
103
+
104
+ indent = (func & STR_FUNC_INDENT) != 0
105
+ expand = (func & STR_FUNC_EXPAND) != 0
106
+ eos_re = indent ? /[ \t]*#{eos}(\r?\n|\z)/ : /#{eos}(\r?\n|\z)/
107
+ err_msg = "can't match #{eos_re.inspect} anywhere in "
108
+
109
+ rb_compile_error err_msg if
110
+ src.eos?
111
+
112
+ if src.beginning_of_line? && src.scan(eos_re) then
113
+ src.unread_many last_line # TODO: figure out how to remove this
114
+ self.yacc_value = eos
115
+ return :tSTRING_END
116
+ end
117
+
118
+ self.string_buffer = []
119
+
120
+ if expand then
121
+ case
122
+ when src.scan(/#[$@]/) then
123
+ src.pos -= 1 # FIX omg stupid
124
+ self.yacc_value = src.matched
125
+ return :tSTRING_DVAR
126
+ when src.scan(/#[{]/) then
127
+ self.yacc_value = src.matched
128
+ return :tSTRING_DBEG
129
+ when src.scan(/#/) then
130
+ string_buffer << '#'
131
+ end
132
+
133
+ until src.scan(eos_re) do
134
+ c = tokadd_string func, "\n", nil
135
+
136
+ rb_compile_error err_msg if
137
+ c == RubyLexer::EOF
138
+
139
+ if c != "\n" then
140
+ self.yacc_value = string_buffer.join.delete("\r")
141
+ return :tSTRING_CONTENT
142
+ else
143
+ string_buffer << src.scan(/\n/)
144
+ end
145
+
146
+ rb_compile_error err_msg if
147
+ src.eos?
148
+ end
149
+
150
+ # tack on a NL after the heredoc token - FIX NL should not be needed
151
+ src.unread_many(eos + "\n") # TODO: remove this... stupid stupid stupid
152
+ else
153
+ until src.check(eos_re) do
154
+ string_buffer << src.scan(/.*(\n|\z)/)
155
+ rb_compile_error err_msg if
156
+ src.eos?
157
+ end
158
+ end
159
+
160
+ self.lex_strterm = [:heredoc, eos, func, last_line]
161
+ self.yacc_value = string_buffer.join.delete("\r")
162
+
163
+ return :tSTRING_CONTENT
164
+ end
165
+
166
+ def heredoc_identifier # 51 lines
167
+ term, func = nil, STR_FUNC_BORING
168
+ self.string_buffer = []
169
+
170
+ case
171
+ when src.scan(/(-?)(['"`])(.*?)\2/) then
172
+ term = src[2]
173
+ unless src[1].empty? then
174
+ func |= STR_FUNC_INDENT
175
+ end
176
+ func |= case term
177
+ when "\'" then
178
+ STR_SQUOTE
179
+ when '"' then
180
+ STR_DQUOTE
181
+ else
182
+ STR_XQUOTE
183
+ end
184
+ string_buffer << src[3]
185
+ when src.scan(/-?(['"`])(?!\1*\Z)/) then
186
+ rb_compile_error "unterminated here document identifier"
187
+ when src.scan(/(-?)(\w+)/) then
188
+ term = '"'
189
+ func |= STR_DQUOTE
190
+ unless src[1].empty? then
191
+ func |= STR_FUNC_INDENT
192
+ end
193
+ string_buffer << src[2]
194
+ else
195
+ return nil
196
+ end
197
+
198
+ if src.check(/.*\n/) then
199
+ # TODO: think about storing off the char range instead
200
+ line = src.string[src.pos, src.matched_size]
201
+ src.string[src.pos, src.matched_size] = "\n"
202
+ src.extra_lines_added += 1
203
+ src.pos += 1
204
+ else
205
+ line = nil
206
+ end
207
+
208
+ self.lex_strterm = [:heredoc, string_buffer.join, func, line]
209
+
210
+ if term == '`' then
211
+ self.yacc_value = "`"
212
+ return :tXSTRING_BEG
213
+ else
214
+ self.yacc_value = "\""
215
+ return :tSTRING_BEG
216
+ end
217
+ end
218
+
219
+ def initialize
220
+ self.cond = RubyParser::StackState.new(:cond)
221
+ self.cmdarg = RubyParser::StackState.new(:cmdarg)
222
+ self.nest = 0
223
+ @comments = []
224
+
225
+ reset
226
+ end
227
+
228
+ def int_with_base base
229
+ rb_compile_error "Invalid numeric format" if src.matched =~ /__/
230
+ self.yacc_value = src.matched.to_i(base)
231
+ return :tINTEGER
232
+ end
233
+
234
+ def lex_state= o
235
+ raise "wtf\?" unless Symbol === o
236
+ @lex_state = o
237
+ end
238
+
239
+ attr_writer :lineno
240
+ def lineno
241
+ @lineno ||= src.lineno
242
+ end
243
+
244
+ ##
245
+ # Parse a number from the input stream.
246
+ #
247
+ # @param c The first character of the number.
248
+ # @return A int constant wich represents a token.
249
+
250
+ def parse_number
251
+ self.lex_state = :expr_end
252
+
253
+ case
254
+ when src.scan(/[+-]?0[xbd]\b/) then
255
+ rb_compile_error "Invalid numeric format"
256
+ when src.scan(/[+-]?0x[a-f0-9_]+/i) then
257
+ int_with_base(16)
258
+ when src.scan(/[+-]?0b[01_]+/) then
259
+ int_with_base(2)
260
+ when src.scan(/[+-]?0d[0-9_]+/) then
261
+ int_with_base(10)
262
+ when src.scan(/[+-]?0[Oo]?[0-7_]*[89]/) then
263
+ rb_compile_error "Illegal octal digit."
264
+ when src.scan(/[+-]?0[Oo]?[0-7_]+|0[Oo]/) then
265
+ int_with_base(8)
266
+ when src.scan(/[+-]?[\d_]+_(e|\.)/) then
267
+ rb_compile_error "Trailing '_' in number."
268
+ when src.scan(/[+-]?[\d_]+\.[\d_]+(e[+-]?[\d_]+)?\b|[+-]?[\d_]+e[+-]?[\d_]+\b/i) then
269
+ number = src.matched
270
+ if number =~ /__/ then
271
+ rb_compile_error "Invalid numeric format"
272
+ end
273
+ self.yacc_value = number.to_f
274
+ :tFLOAT
275
+ when src.scan(/[+-]?0\b/) then
276
+ int_with_base(10)
277
+ when src.scan(/[+-]?[\d_]+\b/) then
278
+ int_with_base(10)
279
+ else
280
+ rb_compile_error "Bad number format"
281
+ end
282
+ end
283
+
284
+ def parse_quote # 58 lines
285
+ beg, nnd, short_hand, c = nil, nil, false, nil
286
+
287
+ if src.scan(/[a-z0-9]{1,2}/i) then # Long-hand (e.g. %Q{}).
288
+ rb_compile_error "unknown type of %string" if src.matched_size == 2
289
+ c, beg, short_hand = src.matched, src.getch, false
290
+ else # Short-hand (e.g. %{, %., %!, etc)
291
+ c, beg, short_hand = 'Q', src.getch, true
292
+ end
293
+
294
+ if src.eos? or c == RubyLexer::EOF or beg == RubyLexer::EOF then
295
+ rb_compile_error "unterminated quoted string meets end of file"
296
+ end
297
+
298
+ # Figure nnd-char. "\0" is special to indicate beg=nnd and that no nesting?
299
+ nnd = { "(" => ")", "[" => "]", "{" => "}", "<" => ">" }[beg]
300
+ nnd, beg = beg, "\0" if nnd.nil?
301
+
302
+ token_type, self.yacc_value = nil, "%#{c}#{beg}"
303
+ token_type, string_type = case c
304
+ when 'Q' then
305
+ ch = short_hand ? nnd : c + beg
306
+ self.yacc_value = "%#{ch}"
307
+ [:tSTRING_BEG, STR_DQUOTE]
308
+ when 'q' then
309
+ [:tSTRING_BEG, STR_SQUOTE]
310
+ when 'W' then
311
+ src.scan(/\s*/)
312
+ [:tWORDS_BEG, STR_DQUOTE | STR_FUNC_AWORDS]
313
+ when 'w' then
314
+ src.scan(/\s*/)
315
+ [:tAWORDS_BEG, STR_SQUOTE | STR_FUNC_AWORDS]
316
+ when 'x' then
317
+ [:tXSTRING_BEG, STR_XQUOTE]
318
+ when 'r' then
319
+ [:tREGEXP_BEG, STR_REGEXP]
320
+ when 's' then
321
+ self.lex_state = :expr_fname
322
+ [:tSYMBEG, STR_SSYM]
323
+ end
324
+
325
+ rb_compile_error "Bad %string type. Expected [Qqwxr\W], found '#{c}'." if
326
+ token_type.nil?
327
+
328
+ self.lex_strterm = [:strterm, string_type, nnd, beg]
329
+
330
+ return token_type
331
+ end
332
+
333
+ def parse_string(quote) # 65 lines
334
+ _, string_type, term, open = quote
335
+
336
+ space = false # FIX: remove these
337
+ func = string_type
338
+ paren = open
339
+ term_re = Regexp.escape term
340
+
341
+ awords = (func & STR_FUNC_AWORDS) != 0
342
+ regexp = (func & STR_FUNC_REGEXP) != 0
343
+ expand = (func & STR_FUNC_EXPAND) != 0
344
+
345
+ unless func then # FIX: impossible, prolly needs == 0
346
+ self.lineno = nil
347
+ return :tSTRING_END
348
+ end
349
+
350
+ space = true if awords and src.scan(/\s+/)
351
+
352
+ if self.nest == 0 && src.scan(/#{term_re}/) then
353
+ if awords then
354
+ quote[1] = nil
355
+ return :tSPACE
356
+ elsif regexp then
357
+ self.yacc_value = self.regx_options
358
+ self.lineno = nil
359
+ return :tREGEXP_END
360
+ else
361
+ self.yacc_value = term
362
+ self.lineno = nil
363
+ return :tSTRING_END
364
+ end
365
+ end
366
+
367
+ if space then
368
+ return :tSPACE
369
+ end
370
+
371
+ self.string_buffer = []
372
+
373
+ if expand
374
+ case
375
+ when src.scan(/#(?=[$@])/) then
376
+ return :tSTRING_DVAR
377
+ when src.scan(/#[{]/) then
378
+ return :tSTRING_DBEG
379
+ when src.scan(/#/) then
380
+ string_buffer << '#'
381
+ end
382
+ end
383
+
384
+ if tokadd_string(func, term, paren) == RubyLexer::EOF then
385
+ rb_compile_error "unterminated string meets end of file"
386
+ end
387
+
388
+ self.yacc_value = string_buffer.join
389
+
390
+ return :tSTRING_CONTENT
391
+ end
392
+
393
+ def rb_compile_error msg
394
+ msg += ". near line #{self.lineno}: #{src.rest[/^.*/].inspect}"
395
+ raise SyntaxError, msg
396
+ end
397
+
398
+ def read_escape # 51 lines
399
+ case
400
+ when src.scan(/\\/) then # Backslash
401
+ '\\'
402
+ when src.scan(/n/) then # newline
403
+ "\n"
404
+ when src.scan(/t/) then # horizontal tab
405
+ "\t"
406
+ when src.scan(/r/) then # carriage-return
407
+ "\r"
408
+ when src.scan(/f/) then # form-feed
409
+ "\f"
410
+ when src.scan(/v/) then # vertical tab
411
+ "\13"
412
+ when src.scan(/a/) then # alarm(bell)
413
+ "\007"
414
+ when src.scan(/e/) then # escape
415
+ "\033"
416
+ when src.scan(/b/) then # backspace
417
+ "\010"
418
+ when src.scan(/s/) then # space
419
+ " "
420
+ when src.scan(/[0-7]{1,3}/) then # octal constant
421
+ src.matched.to_i(8).chr
422
+ when src.scan(/x([0-9a-fA-F]{1,2})/) then # hex constant
423
+ src[1].to_i(16).chr
424
+ when src.check(/M-\\[\\MCc]/) then
425
+ src.scan(/M-\\/) # eat it
426
+ c = self.read_escape
427
+ c[0] = (c[0].ord | 0x80).chr
428
+ c
429
+ when src.scan(/M-(.)/) then
430
+ c = src[1]
431
+ c[0] = (c[0].ord | 0x80).chr
432
+ c
433
+ when src.check(/(C-|c)\\[\\MCc]/) then
434
+ src.scan(/(C-|c)\\/) # eat it
435
+ c = self.read_escape
436
+ c[0] = (c[0].ord & 0x9f).chr
437
+ c
438
+ when src.scan(/C-\?|c\?/) then
439
+ 127.chr
440
+ when src.scan(/(C-|c)(.)/) then
441
+ c = src[2]
442
+ c[0] = (c[0].ord & 0x9f).chr
443
+ c
444
+ when src.scan(/[McCx0-9]/) || src.eos? then
445
+ rb_compile_error("Invalid escape character syntax")
446
+ else
447
+ src.getch
448
+ end
449
+ end
450
+
451
+ def regx_options # 15 lines
452
+ good, bad = [], []
453
+
454
+ if src.scan(/[a-z]+/) then
455
+ good, bad = src.matched.split(//).partition { |s| s =~ /^[ixmonesu]$/ }
456
+ end
457
+
458
+ unless bad.empty? then
459
+ rb_compile_error("unknown regexp option%s - %s" %
460
+ [(bad.size > 1 ? "s" : ""), bad.join.inspect])
461
+ end
462
+
463
+ return good.join
464
+ end
465
+
466
+ def reset
467
+ self.command_start = true
468
+ self.lex_strterm = nil
469
+ self.token = nil
470
+ self.yacc_value = nil
471
+
472
+ @src = nil
473
+ @lex_state = nil
474
+ end
475
+
476
+ def src= src
477
+ raise "bad src: #{src.inspect}" unless String === src
478
+ @src = RPStringScanner.new(src)
479
+ end
480
+
481
+ def tokadd_escape term # 20 lines
482
+ case
483
+ when src.scan(/\\\n/) then
484
+ # just ignore
485
+ when src.scan(/\\([0-7]{1,3}|x[0-9a-fA-F]{1,2})/) then
486
+ self.string_buffer << src.matched
487
+ when src.scan(/\\([MC]-|c)(?=\\)/) then
488
+ self.string_buffer << src.matched
489
+ self.tokadd_escape term
490
+ when src.scan(/\\([MC]-|c)(.)/) then
491
+ self.string_buffer << src.matched
492
+ when src.scan(/\\[McCx]/) then
493
+ rb_compile_error "Invalid escape character syntax"
494
+ when src.scan(/\\(.)/m) then
495
+ self.string_buffer << src.matched
496
+ else
497
+ rb_compile_error "Invalid escape character syntax"
498
+ end
499
+ end
500
+
501
+ def tokadd_string(func, term, paren) # 105 lines
502
+ awords = (func & STR_FUNC_AWORDS) != 0
503
+ escape = (func & STR_FUNC_ESCAPE) != 0
504
+ expand = (func & STR_FUNC_EXPAND) != 0
505
+ regexp = (func & STR_FUNC_REGEXP) != 0
506
+ symbol = (func & STR_FUNC_SYMBOL) != 0
507
+
508
+ paren_re = paren.nil? ? nil : Regexp.new(Regexp.escape(paren))
509
+ term_re = Regexp.new(Regexp.escape(term))
510
+
511
+ until src.eos? do
512
+ c = nil
513
+ handled = true
514
+ case
515
+ when self.nest == 0 && src.scan(term_re) then
516
+ src.pos -= 1
517
+ break
518
+ when paren_re && src.scan(paren_re) then
519
+ self.nest += 1
520
+ when src.scan(term_re) then
521
+ self.nest -= 1
522
+ when awords && src.scan(/\s/) then
523
+ src.pos -= 1
524
+ break
525
+ when expand && src.scan(/#(?=[\$\@\{])/) then
526
+ src.pos -= 1
527
+ break
528
+ when expand && src.scan(/#(?!\n)/) then
529
+ # do nothing
530
+ when src.check(/\\/) then
531
+ case
532
+ when awords && src.scan(/\\\n/) then
533
+ string_buffer << "\n"
534
+ next
535
+ when awords && src.scan(/\\\s/) then
536
+ c = ' '
537
+ when expand && src.scan(/\\\n/) then
538
+ next
539
+ when regexp && src.check(/\\/) then
540
+ self.tokadd_escape term
541
+ next
542
+ when expand && src.scan(/\\/) then
543
+ c = self.read_escape
544
+ when src.scan(/\\\n/) then
545
+ # do nothing
546
+ when src.scan(/\\\\/) then
547
+ string_buffer << '\\' if escape
548
+ c = '\\'
549
+ when src.scan(/\\/) then
550
+ unless src.scan(term_re) || paren.nil? || src.scan(paren_re) then
551
+ string_buffer << "\\"
552
+ end
553
+ else
554
+ handled = false
555
+ end
556
+ else
557
+ handled = false
558
+ end # case
559
+
560
+ unless handled then
561
+
562
+ t = Regexp.escape term
563
+ x = Regexp.escape(paren) if paren && paren != "\000"
564
+ re = if awords then
565
+ /[^#{t}#{x}\#\0\\\n\ ]+|./ # |. to pick up whatever
566
+ else
567
+ /[^#{t}#{x}\#\0\\]+|./
568
+ end
569
+
570
+ src.scan re
571
+ c = src.matched
572
+
573
+ rb_compile_error "symbol cannot contain '\\0'" if symbol && c =~ /\0/
574
+ end # unless handled
575
+
576
+ c ||= src.matched
577
+ string_buffer << c
578
+ end # until
579
+
580
+ c ||= src.matched
581
+ c = RubyLexer::EOF if src.eos?
582
+
583
+
584
+ return c
585
+ end
586
+
587
+ def unescape s
588
+
589
+ r = {
590
+ "a" => "\007",
591
+ "b" => "\010",
592
+ "e" => "\033",
593
+ "f" => "\f",
594
+ "n" => "\n",
595
+ "r" => "\r",
596
+ "s" => " ",
597
+ "t" => "\t",
598
+ "v" => "\13",
599
+ "\\" => '\\',
600
+ "\n" => "",
601
+ "C-\?" => 127.chr,
602
+ "c\?" => 127.chr,
603
+ }[s]
604
+
605
+ return r if r
606
+
607
+ case s
608
+ when /^[0-7]{1,3}/ then
609
+ $&.to_i(8).chr
610
+ when /^x([0-9a-fA-F]{1,2})/ then
611
+ $1.to_i(16).chr
612
+ when /^M-(.)/ then
613
+ ($1[0].ord | 0x80).chr
614
+ when /^(C-|c)(.)/ then
615
+ ($2[0].ord & 0x9f).chr
616
+ when /^[McCx0-9]/ then
617
+ rb_compile_error("Invalid escape character syntax")
618
+ else
619
+ s
620
+ end
621
+ end
622
+
623
+ def warning s
624
+ # do nothing for now
625
+ end
626
+
627
+ ##
628
+ # Returns the next token. Also sets yy_val is needed.
629
+ #
630
+ # @return Description of the Returned Value
631
+
632
+ def yylex # 826 lines
633
+
634
+ c = ''
635
+ space_seen = false
636
+ command_state = false
637
+ src = self.src
638
+
639
+ self.token = nil
640
+ self.yacc_value = nil
641
+
642
+ return yylex_string if lex_strterm
643
+
644
+ command_state = self.command_start
645
+ self.command_start = false
646
+
647
+ last_state = lex_state
648
+
649
+ loop do # START OF CASE
650
+ if src.scan(/[\ \t\r\f\v]/) then # \s - \n + \v
651
+ space_seen = true
652
+ next
653
+ elsif src.check(/[^a-zA-Z]/) then
654
+ if src.scan(/\n|#/) then
655
+ self.lineno = nil
656
+ c = src.matched
657
+ if c == '#' then
658
+ src.pos -= 1
659
+
660
+ while src.scan(/\s*#.*(\n+|\z)/) do
661
+ @comments << src.matched.gsub(/^ +#/, '#').gsub(/^ +$/, '')
662
+ end
663
+
664
+ if src.eos? then
665
+ return RubyLexer::EOF
666
+ end
667
+ end
668
+
669
+ # Replace a string of newlines with a single one
670
+ src.scan(/\n+/)
671
+
672
+ if [:expr_beg, :expr_fname,
673
+ :expr_dot, :expr_class].include? lex_state then
674
+ next
675
+ end
676
+
677
+ self.command_start = true
678
+ self.lex_state = :expr_beg
679
+ return :tNL
680
+ elsif src.scan(/[\]\)\}]/) then
681
+ cond.lexpop
682
+ cmdarg.lexpop
683
+ self.lex_state = :expr_end
684
+ self.yacc_value = src.matched
685
+ result = {
686
+ ")" => :tRPAREN,
687
+ "]" => :tRBRACK,
688
+ "}" => :tRCURLY
689
+ }[src.matched]
690
+ return result
691
+ elsif src.scan(/\.\.\.?|,|![=~]?/) then
692
+ self.lex_state = :expr_beg
693
+ tok = self.yacc_value = src.matched
694
+ return TOKENS[tok]
695
+ elsif src.check(/\./) then
696
+ if src.scan(/\.\d/) then
697
+ rb_compile_error "no .<digit> floating literal anymore put 0 before dot"
698
+ elsif src.scan(/\./) then
699
+ self.lex_state = :expr_dot
700
+ self.yacc_value = "."
701
+ return :tDOT
702
+ end
703
+ elsif src.scan(/\(/) then
704
+ result = :tLPAREN2
705
+ self.command_start = true
706
+
707
+ if lex_state == :expr_beg || lex_state == :expr_mid then
708
+ result = :tLPAREN
709
+ elsif space_seen then
710
+ if lex_state == :expr_cmdarg then
711
+ result = :tLPAREN_ARG
712
+ elsif lex_state == :expr_arg then
713
+ warning("don't put space before argument parentheses")
714
+ result = :tLPAREN2
715
+ end
716
+ end
717
+
718
+ self.expr_beg_push "("
719
+
720
+ return result
721
+ elsif src.check(/\=/) then
722
+ if src.scan(/\=\=\=|\=\=|\=~|\=>|\=(?!begin\b)/) then
723
+ self.fix_arg_lex_state
724
+ tok = self.yacc_value = src.matched
725
+ return TOKENS[tok]
726
+ elsif src.scan(/\=begin(?=\s)/) then
727
+ # @comments << '=' << src.matched
728
+ @comments << src.matched
729
+
730
+ unless src.scan(/.*?\n=end( |\t|\f)*[^(\n|\z)]*(\n|\z)/m) then
731
+ @comments.clear
732
+ rb_compile_error("embedded document meets end of file")
733
+ end
734
+
735
+ @comments << src.matched
736
+
737
+ next
738
+ else
739
+ raise "you shouldn't be able to get here"
740
+ end
741
+ elsif src.scan(/\"(#{ESC_RE}|#(#{ESC_RE}|[^\{\#\@\$\"\\])|[^\"\\\#])*\"/o) then
742
+ self.yacc_value = src.matched[1..-2].gsub(ESC_RE) { unescape $1 }
743
+ self.lex_state = :expr_end
744
+ return :tSTRING
745
+ elsif src.scan(/\"/) then # FALLBACK
746
+ self.lex_strterm = [:strterm, STR_DQUOTE, '"', "\0"] # TODO: question this
747
+ self.yacc_value = "\""
748
+ return :tSTRING_BEG
749
+ elsif src.scan(/\@\@?\w*/) then
750
+ self.token = src.matched
751
+
752
+ rb_compile_error "`#{token}` is not allowed as a variable name" if
753
+ token =~ /\@\d/
754
+
755
+ return process_token(command_state)
756
+ elsif src.scan(/\:\:/) then
757
+ if (lex_state == :expr_beg ||
758
+ lex_state == :expr_mid ||
759
+ lex_state == :expr_class ||
760
+ (lex_state.is_argument && space_seen)) then
761
+ self.lex_state = :expr_beg
762
+ self.yacc_value = "::"
763
+ return :tCOLON3
764
+ end
765
+
766
+ self.lex_state = :expr_dot
767
+ self.yacc_value = "::"
768
+ return :tCOLON2
769
+ elsif lex_state != :expr_end && lex_state != :expr_endarg && src.scan(/:([a-zA-Z_]\w*(?:[?!]|=(?!>))?)/) then
770
+ self.yacc_value = src[1]
771
+ self.lex_state = :expr_end
772
+ return :tSYMBOL
773
+ elsif src.scan(/\:/) then
774
+ # ?: / then / when
775
+ if (lex_state == :expr_end || lex_state == :expr_endarg||
776
+ src.check(/\s/)) then
777
+ self.lex_state = :expr_beg
778
+ self.yacc_value = ":"
779
+ return :tCOLON
780
+ end
781
+
782
+ case
783
+ when src.scan(/\'/) then
784
+ self.lex_strterm = [:strterm, STR_SSYM, src.matched, "\0"]
785
+ when src.scan(/\"/) then
786
+ self.lex_strterm = [:strterm, STR_DSYM, src.matched, "\0"]
787
+ end
788
+
789
+ self.lex_state = :expr_fname
790
+ self.yacc_value = ":"
791
+ return :tSYMBEG
792
+ elsif src.check(/[0-9]/) then
793
+ return parse_number
794
+ elsif src.scan(/\[/) then
795
+ result = src.matched
796
+
797
+ if lex_state == :expr_fname || lex_state == :expr_dot then
798
+ self.lex_state = :expr_arg
799
+ case
800
+ when src.scan(/\]\=/) then
801
+ self.yacc_value = "[]="
802
+ return :tASET
803
+ when src.scan(/\]/) then
804
+ self.yacc_value = "[]"
805
+ return :tAREF
806
+ else
807
+ rb_compile_error "unexpected '['"
808
+ end
809
+ elsif lex_state == :expr_beg || lex_state == :expr_mid then
810
+ result = :tLBRACK
811
+ elsif lex_state.is_argument && space_seen then
812
+ result = :tLBRACK
813
+ end
814
+
815
+ self.expr_beg_push "["
816
+
817
+ return result
818
+ elsif src.scan(/\'(\\.|[^\'])*\'/) then
819
+ self.yacc_value = src.matched[1..-2].gsub(/\\\\/, "\\").gsub(/\\'/, "'")
820
+ self.lex_state = :expr_end
821
+ return :tSTRING
822
+ elsif src.check(/\|/) then
823
+ if src.scan(/\|\|\=/) then
824
+ self.lex_state = :expr_beg
825
+ self.yacc_value = "||"
826
+ return :tOP_ASGN
827
+ elsif src.scan(/\|\|/) then
828
+ self.lex_state = :expr_beg
829
+ self.yacc_value = "||"
830
+ return :tOROP
831
+ elsif src.scan(/\|\=/) then
832
+ self.lex_state = :expr_beg
833
+ self.yacc_value = "|"
834
+ return :tOP_ASGN
835
+ elsif src.scan(/\|/) then
836
+ self.fix_arg_lex_state
837
+ self.yacc_value = "|"
838
+ return :tPIPE
839
+ end
840
+ elsif src.scan(/\{/) then
841
+ result = if lex_state.is_argument || lex_state == :expr_end then
842
+ :tLCURLY # block (primary)
843
+ elsif lex_state == :expr_endarg then
844
+ :tLBRACE_ARG # block (expr)
845
+ else
846
+ :tLBRACE # hash
847
+ end
848
+
849
+ self.expr_beg_push "{"
850
+ self.command_start = true unless result == :tLBRACE
851
+
852
+ return result
853
+ elsif src.scan(/[+-]/) then
854
+ sign = src.matched
855
+ utype, type = if sign == "+" then
856
+ [:tUPLUS, :tPLUS]
857
+ else
858
+ [:tUMINUS, :tMINUS]
859
+ end
860
+
861
+ if lex_state == :expr_fname || lex_state == :expr_dot then
862
+ self.lex_state = :expr_arg
863
+ if src.scan(/@/) then
864
+ self.yacc_value = "#{sign}@"
865
+ return utype
866
+ else
867
+ self.yacc_value = sign
868
+ return type
869
+ end
870
+ end
871
+
872
+ if src.scan(/\=/) then
873
+ self.lex_state = :expr_beg
874
+ self.yacc_value = sign
875
+ return :tOP_ASGN
876
+ end
877
+
878
+ if (lex_state == :expr_beg || lex_state == :expr_mid ||
879
+ (lex_state.is_argument && space_seen && !src.check(/\s/))) then
880
+ if lex_state.is_argument then
881
+ arg_ambiguous
882
+ end
883
+
884
+ self.lex_state = :expr_beg
885
+ self.yacc_value = sign
886
+
887
+ if src.check(/\d/) then
888
+ if utype == :tUPLUS then
889
+ return self.parse_number
890
+ else
891
+ return :tUMINUS_NUM
892
+ end
893
+ end
894
+
895
+ return utype
896
+ end
897
+
898
+ self.lex_state = :expr_beg
899
+ self.yacc_value = sign
900
+ return type
901
+ elsif src.check(/\*/) then
902
+ if src.scan(/\*\*=/) then
903
+ self.lex_state = :expr_beg
904
+ self.yacc_value = "**"
905
+ return :tOP_ASGN
906
+ elsif src.scan(/\*\*/) then
907
+ self.yacc_value = "**"
908
+ self.fix_arg_lex_state
909
+ return :tPOW
910
+ elsif src.scan(/\*\=/) then
911
+ self.lex_state = :expr_beg
912
+ self.yacc_value = "*"
913
+ return :tOP_ASGN
914
+ elsif src.scan(/\*/) then
915
+ result = if lex_state.is_argument && space_seen && src.check(/\S/) then
916
+ warning("`*' interpreted as argument prefix")
917
+ :tSTAR
918
+ elsif lex_state == :expr_beg || lex_state == :expr_mid then
919
+ :tSTAR
920
+ else
921
+ :tSTAR2
922
+ end
923
+ self.yacc_value = "*"
924
+ self.fix_arg_lex_state
925
+
926
+ return result
927
+ end
928
+ elsif src.check(/\</) then
929
+ if src.scan(/\<\=\>/) then
930
+ self.fix_arg_lex_state
931
+ self.yacc_value = "<=>"
932
+ return :tCMP
933
+ elsif src.scan(/\<\=/) then
934
+ self.fix_arg_lex_state
935
+ self.yacc_value = "<="
936
+ return :tLEQ
937
+ elsif src.scan(/\<\<\=/) then
938
+ self.fix_arg_lex_state
939
+ self.lex_state = :expr_beg
940
+ self.yacc_value = "\<\<"
941
+ return :tOP_ASGN
942
+ elsif src.scan(/\<\</) then
943
+ if (! [:expr_end, :expr_dot,
944
+ :expr_endarg, :expr_class].include?(lex_state) &&
945
+ (!lex_state.is_argument || space_seen)) then
946
+ tok = self.heredoc_identifier
947
+ if tok then
948
+ return tok
949
+ end
950
+ end
951
+
952
+ self.fix_arg_lex_state
953
+ self.yacc_value = "\<\<"
954
+ return :tLSHFT
955
+ elsif src.scan(/\</) then
956
+ self.fix_arg_lex_state
957
+ self.yacc_value = "<"
958
+ return :tLT
959
+ end
960
+ elsif src.check(/\>/) then
961
+ if src.scan(/\>\=/) then
962
+ self.fix_arg_lex_state
963
+ self.yacc_value = ">="
964
+ return :tGEQ
965
+ elsif src.scan(/\>\>=/) then
966
+ self.fix_arg_lex_state
967
+ self.lex_state = :expr_beg
968
+ self.yacc_value = ">>"
969
+ return :tOP_ASGN
970
+ elsif src.scan(/\>\>/) then
971
+ self.fix_arg_lex_state
972
+ self.yacc_value = ">>"
973
+ return :tRSHFT
974
+ elsif src.scan(/\>/) then
975
+ self.fix_arg_lex_state
976
+ self.yacc_value = ">"
977
+ return :tGT
978
+ end
979
+ elsif src.scan(/\`/) then
980
+ self.yacc_value = "`"
981
+ case lex_state
982
+ when :expr_fname then
983
+ self.lex_state = :expr_end
984
+ return :tBACK_REF2
985
+ when :expr_dot then
986
+ self.lex_state = if command_state then
987
+ :expr_cmdarg
988
+ else
989
+ :expr_arg
990
+ end
991
+ return :tBACK_REF2
992
+ end
993
+ self.lex_strterm = [:strterm, STR_XQUOTE, '`', "\0"]
994
+ return :tXSTRING_BEG
995
+ elsif src.scan(/\?/) then
996
+ if lex_state == :expr_end || lex_state == :expr_endarg then
997
+ self.lex_state = :expr_beg
998
+ self.yacc_value = "?"
999
+ return :tEH
1000
+ end
1001
+
1002
+ if src.eos? then
1003
+ rb_compile_error "incomplete character syntax"
1004
+ end
1005
+
1006
+ if src.check(/\s|\v/) then
1007
+ unless lex_state.is_argument then
1008
+ c2 = { " " => 's',
1009
+ "\n" => 'n',
1010
+ "\t" => 't',
1011
+ "\v" => 'v',
1012
+ "\r" => 'r',
1013
+ "\f" => 'f' }[src.matched]
1014
+
1015
+ if c2 then
1016
+ warning("invalid character syntax; use ?\\" + c2)
1017
+ end
1018
+ end
1019
+
1020
+ # ternary
1021
+ self.lex_state = :expr_beg
1022
+ self.yacc_value = "?"
1023
+ return :tEH
1024
+ elsif src.check(/\w(?=\w)/) then # ternary, also
1025
+ self.lex_state = :expr_beg
1026
+ self.yacc_value = "?"
1027
+ return :tEH
1028
+ end
1029
+
1030
+ c = if src.scan(/\\/) then
1031
+ self.read_escape
1032
+ else
1033
+ src.getch
1034
+ end
1035
+ self.lex_state = :expr_end
1036
+ self.yacc_value = c[0].ord & 0xff
1037
+ return :tINTEGER
1038
+ elsif src.check(/\&/) then
1039
+ if src.scan(/\&\&\=/) then
1040
+ self.yacc_value = "&&"
1041
+ self.lex_state = :expr_beg
1042
+ return :tOP_ASGN
1043
+ elsif src.scan(/\&\&/) then
1044
+ self.lex_state = :expr_beg
1045
+ self.yacc_value = "&&"
1046
+ return :tANDOP
1047
+ elsif src.scan(/\&\=/) then
1048
+ self.yacc_value = "&"
1049
+ self.lex_state = :expr_beg
1050
+ return :tOP_ASGN
1051
+ elsif src.scan(/&/) then
1052
+ result = if lex_state.is_argument && space_seen &&
1053
+ !src.check(/\s/) then
1054
+ warning("`&' interpreted as argument prefix")
1055
+ :tAMPER
1056
+ elsif lex_state == :expr_beg || lex_state == :expr_mid then
1057
+ :tAMPER
1058
+ else
1059
+ :tAMPER2
1060
+ end
1061
+
1062
+ self.fix_arg_lex_state
1063
+ self.yacc_value = "&"
1064
+ return result
1065
+ end
1066
+ elsif src.scan(/\//) then
1067
+ if lex_state == :expr_beg || lex_state == :expr_mid then
1068
+ self.lex_strterm = [:strterm, STR_REGEXP, '/', "\0"]
1069
+ self.yacc_value = "/"
1070
+ return :tREGEXP_BEG
1071
+ end
1072
+
1073
+ if src.scan(/\=/) then
1074
+ self.yacc_value = "/"
1075
+ self.lex_state = :expr_beg
1076
+ return :tOP_ASGN
1077
+ end
1078
+
1079
+ if lex_state.is_argument && space_seen then
1080
+ unless src.scan(/\s/) then
1081
+ arg_ambiguous
1082
+ self.lex_strterm = [:strterm, STR_REGEXP, '/', "\0"]
1083
+ self.yacc_value = "/"
1084
+ return :tREGEXP_BEG
1085
+ end
1086
+ end
1087
+
1088
+ self.fix_arg_lex_state
1089
+ self.yacc_value = "/"
1090
+
1091
+ return :tDIVIDE
1092
+ elsif src.scan(/\^=/) then
1093
+ self.lex_state = :expr_beg
1094
+ self.yacc_value = "^"
1095
+ return :tOP_ASGN
1096
+ elsif src.scan(/\^/) then
1097
+ self.fix_arg_lex_state
1098
+ self.yacc_value = "^"
1099
+ return :tCARET
1100
+ elsif src.scan(/\;/) then
1101
+ self.command_start = true
1102
+ self.lex_state = :expr_beg
1103
+ self.yacc_value = ";"
1104
+ return :tSEMI
1105
+ elsif src.scan(/\~/) then
1106
+ if lex_state == :expr_fname || lex_state == :expr_dot then
1107
+ src.scan(/@/)
1108
+ end
1109
+
1110
+ self.fix_arg_lex_state
1111
+ self.yacc_value = "~"
1112
+
1113
+ return :tTILDE
1114
+ elsif src.scan(/\\/) then
1115
+ if src.scan(/\n/) then
1116
+ self.lineno = nil
1117
+ space_seen = true
1118
+ next
1119
+ end
1120
+ rb_compile_error "bare backslash only allowed before newline"
1121
+ elsif src.scan(/\%/) then
1122
+ if lex_state == :expr_beg || lex_state == :expr_mid then
1123
+ return parse_quote
1124
+ end
1125
+
1126
+ if src.scan(/\=/) then
1127
+ self.lex_state = :expr_beg
1128
+ self.yacc_value = "%"
1129
+ return :tOP_ASGN
1130
+ end
1131
+
1132
+ if lex_state.is_argument && space_seen && ! src.check(/\s/) then
1133
+ return parse_quote
1134
+ end
1135
+
1136
+ self.fix_arg_lex_state
1137
+ self.yacc_value = "%"
1138
+
1139
+ return :tPERCENT
1140
+ elsif src.check(/\$/) then
1141
+ if src.scan(/(\$_)(\w+)/) then
1142
+ self.lex_state = :expr_end
1143
+ self.token = src.matched
1144
+ return process_token(command_state)
1145
+ elsif src.scan(/\$_/) then
1146
+ self.lex_state = :expr_end
1147
+ self.token = src.matched
1148
+ self.yacc_value = src.matched
1149
+ return :tGVAR
1150
+ elsif src.scan(/\$[~*$?!@\/\\;,.=:<>\"]|\$-\w?/) then
1151
+ self.lex_state = :expr_end
1152
+ self.yacc_value = src.matched
1153
+ return :tGVAR
1154
+ elsif src.scan(/\$([\&\`\'\+])/) then
1155
+ self.lex_state = :expr_end
1156
+ # Explicit reference to these vars as symbols...
1157
+ if last_state == :expr_fname then
1158
+ self.yacc_value = src.matched
1159
+ return :tGVAR
1160
+ else
1161
+ self.yacc_value = src[1].to_sym
1162
+ return :tBACK_REF
1163
+ end
1164
+ elsif src.scan(/\$([1-9]\d*)/) then
1165
+ self.lex_state = :expr_end
1166
+ if last_state == :expr_fname then
1167
+ self.yacc_value = src.matched
1168
+ return :tGVAR
1169
+ else
1170
+ self.yacc_value = src[1].to_i
1171
+ return :tNTH_REF
1172
+ end
1173
+ elsif src.scan(/\$0/) then
1174
+ self.lex_state = :expr_end
1175
+ self.token = src.matched
1176
+ return process_token(command_state)
1177
+ elsif src.scan(/\$\W|\$\z/) then # TODO: remove?
1178
+ self.lex_state = :expr_end
1179
+ self.yacc_value = "$"
1180
+ return "$"
1181
+ elsif src.scan(/\$\w+/)
1182
+ self.lex_state = :expr_end
1183
+ self.token = src.matched
1184
+ return process_token(command_state)
1185
+ end
1186
+ elsif src.check(/\_/) then
1187
+ if src.beginning_of_line? && src.scan(/\__END__(\n|\Z)/) then
1188
+ self.lineno = nil
1189
+ return RubyLexer::EOF
1190
+ elsif src.scan(/\_\w*/) then
1191
+ self.token = src.matched
1192
+ return process_token(command_state)
1193
+ end
1194
+ end
1195
+ end # END OF CASE
1196
+
1197
+ if src.scan(/\004|\032|\000/) || src.eos? then # ^D, ^Z, EOF
1198
+ return RubyLexer::EOF
1199
+ else # alpha check
1200
+ if src.scan(/\W/) then
1201
+ rb_compile_error "Invalid char #{src.matched.inspect} in expression"
1202
+ end
1203
+ end
1204
+
1205
+ self.token = src.matched if self.src.scan(/\w+/)
1206
+
1207
+ return process_token(command_state)
1208
+ end
1209
+ end
1210
+
1211
+ def process_token(command_state)
1212
+
1213
+ token << src.matched if token =~ /^\w/ && src.scan(/[\!\?](?!=)/)
1214
+
1215
+ result = nil
1216
+ last_state = lex_state
1217
+
1218
+
1219
+ case token
1220
+ when /^\$/ then
1221
+ self.lex_state, result = :expr_end, :tGVAR
1222
+ when /^@@/ then
1223
+ self.lex_state, result = :expr_end, :tCVAR
1224
+ when /^@/ then
1225
+ self.lex_state, result = :expr_end, :tIVAR
1226
+ else
1227
+ if token =~ /[!?]$/ then
1228
+ result = :tFID
1229
+ else
1230
+ if lex_state == :expr_fname then
1231
+ # ident=, not =~ => == or followed by =>
1232
+ # TODO test lexing of a=>b vs a==>b
1233
+ if src.scan(/=(?:(?![~>=])|(?==>))/) then
1234
+ result = :tIDENTIFIER
1235
+ token << src.matched
1236
+ end
1237
+ end
1238
+
1239
+ if src.scan(/:(?!:)/)
1240
+ result = :tHASHKEY
1241
+ token << src.matched
1242
+ self.yacc_value = token
1243
+ return result
1244
+ end
1245
+
1246
+ result ||= if token =~ /^[A-Z]/ then
1247
+ :tCONSTANT
1248
+ else
1249
+ :tIDENTIFIER
1250
+ end
1251
+ end
1252
+
1253
+ unless lex_state == :expr_dot then
1254
+ # See if it is a reserved word.
1255
+ keyword = RubyParser::Keyword.keyword token
1256
+
1257
+ if keyword then
1258
+ state = lex_state
1259
+ self.lex_state = keyword.state
1260
+ self.yacc_value = [token, src.lineno]
1261
+
1262
+ if state == :expr_fname then
1263
+ self.yacc_value = keyword.name
1264
+ return keyword.id0
1265
+ end
1266
+
1267
+ if keyword.id0 == :kDO then
1268
+ self.command_start = true
1269
+ return :kDO_COND if cond.is_in_state
1270
+ return :kDO_BLOCK if cmdarg.is_in_state && state != :expr_cmdarg
1271
+ return :kDO_BLOCK if state == :expr_endarg
1272
+ return :kDO
1273
+ end
1274
+
1275
+ return keyword.id0 if state == :expr_beg or state == :expr_value
1276
+
1277
+ self.lex_state = :expr_beg if keyword.id0 != keyword.id1
1278
+
1279
+ return keyword.id1
1280
+ end
1281
+ end
1282
+
1283
+ if (lex_state == :expr_beg || lex_state == :expr_mid ||
1284
+ lex_state == :expr_dot || lex_state == :expr_arg ||
1285
+ lex_state == :expr_cmdarg) then
1286
+ if command_state then
1287
+ self.lex_state = :expr_cmdarg
1288
+ else
1289
+ self.lex_state = :expr_arg
1290
+ end
1291
+ else
1292
+ self.lex_state = :expr_end
1293
+ end
1294
+ end
1295
+
1296
+ self.yacc_value = token
1297
+
1298
+
1299
+ self.lex_state = :expr_end if
1300
+ last_state != :expr_dot && self.parser.env[token.to_sym] == :lvar
1301
+
1302
+ return result
1303
+ end
1304
+
1305
+ def yylex_string # 23 lines
1306
+ token = if lex_strterm[0] == :heredoc then
1307
+ self.heredoc lex_strterm
1308
+ else
1309
+ self.parse_string lex_strterm
1310
+ end
1311
+
1312
+ if token == :tSTRING_END || token == :tREGEXP_END then
1313
+ self.lineno = nil
1314
+ self.lex_strterm = nil
1315
+ self.lex_state = :expr_end
1316
+ end
1317
+
1318
+ return token
1319
+ end
1320
+ end