brakeman 0.7.2 → 0.8.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,70 @@
1
+ require 'checks/base_check'
2
+
3
+ #Check for bypassing mass assignment protection
4
+ #with without_protection => true
5
+ #
6
+ #Only for Rails 3.1
7
+ class CheckWithoutProtection < BaseCheck
8
+ Checks.add self
9
+
10
+ def run_check
11
+ if mass_assign_disabled? tracker or version_between? "0.0.0", "3.0.99"
12
+ return
13
+ end
14
+
15
+ models = []
16
+ tracker.models.each do |name, m|
17
+ if parent?(tracker, m, :"ActiveRecord::Base")
18
+ models << name
19
+ end
20
+ end
21
+
22
+ return if models.empty?
23
+
24
+ @results = Set.new
25
+
26
+ calls = tracker.find_call models, [:new,
27
+ :attributes=,
28
+ :update_attribute,
29
+ :update_attributes,
30
+ :update_attributes!,
31
+ :create,
32
+ :create!]
33
+
34
+ calls.each do |result|
35
+ process result
36
+ end
37
+ end
38
+
39
+ #All results should be Model.new(...) or Model.attributes=() calls
40
+ def process_result res
41
+ call = res[-1]
42
+ last_arg = call[3][-1]
43
+
44
+ if hash? last_arg and not @results.include? call
45
+
46
+ hash_iterate(last_arg) do |k,v|
47
+ if symbol? k and k[1] == :without_protection and v[0] == :true
48
+ @results << call
49
+
50
+ if include_user_input? call[3]
51
+ confidence = CONFIDENCE[:high]
52
+ else
53
+ confidence = CONFIDENCE[:med]
54
+ end
55
+
56
+ warn :result => res,
57
+ :warning_type => "Mass Assignment",
58
+ :message => "Unprotected mass assignment",
59
+ :line => call.line,
60
+ :code => call,
61
+ :confidence => confidence
62
+
63
+ break
64
+ end
65
+ end
66
+ end
67
+
68
+ res
69
+ end
70
+ end
@@ -59,7 +59,7 @@ class ControllerProcessor < BaseProcessor
59
59
 
60
60
  #Methods called inside class definition
61
61
  #like attr_* and other settings
62
- if @current_method.nil? and target.nil?
62
+ if @current_method.nil? and target.nil? and @controller
63
63
  if args.length == 1 #actually, empty
64
64
  case method
65
65
  when :private, :protected, :public
@@ -90,9 +90,13 @@ class ControllerProcessor < BaseProcessor
90
90
  #layout :false or layout nil
91
91
  @controller[:layout] = false
92
92
  end
93
+ else
94
+ @controller[:options][method] ||= []
95
+ @controller[:options][method] << exp
93
96
  end
94
97
  end
95
- ignore
98
+
99
+ exp
96
100
  elsif target == nil and method == :render
97
101
  make_render exp
98
102
  elsif exp == FORMAT_HTML and context[1] != :iter
@@ -71,7 +71,7 @@ class RoutesProcessor < BaseProcessor
71
71
  args = exp[3][1..-1]
72
72
 
73
73
  hash_iterate args[0] do |k, v|
74
- if symbol? k and k[1] == :to
74
+ if symbol? k and k[1] == :to and string? v[1]
75
75
  controller, action = extract_action v[1]
76
76
 
77
77
  self.current_controller = controller
@@ -368,6 +368,16 @@ class Report
368
368
  raise "PDF output is not yet supported."
369
369
  end
370
370
 
371
+ def rails_version
372
+ if version = tracker.config[:rails_version]
373
+ return version
374
+ elsif OPTIONS[:rails3]
375
+ return "3.x"
376
+ else
377
+ return "Unknown"
378
+ end
379
+ end
380
+
371
381
  #Return header for HTML output. Uses CSS from OPTIONS[:html_style]
372
382
  def html_header
373
383
  if File.exist? OPTIONS[:html_style]
@@ -398,11 +408,13 @@ class Report
398
408
  <table>
399
409
  <tr>
400
410
  <th>Application Path</th>
411
+ <th>Rails Version</th>
401
412
  <th>Report Generation Time</th>
402
413
  <th>Checks Performed</th>
403
414
  </tr>
404
415
  <tr>
405
416
  <td>#{File.expand_path OPTIONS[:app_path]}</td>
417
+ <td>#{rails_version}</td>
406
418
  <td>#{Time.now}</td>
407
419
  <td>#{checks.checks_run.sort.join(", ")}</td>
408
420
  </tr>
@@ -412,13 +424,13 @@ class Report
412
424
 
413
425
  #Generate header for text output
414
426
  def text_header
415
- "\n+BRAKEMAN REPORT+\n\nApplication path: #{File.expand_path OPTIONS[:app_path]}\nGenerated at #{Time.now}\nChecks run: #{checks.checks_run.sort.join(", ")}\n"
427
+ "\n+BRAKEMAN REPORT+\n\nApplication path: #{File.expand_path OPTIONS[:app_path]}\nRails version: #{rails_version}\nGenerated at #{Time.now}\nChecks run: #{checks.checks_run.sort.join(", ")}\n"
416
428
  end
417
429
 
418
430
  #Generate header for CSV output
419
431
  def csv_header
420
- header = Ruport::Data::Table(["Application Path", "Report Generation Time", "Checks Performed"])
421
- header << [File.expand_path(OPTIONS[:app_path]), Time.now.to_s, checks.checks_run.sort.join(", ")]
432
+ header = Ruport::Data::Table(["Application Path", "Report Generation Time", "Checks Performed", "Rails Version"])
433
+ header << [File.expand_path(OPTIONS[:app_path]), Time.now.to_s, checks.checks_run.sort.join(", "), rails_version]
422
434
  "BRAKEMAN REPORT\n\n" << header.to_csv
423
435
  end
424
436
 
@@ -0,0 +1,1320 @@
1
+ class RubyLexer
2
+ attr_accessor :command_start
3
+ attr_accessor :cmdarg
4
+ attr_accessor :cond
5
+ attr_accessor :nest
6
+
7
+ ESC_RE = /\\([0-7]{1,3}|x[0-9a-fA-F]{1,2}|M-[^\\]|(C-|c)[^\\]|[^0-7xMCc])/
8
+
9
+ # Additional context surrounding tokens that both the lexer and
10
+ # grammar use.
11
+ attr_reader :lex_state
12
+
13
+ attr_accessor :lex_strterm
14
+
15
+ attr_accessor :parser # HACK for very end of lexer... *sigh*
16
+
17
+ # Stream of data that yylex examines.
18
+ attr_reader :src
19
+
20
+ # Last token read via yylex.
21
+ attr_accessor :token
22
+
23
+ attr_accessor :string_buffer
24
+
25
+ # Value of last token which had a value associated with it.
26
+ attr_accessor :yacc_value
27
+
28
+ # What handles warnings
29
+ attr_accessor :warnings
30
+
31
+ EOF = :eof_haha!
32
+
33
+ # ruby constants for strings (should this be moved somewhere else?)
34
+ STR_FUNC_BORING = 0x00
35
+ STR_FUNC_ESCAPE = 0x01 # TODO: remove and replace with REGEXP
36
+ STR_FUNC_EXPAND = 0x02
37
+ STR_FUNC_REGEXP = 0x04
38
+ STR_FUNC_AWORDS = 0x08
39
+ STR_FUNC_SYMBOL = 0x10
40
+ STR_FUNC_INDENT = 0x20 # <<-HEREDOC
41
+
42
+ STR_SQUOTE = STR_FUNC_BORING
43
+ STR_DQUOTE = STR_FUNC_BORING | STR_FUNC_EXPAND
44
+ STR_XQUOTE = STR_FUNC_BORING | STR_FUNC_EXPAND
45
+ STR_REGEXP = STR_FUNC_REGEXP | STR_FUNC_ESCAPE | STR_FUNC_EXPAND
46
+ STR_SSYM = STR_FUNC_SYMBOL
47
+ STR_DSYM = STR_FUNC_SYMBOL | STR_FUNC_EXPAND
48
+
49
+ TOKENS = {
50
+ "!" => :tBANG,
51
+ "!=" => :tNEQ,
52
+ "!~" => :tNMATCH,
53
+ "," => :tCOMMA,
54
+ ".." => :tDOT2,
55
+ "..." => :tDOT3,
56
+ "=" => :tEQL,
57
+ "==" => :tEQ,
58
+ "===" => :tEQQ,
59
+ "=>" => :tASSOC,
60
+ "=~" => :tMATCH,
61
+ }
62
+
63
+ # How the parser advances to the next token.
64
+ #
65
+ # @return true if not at end of file (EOF).
66
+
67
+ def advance
68
+ r = yylex
69
+ self.token = r
70
+
71
+ raise "yylex returned nil" unless r
72
+
73
+ return RubyLexer::EOF != r
74
+ end
75
+
76
+ def arg_ambiguous
77
+ self.warning("Ambiguous first argument. make sure.")
78
+ end
79
+
80
+ def comments
81
+ c = @comments.join
82
+ @comments.clear
83
+ c
84
+ end
85
+
86
+ def expr_beg_push val
87
+ cond.push false
88
+ cmdarg.push false
89
+ self.lex_state = :expr_beg
90
+ self.yacc_value = val
91
+ end
92
+
93
+ def fix_arg_lex_state
94
+ self.lex_state = if lex_state == :expr_fname || lex_state == :expr_dot
95
+ :expr_arg
96
+ else
97
+ :expr_beg
98
+ end
99
+ end
100
+
101
+ def heredoc here # 63 lines
102
+ _, eos, func, last_line = here
103
+
104
+ indent = (func & STR_FUNC_INDENT) != 0
105
+ expand = (func & STR_FUNC_EXPAND) != 0
106
+ eos_re = indent ? /[ \t]*#{eos}(\r?\n|\z)/ : /#{eos}(\r?\n|\z)/
107
+ err_msg = "can't match #{eos_re.inspect} anywhere in "
108
+
109
+ rb_compile_error err_msg if
110
+ src.eos?
111
+
112
+ if src.beginning_of_line? && src.scan(eos_re) then
113
+ src.unread_many last_line # TODO: figure out how to remove this
114
+ self.yacc_value = eos
115
+ return :tSTRING_END
116
+ end
117
+
118
+ self.string_buffer = []
119
+
120
+ if expand then
121
+ case
122
+ when src.scan(/#[$@]/) then
123
+ src.pos -= 1 # FIX omg stupid
124
+ self.yacc_value = src.matched
125
+ return :tSTRING_DVAR
126
+ when src.scan(/#[{]/) then
127
+ self.yacc_value = src.matched
128
+ return :tSTRING_DBEG
129
+ when src.scan(/#/) then
130
+ string_buffer << '#'
131
+ end
132
+
133
+ until src.scan(eos_re) do
134
+ c = tokadd_string func, "\n", nil
135
+
136
+ rb_compile_error err_msg if
137
+ c == RubyLexer::EOF
138
+
139
+ if c != "\n" then
140
+ self.yacc_value = string_buffer.join.delete("\r")
141
+ return :tSTRING_CONTENT
142
+ else
143
+ string_buffer << src.scan(/\n/)
144
+ end
145
+
146
+ rb_compile_error err_msg if
147
+ src.eos?
148
+ end
149
+
150
+ # tack on a NL after the heredoc token - FIX NL should not be needed
151
+ src.unread_many(eos + "\n") # TODO: remove this... stupid stupid stupid
152
+ else
153
+ until src.check(eos_re) do
154
+ string_buffer << src.scan(/.*(\n|\z)/)
155
+ rb_compile_error err_msg if
156
+ src.eos?
157
+ end
158
+ end
159
+
160
+ self.lex_strterm = [:heredoc, eos, func, last_line]
161
+ self.yacc_value = string_buffer.join.delete("\r")
162
+
163
+ return :tSTRING_CONTENT
164
+ end
165
+
166
+ def heredoc_identifier # 51 lines
167
+ term, func = nil, STR_FUNC_BORING
168
+ self.string_buffer = []
169
+
170
+ case
171
+ when src.scan(/(-?)(['"`])(.*?)\2/) then
172
+ term = src[2]
173
+ unless src[1].empty? then
174
+ func |= STR_FUNC_INDENT
175
+ end
176
+ func |= case term
177
+ when "\'" then
178
+ STR_SQUOTE
179
+ when '"' then
180
+ STR_DQUOTE
181
+ else
182
+ STR_XQUOTE
183
+ end
184
+ string_buffer << src[3]
185
+ when src.scan(/-?(['"`])(?!\1*\Z)/) then
186
+ rb_compile_error "unterminated here document identifier"
187
+ when src.scan(/(-?)(\w+)/) then
188
+ term = '"'
189
+ func |= STR_DQUOTE
190
+ unless src[1].empty? then
191
+ func |= STR_FUNC_INDENT
192
+ end
193
+ string_buffer << src[2]
194
+ else
195
+ return nil
196
+ end
197
+
198
+ if src.check(/.*\n/) then
199
+ # TODO: think about storing off the char range instead
200
+ line = src.string[src.pos, src.matched_size]
201
+ src.string[src.pos, src.matched_size] = "\n"
202
+ src.extra_lines_added += 1
203
+ src.pos += 1
204
+ else
205
+ line = nil
206
+ end
207
+
208
+ self.lex_strterm = [:heredoc, string_buffer.join, func, line]
209
+
210
+ if term == '`' then
211
+ self.yacc_value = "`"
212
+ return :tXSTRING_BEG
213
+ else
214
+ self.yacc_value = "\""
215
+ return :tSTRING_BEG
216
+ end
217
+ end
218
+
219
+ def initialize
220
+ self.cond = RubyParser::StackState.new(:cond)
221
+ self.cmdarg = RubyParser::StackState.new(:cmdarg)
222
+ self.nest = 0
223
+ @comments = []
224
+
225
+ reset
226
+ end
227
+
228
+ def int_with_base base
229
+ rb_compile_error "Invalid numeric format" if src.matched =~ /__/
230
+ self.yacc_value = src.matched.to_i(base)
231
+ return :tINTEGER
232
+ end
233
+
234
+ def lex_state= o
235
+ raise "wtf\?" unless Symbol === o
236
+ @lex_state = o
237
+ end
238
+
239
+ attr_writer :lineno
240
+ def lineno
241
+ @lineno ||= src.lineno
242
+ end
243
+
244
+ ##
245
+ # Parse a number from the input stream.
246
+ #
247
+ # @param c The first character of the number.
248
+ # @return A int constant wich represents a token.
249
+
250
+ def parse_number
251
+ self.lex_state = :expr_end
252
+
253
+ case
254
+ when src.scan(/[+-]?0[xbd]\b/) then
255
+ rb_compile_error "Invalid numeric format"
256
+ when src.scan(/[+-]?0x[a-f0-9_]+/i) then
257
+ int_with_base(16)
258
+ when src.scan(/[+-]?0b[01_]+/) then
259
+ int_with_base(2)
260
+ when src.scan(/[+-]?0d[0-9_]+/) then
261
+ int_with_base(10)
262
+ when src.scan(/[+-]?0[Oo]?[0-7_]*[89]/) then
263
+ rb_compile_error "Illegal octal digit."
264
+ when src.scan(/[+-]?0[Oo]?[0-7_]+|0[Oo]/) then
265
+ int_with_base(8)
266
+ when src.scan(/[+-]?[\d_]+_(e|\.)/) then
267
+ rb_compile_error "Trailing '_' in number."
268
+ when src.scan(/[+-]?[\d_]+\.[\d_]+(e[+-]?[\d_]+)?\b|[+-]?[\d_]+e[+-]?[\d_]+\b/i) then
269
+ number = src.matched
270
+ if number =~ /__/ then
271
+ rb_compile_error "Invalid numeric format"
272
+ end
273
+ self.yacc_value = number.to_f
274
+ :tFLOAT
275
+ when src.scan(/[+-]?0\b/) then
276
+ int_with_base(10)
277
+ when src.scan(/[+-]?[\d_]+\b/) then
278
+ int_with_base(10)
279
+ else
280
+ rb_compile_error "Bad number format"
281
+ end
282
+ end
283
+
284
+ def parse_quote # 58 lines
285
+ beg, nnd, short_hand, c = nil, nil, false, nil
286
+
287
+ if src.scan(/[a-z0-9]{1,2}/i) then # Long-hand (e.g. %Q{}).
288
+ rb_compile_error "unknown type of %string" if src.matched_size == 2
289
+ c, beg, short_hand = src.matched, src.getch, false
290
+ else # Short-hand (e.g. %{, %., %!, etc)
291
+ c, beg, short_hand = 'Q', src.getch, true
292
+ end
293
+
294
+ if src.eos? or c == RubyLexer::EOF or beg == RubyLexer::EOF then
295
+ rb_compile_error "unterminated quoted string meets end of file"
296
+ end
297
+
298
+ # Figure nnd-char. "\0" is special to indicate beg=nnd and that no nesting?
299
+ nnd = { "(" => ")", "[" => "]", "{" => "}", "<" => ">" }[beg]
300
+ nnd, beg = beg, "\0" if nnd.nil?
301
+
302
+ token_type, self.yacc_value = nil, "%#{c}#{beg}"
303
+ token_type, string_type = case c
304
+ when 'Q' then
305
+ ch = short_hand ? nnd : c + beg
306
+ self.yacc_value = "%#{ch}"
307
+ [:tSTRING_BEG, STR_DQUOTE]
308
+ when 'q' then
309
+ [:tSTRING_BEG, STR_SQUOTE]
310
+ when 'W' then
311
+ src.scan(/\s*/)
312
+ [:tWORDS_BEG, STR_DQUOTE | STR_FUNC_AWORDS]
313
+ when 'w' then
314
+ src.scan(/\s*/)
315
+ [:tAWORDS_BEG, STR_SQUOTE | STR_FUNC_AWORDS]
316
+ when 'x' then
317
+ [:tXSTRING_BEG, STR_XQUOTE]
318
+ when 'r' then
319
+ [:tREGEXP_BEG, STR_REGEXP]
320
+ when 's' then
321
+ self.lex_state = :expr_fname
322
+ [:tSYMBEG, STR_SSYM]
323
+ end
324
+
325
+ rb_compile_error "Bad %string type. Expected [Qqwxr\W], found '#{c}'." if
326
+ token_type.nil?
327
+
328
+ self.lex_strterm = [:strterm, string_type, nnd, beg]
329
+
330
+ return token_type
331
+ end
332
+
333
+ def parse_string(quote) # 65 lines
334
+ _, string_type, term, open = quote
335
+
336
+ space = false # FIX: remove these
337
+ func = string_type
338
+ paren = open
339
+ term_re = Regexp.escape term
340
+
341
+ awords = (func & STR_FUNC_AWORDS) != 0
342
+ regexp = (func & STR_FUNC_REGEXP) != 0
343
+ expand = (func & STR_FUNC_EXPAND) != 0
344
+
345
+ unless func then # FIX: impossible, prolly needs == 0
346
+ self.lineno = nil
347
+ return :tSTRING_END
348
+ end
349
+
350
+ space = true if awords and src.scan(/\s+/)
351
+
352
+ if self.nest == 0 && src.scan(/#{term_re}/) then
353
+ if awords then
354
+ quote[1] = nil
355
+ return :tSPACE
356
+ elsif regexp then
357
+ self.yacc_value = self.regx_options
358
+ self.lineno = nil
359
+ return :tREGEXP_END
360
+ else
361
+ self.yacc_value = term
362
+ self.lineno = nil
363
+ return :tSTRING_END
364
+ end
365
+ end
366
+
367
+ if space then
368
+ return :tSPACE
369
+ end
370
+
371
+ self.string_buffer = []
372
+
373
+ if expand
374
+ case
375
+ when src.scan(/#(?=[$@])/) then
376
+ return :tSTRING_DVAR
377
+ when src.scan(/#[{]/) then
378
+ return :tSTRING_DBEG
379
+ when src.scan(/#/) then
380
+ string_buffer << '#'
381
+ end
382
+ end
383
+
384
+ if tokadd_string(func, term, paren) == RubyLexer::EOF then
385
+ rb_compile_error "unterminated string meets end of file"
386
+ end
387
+
388
+ self.yacc_value = string_buffer.join
389
+
390
+ return :tSTRING_CONTENT
391
+ end
392
+
393
+ def rb_compile_error msg
394
+ msg += ". near line #{self.lineno}: #{src.rest[/^.*/].inspect}"
395
+ raise SyntaxError, msg
396
+ end
397
+
398
+ def read_escape # 51 lines
399
+ case
400
+ when src.scan(/\\/) then # Backslash
401
+ '\\'
402
+ when src.scan(/n/) then # newline
403
+ "\n"
404
+ when src.scan(/t/) then # horizontal tab
405
+ "\t"
406
+ when src.scan(/r/) then # carriage-return
407
+ "\r"
408
+ when src.scan(/f/) then # form-feed
409
+ "\f"
410
+ when src.scan(/v/) then # vertical tab
411
+ "\13"
412
+ when src.scan(/a/) then # alarm(bell)
413
+ "\007"
414
+ when src.scan(/e/) then # escape
415
+ "\033"
416
+ when src.scan(/b/) then # backspace
417
+ "\010"
418
+ when src.scan(/s/) then # space
419
+ " "
420
+ when src.scan(/[0-7]{1,3}/) then # octal constant
421
+ src.matched.to_i(8).chr
422
+ when src.scan(/x([0-9a-fA-F]{1,2})/) then # hex constant
423
+ src[1].to_i(16).chr
424
+ when src.check(/M-\\[\\MCc]/) then
425
+ src.scan(/M-\\/) # eat it
426
+ c = self.read_escape
427
+ c[0] = (c[0].ord | 0x80).chr
428
+ c
429
+ when src.scan(/M-(.)/) then
430
+ c = src[1]
431
+ c[0] = (c[0].ord | 0x80).chr
432
+ c
433
+ when src.check(/(C-|c)\\[\\MCc]/) then
434
+ src.scan(/(C-|c)\\/) # eat it
435
+ c = self.read_escape
436
+ c[0] = (c[0].ord & 0x9f).chr
437
+ c
438
+ when src.scan(/C-\?|c\?/) then
439
+ 127.chr
440
+ when src.scan(/(C-|c)(.)/) then
441
+ c = src[2]
442
+ c[0] = (c[0].ord & 0x9f).chr
443
+ c
444
+ when src.scan(/[McCx0-9]/) || src.eos? then
445
+ rb_compile_error("Invalid escape character syntax")
446
+ else
447
+ src.getch
448
+ end
449
+ end
450
+
451
+ def regx_options # 15 lines
452
+ good, bad = [], []
453
+
454
+ if src.scan(/[a-z]+/) then
455
+ good, bad = src.matched.split(//).partition { |s| s =~ /^[ixmonesu]$/ }
456
+ end
457
+
458
+ unless bad.empty? then
459
+ rb_compile_error("unknown regexp option%s - %s" %
460
+ [(bad.size > 1 ? "s" : ""), bad.join.inspect])
461
+ end
462
+
463
+ return good.join
464
+ end
465
+
466
+ def reset
467
+ self.command_start = true
468
+ self.lex_strterm = nil
469
+ self.token = nil
470
+ self.yacc_value = nil
471
+
472
+ @src = nil
473
+ @lex_state = nil
474
+ end
475
+
476
+ def src= src
477
+ raise "bad src: #{src.inspect}" unless String === src
478
+ @src = RPStringScanner.new(src)
479
+ end
480
+
481
+ def tokadd_escape term # 20 lines
482
+ case
483
+ when src.scan(/\\\n/) then
484
+ # just ignore
485
+ when src.scan(/\\([0-7]{1,3}|x[0-9a-fA-F]{1,2})/) then
486
+ self.string_buffer << src.matched
487
+ when src.scan(/\\([MC]-|c)(?=\\)/) then
488
+ self.string_buffer << src.matched
489
+ self.tokadd_escape term
490
+ when src.scan(/\\([MC]-|c)(.)/) then
491
+ self.string_buffer << src.matched
492
+ when src.scan(/\\[McCx]/) then
493
+ rb_compile_error "Invalid escape character syntax"
494
+ when src.scan(/\\(.)/m) then
495
+ self.string_buffer << src.matched
496
+ else
497
+ rb_compile_error "Invalid escape character syntax"
498
+ end
499
+ end
500
+
501
+ def tokadd_string(func, term, paren) # 105 lines
502
+ awords = (func & STR_FUNC_AWORDS) != 0
503
+ escape = (func & STR_FUNC_ESCAPE) != 0
504
+ expand = (func & STR_FUNC_EXPAND) != 0
505
+ regexp = (func & STR_FUNC_REGEXP) != 0
506
+ symbol = (func & STR_FUNC_SYMBOL) != 0
507
+
508
+ paren_re = paren.nil? ? nil : Regexp.new(Regexp.escape(paren))
509
+ term_re = Regexp.new(Regexp.escape(term))
510
+
511
+ until src.eos? do
512
+ c = nil
513
+ handled = true
514
+ case
515
+ when self.nest == 0 && src.scan(term_re) then
516
+ src.pos -= 1
517
+ break
518
+ when paren_re && src.scan(paren_re) then
519
+ self.nest += 1
520
+ when src.scan(term_re) then
521
+ self.nest -= 1
522
+ when awords && src.scan(/\s/) then
523
+ src.pos -= 1
524
+ break
525
+ when expand && src.scan(/#(?=[\$\@\{])/) then
526
+ src.pos -= 1
527
+ break
528
+ when expand && src.scan(/#(?!\n)/) then
529
+ # do nothing
530
+ when src.check(/\\/) then
531
+ case
532
+ when awords && src.scan(/\\\n/) then
533
+ string_buffer << "\n"
534
+ next
535
+ when awords && src.scan(/\\\s/) then
536
+ c = ' '
537
+ when expand && src.scan(/\\\n/) then
538
+ next
539
+ when regexp && src.check(/\\/) then
540
+ self.tokadd_escape term
541
+ next
542
+ when expand && src.scan(/\\/) then
543
+ c = self.read_escape
544
+ when src.scan(/\\\n/) then
545
+ # do nothing
546
+ when src.scan(/\\\\/) then
547
+ string_buffer << '\\' if escape
548
+ c = '\\'
549
+ when src.scan(/\\/) then
550
+ unless src.scan(term_re) || paren.nil? || src.scan(paren_re) then
551
+ string_buffer << "\\"
552
+ end
553
+ else
554
+ handled = false
555
+ end
556
+ else
557
+ handled = false
558
+ end # case
559
+
560
+ unless handled then
561
+
562
+ t = Regexp.escape term
563
+ x = Regexp.escape(paren) if paren && paren != "\000"
564
+ re = if awords then
565
+ /[^#{t}#{x}\#\0\\\n\ ]+|./ # |. to pick up whatever
566
+ else
567
+ /[^#{t}#{x}\#\0\\]+|./
568
+ end
569
+
570
+ src.scan re
571
+ c = src.matched
572
+
573
+ rb_compile_error "symbol cannot contain '\\0'" if symbol && c =~ /\0/
574
+ end # unless handled
575
+
576
+ c ||= src.matched
577
+ string_buffer << c
578
+ end # until
579
+
580
+ c ||= src.matched
581
+ c = RubyLexer::EOF if src.eos?
582
+
583
+
584
+ return c
585
+ end
586
+
587
+ def unescape s
588
+
589
+ r = {
590
+ "a" => "\007",
591
+ "b" => "\010",
592
+ "e" => "\033",
593
+ "f" => "\f",
594
+ "n" => "\n",
595
+ "r" => "\r",
596
+ "s" => " ",
597
+ "t" => "\t",
598
+ "v" => "\13",
599
+ "\\" => '\\',
600
+ "\n" => "",
601
+ "C-\?" => 127.chr,
602
+ "c\?" => 127.chr,
603
+ }[s]
604
+
605
+ return r if r
606
+
607
+ case s
608
+ when /^[0-7]{1,3}/ then
609
+ $&.to_i(8).chr
610
+ when /^x([0-9a-fA-F]{1,2})/ then
611
+ $1.to_i(16).chr
612
+ when /^M-(.)/ then
613
+ ($1[0].ord | 0x80).chr
614
+ when /^(C-|c)(.)/ then
615
+ ($2[0].ord & 0x9f).chr
616
+ when /^[McCx0-9]/ then
617
+ rb_compile_error("Invalid escape character syntax")
618
+ else
619
+ s
620
+ end
621
+ end
622
+
623
+ def warning s
624
+ # do nothing for now
625
+ end
626
+
627
+ ##
628
+ # Returns the next token. Also sets yy_val is needed.
629
+ #
630
+ # @return Description of the Returned Value
631
+
632
+ def yylex # 826 lines
633
+
634
+ c = ''
635
+ space_seen = false
636
+ command_state = false
637
+ src = self.src
638
+
639
+ self.token = nil
640
+ self.yacc_value = nil
641
+
642
+ return yylex_string if lex_strterm
643
+
644
+ command_state = self.command_start
645
+ self.command_start = false
646
+
647
+ last_state = lex_state
648
+
649
+ loop do # START OF CASE
650
+ if src.scan(/[\ \t\r\f\v]/) then # \s - \n + \v
651
+ space_seen = true
652
+ next
653
+ elsif src.check(/[^a-zA-Z]/) then
654
+ if src.scan(/\n|#/) then
655
+ self.lineno = nil
656
+ c = src.matched
657
+ if c == '#' then
658
+ src.pos -= 1
659
+
660
+ while src.scan(/\s*#.*(\n+|\z)/) do
661
+ @comments << src.matched.gsub(/^ +#/, '#').gsub(/^ +$/, '')
662
+ end
663
+
664
+ if src.eos? then
665
+ return RubyLexer::EOF
666
+ end
667
+ end
668
+
669
+ # Replace a string of newlines with a single one
670
+ src.scan(/\n+/)
671
+
672
+ if [:expr_beg, :expr_fname,
673
+ :expr_dot, :expr_class].include? lex_state then
674
+ next
675
+ end
676
+
677
+ self.command_start = true
678
+ self.lex_state = :expr_beg
679
+ return :tNL
680
+ elsif src.scan(/[\]\)\}]/) then
681
+ cond.lexpop
682
+ cmdarg.lexpop
683
+ self.lex_state = :expr_end
684
+ self.yacc_value = src.matched
685
+ result = {
686
+ ")" => :tRPAREN,
687
+ "]" => :tRBRACK,
688
+ "}" => :tRCURLY
689
+ }[src.matched]
690
+ return result
691
+ elsif src.scan(/\.\.\.?|,|![=~]?/) then
692
+ self.lex_state = :expr_beg
693
+ tok = self.yacc_value = src.matched
694
+ return TOKENS[tok]
695
+ elsif src.check(/\./) then
696
+ if src.scan(/\.\d/) then
697
+ rb_compile_error "no .<digit> floating literal anymore put 0 before dot"
698
+ elsif src.scan(/\./) then
699
+ self.lex_state = :expr_dot
700
+ self.yacc_value = "."
701
+ return :tDOT
702
+ end
703
+ elsif src.scan(/\(/) then
704
+ result = :tLPAREN2
705
+ self.command_start = true
706
+
707
+ if lex_state == :expr_beg || lex_state == :expr_mid then
708
+ result = :tLPAREN
709
+ elsif space_seen then
710
+ if lex_state == :expr_cmdarg then
711
+ result = :tLPAREN_ARG
712
+ elsif lex_state == :expr_arg then
713
+ warning("don't put space before argument parentheses")
714
+ result = :tLPAREN2
715
+ end
716
+ end
717
+
718
+ self.expr_beg_push "("
719
+
720
+ return result
721
+ elsif src.check(/\=/) then
722
+ if src.scan(/\=\=\=|\=\=|\=~|\=>|\=(?!begin\b)/) then
723
+ self.fix_arg_lex_state
724
+ tok = self.yacc_value = src.matched
725
+ return TOKENS[tok]
726
+ elsif src.scan(/\=begin(?=\s)/) then
727
+ # @comments << '=' << src.matched
728
+ @comments << src.matched
729
+
730
+ unless src.scan(/.*?\n=end( |\t|\f)*[^(\n|\z)]*(\n|\z)/m) then
731
+ @comments.clear
732
+ rb_compile_error("embedded document meets end of file")
733
+ end
734
+
735
+ @comments << src.matched
736
+
737
+ next
738
+ else
739
+ raise "you shouldn't be able to get here"
740
+ end
741
+ elsif src.scan(/\"(#{ESC_RE}|#(#{ESC_RE}|[^\{\#\@\$\"\\])|[^\"\\\#])*\"/o) then
742
+ self.yacc_value = src.matched[1..-2].gsub(ESC_RE) { unescape $1 }
743
+ self.lex_state = :expr_end
744
+ return :tSTRING
745
+ elsif src.scan(/\"/) then # FALLBACK
746
+ self.lex_strterm = [:strterm, STR_DQUOTE, '"', "\0"] # TODO: question this
747
+ self.yacc_value = "\""
748
+ return :tSTRING_BEG
749
+ elsif src.scan(/\@\@?\w*/) then
750
+ self.token = src.matched
751
+
752
+ rb_compile_error "`#{token}` is not allowed as a variable name" if
753
+ token =~ /\@\d/
754
+
755
+ return process_token(command_state)
756
+ elsif src.scan(/\:\:/) then
757
+ if (lex_state == :expr_beg ||
758
+ lex_state == :expr_mid ||
759
+ lex_state == :expr_class ||
760
+ (lex_state.is_argument && space_seen)) then
761
+ self.lex_state = :expr_beg
762
+ self.yacc_value = "::"
763
+ return :tCOLON3
764
+ end
765
+
766
+ self.lex_state = :expr_dot
767
+ self.yacc_value = "::"
768
+ return :tCOLON2
769
+ elsif lex_state != :expr_end && lex_state != :expr_endarg && src.scan(/:([a-zA-Z_]\w*(?:[?!]|=(?!>))?)/) then
770
+ self.yacc_value = src[1]
771
+ self.lex_state = :expr_end
772
+ return :tSYMBOL
773
+ elsif src.scan(/\:/) then
774
+ # ?: / then / when
775
+ if (lex_state == :expr_end || lex_state == :expr_endarg||
776
+ src.check(/\s/)) then
777
+ self.lex_state = :expr_beg
778
+ self.yacc_value = ":"
779
+ return :tCOLON
780
+ end
781
+
782
+ case
783
+ when src.scan(/\'/) then
784
+ self.lex_strterm = [:strterm, STR_SSYM, src.matched, "\0"]
785
+ when src.scan(/\"/) then
786
+ self.lex_strterm = [:strterm, STR_DSYM, src.matched, "\0"]
787
+ end
788
+
789
+ self.lex_state = :expr_fname
790
+ self.yacc_value = ":"
791
+ return :tSYMBEG
792
+ elsif src.check(/[0-9]/) then
793
+ return parse_number
794
+ elsif src.scan(/\[/) then
795
+ result = src.matched
796
+
797
+ if lex_state == :expr_fname || lex_state == :expr_dot then
798
+ self.lex_state = :expr_arg
799
+ case
800
+ when src.scan(/\]\=/) then
801
+ self.yacc_value = "[]="
802
+ return :tASET
803
+ when src.scan(/\]/) then
804
+ self.yacc_value = "[]"
805
+ return :tAREF
806
+ else
807
+ rb_compile_error "unexpected '['"
808
+ end
809
+ elsif lex_state == :expr_beg || lex_state == :expr_mid then
810
+ result = :tLBRACK
811
+ elsif lex_state.is_argument && space_seen then
812
+ result = :tLBRACK
813
+ end
814
+
815
+ self.expr_beg_push "["
816
+
817
+ return result
818
+ elsif src.scan(/\'(\\.|[^\'])*\'/) then
819
+ self.yacc_value = src.matched[1..-2].gsub(/\\\\/, "\\").gsub(/\\'/, "'")
820
+ self.lex_state = :expr_end
821
+ return :tSTRING
822
+ elsif src.check(/\|/) then
823
+ if src.scan(/\|\|\=/) then
824
+ self.lex_state = :expr_beg
825
+ self.yacc_value = "||"
826
+ return :tOP_ASGN
827
+ elsif src.scan(/\|\|/) then
828
+ self.lex_state = :expr_beg
829
+ self.yacc_value = "||"
830
+ return :tOROP
831
+ elsif src.scan(/\|\=/) then
832
+ self.lex_state = :expr_beg
833
+ self.yacc_value = "|"
834
+ return :tOP_ASGN
835
+ elsif src.scan(/\|/) then
836
+ self.fix_arg_lex_state
837
+ self.yacc_value = "|"
838
+ return :tPIPE
839
+ end
840
+ elsif src.scan(/\{/) then
841
+ result = if lex_state.is_argument || lex_state == :expr_end then
842
+ :tLCURLY # block (primary)
843
+ elsif lex_state == :expr_endarg then
844
+ :tLBRACE_ARG # block (expr)
845
+ else
846
+ :tLBRACE # hash
847
+ end
848
+
849
+ self.expr_beg_push "{"
850
+ self.command_start = true unless result == :tLBRACE
851
+
852
+ return result
853
+ elsif src.scan(/[+-]/) then
854
+ sign = src.matched
855
+ utype, type = if sign == "+" then
856
+ [:tUPLUS, :tPLUS]
857
+ else
858
+ [:tUMINUS, :tMINUS]
859
+ end
860
+
861
+ if lex_state == :expr_fname || lex_state == :expr_dot then
862
+ self.lex_state = :expr_arg
863
+ if src.scan(/@/) then
864
+ self.yacc_value = "#{sign}@"
865
+ return utype
866
+ else
867
+ self.yacc_value = sign
868
+ return type
869
+ end
870
+ end
871
+
872
+ if src.scan(/\=/) then
873
+ self.lex_state = :expr_beg
874
+ self.yacc_value = sign
875
+ return :tOP_ASGN
876
+ end
877
+
878
+ if (lex_state == :expr_beg || lex_state == :expr_mid ||
879
+ (lex_state.is_argument && space_seen && !src.check(/\s/))) then
880
+ if lex_state.is_argument then
881
+ arg_ambiguous
882
+ end
883
+
884
+ self.lex_state = :expr_beg
885
+ self.yacc_value = sign
886
+
887
+ if src.check(/\d/) then
888
+ if utype == :tUPLUS then
889
+ return self.parse_number
890
+ else
891
+ return :tUMINUS_NUM
892
+ end
893
+ end
894
+
895
+ return utype
896
+ end
897
+
898
+ self.lex_state = :expr_beg
899
+ self.yacc_value = sign
900
+ return type
901
+ elsif src.check(/\*/) then
902
+ if src.scan(/\*\*=/) then
903
+ self.lex_state = :expr_beg
904
+ self.yacc_value = "**"
905
+ return :tOP_ASGN
906
+ elsif src.scan(/\*\*/) then
907
+ self.yacc_value = "**"
908
+ self.fix_arg_lex_state
909
+ return :tPOW
910
+ elsif src.scan(/\*\=/) then
911
+ self.lex_state = :expr_beg
912
+ self.yacc_value = "*"
913
+ return :tOP_ASGN
914
+ elsif src.scan(/\*/) then
915
+ result = if lex_state.is_argument && space_seen && src.check(/\S/) then
916
+ warning("`*' interpreted as argument prefix")
917
+ :tSTAR
918
+ elsif lex_state == :expr_beg || lex_state == :expr_mid then
919
+ :tSTAR
920
+ else
921
+ :tSTAR2
922
+ end
923
+ self.yacc_value = "*"
924
+ self.fix_arg_lex_state
925
+
926
+ return result
927
+ end
928
+ elsif src.check(/\</) then
929
+ if src.scan(/\<\=\>/) then
930
+ self.fix_arg_lex_state
931
+ self.yacc_value = "<=>"
932
+ return :tCMP
933
+ elsif src.scan(/\<\=/) then
934
+ self.fix_arg_lex_state
935
+ self.yacc_value = "<="
936
+ return :tLEQ
937
+ elsif src.scan(/\<\<\=/) then
938
+ self.fix_arg_lex_state
939
+ self.lex_state = :expr_beg
940
+ self.yacc_value = "\<\<"
941
+ return :tOP_ASGN
942
+ elsif src.scan(/\<\</) then
943
+ if (! [:expr_end, :expr_dot,
944
+ :expr_endarg, :expr_class].include?(lex_state) &&
945
+ (!lex_state.is_argument || space_seen)) then
946
+ tok = self.heredoc_identifier
947
+ if tok then
948
+ return tok
949
+ end
950
+ end
951
+
952
+ self.fix_arg_lex_state
953
+ self.yacc_value = "\<\<"
954
+ return :tLSHFT
955
+ elsif src.scan(/\</) then
956
+ self.fix_arg_lex_state
957
+ self.yacc_value = "<"
958
+ return :tLT
959
+ end
960
+ elsif src.check(/\>/) then
961
+ if src.scan(/\>\=/) then
962
+ self.fix_arg_lex_state
963
+ self.yacc_value = ">="
964
+ return :tGEQ
965
+ elsif src.scan(/\>\>=/) then
966
+ self.fix_arg_lex_state
967
+ self.lex_state = :expr_beg
968
+ self.yacc_value = ">>"
969
+ return :tOP_ASGN
970
+ elsif src.scan(/\>\>/) then
971
+ self.fix_arg_lex_state
972
+ self.yacc_value = ">>"
973
+ return :tRSHFT
974
+ elsif src.scan(/\>/) then
975
+ self.fix_arg_lex_state
976
+ self.yacc_value = ">"
977
+ return :tGT
978
+ end
979
+ elsif src.scan(/\`/) then
980
+ self.yacc_value = "`"
981
+ case lex_state
982
+ when :expr_fname then
983
+ self.lex_state = :expr_end
984
+ return :tBACK_REF2
985
+ when :expr_dot then
986
+ self.lex_state = if command_state then
987
+ :expr_cmdarg
988
+ else
989
+ :expr_arg
990
+ end
991
+ return :tBACK_REF2
992
+ end
993
+ self.lex_strterm = [:strterm, STR_XQUOTE, '`', "\0"]
994
+ return :tXSTRING_BEG
995
+ elsif src.scan(/\?/) then
996
+ if lex_state == :expr_end || lex_state == :expr_endarg then
997
+ self.lex_state = :expr_beg
998
+ self.yacc_value = "?"
999
+ return :tEH
1000
+ end
1001
+
1002
+ if src.eos? then
1003
+ rb_compile_error "incomplete character syntax"
1004
+ end
1005
+
1006
+ if src.check(/\s|\v/) then
1007
+ unless lex_state.is_argument then
1008
+ c2 = { " " => 's',
1009
+ "\n" => 'n',
1010
+ "\t" => 't',
1011
+ "\v" => 'v',
1012
+ "\r" => 'r',
1013
+ "\f" => 'f' }[src.matched]
1014
+
1015
+ if c2 then
1016
+ warning("invalid character syntax; use ?\\" + c2)
1017
+ end
1018
+ end
1019
+
1020
+ # ternary
1021
+ self.lex_state = :expr_beg
1022
+ self.yacc_value = "?"
1023
+ return :tEH
1024
+ elsif src.check(/\w(?=\w)/) then # ternary, also
1025
+ self.lex_state = :expr_beg
1026
+ self.yacc_value = "?"
1027
+ return :tEH
1028
+ end
1029
+
1030
+ c = if src.scan(/\\/) then
1031
+ self.read_escape
1032
+ else
1033
+ src.getch
1034
+ end
1035
+ self.lex_state = :expr_end
1036
+ self.yacc_value = c[0].ord & 0xff
1037
+ return :tINTEGER
1038
+ elsif src.check(/\&/) then
1039
+ if src.scan(/\&\&\=/) then
1040
+ self.yacc_value = "&&"
1041
+ self.lex_state = :expr_beg
1042
+ return :tOP_ASGN
1043
+ elsif src.scan(/\&\&/) then
1044
+ self.lex_state = :expr_beg
1045
+ self.yacc_value = "&&"
1046
+ return :tANDOP
1047
+ elsif src.scan(/\&\=/) then
1048
+ self.yacc_value = "&"
1049
+ self.lex_state = :expr_beg
1050
+ return :tOP_ASGN
1051
+ elsif src.scan(/&/) then
1052
+ result = if lex_state.is_argument && space_seen &&
1053
+ !src.check(/\s/) then
1054
+ warning("`&' interpreted as argument prefix")
1055
+ :tAMPER
1056
+ elsif lex_state == :expr_beg || lex_state == :expr_mid then
1057
+ :tAMPER
1058
+ else
1059
+ :tAMPER2
1060
+ end
1061
+
1062
+ self.fix_arg_lex_state
1063
+ self.yacc_value = "&"
1064
+ return result
1065
+ end
1066
+ elsif src.scan(/\//) then
1067
+ if lex_state == :expr_beg || lex_state == :expr_mid then
1068
+ self.lex_strterm = [:strterm, STR_REGEXP, '/', "\0"]
1069
+ self.yacc_value = "/"
1070
+ return :tREGEXP_BEG
1071
+ end
1072
+
1073
+ if src.scan(/\=/) then
1074
+ self.yacc_value = "/"
1075
+ self.lex_state = :expr_beg
1076
+ return :tOP_ASGN
1077
+ end
1078
+
1079
+ if lex_state.is_argument && space_seen then
1080
+ unless src.scan(/\s/) then
1081
+ arg_ambiguous
1082
+ self.lex_strterm = [:strterm, STR_REGEXP, '/', "\0"]
1083
+ self.yacc_value = "/"
1084
+ return :tREGEXP_BEG
1085
+ end
1086
+ end
1087
+
1088
+ self.fix_arg_lex_state
1089
+ self.yacc_value = "/"
1090
+
1091
+ return :tDIVIDE
1092
+ elsif src.scan(/\^=/) then
1093
+ self.lex_state = :expr_beg
1094
+ self.yacc_value = "^"
1095
+ return :tOP_ASGN
1096
+ elsif src.scan(/\^/) then
1097
+ self.fix_arg_lex_state
1098
+ self.yacc_value = "^"
1099
+ return :tCARET
1100
+ elsif src.scan(/\;/) then
1101
+ self.command_start = true
1102
+ self.lex_state = :expr_beg
1103
+ self.yacc_value = ";"
1104
+ return :tSEMI
1105
+ elsif src.scan(/\~/) then
1106
+ if lex_state == :expr_fname || lex_state == :expr_dot then
1107
+ src.scan(/@/)
1108
+ end
1109
+
1110
+ self.fix_arg_lex_state
1111
+ self.yacc_value = "~"
1112
+
1113
+ return :tTILDE
1114
+ elsif src.scan(/\\/) then
1115
+ if src.scan(/\n/) then
1116
+ self.lineno = nil
1117
+ space_seen = true
1118
+ next
1119
+ end
1120
+ rb_compile_error "bare backslash only allowed before newline"
1121
+ elsif src.scan(/\%/) then
1122
+ if lex_state == :expr_beg || lex_state == :expr_mid then
1123
+ return parse_quote
1124
+ end
1125
+
1126
+ if src.scan(/\=/) then
1127
+ self.lex_state = :expr_beg
1128
+ self.yacc_value = "%"
1129
+ return :tOP_ASGN
1130
+ end
1131
+
1132
+ if lex_state.is_argument && space_seen && ! src.check(/\s/) then
1133
+ return parse_quote
1134
+ end
1135
+
1136
+ self.fix_arg_lex_state
1137
+ self.yacc_value = "%"
1138
+
1139
+ return :tPERCENT
1140
+ elsif src.check(/\$/) then
1141
+ if src.scan(/(\$_)(\w+)/) then
1142
+ self.lex_state = :expr_end
1143
+ self.token = src.matched
1144
+ return process_token(command_state)
1145
+ elsif src.scan(/\$_/) then
1146
+ self.lex_state = :expr_end
1147
+ self.token = src.matched
1148
+ self.yacc_value = src.matched
1149
+ return :tGVAR
1150
+ elsif src.scan(/\$[~*$?!@\/\\;,.=:<>\"]|\$-\w?/) then
1151
+ self.lex_state = :expr_end
1152
+ self.yacc_value = src.matched
1153
+ return :tGVAR
1154
+ elsif src.scan(/\$([\&\`\'\+])/) then
1155
+ self.lex_state = :expr_end
1156
+ # Explicit reference to these vars as symbols...
1157
+ if last_state == :expr_fname then
1158
+ self.yacc_value = src.matched
1159
+ return :tGVAR
1160
+ else
1161
+ self.yacc_value = src[1].to_sym
1162
+ return :tBACK_REF
1163
+ end
1164
+ elsif src.scan(/\$([1-9]\d*)/) then
1165
+ self.lex_state = :expr_end
1166
+ if last_state == :expr_fname then
1167
+ self.yacc_value = src.matched
1168
+ return :tGVAR
1169
+ else
1170
+ self.yacc_value = src[1].to_i
1171
+ return :tNTH_REF
1172
+ end
1173
+ elsif src.scan(/\$0/) then
1174
+ self.lex_state = :expr_end
1175
+ self.token = src.matched
1176
+ return process_token(command_state)
1177
+ elsif src.scan(/\$\W|\$\z/) then # TODO: remove?
1178
+ self.lex_state = :expr_end
1179
+ self.yacc_value = "$"
1180
+ return "$"
1181
+ elsif src.scan(/\$\w+/)
1182
+ self.lex_state = :expr_end
1183
+ self.token = src.matched
1184
+ return process_token(command_state)
1185
+ end
1186
+ elsif src.check(/\_/) then
1187
+ if src.beginning_of_line? && src.scan(/\__END__(\n|\Z)/) then
1188
+ self.lineno = nil
1189
+ return RubyLexer::EOF
1190
+ elsif src.scan(/\_\w*/) then
1191
+ self.token = src.matched
1192
+ return process_token(command_state)
1193
+ end
1194
+ end
1195
+ end # END OF CASE
1196
+
1197
+ if src.scan(/\004|\032|\000/) || src.eos? then # ^D, ^Z, EOF
1198
+ return RubyLexer::EOF
1199
+ else # alpha check
1200
+ if src.scan(/\W/) then
1201
+ rb_compile_error "Invalid char #{src.matched.inspect} in expression"
1202
+ end
1203
+ end
1204
+
1205
+ self.token = src.matched if self.src.scan(/\w+/)
1206
+
1207
+ return process_token(command_state)
1208
+ end
1209
+ end
1210
+
1211
+ def process_token(command_state)
1212
+
1213
+ token << src.matched if token =~ /^\w/ && src.scan(/[\!\?](?!=)/)
1214
+
1215
+ result = nil
1216
+ last_state = lex_state
1217
+
1218
+
1219
+ case token
1220
+ when /^\$/ then
1221
+ self.lex_state, result = :expr_end, :tGVAR
1222
+ when /^@@/ then
1223
+ self.lex_state, result = :expr_end, :tCVAR
1224
+ when /^@/ then
1225
+ self.lex_state, result = :expr_end, :tIVAR
1226
+ else
1227
+ if token =~ /[!?]$/ then
1228
+ result = :tFID
1229
+ else
1230
+ if lex_state == :expr_fname then
1231
+ # ident=, not =~ => == or followed by =>
1232
+ # TODO test lexing of a=>b vs a==>b
1233
+ if src.scan(/=(?:(?![~>=])|(?==>))/) then
1234
+ result = :tIDENTIFIER
1235
+ token << src.matched
1236
+ end
1237
+ end
1238
+
1239
+ if src.scan(/:(?!:)/)
1240
+ result = :tHASHKEY
1241
+ token << src.matched
1242
+ self.yacc_value = token
1243
+ return result
1244
+ end
1245
+
1246
+ result ||= if token =~ /^[A-Z]/ then
1247
+ :tCONSTANT
1248
+ else
1249
+ :tIDENTIFIER
1250
+ end
1251
+ end
1252
+
1253
+ unless lex_state == :expr_dot then
1254
+ # See if it is a reserved word.
1255
+ keyword = RubyParser::Keyword.keyword token
1256
+
1257
+ if keyword then
1258
+ state = lex_state
1259
+ self.lex_state = keyword.state
1260
+ self.yacc_value = [token, src.lineno]
1261
+
1262
+ if state == :expr_fname then
1263
+ self.yacc_value = keyword.name
1264
+ return keyword.id0
1265
+ end
1266
+
1267
+ if keyword.id0 == :kDO then
1268
+ self.command_start = true
1269
+ return :kDO_COND if cond.is_in_state
1270
+ return :kDO_BLOCK if cmdarg.is_in_state && state != :expr_cmdarg
1271
+ return :kDO_BLOCK if state == :expr_endarg
1272
+ return :kDO
1273
+ end
1274
+
1275
+ return keyword.id0 if state == :expr_beg or state == :expr_value
1276
+
1277
+ self.lex_state = :expr_beg if keyword.id0 != keyword.id1
1278
+
1279
+ return keyword.id1
1280
+ end
1281
+ end
1282
+
1283
+ if (lex_state == :expr_beg || lex_state == :expr_mid ||
1284
+ lex_state == :expr_dot || lex_state == :expr_arg ||
1285
+ lex_state == :expr_cmdarg) then
1286
+ if command_state then
1287
+ self.lex_state = :expr_cmdarg
1288
+ else
1289
+ self.lex_state = :expr_arg
1290
+ end
1291
+ else
1292
+ self.lex_state = :expr_end
1293
+ end
1294
+ end
1295
+
1296
+ self.yacc_value = token
1297
+
1298
+
1299
+ self.lex_state = :expr_end if
1300
+ last_state != :expr_dot && self.parser.env[token.to_sym] == :lvar
1301
+
1302
+ return result
1303
+ end
1304
+
1305
+ def yylex_string # 23 lines
1306
+ token = if lex_strterm[0] == :heredoc then
1307
+ self.heredoc lex_strterm
1308
+ else
1309
+ self.parse_string lex_strterm
1310
+ end
1311
+
1312
+ if token == :tSTRING_END || token == :tREGEXP_END then
1313
+ self.lineno = nil
1314
+ self.lex_strterm = nil
1315
+ self.lex_state = :expr_end
1316
+ end
1317
+
1318
+ return token
1319
+ end
1320
+ end