ruby_parser 3.3.0 → 3.4.0

Sign up to get free protection for your applications and to get access to all the features.
data/lib/ruby20_parser.rb CHANGED
@@ -4913,13 +4913,15 @@ def _reduce_311(val, _values, result)
4913
4913
  end
4914
4914
 
4915
4915
  def _reduce_312(val, _values, result)
4916
- result = new_case val[1], val[3]
4916
+ (_, line), expr, _, body, _ = val
4917
+ result = new_case expr, body, line
4917
4918
 
4918
4919
  result
4919
4920
  end
4920
4921
 
4921
4922
  def _reduce_313(val, _values, result)
4922
- result = new_case nil, val[2]
4923
+ (_, line), _, body, _ = val
4924
+ result = new_case nil, body, line
4923
4925
 
4924
4926
  result
4925
4927
  end
@@ -5514,13 +5516,23 @@ def _reduce_414(val, _values, result)
5514
5516
  end
5515
5517
 
5516
5518
  def _reduce_415(val, _values, result)
5517
- debug20 21, val, result
5519
+ iter1, _, name, args, iter2 = val
5520
+
5521
+ call = new_call iter1, name.to_sym, args
5522
+ iter2.insert 1, call
5523
+
5524
+ result = iter2
5518
5525
 
5519
5526
  result
5520
5527
  end
5521
5528
 
5522
5529
  def _reduce_416(val, _values, result)
5523
- debug20 22, val, result
5530
+ iter1, _, name, args, iter2 = val
5531
+
5532
+ call = new_call iter1, name.to_sym, args
5533
+ iter2.insert 1, call
5534
+
5535
+ result = iter2
5524
5536
 
5525
5537
  result
5526
5538
  end
@@ -6088,7 +6100,7 @@ def _reduce_510(val, _values, result)
6088
6100
  end
6089
6101
 
6090
6102
  def _reduce_511(val, _values, result)
6091
- result = s(:lit, lexer.src.current_line)
6103
+ result = s(:lit, lexer.lineno)
6092
6104
  result
6093
6105
  end
6094
6106
 
data/lib/ruby20_parser.y CHANGED
@@ -1058,11 +1058,13 @@ rule
1058
1058
  }
1059
1059
  | kCASE expr_value opt_terms case_body kEND
1060
1060
  {
1061
- result = new_case val[1], val[3]
1061
+ (_, line), expr, _, body, _ = val
1062
+ result = new_case expr, body, line
1062
1063
  }
1063
1064
  | kCASE opt_terms case_body kEND
1064
1065
  {
1065
- result = new_case nil, val[2]
1066
+ (_, line), _, body, _ = val
1067
+ result = new_case nil, body, line
1066
1068
  }
1067
1069
  | kFOR for_var kIN
1068
1070
  {
@@ -1501,11 +1503,21 @@ opt_block_args_tail: tCOMMA block_args_tail
1501
1503
  }
1502
1504
  | block_call dot_or_colon operation2 opt_paren_args brace_block
1503
1505
  {
1504
- debug20 21, val, result
1506
+ iter1, _, name, args, iter2 = val
1507
+
1508
+ call = new_call iter1, name.to_sym, args
1509
+ iter2.insert 1, call
1510
+
1511
+ result = iter2
1505
1512
  }
1506
1513
  | block_call dot_or_colon operation2 command_args do_block
1507
1514
  {
1508
- debug20 22, val, result
1515
+ iter1, _, name, args, iter2 = val
1516
+
1517
+ call = new_call iter1, name.to_sym, args
1518
+ iter2.insert 1, call
1519
+
1520
+ result = iter2
1509
1521
  }
1510
1522
 
1511
1523
  method_call: fcall
@@ -1916,7 +1928,7 @@ keyword_variable: kNIL { result = s(:nil) }
1916
1928
  | kTRUE { result = s(:true) }
1917
1929
  | kFALSE { result = s(:false) }
1918
1930
  | k__FILE__ { result = s(:str, self.file) }
1919
- | k__LINE__ { result = s(:lit, lexer.src.current_line) }
1931
+ | k__LINE__ { result = s(:lit, lexer.lineno) }
1920
1932
  | k__ENCODING__
1921
1933
  {
1922
1934
  result =
data/lib/ruby_lexer.rb CHANGED
@@ -11,11 +11,6 @@ class RubyLexer
11
11
  /[\w\x80-\xFF]/n
12
12
  end
13
13
 
14
- IDENT = /^#{IDENT_CHAR}+/o
15
- ESC = /\\((?>[0-7]{1,3}|x[0-9a-fA-F]{1,2}|M-[^\\]|(C-|c)[^\\]|u[0-9a-fA-F]+|u\{[0-9a-fA-F]+\}|[^0-7xMCc]))/u
16
- SIMPLE_STRING = /(#{ESC}|#(#{ESC}|[^\{\#\@\$\"\\])|[^\"\\\#])*/o
17
- SIMPLE_SSTRING = /(\\.|[^\'])*/
18
-
19
14
  EOF = :eof_haha!
20
15
 
21
16
  # ruby constants for strings (should this be moved somewhere else?)
@@ -75,6 +70,8 @@ class RubyLexer
75
70
  attr_accessor :brace_nest
76
71
  attr_accessor :cmdarg
77
72
  attr_accessor :command_start
73
+ attr_accessor :command_state
74
+ attr_accessor :last_state
78
75
  attr_accessor :cond
79
76
 
80
77
  ##
@@ -91,11 +88,7 @@ class RubyLexer
91
88
  attr_accessor :string_buffer
92
89
  attr_accessor :string_nest
93
90
 
94
- # Stream of data that yylex examines.
95
- attr_reader :src
96
- alias :ss :src
97
-
98
- # Last token read via yylex.
91
+ # Last token read via next_token.
99
92
  attr_accessor :token
100
93
 
101
94
  ##
@@ -104,11 +97,6 @@ class RubyLexer
104
97
 
105
98
  attr_accessor :version
106
99
 
107
- # Value of last token which had a value associated with it.
108
- attr_accessor :yacc_value
109
-
110
- attr_writer :lineno # reader is lazy initalizer
111
-
112
100
  attr_writer :comments
113
101
 
114
102
  def initialize v = 18
@@ -117,20 +105,6 @@ class RubyLexer
117
105
  reset
118
106
  end
119
107
 
120
- ##
121
- # How the parser advances to the next token.
122
- #
123
- # @return true if not at end of file (EOF).
124
-
125
- def advance
126
- r = yylex
127
- self.token = r
128
-
129
- raise "yylex returned nil, near #{ss.rest[0,10].inspect}" unless r
130
-
131
- return RubyLexer::EOF != r
132
- end
133
-
134
108
  def arg_ambiguous
135
109
  self.warning("Ambiguous first argument. make sure.")
136
110
  end
@@ -142,6 +116,7 @@ class RubyLexer
142
116
  def beginning_of_line?
143
117
  ss.bol?
144
118
  end
119
+ alias :bol? :beginning_of_line? # to make .rex file more readable
145
120
 
146
121
  def check re
147
122
  ss.check re
@@ -174,9 +149,9 @@ class RubyLexer
174
149
  rb_compile_error err_msg if end_of_stream?
175
150
 
176
151
  if beginning_of_line? && scan(eos_re) then
152
+ self.lineno += 1
177
153
  ss.unread_many last_line # TODO: figure out how to remove this
178
- self.yacc_value = eos
179
- return :tSTRING_END
154
+ return :tSTRING_END, eos
180
155
  end
181
156
 
182
157
  self.string_buffer = []
@@ -185,11 +160,9 @@ class RubyLexer
185
160
  case
186
161
  when scan(/#[$@]/) then
187
162
  ss.pos -= 1 # FIX omg stupid
188
- self.yacc_value = matched
189
- return :tSTRING_DVAR
163
+ return :tSTRING_DVAR, matched
190
164
  when scan(/#[{]/) then
191
- self.yacc_value = matched
192
- return :tSTRING_DBEG
165
+ return :tSTRING_DBEG, matched
193
166
  when scan(/#/) then
194
167
  string_buffer << '#'
195
168
  end
@@ -201,9 +174,9 @@ class RubyLexer
201
174
  c == RubyLexer::EOF
202
175
 
203
176
  if c != "\n" then
204
- self.yacc_value = string_buffer.join.delete("\r")
205
- return :tSTRING_CONTENT
177
+ return :tSTRING_CONTENT, string_buffer.join.delete("\r")
206
178
  else
179
+ self.lineno += 1
207
180
  string_buffer << scan(/\n/)
208
181
  end
209
182
 
@@ -218,8 +191,7 @@ class RubyLexer
218
191
 
219
192
  self.lex_strterm = [:heredoc, eos, func, last_line]
220
193
 
221
- self.yacc_value = string_buffer.join.delete("\r")
222
- return :tSTRING_CONTENT
194
+ return :tSTRING_CONTENT, string_buffer.join.delete("\r")
223
195
  end
224
196
 
225
197
  def heredoc_identifier # TODO: remove / rewrite
@@ -255,7 +227,6 @@ class RubyLexer
255
227
  if scan(/.*\n/) then
256
228
  # TODO: think about storing off the char range instead
257
229
  line = matched
258
- ss.extra_lines_added += 1 # FIX: ugh
259
230
  else
260
231
  line = nil
261
232
  end
@@ -263,14 +234,16 @@ class RubyLexer
263
234
  self.lex_strterm = [:heredoc, string_buffer.join, func, line]
264
235
 
265
236
  if term == '`' then
266
- self.yacc_value = "`"
267
- return :tXSTRING_BEG
237
+ result nil, :tXSTRING_BEG, "`"
268
238
  else
269
- self.yacc_value = "\""
270
- return :tSTRING_BEG
239
+ result nil, :tSTRING_BEG, "\""
271
240
  end
272
241
  end
273
242
 
243
+ def in_fname?
244
+ in_lex_state? :expr_fname
245
+ end
246
+
274
247
  def in_arg_state? # TODO: rename is_after_operator?
275
248
  in_lex_state? :expr_fname, :expr_dot
276
249
  end
@@ -281,9 +254,7 @@ class RubyLexer
281
254
 
282
255
  def int_with_base base
283
256
  rb_compile_error "Invalid numeric format" if matched =~ /__/
284
-
285
- self.yacc_value = matched.to_i(base)
286
- return :tINTEGER
257
+ return result(:expr_end, :tINTEGER, matched.to_i(base))
287
258
  end
288
259
 
289
260
  def is_arg?
@@ -298,7 +269,7 @@ class RubyLexer
298
269
  in_lex_state? :expr_end, :expr_endarg, :expr_endfn
299
270
  end
300
271
 
301
- def is_label_possible? command_state
272
+ def is_label_possible?
302
273
  (in_lex_state?(:expr_beg, :expr_endfn) && !command_state) || is_arg?
303
274
  end
304
275
 
@@ -306,170 +277,396 @@ class RubyLexer
306
277
  is_arg? and space_seen and c !~ /\s/
307
278
  end
308
279
 
309
- def lineno
310
- @lineno ||= ss.lineno
311
- end
312
-
313
280
  def matched
314
281
  ss.matched
315
282
  end
316
283
 
317
- ##
318
- # Parse a number from the input stream.
319
- #
320
- # @param c The first character of the number.
321
- # @return A int constant wich represents a token.
284
+ def not_end?
285
+ not is_end?
286
+ end
322
287
 
323
- def parse_number
324
- self.lex_state = :expr_end
288
+ def process_amper text
289
+ token = if is_arg? && space_seen && !check(/\s/) then
290
+ warning("`&' interpreted as argument prefix")
291
+ :tAMPER
292
+ elsif in_lex_state? :expr_beg, :expr_mid then
293
+ :tAMPER
294
+ else
295
+ :tAMPER2
296
+ end
297
+
298
+ return result(:arg_state, token, "&")
299
+ end
300
+
301
+ def process_backref text
302
+ token = ss[1].to_sym
303
+ # TODO: can't do lineno hack w/ symbol
304
+ result :expr_end, :tBACK_REF, token
305
+ end
306
+
307
+ def process_backtick text
308
+ case lex_state
309
+ when :expr_fname then
310
+ result :expr_end, :tBACK_REF2, "`"
311
+ when :expr_dot then
312
+ result((command_state ? :expr_cmdarg : :expr_arg), :tBACK_REF2, "`")
313
+ else
314
+ string STR_XQUOTE
315
+ result nil, :tXSTRING_BEG, "`"
316
+ end
317
+ end
318
+
319
+ def process_bang text
320
+ if in_arg_state? then
321
+ return result(:expr_arg, :tUBANG, "!@") if scan(/@/)
322
+ end
323
+
324
+ text = scan(/[=~]/) ? "!#{matched}" : "!"
325
+
326
+ return result(arg_state, TOKENS[text], text)
327
+ end
328
+
329
+ def process_begin text
330
+ @comments << matched
331
+
332
+ unless scan(/.*?\n=end( |\t|\f)*[^\n]*(\n|\z)/m) then
333
+ @comments.clear
334
+ rb_compile_error("embedded document meets end of file")
335
+ end
336
+
337
+ @comments << matched
338
+
339
+ nil # TODO
340
+ end
341
+
342
+ def process_bracing text
343
+ cond.lexpop
344
+ cmdarg.lexpop
345
+
346
+ case matched
347
+ when "}" then
348
+ self.brace_nest -= 1
349
+ self.lex_state = :expr_endarg
350
+ return :tRCURLY, matched
351
+ when "]" then
352
+ self.paren_nest -= 1
353
+ self.lex_state = :expr_endarg
354
+ return :tRBRACK, matched
355
+ when ")" then
356
+ self.paren_nest -= 1
357
+ self.lex_state = :expr_endfn
358
+ return :tRPAREN, matched
359
+ else
360
+ raise "Unknown bracing: #{matched.inspect}"
361
+ end
362
+ end
363
+
364
+ def process_colon1 text
365
+ # ?: / then / when
366
+ if is_end? || check(/\s/) then
367
+ return result :expr_beg, :tCOLON, text
368
+ end
325
369
 
326
370
  case
327
- when scan(/[+-]?0[xXbBdD]\b/) then
328
- rb_compile_error "Invalid numeric format"
329
- when scan(/[+-]?(?:(?:[1-9][\d_]*|0)(?!\.\d)\b|0[Dd][0-9_]+)/) then
330
- int_with_base(10)
331
- when scan(/[+-]?0x[a-f0-9_]+/i) then
332
- int_with_base(16)
333
- when scan(/[+-]?0[Bb][01_]+/) then
334
- int_with_base(2)
335
- when scan(/[+-]?0[Oo]?[0-7_]*[89]/) then
336
- rb_compile_error "Illegal octal digit."
337
- when scan(/[+-]?0[Oo]?[0-7_]+|0[Oo]/) then
338
- int_with_base(8)
339
- when scan(/[+-]?[\d_]+_(e|\.)/) then
340
- rb_compile_error "Trailing '_' in number."
341
- when scan(/[+-]?[\d_]+\.[\d_]+(e[+-]?[\d_]+)?\b|[+-]?[\d_]+e[+-]?[\d_]+\b/i) then
342
- number = matched
343
- if number =~ /__/ then
344
- rb_compile_error "Invalid numeric format"
345
- end
346
- self.yacc_value = number.to_f
347
- :tFLOAT
348
- when scan(/[+-]?[0-9_]+(?![e])/) then
349
- int_with_base(10)
371
+ when scan(/\'/) then
372
+ string STR_SSYM
373
+ when scan(/\"/) then
374
+ string STR_DSYM
375
+ end
376
+
377
+ result :expr_fname, :tSYMBEG, text
378
+ end
379
+
380
+ def process_colon2 text
381
+ if is_beg? || in_lex_state?(:expr_class) || is_space_arg? then
382
+ result :expr_beg, :tCOLON3, text
350
383
  else
351
- rb_compile_error "Bad number format"
384
+ result :expr_dot, :tCOLON2, text
352
385
  end
353
386
  end
354
387
 
355
- def parse_quote # TODO: remove / rewrite
356
- beg, nnd, short_hand, c = nil, nil, false, nil
388
+ def process_curly_brace text
389
+ self.brace_nest += 1
390
+ if lpar_beg && lpar_beg == paren_nest then
391
+ self.lpar_beg = nil
392
+ self.paren_nest -= 1
357
393
 
358
- if scan(/[a-z0-9]{1,2}/i) then # Long-hand (e.g. %Q{}).
359
- rb_compile_error "unknown type of %string" if ss.matched_size == 2
360
- c, beg, short_hand = matched, ss.getch, false
361
- else # Short-hand (e.g. %{, %., %!, etc)
362
- c, beg, short_hand = 'Q', ss.getch, true
394
+ return expr_result(:tLAMBEG, "{")
363
395
  end
364
396
 
365
- if end_of_stream? or c == RubyLexer::EOF or beg == RubyLexer::EOF then
366
- rb_compile_error "unterminated quoted string meets end of file"
397
+ token = if is_arg? || in_lex_state?(:expr_end, :expr_endfn) then
398
+ :tLCURLY # block (primary)
399
+ elsif in_lex_state?(:expr_endarg) then
400
+ :tLBRACE_ARG # block (expr)
401
+ else
402
+ :tLBRACE # hash
403
+ end
404
+
405
+ self.command_start = true unless token == :tLBRACE
406
+
407
+ return expr_result(token, "{")
408
+ end
409
+
410
+ def process_float text
411
+ rb_compile_error "Invalid numeric format" if text =~ /__/
412
+ return result(:expr_end, :tFLOAT, text.to_f)
413
+ end
414
+
415
+ def process_gvar text
416
+ text.lineno = self.lineno
417
+ result(:expr_end, :tGVAR, text)
418
+ end
419
+
420
+ def process_gvar_oddity text
421
+ result :expr_end, "$", "$" # TODO: wtf is this?
422
+ end
423
+
424
+ def process_ivar text
425
+ tok_id = text =~ /^@@/ ? :tCVAR : :tIVAR
426
+ text.lineno = self.lineno
427
+ return result(:expr_end, tok_id, text)
428
+ end
429
+
430
+ def process_lchevron text
431
+ if (!in_lex_state?(:expr_dot, :expr_class) &&
432
+ !is_end? &&
433
+ (!is_arg? || space_seen)) then
434
+ tok = self.heredoc_identifier
435
+ return tok if tok
367
436
  end
368
437
 
369
- # Figure nnd-char. "\0" is special to indicate beg=nnd and that no nesting?
370
- nnd = { "(" => ")", "[" => "]", "{" => "}", "<" => ">" }[beg]
371
- nnd, beg = beg, "\0" if nnd.nil?
438
+ return result(:arg_state, :tLSHFT, "\<\<")
439
+ end
372
440
 
373
- token_type, text = nil, "%#{c}#{beg}"
374
- token_type, string_type = case c
375
- when 'Q' then
376
- ch = short_hand ? nnd : c + beg
377
- text = "%#{ch}"
378
- [:tSTRING_BEG, STR_DQUOTE]
379
- when 'q' then
380
- [:tSTRING_BEG, STR_SQUOTE]
381
- when 'W' then
382
- scan(/\s*/)
383
- [:tWORDS_BEG, STR_DQUOTE | STR_FUNC_QWORDS]
384
- when 'w' then
385
- scan(/\s*/)
386
- [:tQWORDS_BEG, STR_SQUOTE | STR_FUNC_QWORDS]
387
- when 'x' then
388
- [:tXSTRING_BEG, STR_XQUOTE]
389
- when 'r' then
390
- [:tREGEXP_BEG, STR_REGEXP]
391
- when 's' then
392
- self.lex_state = :expr_fname
393
- [:tSYMBEG, STR_SSYM]
394
- when 'I' then
395
- src.scan(/\s*/)
396
- [:tSYMBOLS_BEG, STR_DQUOTE | STR_FUNC_QWORDS]
397
- when 'i' then
398
- src.scan(/\s*/)
399
- [:tQSYMBOLS_BEG, STR_SQUOTE | STR_FUNC_QWORDS]
400
- end
441
+ def process_newline_or_comment text
442
+ c = matched
443
+ hit = false
401
444
 
402
- rb_compile_error "Bad %string type. Expected [QqWwIixrs], found '#{c}'." if
403
- token_type.nil?
445
+ if c == '#' then
446
+ ss.pos -= 1
404
447
 
405
- raise "huh" unless string_type
448
+ while scan(/\s*\#.*(\n+|\z)/) do
449
+ hit = true
450
+ self.lineno += matched.lines.to_a.size
451
+ @comments << matched.gsub(/^ +#/, '#').gsub(/^ +$/, '')
452
+ end
406
453
 
407
- string string_type, nnd, beg
454
+ return nil if end_of_stream?
455
+ end
456
+
457
+ self.lineno += 1 unless hit
458
+
459
+ # Replace a string of newlines with a single one
460
+ self.lineno += matched.lines.to_a.size if scan(/\n+/)
461
+
462
+ return if in_lex_state?(:expr_beg, :expr_value, :expr_class,
463
+ :expr_fname, :expr_dot)
464
+
465
+ if scan(/([\ \t\r\f\v]*)\./) then
466
+ self.space_seen = true unless ss[1].empty?
467
+
468
+ ss.pos -= 1
469
+ return unless check(/\.\./)
470
+ end
471
+
472
+ self.command_start = true
408
473
 
409
- self.yacc_value = text
410
- return token_type
474
+ return result(:expr_beg, :tNL, nil)
411
475
  end
412
476
 
413
- def parse_string quote # TODO: rewrite / remove
414
- _, string_type, term, open = quote
477
+ def process_nthref text
478
+ # TODO: can't do lineno hack w/ number
479
+ result :expr_end, :tNTH_REF, ss[1].to_i
480
+ end
415
481
 
416
- space = false # FIX: remove these
417
- func = string_type
418
- paren = open
419
- term_re = @@regexp_cache[term]
482
+ def process_paren text
483
+ token = if ruby18 then
484
+ process_paren18
485
+ else
486
+ process_paren19
487
+ end
420
488
 
421
- qwords = (func & STR_FUNC_QWORDS) != 0
422
- regexp = (func & STR_FUNC_REGEXP) != 0
423
- expand = (func & STR_FUNC_EXPAND) != 0
489
+ self.paren_nest += 1
424
490
 
425
- unless func then # nil'ed from qwords below. *sigh*
426
- self.lineno = nil
427
- return :tSTRING_END
491
+ return expr_result(token, "(")
492
+ end
493
+
494
+ def process_paren18
495
+ self.command_start = true
496
+ token = :tLPAREN2
497
+
498
+ if in_lex_state? :expr_beg, :expr_mid then
499
+ token = :tLPAREN
500
+ elsif space_seen then
501
+ if in_lex_state? :expr_cmdarg then
502
+ token = :tLPAREN_ARG
503
+ elsif in_lex_state? :expr_arg then
504
+ warning "don't put space before argument parentheses"
505
+ end
506
+ else
507
+ # not a ternary -- do nothing?
428
508
  end
429
509
 
430
- space = true if qwords and scan(/\s+/)
510
+ token
511
+ end
431
512
 
432
- if self.string_nest == 0 && scan(/#{term_re}/) then
433
- if qwords then
434
- quote[1] = nil
435
- return :tSPACE
436
- elsif regexp then
437
- self.lineno = nil
438
- self.yacc_value = self.regx_options
439
- return :tREGEXP_END
513
+ def process_paren19
514
+ if is_beg? then
515
+ :tLPAREN
516
+ elsif is_space_arg? then
517
+ :tLPAREN_ARG
518
+ else
519
+ :tLPAREN2 # plain '(' in parse.y
520
+ end
521
+ end
522
+
523
+ def process_percent text
524
+ return parse_quote if is_beg?
525
+
526
+ return result(:expr_beg, :tOP_ASGN, "%") if scan(/\=/)
527
+
528
+ return parse_quote if is_arg? && space_seen && ! check(/\s/)
529
+
530
+ return result(:arg_state, :tPERCENT, "%")
531
+ end
532
+
533
+ def process_plus_minus text
534
+ sign = matched
535
+ utype, type = if sign == "+" then
536
+ [:tUPLUS, :tPLUS]
537
+ else
538
+ [:tUMINUS, :tMINUS]
539
+ end
540
+
541
+ if in_arg_state? then
542
+ if scan(/@/) then
543
+ return result(:expr_arg, utype, "#{sign}@")
440
544
  else
441
- self.lineno = nil
442
- self.yacc_value = term
443
- return :tSTRING_END
545
+ return result(:expr_arg, type, sign)
444
546
  end
445
547
  end
446
548
 
447
- return :tSPACE if space
549
+ return result(:expr_beg, :tOP_ASGN, sign) if scan(/\=/)
448
550
 
449
- self.string_buffer = []
551
+ if (is_beg? || (is_arg? && space_seen && !check(/\s/))) then
552
+ arg_ambiguous if is_arg?
450
553
 
451
- if expand
452
- case
453
- when scan(/#(?=[$@])/) then
454
- return :tSTRING_DVAR
455
- when scan(/#[{]/) then
456
- return :tSTRING_DBEG
457
- when scan(/#/) then
458
- string_buffer << '#'
554
+ if check(/\d/) then
555
+ return nil if utype == :tUPLUS
556
+ return result(:expr_beg, :tUMINUS_NUM, sign)
459
557
  end
558
+
559
+ return result(:expr_beg, utype, sign)
460
560
  end
461
561
 
462
- if tokadd_string(func, term, paren) == RubyLexer::EOF then
463
- rb_compile_error "unterminated string meets end of file"
562
+ return result(:expr_beg, type, sign)
563
+ end
564
+
565
+ def process_questionmark text
566
+ if is_end? then
567
+ state = ruby18 ? :expr_beg : :expr_value # HACK?
568
+ return result(state, :tEH, "?")
569
+ end
570
+
571
+ if end_of_stream? then
572
+ rb_compile_error "incomplete character syntax: parsed #{text.inspect}"
464
573
  end
465
574
 
466
- self.yacc_value = string_buffer.join
575
+ if check(/\s|\v/) then
576
+ unless is_arg? then
577
+ c2 = { " " => 's',
578
+ "\n" => 'n',
579
+ "\t" => 't',
580
+ "\v" => 'v',
581
+ "\r" => 'r',
582
+ "\f" => 'f' }[matched]
583
+
584
+ if c2 then
585
+ warning("invalid character syntax; use ?\\" + c2)
586
+ end
587
+ end
467
588
 
468
- return :tSTRING_CONTENT
589
+ # ternary
590
+ state = ruby18 ? :expr_beg : :expr_value # HACK?
591
+ return result(state, :tEH, "?")
592
+ elsif check(/\w(?=\w)/) then # ternary, also
593
+ return result(:expr_beg, :tEH, "?")
594
+ end
595
+
596
+ c = if scan(/\\/) then
597
+ self.read_escape
598
+ else
599
+ ss.getch
600
+ end
601
+
602
+ if version == 18 then
603
+ return result(:expr_end, :tINTEGER, c[0].ord & 0xff)
604
+ else
605
+ return result(:expr_end, :tSTRING, c)
606
+ end
469
607
  end
470
608
 
471
- def process_token command_state, last_state
472
- token = self.token
609
+ def process_slash text
610
+ if is_beg? then
611
+ string STR_REGEXP
612
+
613
+ return result(nil, :tREGEXP_BEG, "/")
614
+ end
615
+
616
+ if scan(/\=/) then
617
+ return result(:expr_beg, :tOP_ASGN, "/")
618
+ end
619
+
620
+ if is_arg? && space_seen then
621
+ unless scan(/\s/) then
622
+ arg_ambiguous
623
+ string STR_REGEXP, "/"
624
+ return result(nil, :tREGEXP_BEG, "/")
625
+ end
626
+ end
627
+
628
+ return result(:arg_state, :tDIVIDE, "/")
629
+ end
630
+
631
+ def process_square_bracket text
632
+ self.paren_nest += 1
633
+
634
+ token = nil
635
+
636
+ if in_arg_state? then
637
+ case
638
+ when scan(/\]\=/) then
639
+ self.paren_nest -= 1 # HACK? I dunno, or bug in MRI
640
+ return result(:expr_arg, :tASET, "[]=")
641
+ when scan(/\]/) then
642
+ self.paren_nest -= 1 # HACK? I dunno, or bug in MRI
643
+ return result(:expr_arg, :tAREF, "[]")
644
+ else
645
+ rb_compile_error "unexpected '['"
646
+ end
647
+ elsif is_beg? then
648
+ token = :tLBRACK
649
+ elsif is_arg? && space_seen then
650
+ token = :tLBRACK
651
+ else
652
+ token = :tLBRACK2
653
+ end
654
+
655
+ return expr_result(token, "[")
656
+ end
657
+
658
+ def process_symbol text
659
+ symbol = match[1].gsub(ESC) { unescape $1 }
660
+
661
+ rb_compile_error "symbol cannot contain '\\0'" if
662
+ ruby18 && symbol =~ /\0/
663
+
664
+ return result(:expr_end, :tSYMBOL, symbol)
665
+ end
666
+
667
+ def process_token text
668
+ # TODO: make this always return [token, lineno]
669
+ token = self.token = text
473
670
  token << matched if scan(/[\!\?](?!=)/)
474
671
 
475
672
  tok_id =
@@ -487,8 +684,8 @@ class RubyLexer
487
684
  :tIDENTIFIER
488
685
  end
489
686
 
490
- if !ruby18 and is_label_possible?(command_state) and scan(/:(?!:)/) then
491
- return result(:expr_beg, :tLABEL, [token, ss.lineno]) # HACK: array? TODO: self.lineno
687
+ if !ruby18 and is_label_possible? and scan(/:(?!:)/) then
688
+ return result(:expr_beg, :tLABEL, [token, self.lineno])
492
689
  end
493
690
 
494
691
  unless in_lex_state? :expr_dot then
@@ -518,12 +715,15 @@ class RubyLexer
518
715
  state = :expr_end
519
716
  end
520
717
 
718
+ token.lineno = self.lineno # yes, on a string. I know... I know...
719
+
521
720
  return result(state, tok_id, token)
522
721
  end
523
722
 
524
723
  def process_token_keyword keyword
525
724
  state = keyword.state
526
- value = [token, ss.lineno] # TODO: use self.lineno ?
725
+
726
+ value = [token, self.lineno]
527
727
 
528
728
  self.command_start = true if state == :expr_beg and lex_state != :expr_fname
529
729
 
@@ -554,6 +754,16 @@ class RubyLexer
554
754
  end
555
755
  end
556
756
 
757
+ def process_underscore text
758
+ ss.unscan # put back "_"
759
+
760
+ if beginning_of_line? && scan(/\__END__(\r?\n|\Z)/) then
761
+ return [RubyLexer::EOF, RubyLexer::EOF]
762
+ elsif scan(/\_\w*/) then
763
+ return process_token matched
764
+ end
765
+ end
766
+
557
767
  def rb_compile_error msg
558
768
  msg += ". near line #{self.lineno}: #{ss.rest[/^.*/].inspect}"
559
769
  raise RubyParser::SyntaxError, msg
@@ -607,7 +817,7 @@ class RubyLexer
607
817
  c
608
818
  when scan(/^[89]/i) then # bad octal or hex... MRI ignores them :(
609
819
  matched
610
- when scan(/u([0-9a-fA-F]+|\{[0-9a-fA-F]+\})/) then
820
+ when scan(/u([0-9a-fA-F]{2,4}|\{[0-9a-fA-F]{2,6}\})/) then
611
821
  [ss[1].delete("{}").to_i(16)].pack("U")
612
822
  when scan(/[McCx0-9]/) || end_of_stream? then
613
823
  rb_compile_error("Invalid escape character syntax")
@@ -643,19 +853,15 @@ class RubyLexer
643
853
  self.space_seen = false
644
854
  self.string_nest = 0
645
855
  self.token = nil
646
- self.yacc_value = nil
647
856
 
648
857
  self.cmdarg = RubyParserStuff::StackState.new(:cmdarg)
649
858
  self.cond = RubyParserStuff::StackState.new(:cond)
650
-
651
- @src = nil
652
859
  end
653
860
 
654
861
  def result lex_state, token, text # :nodoc:
655
862
  lex_state = self.arg_state if lex_state == :arg_state
656
863
  self.lex_state = lex_state if lex_state
657
- self.yacc_value = text
658
- token
864
+ [token, text]
659
865
  end
660
866
 
661
867
  def ruby18
@@ -670,6 +876,10 @@ class RubyLexer
670
876
  ss.scan re
671
877
  end
672
878
 
879
+ def scanner_class # TODO: design this out of oedipus_lex. or something.
880
+ RPStringScanner
881
+ end
882
+
673
883
  def space_vs_beginning space_type, beg_type, fallback
674
884
  if is_space_arg? check(/./m) then
675
885
  warning "`**' interpreted as argument prefix"
@@ -686,10 +896,11 @@ class RubyLexer
686
896
  self.lex_strterm = [:strterm, type, beg, nnd]
687
897
  end
688
898
 
689
- def src= src
690
- raise "bad src: #{src.inspect}" unless String === src
691
- @src = RPStringScanner.new(src)
692
- end
899
+ # TODO: consider
900
+ # def src= src
901
+ # raise "bad src: #{src.inspect}" unless String === src
902
+ # @src = RPStringScanner.new(src)
903
+ # end
693
904
 
694
905
  def tokadd_escape term # TODO: rewrite / remove
695
906
  case
@@ -820,7 +1031,7 @@ class RubyLexer
820
1031
  s
821
1032
  when /^[McCx0-9]/ then
822
1033
  rb_compile_error("Invalid escape character syntax")
823
- when /u([0-9a-fA-F]+|\{[0-9a-fA-F]+\})/ then
1034
+ when /u([0-9a-fA-F]{2,4}|\{[0-9a-fA-F]{2,6}\})/ then
824
1035
  [$1.delete("{}").to_i(16)].pack("U")
825
1036
  else
826
1037
  s
@@ -833,519 +1044,146 @@ class RubyLexer
833
1044
  # do nothing for now
834
1045
  end
835
1046
 
836
- ##
837
- # Returns the next token. Also sets yy_val is needed.
838
- #
839
- # @return Description of the Returned Value
840
-
841
- def yylex # 461 lines
842
- c = ''
843
- self.space_seen = false
844
- command_state = false
845
- ss = self.src
846
-
847
- self.token = nil
848
- self.yacc_value = nil
849
-
850
- return yylex_string if lex_strterm
851
-
852
- command_state = self.command_start
853
- self.command_start = false
854
-
855
- last_state = lex_state
856
-
857
- loop do # START OF CASE
858
- if scan(/[\ \t\r\f\v]/) then # \s - \n + \v
859
- self.space_seen = true
860
- next
861
- elsif check(/[^a-zA-Z]/) then
862
- if scan(/\n|\#/) then
863
- self.lineno = nil
864
- c = matched
865
- if c == '#' then
866
- ss.pos -= 1
867
-
868
- while scan(/\s*#.*(\n+|\z)/) do
869
- # TODO: self.lineno += matched.lines.to_a.size
870
- @comments << matched.gsub(/^ +#/, '#').gsub(/^ +$/, '')
871
- end
872
-
873
- return RubyLexer::EOF if end_of_stream?
874
- end
875
-
876
- # Replace a string of newlines with a single one
877
- scan(/\n+/)
878
-
879
- next if in_lex_state?(:expr_beg, :expr_value, :expr_class,
880
- :expr_fname, :expr_dot)
881
-
882
- if scan(/([\ \t\r\f\v]*)\./) then
883
- self.space_seen = true unless ss[1].empty?
884
-
885
- ss.pos -= 1
886
- next unless check(/\.\./)
887
- end
888
-
889
- self.command_start = true
890
-
891
- return result(:expr_beg, :tNL, nil)
892
- elsif scan(/[\]\)\}]/) then
893
- if matched == "}" then
894
- self.brace_nest -= 1
895
- else
896
- self.paren_nest -= 1
897
- end
898
-
899
- cond.lexpop
900
- cmdarg.lexpop
901
-
902
- text = matched
903
- state = text == ")" ? :expr_endfn : :expr_endarg
904
- token = {
905
- ")" => :tRPAREN,
906
- "]" => :tRBRACK,
907
- "}" => :tRCURLY
908
- }[text]
909
-
910
- return result(state, token, text)
911
- elsif scan(/\!/) then
912
- if in_arg_state? then
913
- return result(:expr_arg, :tUBANG, "!@") if scan(/@/)
914
- end
915
-
916
- text = scan(/[=~]/) ? "!#{matched}" : "!"
917
-
918
- return result(arg_state, TOKENS[text], text)
919
- elsif scan(/\.\.\.?|,|![=~]?/) then
920
- return result(:expr_beg, TOKENS[matched], matched)
921
- elsif check(/\./) then
922
- if scan(/\.\d/) then
923
- rb_compile_error "no .<digit> floating literal anymore put 0 before dot"
924
- elsif scan(/\./) then
925
- return result(:expr_dot, :tDOT, ".")
926
- end
927
- elsif scan(/\(/) then
928
- token = if ruby18 then
929
- yylex_paren18
930
- else
931
- yylex_paren19
932
- end
933
-
934
- self.paren_nest += 1
935
-
936
- return expr_result(token, "(")
937
- elsif check(/\=/) then
938
- if scan(/\=\=\=|\=\=|\=~|\=>|\=(?!begin\b)/) then
939
- tok = matched
940
- return result(:arg_state, TOKENS[tok], tok)
941
- elsif beginning_of_line? and scan(/\=begin(?=\s)/) then
942
- @comments << matched
943
-
944
- unless scan(/.*?\n=end( |\t|\f)*[^\n]*(\n|\z)/m) then
945
- @comments.clear
946
- rb_compile_error("embedded document meets end of file")
947
- end
948
-
949
- @comments << matched
950
-
951
- next
952
- elsif scan(/\=(?=begin\b)/) then # h[k]=begin ... end
953
- tok = matched
954
- return result(:arg_state, TOKENS[tok], tok)
955
- else
956
- raise "you shouldn't be able to get here"
957
- end
958
- elsif scan(/\"(#{SIMPLE_STRING})\"/o) then
959
- string = matched[1..-2].gsub(ESC) { unescape $1 }
960
- return result(:expr_end, :tSTRING, string)
961
- elsif scan(/\"/) then # FALLBACK
962
- string STR_DQUOTE, '"' # TODO: question this
963
- return result(nil, :tSTRING_BEG, '"')
964
- elsif scan(/\@\@?#{IDENT_CHAR}+/o) then
965
- self.token = matched
966
-
967
- rb_compile_error "`#{self.token}` is not allowed as a variable name" if
968
- self.token =~ /\@\d/
969
-
970
- tok_id = matched =~ /^@@/ ? :tCVAR : :tIVAR
971
- return result(:expr_end, tok_id, self.token)
972
- elsif scan(/\:\:/) then
973
- if is_beg? || in_lex_state?(:expr_class) || is_space_arg? then
974
- return result(:expr_beg, :tCOLON3, "::")
975
- end
976
-
977
- return result(:expr_dot, :tCOLON2, "::")
978
- elsif ! is_end? && scan(/:([a-zA-Z_]#{IDENT_CHAR}*(?:[?!]|=(?==>)|=(?![=>]))?)/) then
979
- # scanning shortcut to symbols
980
- return result(:expr_end, :tSYMBOL, ss[1])
981
- elsif ! is_end? && (scan(/\:\"(#{SIMPLE_STRING})\"/) ||
982
- scan(/\:\'(#{SIMPLE_SSTRING})\'/)) then
983
- symbol = ss[1].gsub(ESC) { unescape $1 }
984
-
985
- rb_compile_error "symbol cannot contain '\\0'" if
986
- ruby18 && symbol =~ /\0/
987
-
988
- return result(:expr_end, :tSYMBOL, symbol)
989
- elsif scan(/\:/) then
990
- # ?: / then / when
991
- if is_end? || check(/\s/) then
992
- # TODO warn_balanced(":", "symbol literal");
993
- return result(:expr_beg, :tCOLON, ":")
994
- end
995
-
996
- case
997
- when scan(/\'/) then
998
- string STR_SSYM, matched
999
- when scan(/\"/) then
1000
- string STR_DSYM, matched
1001
- end
1002
-
1003
- return result(:expr_fname, :tSYMBEG, ":")
1004
- elsif check(/[0-9]/) then
1005
- return parse_number
1006
- elsif scan(/\[/) then
1007
- self.paren_nest += 1
1008
-
1009
- token = nil
1010
-
1011
- if in_lex_state? :expr_fname, :expr_dot then
1012
- case
1013
- when scan(/\]\=/) then
1014
- self.paren_nest -= 1 # HACK? I dunno, or bug in MRI
1015
- return result(:expr_arg, :tASET, "[]=")
1016
- when scan(/\]/) then
1017
- self.paren_nest -= 1 # HACK? I dunno, or bug in MRI
1018
- return result(:expr_arg, :tAREF, "[]")
1047
+ def process_string # TODO: rewrite / remove
1048
+ token = if lex_strterm[0] == :heredoc then
1049
+ self.heredoc lex_strterm
1019
1050
  else
1020
- rb_compile_error "unexpected '['"
1051
+ self.parse_string lex_strterm
1021
1052
  end
1022
- elsif is_beg? then
1023
- token = :tLBRACK
1024
- elsif is_arg? && space_seen then
1025
- token = :tLBRACK
1026
- else
1027
- token = :tLBRACK2
1028
- end
1029
1053
 
1030
- return expr_result(token, "[")
1031
- elsif scan(/\'#{SIMPLE_SSTRING}\'/) then
1032
- text = matched[1..-2].gsub(/\\\\/, "\\").gsub(/\\'/, "'") # "
1033
- return result(:expr_end, :tSTRING, text)
1034
- elsif check(/\|/) then
1035
- if scan(/\|\|\=/) then
1036
- return result(:expr_beg, :tOP_ASGN, "||")
1037
- elsif scan(/\|\|/) then
1038
- return result(:expr_beg, :tOROP, "||")
1039
- elsif scan(/\|\=/) then
1040
- return result(:expr_beg, :tOP_ASGN, "|")
1041
- elsif scan(/\|/) then
1042
- return result(:arg_state, :tPIPE, "|")
1043
- end
1044
- elsif scan(/\{/) then
1045
- self.brace_nest += 1
1046
- if lpar_beg && lpar_beg == paren_nest then
1047
- self.lpar_beg = nil
1048
- self.paren_nest -= 1
1054
+ token_type, _ = token
1049
1055
 
1050
- return expr_result(:tLAMBEG, "{")
1051
- end
1052
-
1053
- token = if is_arg? || in_lex_state?(:expr_end, :expr_endfn) then
1054
- :tLCURLY # block (primary)
1055
- elsif in_lex_state?(:expr_endarg) then
1056
- :tLBRACE_ARG # block (expr)
1057
- else
1058
- :tLBRACE # hash
1059
- end
1060
-
1061
- self.command_start = true unless token == :tLBRACE
1062
-
1063
- return expr_result(token, "{")
1064
- elsif scan(/->/) then
1065
- return result(:expr_endfn, :tLAMBDA, nil)
1066
- elsif scan(/[+-]/) then
1067
- sign = matched
1068
- utype, type = if sign == "+" then
1069
- [:tUPLUS, :tPLUS]
1070
- else
1071
- [:tUMINUS, :tMINUS]
1072
- end
1073
-
1074
- if in_arg_state? then
1075
- if scan(/@/) then
1076
- return result(:expr_arg, utype, "#{sign}@")
1077
- else
1078
- return result(:expr_arg, type, sign)
1079
- end
1080
- end
1081
-
1082
- return result(:expr_beg, :tOP_ASGN, sign) if scan(/\=/)
1056
+ if token_type == :tSTRING_END || token_type == :tREGEXP_END then
1057
+ self.lex_strterm = nil
1058
+ self.lex_state = :expr_end
1059
+ end
1083
1060
 
1084
- if (is_beg? || (is_arg? && space_seen && !check(/\s/))) then
1085
- arg_ambiguous if is_arg?
1061
+ return token
1062
+ end
1086
1063
 
1087
- if check(/\d/) then
1088
- return self.parse_number if utype == :tUPLUS
1089
- return result(:expr_beg, :tUMINUS_NUM, sign)
1090
- end
1064
+ def parse_quote # TODO: remove / rewrite
1065
+ beg, nnd, short_hand, c = nil, nil, false, nil
1091
1066
 
1092
- return result(:expr_beg, utype, sign)
1093
- end
1067
+ if scan(/[a-z0-9]{1,2}/i) then # Long-hand (e.g. %Q{}).
1068
+ rb_compile_error "unknown type of %string" if ss.matched_size == 2
1069
+ c, beg, short_hand = matched, ss.getch, false
1070
+ else # Short-hand (e.g. %{, %., %!, etc)
1071
+ c, beg, short_hand = 'Q', ss.getch, true
1072
+ end
1094
1073
 
1095
- return result(:expr_beg, type, sign)
1096
- elsif check(/\*/) then
1097
- if scan(/\*\*=/) then
1098
- return result(:expr_beg, :tOP_ASGN, "**")
1099
- elsif scan(/\*\*/) then
1100
- token = space_vs_beginning :tDSTAR, :tDSTAR, :tPOW
1074
+ if end_of_stream? or c == RubyLexer::EOF or beg == RubyLexer::EOF then
1075
+ rb_compile_error "unterminated quoted string meets end of file"
1076
+ end
1101
1077
 
1102
- return result(:arg_state, token, "**")
1103
- elsif scan(/\*\=/) then
1104
- return result(:expr_beg, :tOP_ASGN, "*")
1105
- elsif scan(/\*/) then
1106
- token = space_vs_beginning :tSTAR, :tSTAR, :tSTAR2
1078
+ # Figure nnd-char. "\0" is special to indicate beg=nnd and that no nesting?
1079
+ nnd = { "(" => ")", "[" => "]", "{" => "}", "<" => ">" }[beg]
1080
+ nnd, beg = beg, "\0" if nnd.nil?
1107
1081
 
1108
- return result(:arg_state, token, "*")
1109
- end
1110
- elsif check(/\</) then
1111
- if scan(/\<\=\>/) then
1112
- return result(:arg_state, :tCMP, "<=>")
1113
- elsif scan(/\<\=/) then
1114
- return result(:arg_state, :tLEQ, "<=")
1115
- elsif scan(/\<\<\=/) then
1116
- return result(:arg_state, :tOP_ASGN, "<<")
1117
- elsif scan(/\<\</) then
1118
- if (!in_lex_state?(:expr_dot, :expr_class) &&
1119
- !is_end? &&
1120
- (!is_arg? || space_seen)) then
1121
- tok = self.heredoc_identifier
1122
- return tok if tok
1123
- end
1082
+ token_type, text = nil, "%#{c}#{beg}"
1083
+ token_type, string_type = case c
1084
+ when 'Q' then
1085
+ ch = short_hand ? nnd : c + beg
1086
+ text = "%#{ch}"
1087
+ [:tSTRING_BEG, STR_DQUOTE]
1088
+ when 'q' then
1089
+ [:tSTRING_BEG, STR_SQUOTE]
1090
+ when 'W' then
1091
+ scan(/\s*/)
1092
+ [:tWORDS_BEG, STR_DQUOTE | STR_FUNC_QWORDS]
1093
+ when 'w' then
1094
+ scan(/\s*/)
1095
+ [:tQWORDS_BEG, STR_SQUOTE | STR_FUNC_QWORDS]
1096
+ when 'x' then
1097
+ [:tXSTRING_BEG, STR_XQUOTE]
1098
+ when 'r' then
1099
+ [:tREGEXP_BEG, STR_REGEXP]
1100
+ when 's' then
1101
+ self.lex_state = :expr_fname
1102
+ [:tSYMBEG, STR_SSYM]
1103
+ when 'I' then
1104
+ scan(/\s*/)
1105
+ [:tSYMBOLS_BEG, STR_DQUOTE | STR_FUNC_QWORDS]
1106
+ when 'i' then
1107
+ scan(/\s*/)
1108
+ [:tQSYMBOLS_BEG, STR_SQUOTE | STR_FUNC_QWORDS]
1109
+ end
1124
1110
 
1125
- return result(:arg_state, :tLSHFT, "\<\<")
1126
- elsif scan(/\</) then
1127
- return result(:arg_state, :tLT, "<")
1128
- end
1129
- elsif check(/\>/) then
1130
- if scan(/\>\=/) then
1131
- return result(:arg_state, :tGEQ, ">=")
1132
- elsif scan(/\>\>=/) then
1133
- return result(:arg_state, :tOP_ASGN, ">>")
1134
- elsif scan(/\>\>/) then
1135
- return result(:arg_state, :tRSHFT, ">>")
1136
- elsif scan(/\>/) then
1137
- return result(:arg_state, :tGT, ">")
1138
- end
1139
- elsif scan(/\`/) then
1140
- case lex_state
1141
- when :expr_fname then
1142
- return result(:expr_end, :tBACK_REF2, "`")
1143
- when :expr_dot then
1144
- state = command_state ? :expr_cmdarg : :expr_arg
1145
- return result(state, :tBACK_REF2, "`")
1146
- else
1147
- string STR_XQUOTE, '`'
1148
- return result(nil, :tXSTRING_BEG, "`")
1149
- end
1150
- elsif scan(/\?/) then
1151
- if is_end? then
1152
- state = ruby18 ? :expr_beg : :expr_value # HACK?
1153
- return result(state, :tEH, "?")
1154
- end
1111
+ rb_compile_error "Bad %string type. Expected [QqWwIixrs], found '#{c}'." if
1112
+ token_type.nil?
1155
1113
 
1156
- if end_of_stream? then
1157
- rb_compile_error "incomplete character syntax"
1158
- end
1114
+ raise "huh" unless string_type
1159
1115
 
1160
- if check(/\s|\v/) then
1161
- unless is_arg? then
1162
- c2 = { " " => 's',
1163
- "\n" => 'n',
1164
- "\t" => 't',
1165
- "\v" => 'v',
1166
- "\r" => 'r',
1167
- "\f" => 'f' }[matched]
1168
-
1169
- if c2 then
1170
- warning("invalid character syntax; use ?\\" + c2)
1171
- end
1172
- end
1116
+ string string_type, nnd, beg
1173
1117
 
1174
- # ternary
1175
- state = ruby18 ? :expr_beg : :expr_value # HACK?
1176
- return result(state, :tEH, "?")
1177
- elsif check(/\w(?=\w)/) then # ternary, also
1178
- return result(:expr_beg, :tEH, "?")
1179
- end
1118
+ return token_type, text
1119
+ end
1180
1120
 
1181
- c = if scan(/\\/) then
1182
- self.read_escape
1183
- else
1184
- ss.getch
1185
- end
1121
+ def parse_string quote # TODO: rewrite / remove
1122
+ _, string_type, term, open = quote
1186
1123
 
1187
- if version == 18 then
1188
- return result(:expr_end, :tINTEGER, c[0].ord & 0xff)
1189
- else
1190
- return result(:expr_end, :tSTRING, c)
1191
- end
1192
- elsif check(/\&/) then
1193
- if scan(/\&\&\=/) then
1194
- return result(:expr_beg, :tOP_ASGN, "&&")
1195
- elsif scan(/\&\&/) then
1196
- return result(:expr_beg, :tANDOP, "&&")
1197
- elsif scan(/\&\=/) then
1198
- return result(:expr_beg, :tOP_ASGN, "&")
1199
- elsif scan(/&/) then
1200
- token = if is_arg? && space_seen && !check(/\s/) then
1201
- warning("`&' interpreted as argument prefix")
1202
- :tAMPER
1203
- elsif in_lex_state? :expr_beg, :expr_mid then
1204
- :tAMPER
1205
- else
1206
- :tAMPER2
1207
- end
1208
-
1209
- return result(:arg_state, token, "&")
1210
- end
1211
- elsif scan(/\//) then
1212
- if is_beg? then
1213
- string STR_REGEXP, '/'
1214
- return result(nil, :tREGEXP_BEG, "/")
1215
- end
1124
+ space = false # FIX: remove these
1125
+ func = string_type
1126
+ paren = open
1127
+ term_re = @@regexp_cache[term]
1216
1128
 
1217
- if scan(/\=/) then
1218
- return result(:expr_beg, :tOP_ASGN, "/")
1219
- end
1129
+ qwords = (func & STR_FUNC_QWORDS) != 0
1130
+ regexp = (func & STR_FUNC_REGEXP) != 0
1131
+ expand = (func & STR_FUNC_EXPAND) != 0
1220
1132
 
1221
- if is_arg? && space_seen then
1222
- unless scan(/\s/) then
1223
- arg_ambiguous
1224
- string STR_REGEXP, '/'
1225
- return result(nil, :tREGEXP_BEG, "/")
1226
- end
1227
- end
1133
+ unless func then # nil'ed from qwords below. *sigh*
1134
+ return :tSTRING_END, nil
1135
+ end
1228
1136
 
1229
- return result(:arg_state, :tDIVIDE, "/")
1230
- elsif scan(/\^=/) then
1231
- return result(:expr_beg, :tOP_ASGN, "^")
1232
- elsif scan(/\^/) then
1233
- return result(:arg_state, :tCARET, "^")
1234
- elsif scan(/\;/) then
1235
- self.command_start = true
1236
- return result(:expr_beg, :tSEMI, ";")
1237
- elsif scan(/\~/) then
1238
- scan(/@/) if in_lex_state? :expr_fname, :expr_dot
1239
- return result(:arg_state, :tTILDE, "~")
1240
- elsif scan(/\\/) then
1241
- if scan(/\r?\n/) then
1242
- self.lineno = nil
1243
- self.space_seen = true
1244
- next
1245
- end
1246
- rb_compile_error "bare backslash only allowed before newline"
1247
- elsif scan(/\%/) then
1248
- return parse_quote if is_beg?
1249
-
1250
- return result(:expr_beg, :tOP_ASGN, "%") if scan(/\=/)
1251
-
1252
- return parse_quote if is_arg? && space_seen && ! check(/\s/)
1253
-
1254
- return result(:arg_state, :tPERCENT, "%")
1255
- elsif check(/\$/) then
1256
- if scan(/(\$_)(\w+)/) then
1257
- self.token = matched
1258
- return result(:expr_end, :tGVAR, matched)
1259
- elsif scan(/\$_/) then
1260
- return result(:expr_end, :tGVAR, matched)
1261
- elsif scan(/\$[~*$?!@\/\\;,.=:<>\"]|\$-\w?/) then
1262
- return result(:expr_end, :tGVAR, matched)
1263
- elsif scan(/\$([\&\`\'\+])/) then
1264
- # Explicit reference to these vars as symbols...
1265
- if lex_state == :expr_fname then
1266
- return result(:expr_end, :tGVAR, matched)
1267
- else
1268
- return result(:expr_end, :tBACK_REF, ss[1].to_sym)
1269
- end
1270
- elsif scan(/\$([1-9]\d*)/) then
1271
- if lex_state == :expr_fname then
1272
- return result(:expr_end, :tGVAR, matched)
1273
- else
1274
- return result(:expr_end, :tNTH_REF, ss[1].to_i)
1275
- end
1276
- elsif scan(/\$0/) then
1277
- return result(:expr_end, :tGVAR, matched)
1278
- elsif scan(/\$\W|\$\z/) then # TODO: remove?
1279
- return result(:expr_end, "$", "$") # FIX: "$"??
1280
- elsif scan(/\$\w+/)
1281
- return result(:expr_end, :tGVAR, matched)
1282
- end
1283
- elsif check(/\_/) then
1284
- if beginning_of_line? && scan(/\__END__(\r?\n|\Z)/) then
1285
- self.lineno = nil
1286
- return RubyLexer::EOF
1287
- elsif scan(/\_\w*/) then
1288
- self.token = matched
1289
- return process_token command_state, last_state
1290
- end
1291
- end
1292
- end # END OF CASE
1137
+ space = true if qwords and scan(/\s+/)
1293
1138
 
1294
- if scan(/\004|\032|\000/) || end_of_stream? then # ^D, ^Z, EOF
1295
- return RubyLexer::EOF
1296
- else # alpha check
1297
- rb_compile_error "Invalid char #{ss.rest[0].chr} in expression" unless
1298
- check IDENT
1139
+ if self.string_nest == 0 && scan(/#{term_re}/) then
1140
+ if qwords then
1141
+ quote[1] = nil
1142
+ return :tSPACE, nil
1143
+ elsif regexp then
1144
+ return :tREGEXP_END, self.regx_options
1145
+ else
1146
+ return :tSTRING_END, term
1299
1147
  end
1300
-
1301
- self.token = matched if self.scan IDENT
1302
-
1303
- return process_token command_state, last_state
1304
1148
  end
1305
- end
1306
1149
 
1307
- def yylex_paren18
1308
- self.command_start = true
1309
- token = :tLPAREN2
1150
+ return :tSPACE, nil if space
1310
1151
 
1311
- if in_lex_state? :expr_beg, :expr_mid then
1312
- token = :tLPAREN
1313
- elsif space_seen then
1314
- if in_lex_state? :expr_cmdarg then
1315
- token = :tLPAREN_ARG
1316
- elsif in_lex_state? :expr_arg then
1317
- warning "don't put space before argument parentheses"
1152
+ self.string_buffer = []
1153
+
1154
+ if expand
1155
+ case
1156
+ when scan(/#(?=[$@])/) then
1157
+ return :tSTRING_DVAR, nil
1158
+ when scan(/#[{]/) then
1159
+ return :tSTRING_DBEG, nil
1160
+ when scan(/#/) then
1161
+ string_buffer << '#'
1318
1162
  end
1319
- else
1320
- # not a ternary -- do nothing?
1321
1163
  end
1322
1164
 
1323
- token
1324
- end
1325
-
1326
- def yylex_paren19
1327
- if is_beg? then
1328
- :tLPAREN
1329
- elsif is_space_arg? then
1330
- :tLPAREN_ARG
1331
- else
1332
- :tLPAREN2 # plain '(' in parse.y
1165
+ if tokadd_string(func, term, paren) == RubyLexer::EOF then
1166
+ rb_compile_error "unterminated string meets end of file"
1333
1167
  end
1168
+
1169
+ return :tSTRING_CONTENT, string_buffer.join
1334
1170
  end
1171
+ end
1335
1172
 
1336
- def yylex_string # TODO: rewrite / remove
1337
- token = if lex_strterm[0] == :heredoc then
1338
- self.heredoc lex_strterm
1339
- else
1340
- self.parse_string lex_strterm
1341
- end
1173
+ require "ruby_lexer.rex"
1342
1174
 
1343
- if token == :tSTRING_END || token == :tREGEXP_END then
1344
- self.lineno = nil
1345
- self.lex_strterm = nil
1346
- self.lex_state = :expr_end
1175
+ if ENV["DEBUG"] then
1176
+ class RubyLexer
1177
+ alias :old_lineno= :lineno=
1178
+
1179
+ def d o
1180
+ $stderr.puts o.inspect
1347
1181
  end
1348
1182
 
1349
- return token
1183
+ def lineno= n
1184
+ self.old_lineno= n
1185
+ where = caller.first.split(/:/).first(2).join(":")
1186
+ d :lineno => [n, where, ss && ss.rest[0,40]]
1187
+ end
1350
1188
  end
1351
1189
  end