ruby_parser 3.3.0 → 3.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/lib/ruby20_parser.rb CHANGED
@@ -4913,13 +4913,15 @@ def _reduce_311(val, _values, result)
4913
4913
  end
4914
4914
 
4915
4915
  def _reduce_312(val, _values, result)
4916
- result = new_case val[1], val[3]
4916
+ (_, line), expr, _, body, _ = val
4917
+ result = new_case expr, body, line
4917
4918
 
4918
4919
  result
4919
4920
  end
4920
4921
 
4921
4922
  def _reduce_313(val, _values, result)
4922
- result = new_case nil, val[2]
4923
+ (_, line), _, body, _ = val
4924
+ result = new_case nil, body, line
4923
4925
 
4924
4926
  result
4925
4927
  end
@@ -5514,13 +5516,23 @@ def _reduce_414(val, _values, result)
5514
5516
  end
5515
5517
 
5516
5518
  def _reduce_415(val, _values, result)
5517
- debug20 21, val, result
5519
+ iter1, _, name, args, iter2 = val
5520
+
5521
+ call = new_call iter1, name.to_sym, args
5522
+ iter2.insert 1, call
5523
+
5524
+ result = iter2
5518
5525
 
5519
5526
  result
5520
5527
  end
5521
5528
 
5522
5529
  def _reduce_416(val, _values, result)
5523
- debug20 22, val, result
5530
+ iter1, _, name, args, iter2 = val
5531
+
5532
+ call = new_call iter1, name.to_sym, args
5533
+ iter2.insert 1, call
5534
+
5535
+ result = iter2
5524
5536
 
5525
5537
  result
5526
5538
  end
@@ -6088,7 +6100,7 @@ def _reduce_510(val, _values, result)
6088
6100
  end
6089
6101
 
6090
6102
  def _reduce_511(val, _values, result)
6091
- result = s(:lit, lexer.src.current_line)
6103
+ result = s(:lit, lexer.lineno)
6092
6104
  result
6093
6105
  end
6094
6106
 
data/lib/ruby20_parser.y CHANGED
@@ -1058,11 +1058,13 @@ rule
1058
1058
  }
1059
1059
  | kCASE expr_value opt_terms case_body kEND
1060
1060
  {
1061
- result = new_case val[1], val[3]
1061
+ (_, line), expr, _, body, _ = val
1062
+ result = new_case expr, body, line
1062
1063
  }
1063
1064
  | kCASE opt_terms case_body kEND
1064
1065
  {
1065
- result = new_case nil, val[2]
1066
+ (_, line), _, body, _ = val
1067
+ result = new_case nil, body, line
1066
1068
  }
1067
1069
  | kFOR for_var kIN
1068
1070
  {
@@ -1501,11 +1503,21 @@ opt_block_args_tail: tCOMMA block_args_tail
1501
1503
  }
1502
1504
  | block_call dot_or_colon operation2 opt_paren_args brace_block
1503
1505
  {
1504
- debug20 21, val, result
1506
+ iter1, _, name, args, iter2 = val
1507
+
1508
+ call = new_call iter1, name.to_sym, args
1509
+ iter2.insert 1, call
1510
+
1511
+ result = iter2
1505
1512
  }
1506
1513
  | block_call dot_or_colon operation2 command_args do_block
1507
1514
  {
1508
- debug20 22, val, result
1515
+ iter1, _, name, args, iter2 = val
1516
+
1517
+ call = new_call iter1, name.to_sym, args
1518
+ iter2.insert 1, call
1519
+
1520
+ result = iter2
1509
1521
  }
1510
1522
 
1511
1523
  method_call: fcall
@@ -1916,7 +1928,7 @@ keyword_variable: kNIL { result = s(:nil) }
1916
1928
  | kTRUE { result = s(:true) }
1917
1929
  | kFALSE { result = s(:false) }
1918
1930
  | k__FILE__ { result = s(:str, self.file) }
1919
- | k__LINE__ { result = s(:lit, lexer.src.current_line) }
1931
+ | k__LINE__ { result = s(:lit, lexer.lineno) }
1920
1932
  | k__ENCODING__
1921
1933
  {
1922
1934
  result =
data/lib/ruby_lexer.rb CHANGED
@@ -11,11 +11,6 @@ class RubyLexer
11
11
  /[\w\x80-\xFF]/n
12
12
  end
13
13
 
14
- IDENT = /^#{IDENT_CHAR}+/o
15
- ESC = /\\((?>[0-7]{1,3}|x[0-9a-fA-F]{1,2}|M-[^\\]|(C-|c)[^\\]|u[0-9a-fA-F]+|u\{[0-9a-fA-F]+\}|[^0-7xMCc]))/u
16
- SIMPLE_STRING = /(#{ESC}|#(#{ESC}|[^\{\#\@\$\"\\])|[^\"\\\#])*/o
17
- SIMPLE_SSTRING = /(\\.|[^\'])*/
18
-
19
14
  EOF = :eof_haha!
20
15
 
21
16
  # ruby constants for strings (should this be moved somewhere else?)
@@ -75,6 +70,8 @@ class RubyLexer
75
70
  attr_accessor :brace_nest
76
71
  attr_accessor :cmdarg
77
72
  attr_accessor :command_start
73
+ attr_accessor :command_state
74
+ attr_accessor :last_state
78
75
  attr_accessor :cond
79
76
 
80
77
  ##
@@ -91,11 +88,7 @@ class RubyLexer
91
88
  attr_accessor :string_buffer
92
89
  attr_accessor :string_nest
93
90
 
94
- # Stream of data that yylex examines.
95
- attr_reader :src
96
- alias :ss :src
97
-
98
- # Last token read via yylex.
91
+ # Last token read via next_token.
99
92
  attr_accessor :token
100
93
 
101
94
  ##
@@ -104,11 +97,6 @@ class RubyLexer
104
97
 
105
98
  attr_accessor :version
106
99
 
107
- # Value of last token which had a value associated with it.
108
- attr_accessor :yacc_value
109
-
110
- attr_writer :lineno # reader is lazy initalizer
111
-
112
100
  attr_writer :comments
113
101
 
114
102
  def initialize v = 18
@@ -117,20 +105,6 @@ class RubyLexer
117
105
  reset
118
106
  end
119
107
 
120
- ##
121
- # How the parser advances to the next token.
122
- #
123
- # @return true if not at end of file (EOF).
124
-
125
- def advance
126
- r = yylex
127
- self.token = r
128
-
129
- raise "yylex returned nil, near #{ss.rest[0,10].inspect}" unless r
130
-
131
- return RubyLexer::EOF != r
132
- end
133
-
134
108
  def arg_ambiguous
135
109
  self.warning("Ambiguous first argument. make sure.")
136
110
  end
@@ -142,6 +116,7 @@ class RubyLexer
142
116
  def beginning_of_line?
143
117
  ss.bol?
144
118
  end
119
+ alias :bol? :beginning_of_line? # to make .rex file more readable
145
120
 
146
121
  def check re
147
122
  ss.check re
@@ -174,9 +149,9 @@ class RubyLexer
174
149
  rb_compile_error err_msg if end_of_stream?
175
150
 
176
151
  if beginning_of_line? && scan(eos_re) then
152
+ self.lineno += 1
177
153
  ss.unread_many last_line # TODO: figure out how to remove this
178
- self.yacc_value = eos
179
- return :tSTRING_END
154
+ return :tSTRING_END, eos
180
155
  end
181
156
 
182
157
  self.string_buffer = []
@@ -185,11 +160,9 @@ class RubyLexer
185
160
  case
186
161
  when scan(/#[$@]/) then
187
162
  ss.pos -= 1 # FIX omg stupid
188
- self.yacc_value = matched
189
- return :tSTRING_DVAR
163
+ return :tSTRING_DVAR, matched
190
164
  when scan(/#[{]/) then
191
- self.yacc_value = matched
192
- return :tSTRING_DBEG
165
+ return :tSTRING_DBEG, matched
193
166
  when scan(/#/) then
194
167
  string_buffer << '#'
195
168
  end
@@ -201,9 +174,9 @@ class RubyLexer
201
174
  c == RubyLexer::EOF
202
175
 
203
176
  if c != "\n" then
204
- self.yacc_value = string_buffer.join.delete("\r")
205
- return :tSTRING_CONTENT
177
+ return :tSTRING_CONTENT, string_buffer.join.delete("\r")
206
178
  else
179
+ self.lineno += 1
207
180
  string_buffer << scan(/\n/)
208
181
  end
209
182
 
@@ -218,8 +191,7 @@ class RubyLexer
218
191
 
219
192
  self.lex_strterm = [:heredoc, eos, func, last_line]
220
193
 
221
- self.yacc_value = string_buffer.join.delete("\r")
222
- return :tSTRING_CONTENT
194
+ return :tSTRING_CONTENT, string_buffer.join.delete("\r")
223
195
  end
224
196
 
225
197
  def heredoc_identifier # TODO: remove / rewrite
@@ -255,7 +227,6 @@ class RubyLexer
255
227
  if scan(/.*\n/) then
256
228
  # TODO: think about storing off the char range instead
257
229
  line = matched
258
- ss.extra_lines_added += 1 # FIX: ugh
259
230
  else
260
231
  line = nil
261
232
  end
@@ -263,14 +234,16 @@ class RubyLexer
263
234
  self.lex_strterm = [:heredoc, string_buffer.join, func, line]
264
235
 
265
236
  if term == '`' then
266
- self.yacc_value = "`"
267
- return :tXSTRING_BEG
237
+ result nil, :tXSTRING_BEG, "`"
268
238
  else
269
- self.yacc_value = "\""
270
- return :tSTRING_BEG
239
+ result nil, :tSTRING_BEG, "\""
271
240
  end
272
241
  end
273
242
 
243
+ def in_fname?
244
+ in_lex_state? :expr_fname
245
+ end
246
+
274
247
  def in_arg_state? # TODO: rename is_after_operator?
275
248
  in_lex_state? :expr_fname, :expr_dot
276
249
  end
@@ -281,9 +254,7 @@ class RubyLexer
281
254
 
282
255
  def int_with_base base
283
256
  rb_compile_error "Invalid numeric format" if matched =~ /__/
284
-
285
- self.yacc_value = matched.to_i(base)
286
- return :tINTEGER
257
+ return result(:expr_end, :tINTEGER, matched.to_i(base))
287
258
  end
288
259
 
289
260
  def is_arg?
@@ -298,7 +269,7 @@ class RubyLexer
298
269
  in_lex_state? :expr_end, :expr_endarg, :expr_endfn
299
270
  end
300
271
 
301
- def is_label_possible? command_state
272
+ def is_label_possible?
302
273
  (in_lex_state?(:expr_beg, :expr_endfn) && !command_state) || is_arg?
303
274
  end
304
275
 
@@ -306,170 +277,396 @@ class RubyLexer
306
277
  is_arg? and space_seen and c !~ /\s/
307
278
  end
308
279
 
309
- def lineno
310
- @lineno ||= ss.lineno
311
- end
312
-
313
280
  def matched
314
281
  ss.matched
315
282
  end
316
283
 
317
- ##
318
- # Parse a number from the input stream.
319
- #
320
- # @param c The first character of the number.
321
- # @return A int constant wich represents a token.
284
+ def not_end?
285
+ not is_end?
286
+ end
322
287
 
323
- def parse_number
324
- self.lex_state = :expr_end
288
+ def process_amper text
289
+ token = if is_arg? && space_seen && !check(/\s/) then
290
+ warning("`&' interpreted as argument prefix")
291
+ :tAMPER
292
+ elsif in_lex_state? :expr_beg, :expr_mid then
293
+ :tAMPER
294
+ else
295
+ :tAMPER2
296
+ end
297
+
298
+ return result(:arg_state, token, "&")
299
+ end
300
+
301
+ def process_backref text
302
+ token = ss[1].to_sym
303
+ # TODO: can't do lineno hack w/ symbol
304
+ result :expr_end, :tBACK_REF, token
305
+ end
306
+
307
+ def process_backtick text
308
+ case lex_state
309
+ when :expr_fname then
310
+ result :expr_end, :tBACK_REF2, "`"
311
+ when :expr_dot then
312
+ result((command_state ? :expr_cmdarg : :expr_arg), :tBACK_REF2, "`")
313
+ else
314
+ string STR_XQUOTE
315
+ result nil, :tXSTRING_BEG, "`"
316
+ end
317
+ end
318
+
319
+ def process_bang text
320
+ if in_arg_state? then
321
+ return result(:expr_arg, :tUBANG, "!@") if scan(/@/)
322
+ end
323
+
324
+ text = scan(/[=~]/) ? "!#{matched}" : "!"
325
+
326
+ return result(arg_state, TOKENS[text], text)
327
+ end
328
+
329
+ def process_begin text
330
+ @comments << matched
331
+
332
+ unless scan(/.*?\n=end( |\t|\f)*[^\n]*(\n|\z)/m) then
333
+ @comments.clear
334
+ rb_compile_error("embedded document meets end of file")
335
+ end
336
+
337
+ @comments << matched
338
+
339
+ nil # TODO
340
+ end
341
+
342
+ def process_bracing text
343
+ cond.lexpop
344
+ cmdarg.lexpop
345
+
346
+ case matched
347
+ when "}" then
348
+ self.brace_nest -= 1
349
+ self.lex_state = :expr_endarg
350
+ return :tRCURLY, matched
351
+ when "]" then
352
+ self.paren_nest -= 1
353
+ self.lex_state = :expr_endarg
354
+ return :tRBRACK, matched
355
+ when ")" then
356
+ self.paren_nest -= 1
357
+ self.lex_state = :expr_endfn
358
+ return :tRPAREN, matched
359
+ else
360
+ raise "Unknown bracing: #{matched.inspect}"
361
+ end
362
+ end
363
+
364
+ def process_colon1 text
365
+ # ?: / then / when
366
+ if is_end? || check(/\s/) then
367
+ return result :expr_beg, :tCOLON, text
368
+ end
325
369
 
326
370
  case
327
- when scan(/[+-]?0[xXbBdD]\b/) then
328
- rb_compile_error "Invalid numeric format"
329
- when scan(/[+-]?(?:(?:[1-9][\d_]*|0)(?!\.\d)\b|0[Dd][0-9_]+)/) then
330
- int_with_base(10)
331
- when scan(/[+-]?0x[a-f0-9_]+/i) then
332
- int_with_base(16)
333
- when scan(/[+-]?0[Bb][01_]+/) then
334
- int_with_base(2)
335
- when scan(/[+-]?0[Oo]?[0-7_]*[89]/) then
336
- rb_compile_error "Illegal octal digit."
337
- when scan(/[+-]?0[Oo]?[0-7_]+|0[Oo]/) then
338
- int_with_base(8)
339
- when scan(/[+-]?[\d_]+_(e|\.)/) then
340
- rb_compile_error "Trailing '_' in number."
341
- when scan(/[+-]?[\d_]+\.[\d_]+(e[+-]?[\d_]+)?\b|[+-]?[\d_]+e[+-]?[\d_]+\b/i) then
342
- number = matched
343
- if number =~ /__/ then
344
- rb_compile_error "Invalid numeric format"
345
- end
346
- self.yacc_value = number.to_f
347
- :tFLOAT
348
- when scan(/[+-]?[0-9_]+(?![e])/) then
349
- int_with_base(10)
371
+ when scan(/\'/) then
372
+ string STR_SSYM
373
+ when scan(/\"/) then
374
+ string STR_DSYM
375
+ end
376
+
377
+ result :expr_fname, :tSYMBEG, text
378
+ end
379
+
380
+ def process_colon2 text
381
+ if is_beg? || in_lex_state?(:expr_class) || is_space_arg? then
382
+ result :expr_beg, :tCOLON3, text
350
383
  else
351
- rb_compile_error "Bad number format"
384
+ result :expr_dot, :tCOLON2, text
352
385
  end
353
386
  end
354
387
 
355
- def parse_quote # TODO: remove / rewrite
356
- beg, nnd, short_hand, c = nil, nil, false, nil
388
+ def process_curly_brace text
389
+ self.brace_nest += 1
390
+ if lpar_beg && lpar_beg == paren_nest then
391
+ self.lpar_beg = nil
392
+ self.paren_nest -= 1
357
393
 
358
- if scan(/[a-z0-9]{1,2}/i) then # Long-hand (e.g. %Q{}).
359
- rb_compile_error "unknown type of %string" if ss.matched_size == 2
360
- c, beg, short_hand = matched, ss.getch, false
361
- else # Short-hand (e.g. %{, %., %!, etc)
362
- c, beg, short_hand = 'Q', ss.getch, true
394
+ return expr_result(:tLAMBEG, "{")
363
395
  end
364
396
 
365
- if end_of_stream? or c == RubyLexer::EOF or beg == RubyLexer::EOF then
366
- rb_compile_error "unterminated quoted string meets end of file"
397
+ token = if is_arg? || in_lex_state?(:expr_end, :expr_endfn) then
398
+ :tLCURLY # block (primary)
399
+ elsif in_lex_state?(:expr_endarg) then
400
+ :tLBRACE_ARG # block (expr)
401
+ else
402
+ :tLBRACE # hash
403
+ end
404
+
405
+ self.command_start = true unless token == :tLBRACE
406
+
407
+ return expr_result(token, "{")
408
+ end
409
+
410
+ def process_float text
411
+ rb_compile_error "Invalid numeric format" if text =~ /__/
412
+ return result(:expr_end, :tFLOAT, text.to_f)
413
+ end
414
+
415
+ def process_gvar text
416
+ text.lineno = self.lineno
417
+ result(:expr_end, :tGVAR, text)
418
+ end
419
+
420
+ def process_gvar_oddity text
421
+ result :expr_end, "$", "$" # TODO: wtf is this?
422
+ end
423
+
424
+ def process_ivar text
425
+ tok_id = text =~ /^@@/ ? :tCVAR : :tIVAR
426
+ text.lineno = self.lineno
427
+ return result(:expr_end, tok_id, text)
428
+ end
429
+
430
+ def process_lchevron text
431
+ if (!in_lex_state?(:expr_dot, :expr_class) &&
432
+ !is_end? &&
433
+ (!is_arg? || space_seen)) then
434
+ tok = self.heredoc_identifier
435
+ return tok if tok
367
436
  end
368
437
 
369
- # Figure nnd-char. "\0" is special to indicate beg=nnd and that no nesting?
370
- nnd = { "(" => ")", "[" => "]", "{" => "}", "<" => ">" }[beg]
371
- nnd, beg = beg, "\0" if nnd.nil?
438
+ return result(:arg_state, :tLSHFT, "\<\<")
439
+ end
372
440
 
373
- token_type, text = nil, "%#{c}#{beg}"
374
- token_type, string_type = case c
375
- when 'Q' then
376
- ch = short_hand ? nnd : c + beg
377
- text = "%#{ch}"
378
- [:tSTRING_BEG, STR_DQUOTE]
379
- when 'q' then
380
- [:tSTRING_BEG, STR_SQUOTE]
381
- when 'W' then
382
- scan(/\s*/)
383
- [:tWORDS_BEG, STR_DQUOTE | STR_FUNC_QWORDS]
384
- when 'w' then
385
- scan(/\s*/)
386
- [:tQWORDS_BEG, STR_SQUOTE | STR_FUNC_QWORDS]
387
- when 'x' then
388
- [:tXSTRING_BEG, STR_XQUOTE]
389
- when 'r' then
390
- [:tREGEXP_BEG, STR_REGEXP]
391
- when 's' then
392
- self.lex_state = :expr_fname
393
- [:tSYMBEG, STR_SSYM]
394
- when 'I' then
395
- src.scan(/\s*/)
396
- [:tSYMBOLS_BEG, STR_DQUOTE | STR_FUNC_QWORDS]
397
- when 'i' then
398
- src.scan(/\s*/)
399
- [:tQSYMBOLS_BEG, STR_SQUOTE | STR_FUNC_QWORDS]
400
- end
441
+ def process_newline_or_comment text
442
+ c = matched
443
+ hit = false
401
444
 
402
- rb_compile_error "Bad %string type. Expected [QqWwIixrs], found '#{c}'." if
403
- token_type.nil?
445
+ if c == '#' then
446
+ ss.pos -= 1
404
447
 
405
- raise "huh" unless string_type
448
+ while scan(/\s*\#.*(\n+|\z)/) do
449
+ hit = true
450
+ self.lineno += matched.lines.to_a.size
451
+ @comments << matched.gsub(/^ +#/, '#').gsub(/^ +$/, '')
452
+ end
406
453
 
407
- string string_type, nnd, beg
454
+ return nil if end_of_stream?
455
+ end
456
+
457
+ self.lineno += 1 unless hit
458
+
459
+ # Replace a string of newlines with a single one
460
+ self.lineno += matched.lines.to_a.size if scan(/\n+/)
461
+
462
+ return if in_lex_state?(:expr_beg, :expr_value, :expr_class,
463
+ :expr_fname, :expr_dot)
464
+
465
+ if scan(/([\ \t\r\f\v]*)\./) then
466
+ self.space_seen = true unless ss[1].empty?
467
+
468
+ ss.pos -= 1
469
+ return unless check(/\.\./)
470
+ end
471
+
472
+ self.command_start = true
408
473
 
409
- self.yacc_value = text
410
- return token_type
474
+ return result(:expr_beg, :tNL, nil)
411
475
  end
412
476
 
413
- def parse_string quote # TODO: rewrite / remove
414
- _, string_type, term, open = quote
477
+ def process_nthref text
478
+ # TODO: can't do lineno hack w/ number
479
+ result :expr_end, :tNTH_REF, ss[1].to_i
480
+ end
415
481
 
416
- space = false # FIX: remove these
417
- func = string_type
418
- paren = open
419
- term_re = @@regexp_cache[term]
482
+ def process_paren text
483
+ token = if ruby18 then
484
+ process_paren18
485
+ else
486
+ process_paren19
487
+ end
420
488
 
421
- qwords = (func & STR_FUNC_QWORDS) != 0
422
- regexp = (func & STR_FUNC_REGEXP) != 0
423
- expand = (func & STR_FUNC_EXPAND) != 0
489
+ self.paren_nest += 1
424
490
 
425
- unless func then # nil'ed from qwords below. *sigh*
426
- self.lineno = nil
427
- return :tSTRING_END
491
+ return expr_result(token, "(")
492
+ end
493
+
494
+ def process_paren18
495
+ self.command_start = true
496
+ token = :tLPAREN2
497
+
498
+ if in_lex_state? :expr_beg, :expr_mid then
499
+ token = :tLPAREN
500
+ elsif space_seen then
501
+ if in_lex_state? :expr_cmdarg then
502
+ token = :tLPAREN_ARG
503
+ elsif in_lex_state? :expr_arg then
504
+ warning "don't put space before argument parentheses"
505
+ end
506
+ else
507
+ # not a ternary -- do nothing?
428
508
  end
429
509
 
430
- space = true if qwords and scan(/\s+/)
510
+ token
511
+ end
431
512
 
432
- if self.string_nest == 0 && scan(/#{term_re}/) then
433
- if qwords then
434
- quote[1] = nil
435
- return :tSPACE
436
- elsif regexp then
437
- self.lineno = nil
438
- self.yacc_value = self.regx_options
439
- return :tREGEXP_END
513
+ def process_paren19
514
+ if is_beg? then
515
+ :tLPAREN
516
+ elsif is_space_arg? then
517
+ :tLPAREN_ARG
518
+ else
519
+ :tLPAREN2 # plain '(' in parse.y
520
+ end
521
+ end
522
+
523
+ def process_percent text
524
+ return parse_quote if is_beg?
525
+
526
+ return result(:expr_beg, :tOP_ASGN, "%") if scan(/\=/)
527
+
528
+ return parse_quote if is_arg? && space_seen && ! check(/\s/)
529
+
530
+ return result(:arg_state, :tPERCENT, "%")
531
+ end
532
+
533
+ def process_plus_minus text
534
+ sign = matched
535
+ utype, type = if sign == "+" then
536
+ [:tUPLUS, :tPLUS]
537
+ else
538
+ [:tUMINUS, :tMINUS]
539
+ end
540
+
541
+ if in_arg_state? then
542
+ if scan(/@/) then
543
+ return result(:expr_arg, utype, "#{sign}@")
440
544
  else
441
- self.lineno = nil
442
- self.yacc_value = term
443
- return :tSTRING_END
545
+ return result(:expr_arg, type, sign)
444
546
  end
445
547
  end
446
548
 
447
- return :tSPACE if space
549
+ return result(:expr_beg, :tOP_ASGN, sign) if scan(/\=/)
448
550
 
449
- self.string_buffer = []
551
+ if (is_beg? || (is_arg? && space_seen && !check(/\s/))) then
552
+ arg_ambiguous if is_arg?
450
553
 
451
- if expand
452
- case
453
- when scan(/#(?=[$@])/) then
454
- return :tSTRING_DVAR
455
- when scan(/#[{]/) then
456
- return :tSTRING_DBEG
457
- when scan(/#/) then
458
- string_buffer << '#'
554
+ if check(/\d/) then
555
+ return nil if utype == :tUPLUS
556
+ return result(:expr_beg, :tUMINUS_NUM, sign)
459
557
  end
558
+
559
+ return result(:expr_beg, utype, sign)
460
560
  end
461
561
 
462
- if tokadd_string(func, term, paren) == RubyLexer::EOF then
463
- rb_compile_error "unterminated string meets end of file"
562
+ return result(:expr_beg, type, sign)
563
+ end
564
+
565
+ def process_questionmark text
566
+ if is_end? then
567
+ state = ruby18 ? :expr_beg : :expr_value # HACK?
568
+ return result(state, :tEH, "?")
569
+ end
570
+
571
+ if end_of_stream? then
572
+ rb_compile_error "incomplete character syntax: parsed #{text.inspect}"
464
573
  end
465
574
 
466
- self.yacc_value = string_buffer.join
575
+ if check(/\s|\v/) then
576
+ unless is_arg? then
577
+ c2 = { " " => 's',
578
+ "\n" => 'n',
579
+ "\t" => 't',
580
+ "\v" => 'v',
581
+ "\r" => 'r',
582
+ "\f" => 'f' }[matched]
583
+
584
+ if c2 then
585
+ warning("invalid character syntax; use ?\\" + c2)
586
+ end
587
+ end
467
588
 
468
- return :tSTRING_CONTENT
589
+ # ternary
590
+ state = ruby18 ? :expr_beg : :expr_value # HACK?
591
+ return result(state, :tEH, "?")
592
+ elsif check(/\w(?=\w)/) then # ternary, also
593
+ return result(:expr_beg, :tEH, "?")
594
+ end
595
+
596
+ c = if scan(/\\/) then
597
+ self.read_escape
598
+ else
599
+ ss.getch
600
+ end
601
+
602
+ if version == 18 then
603
+ return result(:expr_end, :tINTEGER, c[0].ord & 0xff)
604
+ else
605
+ return result(:expr_end, :tSTRING, c)
606
+ end
469
607
  end
470
608
 
471
- def process_token command_state, last_state
472
- token = self.token
609
+ def process_slash text
610
+ if is_beg? then
611
+ string STR_REGEXP
612
+
613
+ return result(nil, :tREGEXP_BEG, "/")
614
+ end
615
+
616
+ if scan(/\=/) then
617
+ return result(:expr_beg, :tOP_ASGN, "/")
618
+ end
619
+
620
+ if is_arg? && space_seen then
621
+ unless scan(/\s/) then
622
+ arg_ambiguous
623
+ string STR_REGEXP, "/"
624
+ return result(nil, :tREGEXP_BEG, "/")
625
+ end
626
+ end
627
+
628
+ return result(:arg_state, :tDIVIDE, "/")
629
+ end
630
+
631
+ def process_square_bracket text
632
+ self.paren_nest += 1
633
+
634
+ token = nil
635
+
636
+ if in_arg_state? then
637
+ case
638
+ when scan(/\]\=/) then
639
+ self.paren_nest -= 1 # HACK? I dunno, or bug in MRI
640
+ return result(:expr_arg, :tASET, "[]=")
641
+ when scan(/\]/) then
642
+ self.paren_nest -= 1 # HACK? I dunno, or bug in MRI
643
+ return result(:expr_arg, :tAREF, "[]")
644
+ else
645
+ rb_compile_error "unexpected '['"
646
+ end
647
+ elsif is_beg? then
648
+ token = :tLBRACK
649
+ elsif is_arg? && space_seen then
650
+ token = :tLBRACK
651
+ else
652
+ token = :tLBRACK2
653
+ end
654
+
655
+ return expr_result(token, "[")
656
+ end
657
+
658
+ def process_symbol text
659
+ symbol = match[1].gsub(ESC) { unescape $1 }
660
+
661
+ rb_compile_error "symbol cannot contain '\\0'" if
662
+ ruby18 && symbol =~ /\0/
663
+
664
+ return result(:expr_end, :tSYMBOL, symbol)
665
+ end
666
+
667
+ def process_token text
668
+ # TODO: make this always return [token, lineno]
669
+ token = self.token = text
473
670
  token << matched if scan(/[\!\?](?!=)/)
474
671
 
475
672
  tok_id =
@@ -487,8 +684,8 @@ class RubyLexer
487
684
  :tIDENTIFIER
488
685
  end
489
686
 
490
- if !ruby18 and is_label_possible?(command_state) and scan(/:(?!:)/) then
491
- return result(:expr_beg, :tLABEL, [token, ss.lineno]) # HACK: array? TODO: self.lineno
687
+ if !ruby18 and is_label_possible? and scan(/:(?!:)/) then
688
+ return result(:expr_beg, :tLABEL, [token, self.lineno])
492
689
  end
493
690
 
494
691
  unless in_lex_state? :expr_dot then
@@ -518,12 +715,15 @@ class RubyLexer
518
715
  state = :expr_end
519
716
  end
520
717
 
718
+ token.lineno = self.lineno # yes, on a string. I know... I know...
719
+
521
720
  return result(state, tok_id, token)
522
721
  end
523
722
 
524
723
  def process_token_keyword keyword
525
724
  state = keyword.state
526
- value = [token, ss.lineno] # TODO: use self.lineno ?
725
+
726
+ value = [token, self.lineno]
527
727
 
528
728
  self.command_start = true if state == :expr_beg and lex_state != :expr_fname
529
729
 
@@ -554,6 +754,16 @@ class RubyLexer
554
754
  end
555
755
  end
556
756
 
757
+ def process_underscore text
758
+ ss.unscan # put back "_"
759
+
760
+ if beginning_of_line? && scan(/\__END__(\r?\n|\Z)/) then
761
+ return [RubyLexer::EOF, RubyLexer::EOF]
762
+ elsif scan(/\_\w*/) then
763
+ return process_token matched
764
+ end
765
+ end
766
+
557
767
  def rb_compile_error msg
558
768
  msg += ". near line #{self.lineno}: #{ss.rest[/^.*/].inspect}"
559
769
  raise RubyParser::SyntaxError, msg
@@ -607,7 +817,7 @@ class RubyLexer
607
817
  c
608
818
  when scan(/^[89]/i) then # bad octal or hex... MRI ignores them :(
609
819
  matched
610
- when scan(/u([0-9a-fA-F]+|\{[0-9a-fA-F]+\})/) then
820
+ when scan(/u([0-9a-fA-F]{2,4}|\{[0-9a-fA-F]{2,6}\})/) then
611
821
  [ss[1].delete("{}").to_i(16)].pack("U")
612
822
  when scan(/[McCx0-9]/) || end_of_stream? then
613
823
  rb_compile_error("Invalid escape character syntax")
@@ -643,19 +853,15 @@ class RubyLexer
643
853
  self.space_seen = false
644
854
  self.string_nest = 0
645
855
  self.token = nil
646
- self.yacc_value = nil
647
856
 
648
857
  self.cmdarg = RubyParserStuff::StackState.new(:cmdarg)
649
858
  self.cond = RubyParserStuff::StackState.new(:cond)
650
-
651
- @src = nil
652
859
  end
653
860
 
654
861
  def result lex_state, token, text # :nodoc:
655
862
  lex_state = self.arg_state if lex_state == :arg_state
656
863
  self.lex_state = lex_state if lex_state
657
- self.yacc_value = text
658
- token
864
+ [token, text]
659
865
  end
660
866
 
661
867
  def ruby18
@@ -670,6 +876,10 @@ class RubyLexer
670
876
  ss.scan re
671
877
  end
672
878
 
879
+ def scanner_class # TODO: design this out of oedipus_lex. or something.
880
+ RPStringScanner
881
+ end
882
+
673
883
  def space_vs_beginning space_type, beg_type, fallback
674
884
  if is_space_arg? check(/./m) then
675
885
  warning "`**' interpreted as argument prefix"
@@ -686,10 +896,11 @@ class RubyLexer
686
896
  self.lex_strterm = [:strterm, type, beg, nnd]
687
897
  end
688
898
 
689
- def src= src
690
- raise "bad src: #{src.inspect}" unless String === src
691
- @src = RPStringScanner.new(src)
692
- end
899
+ # TODO: consider
900
+ # def src= src
901
+ # raise "bad src: #{src.inspect}" unless String === src
902
+ # @src = RPStringScanner.new(src)
903
+ # end
693
904
 
694
905
  def tokadd_escape term # TODO: rewrite / remove
695
906
  case
@@ -820,7 +1031,7 @@ class RubyLexer
820
1031
  s
821
1032
  when /^[McCx0-9]/ then
822
1033
  rb_compile_error("Invalid escape character syntax")
823
- when /u([0-9a-fA-F]+|\{[0-9a-fA-F]+\})/ then
1034
+ when /u([0-9a-fA-F]{2,4}|\{[0-9a-fA-F]{2,6}\})/ then
824
1035
  [$1.delete("{}").to_i(16)].pack("U")
825
1036
  else
826
1037
  s
@@ -833,519 +1044,146 @@ class RubyLexer
833
1044
  # do nothing for now
834
1045
  end
835
1046
 
836
- ##
837
- # Returns the next token. Also sets yy_val is needed.
838
- #
839
- # @return Description of the Returned Value
840
-
841
- def yylex # 461 lines
842
- c = ''
843
- self.space_seen = false
844
- command_state = false
845
- ss = self.src
846
-
847
- self.token = nil
848
- self.yacc_value = nil
849
-
850
- return yylex_string if lex_strterm
851
-
852
- command_state = self.command_start
853
- self.command_start = false
854
-
855
- last_state = lex_state
856
-
857
- loop do # START OF CASE
858
- if scan(/[\ \t\r\f\v]/) then # \s - \n + \v
859
- self.space_seen = true
860
- next
861
- elsif check(/[^a-zA-Z]/) then
862
- if scan(/\n|\#/) then
863
- self.lineno = nil
864
- c = matched
865
- if c == '#' then
866
- ss.pos -= 1
867
-
868
- while scan(/\s*#.*(\n+|\z)/) do
869
- # TODO: self.lineno += matched.lines.to_a.size
870
- @comments << matched.gsub(/^ +#/, '#').gsub(/^ +$/, '')
871
- end
872
-
873
- return RubyLexer::EOF if end_of_stream?
874
- end
875
-
876
- # Replace a string of newlines with a single one
877
- scan(/\n+/)
878
-
879
- next if in_lex_state?(:expr_beg, :expr_value, :expr_class,
880
- :expr_fname, :expr_dot)
881
-
882
- if scan(/([\ \t\r\f\v]*)\./) then
883
- self.space_seen = true unless ss[1].empty?
884
-
885
- ss.pos -= 1
886
- next unless check(/\.\./)
887
- end
888
-
889
- self.command_start = true
890
-
891
- return result(:expr_beg, :tNL, nil)
892
- elsif scan(/[\]\)\}]/) then
893
- if matched == "}" then
894
- self.brace_nest -= 1
895
- else
896
- self.paren_nest -= 1
897
- end
898
-
899
- cond.lexpop
900
- cmdarg.lexpop
901
-
902
- text = matched
903
- state = text == ")" ? :expr_endfn : :expr_endarg
904
- token = {
905
- ")" => :tRPAREN,
906
- "]" => :tRBRACK,
907
- "}" => :tRCURLY
908
- }[text]
909
-
910
- return result(state, token, text)
911
- elsif scan(/\!/) then
912
- if in_arg_state? then
913
- return result(:expr_arg, :tUBANG, "!@") if scan(/@/)
914
- end
915
-
916
- text = scan(/[=~]/) ? "!#{matched}" : "!"
917
-
918
- return result(arg_state, TOKENS[text], text)
919
- elsif scan(/\.\.\.?|,|![=~]?/) then
920
- return result(:expr_beg, TOKENS[matched], matched)
921
- elsif check(/\./) then
922
- if scan(/\.\d/) then
923
- rb_compile_error "no .<digit> floating literal anymore put 0 before dot"
924
- elsif scan(/\./) then
925
- return result(:expr_dot, :tDOT, ".")
926
- end
927
- elsif scan(/\(/) then
928
- token = if ruby18 then
929
- yylex_paren18
930
- else
931
- yylex_paren19
932
- end
933
-
934
- self.paren_nest += 1
935
-
936
- return expr_result(token, "(")
937
- elsif check(/\=/) then
938
- if scan(/\=\=\=|\=\=|\=~|\=>|\=(?!begin\b)/) then
939
- tok = matched
940
- return result(:arg_state, TOKENS[tok], tok)
941
- elsif beginning_of_line? and scan(/\=begin(?=\s)/) then
942
- @comments << matched
943
-
944
- unless scan(/.*?\n=end( |\t|\f)*[^\n]*(\n|\z)/m) then
945
- @comments.clear
946
- rb_compile_error("embedded document meets end of file")
947
- end
948
-
949
- @comments << matched
950
-
951
- next
952
- elsif scan(/\=(?=begin\b)/) then # h[k]=begin ... end
953
- tok = matched
954
- return result(:arg_state, TOKENS[tok], tok)
955
- else
956
- raise "you shouldn't be able to get here"
957
- end
958
- elsif scan(/\"(#{SIMPLE_STRING})\"/o) then
959
- string = matched[1..-2].gsub(ESC) { unescape $1 }
960
- return result(:expr_end, :tSTRING, string)
961
- elsif scan(/\"/) then # FALLBACK
962
- string STR_DQUOTE, '"' # TODO: question this
963
- return result(nil, :tSTRING_BEG, '"')
964
- elsif scan(/\@\@?#{IDENT_CHAR}+/o) then
965
- self.token = matched
966
-
967
- rb_compile_error "`#{self.token}` is not allowed as a variable name" if
968
- self.token =~ /\@\d/
969
-
970
- tok_id = matched =~ /^@@/ ? :tCVAR : :tIVAR
971
- return result(:expr_end, tok_id, self.token)
972
- elsif scan(/\:\:/) then
973
- if is_beg? || in_lex_state?(:expr_class) || is_space_arg? then
974
- return result(:expr_beg, :tCOLON3, "::")
975
- end
976
-
977
- return result(:expr_dot, :tCOLON2, "::")
978
- elsif ! is_end? && scan(/:([a-zA-Z_]#{IDENT_CHAR}*(?:[?!]|=(?==>)|=(?![=>]))?)/) then
979
- # scanning shortcut to symbols
980
- return result(:expr_end, :tSYMBOL, ss[1])
981
- elsif ! is_end? && (scan(/\:\"(#{SIMPLE_STRING})\"/) ||
982
- scan(/\:\'(#{SIMPLE_SSTRING})\'/)) then
983
- symbol = ss[1].gsub(ESC) { unescape $1 }
984
-
985
- rb_compile_error "symbol cannot contain '\\0'" if
986
- ruby18 && symbol =~ /\0/
987
-
988
- return result(:expr_end, :tSYMBOL, symbol)
989
- elsif scan(/\:/) then
990
- # ?: / then / when
991
- if is_end? || check(/\s/) then
992
- # TODO warn_balanced(":", "symbol literal");
993
- return result(:expr_beg, :tCOLON, ":")
994
- end
995
-
996
- case
997
- when scan(/\'/) then
998
- string STR_SSYM, matched
999
- when scan(/\"/) then
1000
- string STR_DSYM, matched
1001
- end
1002
-
1003
- return result(:expr_fname, :tSYMBEG, ":")
1004
- elsif check(/[0-9]/) then
1005
- return parse_number
1006
- elsif scan(/\[/) then
1007
- self.paren_nest += 1
1008
-
1009
- token = nil
1010
-
1011
- if in_lex_state? :expr_fname, :expr_dot then
1012
- case
1013
- when scan(/\]\=/) then
1014
- self.paren_nest -= 1 # HACK? I dunno, or bug in MRI
1015
- return result(:expr_arg, :tASET, "[]=")
1016
- when scan(/\]/) then
1017
- self.paren_nest -= 1 # HACK? I dunno, or bug in MRI
1018
- return result(:expr_arg, :tAREF, "[]")
1047
+ def process_string # TODO: rewrite / remove
1048
+ token = if lex_strterm[0] == :heredoc then
1049
+ self.heredoc lex_strterm
1019
1050
  else
1020
- rb_compile_error "unexpected '['"
1051
+ self.parse_string lex_strterm
1021
1052
  end
1022
- elsif is_beg? then
1023
- token = :tLBRACK
1024
- elsif is_arg? && space_seen then
1025
- token = :tLBRACK
1026
- else
1027
- token = :tLBRACK2
1028
- end
1029
1053
 
1030
- return expr_result(token, "[")
1031
- elsif scan(/\'#{SIMPLE_SSTRING}\'/) then
1032
- text = matched[1..-2].gsub(/\\\\/, "\\").gsub(/\\'/, "'") # "
1033
- return result(:expr_end, :tSTRING, text)
1034
- elsif check(/\|/) then
1035
- if scan(/\|\|\=/) then
1036
- return result(:expr_beg, :tOP_ASGN, "||")
1037
- elsif scan(/\|\|/) then
1038
- return result(:expr_beg, :tOROP, "||")
1039
- elsif scan(/\|\=/) then
1040
- return result(:expr_beg, :tOP_ASGN, "|")
1041
- elsif scan(/\|/) then
1042
- return result(:arg_state, :tPIPE, "|")
1043
- end
1044
- elsif scan(/\{/) then
1045
- self.brace_nest += 1
1046
- if lpar_beg && lpar_beg == paren_nest then
1047
- self.lpar_beg = nil
1048
- self.paren_nest -= 1
1054
+ token_type, _ = token
1049
1055
 
1050
- return expr_result(:tLAMBEG, "{")
1051
- end
1052
-
1053
- token = if is_arg? || in_lex_state?(:expr_end, :expr_endfn) then
1054
- :tLCURLY # block (primary)
1055
- elsif in_lex_state?(:expr_endarg) then
1056
- :tLBRACE_ARG # block (expr)
1057
- else
1058
- :tLBRACE # hash
1059
- end
1060
-
1061
- self.command_start = true unless token == :tLBRACE
1062
-
1063
- return expr_result(token, "{")
1064
- elsif scan(/->/) then
1065
- return result(:expr_endfn, :tLAMBDA, nil)
1066
- elsif scan(/[+-]/) then
1067
- sign = matched
1068
- utype, type = if sign == "+" then
1069
- [:tUPLUS, :tPLUS]
1070
- else
1071
- [:tUMINUS, :tMINUS]
1072
- end
1073
-
1074
- if in_arg_state? then
1075
- if scan(/@/) then
1076
- return result(:expr_arg, utype, "#{sign}@")
1077
- else
1078
- return result(:expr_arg, type, sign)
1079
- end
1080
- end
1081
-
1082
- return result(:expr_beg, :tOP_ASGN, sign) if scan(/\=/)
1056
+ if token_type == :tSTRING_END || token_type == :tREGEXP_END then
1057
+ self.lex_strterm = nil
1058
+ self.lex_state = :expr_end
1059
+ end
1083
1060
 
1084
- if (is_beg? || (is_arg? && space_seen && !check(/\s/))) then
1085
- arg_ambiguous if is_arg?
1061
+ return token
1062
+ end
1086
1063
 
1087
- if check(/\d/) then
1088
- return self.parse_number if utype == :tUPLUS
1089
- return result(:expr_beg, :tUMINUS_NUM, sign)
1090
- end
1064
+ def parse_quote # TODO: remove / rewrite
1065
+ beg, nnd, short_hand, c = nil, nil, false, nil
1091
1066
 
1092
- return result(:expr_beg, utype, sign)
1093
- end
1067
+ if scan(/[a-z0-9]{1,2}/i) then # Long-hand (e.g. %Q{}).
1068
+ rb_compile_error "unknown type of %string" if ss.matched_size == 2
1069
+ c, beg, short_hand = matched, ss.getch, false
1070
+ else # Short-hand (e.g. %{, %., %!, etc)
1071
+ c, beg, short_hand = 'Q', ss.getch, true
1072
+ end
1094
1073
 
1095
- return result(:expr_beg, type, sign)
1096
- elsif check(/\*/) then
1097
- if scan(/\*\*=/) then
1098
- return result(:expr_beg, :tOP_ASGN, "**")
1099
- elsif scan(/\*\*/) then
1100
- token = space_vs_beginning :tDSTAR, :tDSTAR, :tPOW
1074
+ if end_of_stream? or c == RubyLexer::EOF or beg == RubyLexer::EOF then
1075
+ rb_compile_error "unterminated quoted string meets end of file"
1076
+ end
1101
1077
 
1102
- return result(:arg_state, token, "**")
1103
- elsif scan(/\*\=/) then
1104
- return result(:expr_beg, :tOP_ASGN, "*")
1105
- elsif scan(/\*/) then
1106
- token = space_vs_beginning :tSTAR, :tSTAR, :tSTAR2
1078
+ # Figure nnd-char. "\0" is special to indicate beg=nnd and that no nesting?
1079
+ nnd = { "(" => ")", "[" => "]", "{" => "}", "<" => ">" }[beg]
1080
+ nnd, beg = beg, "\0" if nnd.nil?
1107
1081
 
1108
- return result(:arg_state, token, "*")
1109
- end
1110
- elsif check(/\</) then
1111
- if scan(/\<\=\>/) then
1112
- return result(:arg_state, :tCMP, "<=>")
1113
- elsif scan(/\<\=/) then
1114
- return result(:arg_state, :tLEQ, "<=")
1115
- elsif scan(/\<\<\=/) then
1116
- return result(:arg_state, :tOP_ASGN, "<<")
1117
- elsif scan(/\<\</) then
1118
- if (!in_lex_state?(:expr_dot, :expr_class) &&
1119
- !is_end? &&
1120
- (!is_arg? || space_seen)) then
1121
- tok = self.heredoc_identifier
1122
- return tok if tok
1123
- end
1082
+ token_type, text = nil, "%#{c}#{beg}"
1083
+ token_type, string_type = case c
1084
+ when 'Q' then
1085
+ ch = short_hand ? nnd : c + beg
1086
+ text = "%#{ch}"
1087
+ [:tSTRING_BEG, STR_DQUOTE]
1088
+ when 'q' then
1089
+ [:tSTRING_BEG, STR_SQUOTE]
1090
+ when 'W' then
1091
+ scan(/\s*/)
1092
+ [:tWORDS_BEG, STR_DQUOTE | STR_FUNC_QWORDS]
1093
+ when 'w' then
1094
+ scan(/\s*/)
1095
+ [:tQWORDS_BEG, STR_SQUOTE | STR_FUNC_QWORDS]
1096
+ when 'x' then
1097
+ [:tXSTRING_BEG, STR_XQUOTE]
1098
+ when 'r' then
1099
+ [:tREGEXP_BEG, STR_REGEXP]
1100
+ when 's' then
1101
+ self.lex_state = :expr_fname
1102
+ [:tSYMBEG, STR_SSYM]
1103
+ when 'I' then
1104
+ scan(/\s*/)
1105
+ [:tSYMBOLS_BEG, STR_DQUOTE | STR_FUNC_QWORDS]
1106
+ when 'i' then
1107
+ scan(/\s*/)
1108
+ [:tQSYMBOLS_BEG, STR_SQUOTE | STR_FUNC_QWORDS]
1109
+ end
1124
1110
 
1125
- return result(:arg_state, :tLSHFT, "\<\<")
1126
- elsif scan(/\</) then
1127
- return result(:arg_state, :tLT, "<")
1128
- end
1129
- elsif check(/\>/) then
1130
- if scan(/\>\=/) then
1131
- return result(:arg_state, :tGEQ, ">=")
1132
- elsif scan(/\>\>=/) then
1133
- return result(:arg_state, :tOP_ASGN, ">>")
1134
- elsif scan(/\>\>/) then
1135
- return result(:arg_state, :tRSHFT, ">>")
1136
- elsif scan(/\>/) then
1137
- return result(:arg_state, :tGT, ">")
1138
- end
1139
- elsif scan(/\`/) then
1140
- case lex_state
1141
- when :expr_fname then
1142
- return result(:expr_end, :tBACK_REF2, "`")
1143
- when :expr_dot then
1144
- state = command_state ? :expr_cmdarg : :expr_arg
1145
- return result(state, :tBACK_REF2, "`")
1146
- else
1147
- string STR_XQUOTE, '`'
1148
- return result(nil, :tXSTRING_BEG, "`")
1149
- end
1150
- elsif scan(/\?/) then
1151
- if is_end? then
1152
- state = ruby18 ? :expr_beg : :expr_value # HACK?
1153
- return result(state, :tEH, "?")
1154
- end
1111
+ rb_compile_error "Bad %string type. Expected [QqWwIixrs], found '#{c}'." if
1112
+ token_type.nil?
1155
1113
 
1156
- if end_of_stream? then
1157
- rb_compile_error "incomplete character syntax"
1158
- end
1114
+ raise "huh" unless string_type
1159
1115
 
1160
- if check(/\s|\v/) then
1161
- unless is_arg? then
1162
- c2 = { " " => 's',
1163
- "\n" => 'n',
1164
- "\t" => 't',
1165
- "\v" => 'v',
1166
- "\r" => 'r',
1167
- "\f" => 'f' }[matched]
1168
-
1169
- if c2 then
1170
- warning("invalid character syntax; use ?\\" + c2)
1171
- end
1172
- end
1116
+ string string_type, nnd, beg
1173
1117
 
1174
- # ternary
1175
- state = ruby18 ? :expr_beg : :expr_value # HACK?
1176
- return result(state, :tEH, "?")
1177
- elsif check(/\w(?=\w)/) then # ternary, also
1178
- return result(:expr_beg, :tEH, "?")
1179
- end
1118
+ return token_type, text
1119
+ end
1180
1120
 
1181
- c = if scan(/\\/) then
1182
- self.read_escape
1183
- else
1184
- ss.getch
1185
- end
1121
+ def parse_string quote # TODO: rewrite / remove
1122
+ _, string_type, term, open = quote
1186
1123
 
1187
- if version == 18 then
1188
- return result(:expr_end, :tINTEGER, c[0].ord & 0xff)
1189
- else
1190
- return result(:expr_end, :tSTRING, c)
1191
- end
1192
- elsif check(/\&/) then
1193
- if scan(/\&\&\=/) then
1194
- return result(:expr_beg, :tOP_ASGN, "&&")
1195
- elsif scan(/\&\&/) then
1196
- return result(:expr_beg, :tANDOP, "&&")
1197
- elsif scan(/\&\=/) then
1198
- return result(:expr_beg, :tOP_ASGN, "&")
1199
- elsif scan(/&/) then
1200
- token = if is_arg? && space_seen && !check(/\s/) then
1201
- warning("`&' interpreted as argument prefix")
1202
- :tAMPER
1203
- elsif in_lex_state? :expr_beg, :expr_mid then
1204
- :tAMPER
1205
- else
1206
- :tAMPER2
1207
- end
1208
-
1209
- return result(:arg_state, token, "&")
1210
- end
1211
- elsif scan(/\//) then
1212
- if is_beg? then
1213
- string STR_REGEXP, '/'
1214
- return result(nil, :tREGEXP_BEG, "/")
1215
- end
1124
+ space = false # FIX: remove these
1125
+ func = string_type
1126
+ paren = open
1127
+ term_re = @@regexp_cache[term]
1216
1128
 
1217
- if scan(/\=/) then
1218
- return result(:expr_beg, :tOP_ASGN, "/")
1219
- end
1129
+ qwords = (func & STR_FUNC_QWORDS) != 0
1130
+ regexp = (func & STR_FUNC_REGEXP) != 0
1131
+ expand = (func & STR_FUNC_EXPAND) != 0
1220
1132
 
1221
- if is_arg? && space_seen then
1222
- unless scan(/\s/) then
1223
- arg_ambiguous
1224
- string STR_REGEXP, '/'
1225
- return result(nil, :tREGEXP_BEG, "/")
1226
- end
1227
- end
1133
+ unless func then # nil'ed from qwords below. *sigh*
1134
+ return :tSTRING_END, nil
1135
+ end
1228
1136
 
1229
- return result(:arg_state, :tDIVIDE, "/")
1230
- elsif scan(/\^=/) then
1231
- return result(:expr_beg, :tOP_ASGN, "^")
1232
- elsif scan(/\^/) then
1233
- return result(:arg_state, :tCARET, "^")
1234
- elsif scan(/\;/) then
1235
- self.command_start = true
1236
- return result(:expr_beg, :tSEMI, ";")
1237
- elsif scan(/\~/) then
1238
- scan(/@/) if in_lex_state? :expr_fname, :expr_dot
1239
- return result(:arg_state, :tTILDE, "~")
1240
- elsif scan(/\\/) then
1241
- if scan(/\r?\n/) then
1242
- self.lineno = nil
1243
- self.space_seen = true
1244
- next
1245
- end
1246
- rb_compile_error "bare backslash only allowed before newline"
1247
- elsif scan(/\%/) then
1248
- return parse_quote if is_beg?
1249
-
1250
- return result(:expr_beg, :tOP_ASGN, "%") if scan(/\=/)
1251
-
1252
- return parse_quote if is_arg? && space_seen && ! check(/\s/)
1253
-
1254
- return result(:arg_state, :tPERCENT, "%")
1255
- elsif check(/\$/) then
1256
- if scan(/(\$_)(\w+)/) then
1257
- self.token = matched
1258
- return result(:expr_end, :tGVAR, matched)
1259
- elsif scan(/\$_/) then
1260
- return result(:expr_end, :tGVAR, matched)
1261
- elsif scan(/\$[~*$?!@\/\\;,.=:<>\"]|\$-\w?/) then
1262
- return result(:expr_end, :tGVAR, matched)
1263
- elsif scan(/\$([\&\`\'\+])/) then
1264
- # Explicit reference to these vars as symbols...
1265
- if lex_state == :expr_fname then
1266
- return result(:expr_end, :tGVAR, matched)
1267
- else
1268
- return result(:expr_end, :tBACK_REF, ss[1].to_sym)
1269
- end
1270
- elsif scan(/\$([1-9]\d*)/) then
1271
- if lex_state == :expr_fname then
1272
- return result(:expr_end, :tGVAR, matched)
1273
- else
1274
- return result(:expr_end, :tNTH_REF, ss[1].to_i)
1275
- end
1276
- elsif scan(/\$0/) then
1277
- return result(:expr_end, :tGVAR, matched)
1278
- elsif scan(/\$\W|\$\z/) then # TODO: remove?
1279
- return result(:expr_end, "$", "$") # FIX: "$"??
1280
- elsif scan(/\$\w+/)
1281
- return result(:expr_end, :tGVAR, matched)
1282
- end
1283
- elsif check(/\_/) then
1284
- if beginning_of_line? && scan(/\__END__(\r?\n|\Z)/) then
1285
- self.lineno = nil
1286
- return RubyLexer::EOF
1287
- elsif scan(/\_\w*/) then
1288
- self.token = matched
1289
- return process_token command_state, last_state
1290
- end
1291
- end
1292
- end # END OF CASE
1137
+ space = true if qwords and scan(/\s+/)
1293
1138
 
1294
- if scan(/\004|\032|\000/) || end_of_stream? then # ^D, ^Z, EOF
1295
- return RubyLexer::EOF
1296
- else # alpha check
1297
- rb_compile_error "Invalid char #{ss.rest[0].chr} in expression" unless
1298
- check IDENT
1139
+ if self.string_nest == 0 && scan(/#{term_re}/) then
1140
+ if qwords then
1141
+ quote[1] = nil
1142
+ return :tSPACE, nil
1143
+ elsif regexp then
1144
+ return :tREGEXP_END, self.regx_options
1145
+ else
1146
+ return :tSTRING_END, term
1299
1147
  end
1300
-
1301
- self.token = matched if self.scan IDENT
1302
-
1303
- return process_token command_state, last_state
1304
1148
  end
1305
- end
1306
1149
 
1307
- def yylex_paren18
1308
- self.command_start = true
1309
- token = :tLPAREN2
1150
+ return :tSPACE, nil if space
1310
1151
 
1311
- if in_lex_state? :expr_beg, :expr_mid then
1312
- token = :tLPAREN
1313
- elsif space_seen then
1314
- if in_lex_state? :expr_cmdarg then
1315
- token = :tLPAREN_ARG
1316
- elsif in_lex_state? :expr_arg then
1317
- warning "don't put space before argument parentheses"
1152
+ self.string_buffer = []
1153
+
1154
+ if expand
1155
+ case
1156
+ when scan(/#(?=[$@])/) then
1157
+ return :tSTRING_DVAR, nil
1158
+ when scan(/#[{]/) then
1159
+ return :tSTRING_DBEG, nil
1160
+ when scan(/#/) then
1161
+ string_buffer << '#'
1318
1162
  end
1319
- else
1320
- # not a ternary -- do nothing?
1321
1163
  end
1322
1164
 
1323
- token
1324
- end
1325
-
1326
- def yylex_paren19
1327
- if is_beg? then
1328
- :tLPAREN
1329
- elsif is_space_arg? then
1330
- :tLPAREN_ARG
1331
- else
1332
- :tLPAREN2 # plain '(' in parse.y
1165
+ if tokadd_string(func, term, paren) == RubyLexer::EOF then
1166
+ rb_compile_error "unterminated string meets end of file"
1333
1167
  end
1168
+
1169
+ return :tSTRING_CONTENT, string_buffer.join
1334
1170
  end
1171
+ end
1335
1172
 
1336
- def yylex_string # TODO: rewrite / remove
1337
- token = if lex_strterm[0] == :heredoc then
1338
- self.heredoc lex_strterm
1339
- else
1340
- self.parse_string lex_strterm
1341
- end
1173
+ require "ruby_lexer.rex"
1342
1174
 
1343
- if token == :tSTRING_END || token == :tREGEXP_END then
1344
- self.lineno = nil
1345
- self.lex_strterm = nil
1346
- self.lex_state = :expr_end
1175
+ if ENV["DEBUG"] then
1176
+ class RubyLexer
1177
+ alias :old_lineno= :lineno=
1178
+
1179
+ def d o
1180
+ $stderr.puts o.inspect
1347
1181
  end
1348
1182
 
1349
- return token
1183
+ def lineno= n
1184
+ self.old_lineno= n
1185
+ where = caller.first.split(/:/).first(2).join(":")
1186
+ d :lineno => [n, where, ss && ss.rest[0,40]]
1187
+ end
1350
1188
  end
1351
1189
  end