ruby_parser 3.13.0 → 3.13.1

Sign up to get free protection for your applications and to get access to all the features.
@@ -51,7 +51,7 @@ preclow
51
51
  rule
52
52
 
53
53
  program: {
54
- self.lexer.lex_state = :expr_beg
54
+ self.lexer.lex_state = EXPR_BEG
55
55
  }
56
56
  top_compstmt
57
57
  {
@@ -153,7 +153,7 @@ rule
153
153
 
154
154
  stmt: kALIAS fitem
155
155
  {
156
- lexer.lex_state = :expr_fname
156
+ lexer.lex_state = EXPR_FNAME
157
157
  result = self.lexer.lineno
158
158
  }
159
159
  fitem
@@ -613,14 +613,14 @@ rule
613
613
  fname: tIDENTIFIER | tCONSTANT | tFID
614
614
  | op
615
615
  {
616
- lexer.lex_state = :expr_end
616
+ lexer.lex_state = EXPR_END
617
617
  result = val[0]
618
618
  }
619
619
 
620
620
  | reswords
621
621
  {
622
622
  (sym, _line), = val
623
- lexer.lex_state = :expr_end
623
+ lexer.lex_state = EXPR_END
624
624
  result = sym
625
625
  }
626
626
 
@@ -639,7 +639,7 @@ rule
639
639
  |
640
640
  undef_list tCOMMA
641
641
  {
642
- lexer.lex_state = :expr_fname
642
+ lexer.lex_state = EXPR_FNAME
643
643
  }
644
644
  fitem
645
645
  {
@@ -690,13 +690,21 @@ rule
690
690
  }
691
691
  | primary_value tCOLON2 tCONSTANT tOP_ASGN arg_rhs
692
692
  {
693
- # TODO: assignment
694
- raise "not yet: %p" % [val]
693
+ lhs1, _, lhs2, op, rhs = val
694
+
695
+ lhs = s(:colon2, lhs1, lhs2.to_sym).line lhs1.line
696
+ result = new_const_op_asgn [lhs, op, rhs]
697
+ }
698
+ | tCOLON3 tCONSTANT
699
+ {
700
+ result = self.lexer.lineno
695
701
  }
696
- | tCOLON3 tCONSTANT tOP_ASGN arg_rhs
702
+ tOP_ASGN arg_rhs
697
703
  {
698
- # TODO: assignment
699
- raise "not yet: %p" % [val]
704
+ _, lhs, line, op, rhs = val
705
+
706
+ lhs = s(:colon3, lhs.to_sym).line line
707
+ result = new_const_op_asgn [lhs, op, rhs]
700
708
  }
701
709
  | backref tOP_ASGN arg_rhs
702
710
  {
@@ -1037,7 +1045,7 @@ rule
1037
1045
  }
1038
1046
  | tLPAREN_ARG rparen
1039
1047
  {
1040
- # TODO: lex_state = :expr_endarg in between
1048
+ # TODO: lex_state = EXPR_ENDARG in between
1041
1049
  debug20 13, val, result
1042
1050
  }
1043
1051
  | tLPAREN_ARG
@@ -1048,7 +1056,7 @@ rule
1048
1056
  }
1049
1057
  stmt
1050
1058
  {
1051
- lexer.lex_state = :expr_endarg
1059
+ lexer.lex_state = EXPR_ENDARG
1052
1060
  }
1053
1061
  rparen
1054
1062
  {
@@ -1248,13 +1256,13 @@ rule
1248
1256
  | k_def singleton dot_or_colon
1249
1257
  {
1250
1258
  self.comments.push self.lexer.comments
1251
- lexer.lex_state = :expr_fname
1259
+ lexer.lex_state = EXPR_FNAME
1252
1260
  }
1253
1261
  fname
1254
1262
  {
1255
1263
  self.in_single += 1
1256
1264
  self.env.extend
1257
- lexer.lex_state = :expr_endfn # force for args
1265
+ lexer.lex_state = EXPR_ENDFN # force for args
1258
1266
  result = [lexer.lineno, self.lexer.cmdarg.stack.dup]
1259
1267
  lexer.cmdarg.stack.replace [false]
1260
1268
  }
@@ -1924,7 +1932,7 @@ regexp_contents: none
1924
1932
  result = lexer.lex_strterm
1925
1933
 
1926
1934
  lexer.lex_strterm = nil
1927
- lexer.lex_state = :expr_beg
1935
+ lexer.lex_state = EXPR_BEG
1928
1936
  }
1929
1937
  string_dvar
1930
1938
  {
@@ -1945,7 +1953,7 @@ regexp_contents: none
1945
1953
  lexer.brace_nest = 0
1946
1954
  lexer.string_nest = 0
1947
1955
 
1948
- lexer.lex_state = :expr_beg
1956
+ lexer.lex_state = EXPR_BEG
1949
1957
  }
1950
1958
  compstmt
1951
1959
  tSTRING_DEND
@@ -1986,7 +1994,7 @@ regexp_contents: none
1986
1994
 
1987
1995
  symbol: tSYMBEG sym
1988
1996
  {
1989
- lexer.lex_state = :expr_end
1997
+ lexer.lex_state = EXPR_END
1990
1998
  result = val[1].to_sym
1991
1999
  }
1992
2000
  | tSYMBOL
@@ -1998,7 +2006,7 @@ regexp_contents: none
1998
2006
 
1999
2007
  dsym: tSYMBEG xstring_contents tSTRING_END
2000
2008
  {
2001
- lexer.lex_state = :expr_end
2009
+ lexer.lex_state = EXPR_END
2002
2010
  result = val[1]
2003
2011
 
2004
2012
  result ||= s(:str, "")
@@ -2074,7 +2082,7 @@ keyword_variable: kNIL { result = s(:nil) }
2074
2082
 
2075
2083
  superclass: tLT
2076
2084
  {
2077
- lexer.lex_state = :expr_beg
2085
+ lexer.lex_state = EXPR_BEG
2078
2086
  lexer.command_start = true
2079
2087
  }
2080
2088
  expr_value term
@@ -2089,13 +2097,13 @@ keyword_variable: kNIL { result = s(:nil) }
2089
2097
  f_arglist: tLPAREN2 f_args rparen
2090
2098
  {
2091
2099
  result = val[1]
2092
- self.lexer.lex_state = :expr_beg
2100
+ self.lexer.lex_state = EXPR_BEG
2093
2101
  self.lexer.command_start = true
2094
2102
  }
2095
2103
  | {
2096
2104
  result = self.in_kwarg
2097
2105
  self.in_kwarg = true
2098
- # TODO: self.lexer.lex_state |= :expr_label
2106
+ self.lexer.lex_state |= EXPR_LABEL
2099
2107
  }
2100
2108
  f_args term
2101
2109
  {
@@ -2103,7 +2111,7 @@ keyword_variable: kNIL { result = s(:nil) }
2103
2111
 
2104
2112
  self.in_kwarg = kwarg
2105
2113
  result = args
2106
- lexer.lex_state = :expr_beg
2114
+ lexer.lex_state = EXPR_BEG
2107
2115
  lexer.command_start = true
2108
2116
  }
2109
2117
 
@@ -2384,7 +2392,7 @@ keyword_variable: kNIL { result = s(:nil) }
2384
2392
  singleton: var_ref
2385
2393
  | tLPAREN2
2386
2394
  {
2387
- lexer.lex_state = :expr_beg
2395
+ lexer.lex_state = EXPR_BEG
2388
2396
  }
2389
2397
  expr rparen
2390
2398
  {
@@ -2461,6 +2469,7 @@ end
2461
2469
 
2462
2470
  require "ruby_lexer"
2463
2471
  require "ruby_parser_extras"
2472
+ include RubyLexer::State::Values
2464
2473
 
2465
2474
  # :stopdoc:
2466
2475
 
@@ -34,9 +34,104 @@ class RubyLexer
34
34
  STR_SSYM = STR_FUNC_SYMBOL
35
35
  STR_DSYM = STR_FUNC_SYMBOL | STR_FUNC_EXPAND
36
36
 
37
- EXPR_BEG_ANY = [:expr_beg, :expr_mid, :expr_class ]
38
- EXPR_ARG_ANY = [:expr_arg, :expr_cmdarg, ]
39
- EXPR_END_ANY = [:expr_end, :expr_endarg, :expr_endfn]
37
+ class State
38
+ attr_accessor :n
39
+
40
+ def initialize o
41
+ raise ArgumentError, "bad state: %p" % [o] unless Integer === o # TODO: remove
42
+
43
+ self.n = o
44
+ end
45
+
46
+ def == o
47
+ o.class == self.class && o.n == self.n
48
+ end
49
+
50
+ def =~ v
51
+ (self.n & v.n) != 0
52
+ end
53
+
54
+ def | v
55
+ self.class.new(self.n | v.n)
56
+ end
57
+
58
+ def inspect
59
+ return "EXPR_NONE" if n.zero?
60
+ NAMES.map { |v,k| k if self =~ v }.compact.join "|"
61
+ end
62
+
63
+ module Values
64
+ EXPR_NONE = State.new 0x0
65
+ EXPR_BEG = State.new 0x1
66
+ EXPR_END = State.new 0x2
67
+ EXPR_ENDARG = State.new 0x4
68
+ EXPR_ENDFN = State.new 0x8
69
+ EXPR_ARG = State.new 0x10
70
+ EXPR_CMDARG = State.new 0x20
71
+ EXPR_MID = State.new 0x40
72
+ EXPR_FNAME = State.new 0x80
73
+ EXPR_DOT = State.new 0x100
74
+ EXPR_CLASS = State.new 0x200
75
+ EXPR_LABEL = State.new 0x400
76
+ EXPR_LABELED = State.new 0x800
77
+ EXPR_FITEM = State.new 0x1000
78
+
79
+ EXPR_BEG_ANY = EXPR_BEG | EXPR_MID | EXPR_CLASS
80
+ EXPR_ARG_ANY = EXPR_ARG | EXPR_CMDARG
81
+ EXPR_END_ANY = EXPR_END | EXPR_ENDARG | EXPR_ENDFN
82
+
83
+ # extra fake lex_state names to make things a bit cleaner
84
+
85
+ EXPR_LAB = EXPR_ARG|EXPR_LABELED
86
+ EXPR_NUM = EXPR_END|EXPR_ENDARG
87
+ EXPR_PAR = EXPR_BEG|EXPR_LABEL
88
+ EXPR_PAD = EXPR_BEG|EXPR_LABELED
89
+ end
90
+
91
+ include Values
92
+
93
+ NAMES = {
94
+ EXPR_NONE => "EXPR_NONE",
95
+ EXPR_BEG => "EXPR_BEG",
96
+ EXPR_END => "EXPR_END",
97
+ EXPR_ENDARG => "EXPR_ENDARG",
98
+ EXPR_ENDFN => "EXPR_ENDFN",
99
+ EXPR_ARG => "EXPR_ARG",
100
+ EXPR_CMDARG => "EXPR_CMDARG",
101
+ EXPR_MID => "EXPR_MID",
102
+ EXPR_FNAME => "EXPR_FNAME",
103
+ EXPR_DOT => "EXPR_DOT",
104
+ EXPR_CLASS => "EXPR_CLASS",
105
+ EXPR_LABEL => "EXPR_LABEL",
106
+ EXPR_LABELED => "EXPR_LABELED",
107
+ EXPR_FITEM => "EXPR_FITEM",
108
+ }
109
+ end
110
+
111
+ include State::Values
112
+
113
+ if $DEBUG then
114
+ def lex_state= o
115
+ return if @lex_state == o
116
+ raise ArgumentError, "bad state: %p" % [o] unless State === o
117
+ if ENV["V"] then
118
+ c = caller[0]
119
+ c = caller[1] if c =~ /\b(expr_)?result\b/
120
+ c = caller[2] if c =~ /\b(expr_)?result\b/
121
+ warn "lex_state: %p -> %p from %s" % [lex_state, o, c.clean_caller]
122
+ else
123
+ warn "lex_state: %p -> %p" % [lex_state, o]
124
+ end
125
+ @lex_state = o
126
+ end
127
+ else
128
+ def lex_state= o
129
+ raise ArgumentError, "bad state: %p" % [o] unless State === o
130
+ @lex_state = o
131
+ end
132
+ end
133
+
134
+ attr_reader :lex_state
40
135
 
41
136
  ESCAPES = {
42
137
  "a" => "\007",
@@ -90,7 +185,6 @@ class RubyLexer
90
185
  # Additional context surrounding tokens that both the lexer and
91
186
  # grammar use.
92
187
 
93
- attr_accessor :lex_state
94
188
  attr_accessor :lex_strterm
95
189
  attr_accessor :lpar_beg
96
190
  attr_accessor :paren_nest
@@ -99,24 +193,14 @@ class RubyLexer
99
193
  attr_accessor :string_buffer
100
194
  attr_accessor :string_nest
101
195
 
102
- if $DEBUG then
103
- alias lex_state= lex_state=
104
- def lex_state=o
105
- return if @lex_state == o
106
- c = caller.first
107
- c = caller[1] if c =~ /\bresult\b/
108
- warn "lex_state: %p -> %p from %s" % [@lex_state, o, c.clean_caller]
109
- @lex_state = o
110
- end
111
- end
112
-
113
196
  # Last token read via next_token.
114
197
  attr_accessor :token
115
198
 
116
199
  attr_writer :comments
117
200
 
118
201
  def initialize _ = nil
119
- @lex_state = :expr_none
202
+ @lex_state = nil # remove one warning under $DEBUG
203
+ self.lex_state = EXPR_NONE
120
204
 
121
205
  self.cond = RubyParserStuff::StackState.new(:cond, $DEBUG)
122
206
  self.cmdarg = RubyParserStuff::StackState.new(:cmdarg, $DEBUG)
@@ -129,7 +213,7 @@ class RubyLexer
129
213
  end
130
214
 
131
215
  def arg_state
132
- in_arg_state? ? :expr_arg : :expr_beg
216
+ is_after_operator? ? EXPR_ARG : EXPR_BEG
133
217
  end
134
218
 
135
219
  def beginning_of_line?
@@ -148,17 +232,17 @@ class RubyLexer
148
232
  end
149
233
 
150
234
  def expr_dot?
151
- lex_state == :expr_dot
235
+ lex_state =~ EXPR_DOT
152
236
  end
153
237
 
154
- def expr_fname?
155
- lex_state == :expr_fname
238
+ def expr_fname? # REFACTOR
239
+ lex_state =~ EXPR_FNAME
156
240
  end
157
241
 
158
242
  def expr_result token, text
159
243
  cond.push false
160
244
  cmdarg.push false
161
- result :expr_beg, token, text
245
+ result EXPR_BEG, token, text
162
246
  end
163
247
 
164
248
  def heredoc here # TODO: rewrite / remove
@@ -214,7 +298,12 @@ class RubyLexer
214
298
 
215
299
  self.lex_strterm = [:heredoc, eos, func, last_line]
216
300
 
217
- string_content = string_buffer.join.delete("\r")
301
+ string_content = begin
302
+ s = string_buffer.join
303
+ s.delete "\r"
304
+ rescue ArgumentError
305
+ s.b.delete("\r").force_encoding Encoding::UTF_8
306
+ end
218
307
 
219
308
  string_content = heredoc_dedent(string_content) if content_indent && ruby23plus?
220
309
 
@@ -311,16 +400,12 @@ class RubyLexer
311
400
  end
312
401
  end
313
402
 
314
- def in_fname?
315
- in_lex_state? :expr_fname
316
- end
317
-
318
- def in_arg_state? # TODO: rename is_after_operator?
319
- in_lex_state? :expr_fname, :expr_dot
403
+ def in_fname? # REFACTOR
404
+ lex_state =~ EXPR_FNAME
320
405
  end
321
406
 
322
- def in_lex_state?(*states)
323
- states.include? lex_state
407
+ def is_after_operator?
408
+ lex_state =~ EXPR_FNAME|EXPR_DOT
324
409
  end
325
410
 
326
411
  def int_with_base base
@@ -329,27 +414,26 @@ class RubyLexer
329
414
  text = matched
330
415
  case
331
416
  when text.end_with?('ri')
332
- return result(:expr_end, :tIMAGINARY, Complex(0, Rational(text.chop.chop.to_i(base))))
417
+ return result(EXPR_NUM, :tIMAGINARY, Complex(0, Rational(text.chop.chop.to_i(base))))
333
418
  when text.end_with?('r')
334
- return result(:expr_end, :tRATIONAL, Rational(text.chop.to_i(base)))
419
+ return result(EXPR_NUM, :tRATIONAL, Rational(text.chop.to_i(base)))
335
420
  when text.end_with?('i')
336
- return result(:expr_end, :tIMAGINARY, Complex(0, text.chop.to_i(base)))
421
+ return result(EXPR_NUM, :tIMAGINARY, Complex(0, text.chop.to_i(base)))
337
422
  else
338
- return result(:expr_end, :tINTEGER, text.to_i(base))
423
+ return result(EXPR_NUM, :tINTEGER, text.to_i(base))
339
424
  end
340
425
  end
341
426
 
342
427
  def is_arg?
343
- in_lex_state?(*EXPR_ARG_ANY)
428
+ lex_state =~ EXPR_ARG_ANY
344
429
  end
345
430
 
346
431
  def is_beg?
347
- # TODO: in_lex_state?(*EXPR_BEG_ANY) || lex_state == [:expr_arg, :expr_labeled]
348
- in_lex_state?(*EXPR_BEG_ANY, :expr_value, :expr_labeled)
432
+ lex_state =~ EXPR_BEG_ANY || lex_state == EXPR_LAB # yes, == EXPR_LAB
349
433
  end
350
434
 
351
435
  def is_end?
352
- in_lex_state?(*EXPR_END_ANY)
436
+ lex_state =~ EXPR_END_ANY
353
437
  end
354
438
 
355
439
  def lvar_defined? id
@@ -357,13 +441,12 @@ class RubyLexer
357
441
  self.parser.env[id.to_sym] == :lvar
358
442
  end
359
443
 
360
-
361
444
  def ruby22_label?
362
445
  ruby22plus? and is_label_possible?
363
446
  end
364
447
 
365
448
  def is_label_possible?
366
- (in_lex_state?(:expr_beg, :expr_endfn) && !cmd_state) || is_arg?
449
+ (lex_state =~ EXPR_LABEL|EXPR_ENDFN && !cmd_state) || is_arg?
367
450
  end
368
451
 
369
452
  def is_label_suffix?
@@ -390,7 +473,7 @@ class RubyLexer
390
473
  token = if is_arg? && space_seen && !check(/\s/) then
391
474
  warning("`&' interpreted as argument prefix")
392
475
  :tAMPER
393
- elsif in_lex_state? :expr_beg, :expr_mid then
476
+ elsif lex_state =~ EXPR_BEG|EXPR_MID then
394
477
  :tAMPER
395
478
  else
396
479
  :tAMPER2
@@ -402,7 +485,7 @@ class RubyLexer
402
485
  def process_backref text
403
486
  token = ss[1].to_sym
404
487
  # TODO: can't do lineno hack w/ symbol
405
- result :expr_end, :tBACK_REF, token
488
+ result EXPR_END, :tBACK_REF, token
406
489
  end
407
490
 
408
491
  def process_begin text
@@ -427,17 +510,17 @@ class RubyLexer
427
510
  case matched
428
511
  when "}" then
429
512
  self.brace_nest -= 1
430
- self.lex_state = :expr_endarg # TODO: :expr_end ? Look at 2.6
513
+ self.lex_state = EXPR_ENDARG # TODO: EXPR_END ? Look at 2.6
431
514
 
432
515
  return :tSTRING_DEND, matched if brace_nest < 0
433
516
  return :tRCURLY, matched
434
517
  when "]" then
435
518
  self.paren_nest -= 1
436
- self.lex_state = :expr_endarg
519
+ self.lex_state = EXPR_ENDARG
437
520
  return :tRBRACK, matched
438
521
  when ")" then
439
522
  self.paren_nest -= 1
440
- self.lex_state = :expr_endfn
523
+ self.lex_state = EXPR_ENDFN
441
524
  return :tRPAREN, matched
442
525
  else
443
526
  raise "Unknown bracing: #{matched.inspect}"
@@ -447,7 +530,7 @@ class RubyLexer
447
530
  def process_colon1 text
448
531
  # ?: / then / when
449
532
  if is_end? || check(/\s/) then
450
- return result :expr_beg, :tCOLON, text
533
+ return result EXPR_BEG, :tCOLON, text
451
534
  end
452
535
 
453
536
  case
@@ -457,14 +540,14 @@ class RubyLexer
457
540
  string STR_DSYM
458
541
  end
459
542
 
460
- result :expr_fname, :tSYMBEG, text
543
+ result EXPR_FNAME, :tSYMBEG, text
461
544
  end
462
545
 
463
546
  def process_colon2 text
464
- if is_beg? || in_lex_state?(:expr_class) || is_space_arg? then
465
- result :expr_beg, :tCOLON3, text
547
+ if is_beg? || lex_state =~ EXPR_CLASS || is_space_arg? then
548
+ result EXPR_BEG, :tCOLON3, text
466
549
  else
467
- result :expr_dot, :tCOLON2, text
550
+ result EXPR_DOT, :tCOLON2, text
468
551
  end
469
552
  end
470
553
 
@@ -479,21 +562,23 @@ class RubyLexer
479
562
  return expr_result(:tLAMBEG, "{")
480
563
  end
481
564
 
482
- token = case lex_state
483
- when :expr_labeled then
565
+ token = case
566
+ when lex_state =~ EXPR_LABELED then
484
567
  :tLBRACE # hash
485
- when *EXPR_ARG_ANY, :expr_end, :expr_endfn then
486
- :tLCURLY # block (primary)
487
- when :expr_endarg
568
+ when lex_state =~ EXPR_ARG_ANY|EXPR_END|EXPR_ENDFN then
569
+ :tLCURLY # block (primary) '{' in parse.y
570
+ when lex_state =~ EXPR_ENDARG then
488
571
  :tLBRACE_ARG # block (expr)
489
572
  else
490
573
  :tLBRACE # hash
491
574
  end
492
575
 
493
- # TODO: self.lex_state |= :expr_label if token != :tLBRACE_ARG
576
+ state = token == :tLBRACE_ARG ? EXPR_BEG : EXPR_PAR
494
577
  self.command_start = true if token != :tLBRACE
495
578
 
496
- return expr_result(token, "{")
579
+ cond.push false
580
+ cmdarg.push false
581
+ result state, token, text
497
582
  end
498
583
 
499
584
  def process_float text
@@ -501,45 +586,45 @@ class RubyLexer
501
586
 
502
587
  case
503
588
  when text.end_with?('ri')
504
- return result(:expr_end, :tIMAGINARY, Complex(0, Rational(text.chop.chop)))
505
- when text.end_with?('r')
506
- return result(:expr_end, :tRATIONAL, Rational(text.chop))
589
+ return result EXPR_NUM, :tIMAGINARY, Complex(0, Rational(text.chop.chop))
507
590
  when text.end_with?('i')
508
- return result(:expr_end, :tIMAGINARY, Complex(0, text.chop.to_f))
591
+ return result EXPR_NUM, :tIMAGINARY, Complex(0, text.chop.to_f)
592
+ when text.end_with?('r')
593
+ return result EXPR_NUM, :tRATIONAL, Rational(text.chop)
509
594
  else
510
- return result(:expr_end, :tFLOAT, text.to_f)
595
+ return result EXPR_NUM, :tFLOAT, text.to_f
511
596
  end
512
597
  end
513
598
 
514
599
  def process_gvar text
515
600
  text.lineno = self.lineno
516
- result(:expr_end, :tGVAR, text)
601
+ result EXPR_END, :tGVAR, text
517
602
  end
518
603
 
519
604
  def process_gvar_oddity text
520
- return result :expr_end, "$", "$" if text == "$" # TODO: wtf is this?
605
+ return result EXPR_END, "$", "$" if text == "$" # TODO: wtf is this?
521
606
  rb_compile_error "#{text.inspect} is not allowed as a global variable name"
522
607
  end
523
608
 
524
609
  def process_ivar text
525
610
  tok_id = text =~ /^@@/ ? :tCVAR : :tIVAR
526
611
  text.lineno = self.lineno
527
- return result(:expr_end, tok_id, text)
612
+ result EXPR_END, tok_id, text
528
613
  end
529
614
 
530
615
  def process_lchevron text
531
- if (!in_lex_state?(:expr_dot, :expr_class) &&
616
+ if (lex_state !~ EXPR_DOT|EXPR_CLASS &&
532
617
  !is_end? &&
533
- (!is_arg? || space_seen)) then # TODO: || in_state(:expr_labeled)
618
+ (!is_arg? || lex_state =~ EXPR_LABELED || space_seen)) then
534
619
  tok = self.heredoc_identifier
535
620
  return tok if tok
536
621
  end
537
622
 
538
- if in_arg_state? then
539
- self.lex_state = :expr_arg
623
+ if is_after_operator? then
624
+ self.lex_state = EXPR_ARG
540
625
  else
541
- self.command_start = true if lex_state == :expr_class
542
- self.lex_state = :expr_beg
626
+ self.command_start = true if lex_state =~ EXPR_CLASS
627
+ self.lex_state = EXPR_BEG
543
628
  end
544
629
 
545
630
  return result(lex_state, :tLSHFT, "\<\<")
@@ -567,17 +652,15 @@ class RubyLexer
567
652
  # Replace a string of newlines with a single one
568
653
  self.lineno += matched.lines.to_a.size if scan(/\n+/)
569
654
 
570
- # TODO: remove :expr_value -- audit all uses of it
571
- c = in_lex_state?(:expr_beg, :expr_value, :expr_class,
572
- :expr_fname, :expr_dot) && !in_lex_state?(:expr_labeled)
573
-
655
+ c = (lex_state =~ EXPR_BEG|EXPR_CLASS|EXPR_FNAME|EXPR_DOT &&
656
+ lex_state !~ EXPR_LABELED)
574
657
  # TODO: figure out what token_seen is for
575
- # TODO: if c || self.lex_state == [:expr_beg, :expr_labeled] then
576
- if c || self.lex_state == :expr_labeled then
658
+ if c || self.lex_state == EXPR_LAB then # yes, == EXPR_LAB
577
659
  # ignore if !fallthrough?
578
660
  if !c && parser.in_kwarg then
579
661
  # normal newline
580
- return result(:expr_beg, :tNL, nil)
662
+ self.command_start = true
663
+ return result EXPR_BEG, :tNL, nil
581
664
  else
582
665
  return # skip
583
666
  end
@@ -592,41 +675,46 @@ class RubyLexer
592
675
 
593
676
  self.command_start = true
594
677
 
595
- return result(:expr_beg, :tNL, nil)
678
+ return result(EXPR_BEG, :tNL, nil)
596
679
  end
597
680
 
598
681
  def process_nthref text
599
682
  # TODO: can't do lineno hack w/ number
600
- result :expr_end, :tNTH_REF, ss[1].to_i
683
+ result EXPR_END, :tNTH_REF, ss[1].to_i
601
684
  end
602
685
 
603
686
  def process_paren text
604
- token = process_paren19
687
+ token = if is_beg? then
688
+ :tLPAREN
689
+ elsif !space_seen then
690
+ # foo( ... ) => method call, no ambiguity
691
+ :tLPAREN2
692
+ elsif is_space_arg? then
693
+ :tLPAREN_ARG
694
+ elsif lex_state =~ EXPR_ENDFN && !lambda_beginning? then
695
+ # TODO:
696
+ # warn("parentheses after method name is interpreted as " \
697
+ # "an argument list, not a decomposed argument")
698
+ :tLPAREN2
699
+ else
700
+ :tLPAREN2 # plain '(' in parse.y
701
+ end
605
702
 
606
703
  self.paren_nest += 1
607
704
 
608
- # TODO: add :expr_label to :expr_beg (set in expr_result below)
609
- return expr_result(token, "(")
610
- end
611
-
612
- def process_paren19
613
- if is_beg? then
614
- :tLPAREN
615
- elsif is_space_arg? then
616
- :tLPAREN_ARG
617
- else
618
- :tLPAREN2 # plain '(' in parse.y
619
- end
705
+ cond.push false
706
+ cmdarg.push false
707
+ result EXPR_PAR, token, text
620
708
  end
621
709
 
622
710
  def process_percent text
623
711
  return parse_quote if is_beg?
624
712
 
625
- return result(:expr_beg, :tOP_ASGN, "%") if scan(/\=/)
713
+ return result EXPR_BEG, :tOP_ASGN, "%" if scan(/\=/)
626
714
 
627
- return parse_quote if is_arg? && space_seen && ! check(/\s/)
715
+ return parse_quote if is_space_arg?(check(/\s/)) || (lex_state =~ EXPR_FITEM && check(/s/))
628
716
 
629
- return result(:arg_state, :tPERCENT, "%")
717
+ return result :arg_state, :tPERCENT, "%"
630
718
  end
631
719
 
632
720
  def process_plus_minus text
@@ -637,33 +725,33 @@ class RubyLexer
637
725
  [:tUMINUS, :tMINUS]
638
726
  end
639
727
 
640
- if in_arg_state? then
728
+ if is_after_operator? then
641
729
  if scan(/@/) then
642
- return result(:expr_arg, utype, "#{sign}@")
730
+ return result(EXPR_ARG, utype, "#{sign}@")
643
731
  else
644
- return result(:expr_arg, type, sign)
732
+ return result(EXPR_ARG, type, sign)
645
733
  end
646
734
  end
647
735
 
648
- return result(:expr_beg, :tOP_ASGN, sign) if scan(/\=/)
736
+ return result(EXPR_BEG, :tOP_ASGN, sign) if scan(/\=/)
649
737
 
650
738
  if (is_beg? || (is_arg? && space_seen && !check(/\s/))) then
651
739
  arg_ambiguous if is_arg?
652
740
 
653
741
  if check(/\d/) then
654
742
  return nil if utype == :tUPLUS
655
- return result(:expr_beg, :tUMINUS_NUM, sign)
743
+ return result EXPR_BEG, :tUMINUS_NUM, sign
656
744
  end
657
745
 
658
- return result(:expr_beg, utype, sign)
746
+ return result EXPR_BEG, utype, sign
659
747
  end
660
748
 
661
- return result(:expr_beg, type, sign)
749
+ result EXPR_BEG, type, sign
662
750
  end
663
751
 
664
752
  def process_questionmark text
665
753
  if is_end? then
666
- return result(:expr_value, :tEH, "?")
754
+ return result EXPR_BEG, :tEH, "?"
667
755
  end
668
756
 
669
757
  if end_of_stream? then
@@ -685,9 +773,9 @@ class RubyLexer
685
773
  end
686
774
 
687
775
  # ternary
688
- return result(:expr_value, :tEH, "?")
776
+ return result EXPR_BEG, :tEH, "?"
689
777
  elsif check(/\w(?=\w)/) then # ternary, also
690
- return result(:expr_beg, :tEH, "?")
778
+ return result EXPR_BEG, :tEH, "?"
691
779
  end
692
780
 
693
781
  c = if scan(/\\/) then
@@ -696,7 +784,7 @@ class RubyLexer
696
784
  ss.getch
697
785
  end
698
786
 
699
- return result(:expr_end, :tSTRING, c)
787
+ result EXPR_END, :tSTRING, c
700
788
  end
701
789
 
702
790
  def process_slash text
@@ -707,7 +795,7 @@ class RubyLexer
707
795
  end
708
796
 
709
797
  if scan(/\=/) then
710
- return result(:expr_beg, :tOP_ASGN, "/")
798
+ return result(EXPR_BEG, :tOP_ASGN, "/")
711
799
  end
712
800
 
713
801
  if is_arg? && space_seen then
@@ -726,28 +814,28 @@ class RubyLexer
726
814
 
727
815
  token = nil
728
816
 
729
- if in_arg_state? then
817
+ if is_after_operator? then
730
818
  case
731
819
  when scan(/\]\=/) then
732
820
  self.paren_nest -= 1 # HACK? I dunno, or bug in MRI
733
- return result(:expr_arg, :tASET, "[]=")
821
+ return result EXPR_ARG, :tASET, "[]="
734
822
  when scan(/\]/) then
735
823
  self.paren_nest -= 1 # HACK? I dunno, or bug in MRI
736
- return result(:expr_arg, :tAREF, "[]")
824
+ return result EXPR_ARG, :tAREF, "[]"
737
825
  else
738
826
  rb_compile_error "unexpected '['"
739
827
  end
740
828
  elsif is_beg? then
741
829
  token = :tLBRACK
742
- elsif is_arg? && space_seen then
830
+ elsif is_arg? && (space_seen || lex_state =~ EXPR_LABELED) then
743
831
  token = :tLBRACK
744
832
  else
745
833
  token = :tLBRACK2
746
834
  end
747
835
 
748
- # TODO: this is done by expr_result except "|EXPR_LABEL")
749
- # SET_LEX_STATE(EXPR_BEG|EXPR_LABEL);
750
- expr_result token, "["
836
+ cond.push false
837
+ cmdarg.push false
838
+ result EXPR_PAR, token, text
751
839
  end
752
840
 
753
841
  def possibly_escape_string text, check
@@ -763,7 +851,7 @@ class RubyLexer
763
851
  def process_symbol text
764
852
  symbol = possibly_escape_string text, /^:"/
765
853
 
766
- return result(:expr_end, :tSYMBOL, symbol)
854
+ result EXPR_END, :tSYMBOL, symbol
767
855
  end
768
856
 
769
857
  def was_label?
@@ -780,19 +868,19 @@ class RubyLexer
780
868
  text = text[0..-2]
781
869
  end
782
870
 
783
- result :expr_end, :tSTRING, text[1..-2].gsub(/\\\\/, "\\").gsub(/\\'/, "'")
871
+ result EXPR_END, :tSTRING, text[1..-2].gsub(/\\\\/, "\\").gsub(/\\'/, "'")
784
872
  end
785
873
 
786
874
  def process_label text
787
875
  symbol = possibly_escape_string text, /^"/
788
876
 
789
- result(:expr_labeled, :tLABEL, [symbol, self.lineno]) # TODO: expr_arg|expr_labeled
877
+ result EXPR_LAB, :tLABEL, [symbol, self.lineno]
790
878
  end
791
879
 
792
880
  def process_token text
793
881
  # matching: parse_ident in compare/parse23.y:7989
794
882
  # TODO: make this always return [token, lineno]
795
- self.last_state = lex_state
883
+ # FIX: remove: self.last_state = lex_state
796
884
 
797
885
  token = self.token = text
798
886
  token << matched if scan(/[\!\?](?!=)/)
@@ -801,7 +889,7 @@ class RubyLexer
801
889
  case
802
890
  when token =~ /[!?]$/ then
803
891
  :tFID
804
- when in_lex_state?(:expr_fname) && scan(/=(?:(?![~>=])|(?==>))/) then
892
+ when lex_state =~ EXPR_FNAME && scan(/=(?:(?![~>=])|(?==>))/) then
805
893
  # ident=, not =~ => == or followed by =>
806
894
  # TODO test lexing of a=>b vs a==>b
807
895
  token << matched
@@ -814,31 +902,30 @@ class RubyLexer
814
902
 
815
903
  if is_label_possible? and is_label_suffix? then
816
904
  scan(/:/)
817
- # TODO: :expr_arg|:expr_labeled
818
- return result :expr_labeled, :tLABEL, [token, self.lineno]
905
+ return result EXPR_LAB, :tLABEL, [token, self.lineno]
819
906
  end
820
907
 
821
- # TODO: mb == ENC_CODERANGE_7BIT && !in_lex_state?(:expr_dot)
822
- unless in_lex_state? :expr_dot then
908
+ # TODO: mb == ENC_CODERANGE_7BIT && lex_state !~ EXPR_DOT
909
+ if lex_state !~ EXPR_DOT then
823
910
  # See if it is a reserved word.
824
911
  keyword = RubyParserStuff::Keyword.keyword token
825
912
 
826
913
  return process_token_keyword keyword if keyword
827
- end # unless in_lex_state? :expr_dot
914
+ end
828
915
 
829
916
  # matching: compare/parse23.y:8079
830
- state = if is_beg? or is_arg? or in_lex_state? :expr_dot then
831
- cmd_state ? :expr_cmdarg : :expr_arg
832
- elsif in_lex_state? :expr_fname then
833
- :expr_endfn
917
+ state = if is_beg? or is_arg? or lex_state =~ EXPR_DOT then
918
+ cmd_state ? EXPR_CMDARG : EXPR_ARG
919
+ elsif lex_state =~ EXPR_FNAME then
920
+ EXPR_ENDFN
834
921
  else
835
- :expr_end
922
+ EXPR_END
836
923
  end
837
924
 
838
- if not [:expr_dot, :expr_fname].include? last_state and
839
- (tok_id == :tIDENTIFIER) and # not :expr_fname, not attrasgn
925
+ if last_state !~ EXPR_DOT|EXPR_FNAME and
926
+ (tok_id == :tIDENTIFIER) and # not EXPR_FNAME, not attrasgn
840
927
  lvar_defined?(token) then
841
- state = :expr_end # TODO: EXPR_END|EXPR_LABEL
928
+ state = EXPR_END|EXPR_LABEL
842
929
  end
843
930
 
844
931
  token.lineno = self.lineno # yes, on a string. I know... I know...
@@ -853,9 +940,9 @@ class RubyLexer
853
940
 
854
941
  value = [token, self.lineno]
855
942
 
856
- return result(lex_state, keyword.id0, value) if state == :expr_fname
943
+ return result(lex_state, keyword.id0, value) if state =~ EXPR_FNAME
857
944
 
858
- self.command_start = true if lex_state == :expr_beg
945
+ self.command_start = true if lex_state =~ EXPR_BEG
859
946
 
860
947
  case
861
948
  when keyword.id0 == :kDO then
@@ -863,22 +950,22 @@ class RubyLexer
863
950
  when lambda_beginning? then
864
951
  self.lpar_beg = nil # lambda_beginning? == FALSE in the body of "-> do ... end"
865
952
  self.paren_nest -= 1
866
- result(lex_state, :kDO_LAMBDA, value)
953
+ result lex_state, :kDO_LAMBDA, value
867
954
  when cond.is_in_state then
868
- result(lex_state, :kDO_COND, value)
869
- when cmdarg.is_in_state && state != :expr_cmdarg then
870
- result(lex_state, :kDO_BLOCK, value)
871
- when [:expr_beg, :expr_endarg].include?(state) then
872
- result(lex_state, :kDO_BLOCK, value)
955
+ result lex_state, :kDO_COND, value
956
+ when cmdarg.is_in_state && state != EXPR_CMDARG then
957
+ result lex_state, :kDO_BLOCK, value
958
+ when state =~ EXPR_BEG|EXPR_ENDARG then
959
+ result lex_state, :kDO_BLOCK, value
873
960
  else
874
- result(lex_state, :kDO, value)
961
+ result lex_state, :kDO, value
875
962
  end
876
- when [:expr_beg, :expr_labeled].include?(state) then
877
- result(lex_state, keyword.id0, value)
963
+ when state =~ EXPR_PAD then
964
+ result lex_state, keyword.id0, value
878
965
  when keyword.id0 != keyword.id1 then
879
- result(:expr_beg, keyword.id1, value) # TODO: :expr_beg|:expr_label
966
+ result EXPR_PAR, keyword.id1, value
880
967
  else
881
- result(lex_state, keyword.id1, value)
968
+ result lex_state, keyword.id1, value
882
969
  end
883
970
  end
884
971
 
@@ -921,9 +1008,10 @@ class RubyLexer
921
1008
  when scan(/s/) then # space
922
1009
  " "
923
1010
  when scan(/[0-7]{1,3}/) then # octal constant
924
- (matched.to_i(8) & 0xFF).chr
1011
+ (matched.to_i(8) & 0xFF).chr.force_encoding Encoding::UTF_8
925
1012
  when scan(/x([0-9a-fA-F]{1,2})/) then # hex constant
926
- ss[1].to_i(16).chr
1013
+ # TODO: force encode everything to UTF-8?
1014
+ ss[1].to_i(16).chr.force_encoding Encoding::UTF_8
927
1015
  when check(/M-\\[\\MCc]/) then
928
1016
  scan(/M-\\/) # eat it
929
1017
  c = self.read_escape
@@ -946,8 +1034,10 @@ class RubyLexer
946
1034
  c
947
1035
  when scan(/^[89]/i) then # bad octal or hex... MRI ignores them :(
948
1036
  matched
949
- when scan(/u([0-9a-fA-F]{2,4}|\{[0-9a-fA-F]{2,6}\})/) then
1037
+ when scan(/u([0-9a-fA-F]{4}|\{[0-9a-fA-F]{2,6}\})/) then
950
1038
  [ss[1].delete("{}").to_i(16)].pack("U")
1039
+ when scan(/u([0-9a-fA-F]{1,3})/) then
1040
+ rb_compile_error "Invalid escape character syntax"
951
1041
  when scan(/[McCx0-9]/) || end_of_stream? then
952
1042
  rb_compile_error("Invalid escape character syntax")
953
1043
  else
@@ -974,7 +1064,7 @@ class RubyLexer
974
1064
  self.brace_nest = 0
975
1065
  self.command_start = true
976
1066
  self.comments = []
977
- self.lex_state = :expr_none
1067
+ self.lex_state = EXPR_NONE
978
1068
  self.lex_strterm = nil
979
1069
  self.lineno = 1
980
1070
  self.lpar_beg = nil
@@ -988,9 +1078,9 @@ class RubyLexer
988
1078
  self.cmdarg.reset
989
1079
  end
990
1080
 
991
- def result lex_state, token, text # :nodoc:
992
- lex_state = self.arg_state if lex_state == :arg_state
993
- self.lex_state = lex_state if lex_state
1081
+ def result new_state, token, text # :nodoc:
1082
+ new_state = self.arg_state if new_state == :arg_state
1083
+ self.lex_state = new_state if new_state
994
1084
  [token, text]
995
1085
  end
996
1086
 
@@ -1057,8 +1147,10 @@ class RubyLexer
1057
1147
  prev = self.string_buffer.last
1058
1148
  if term == chr && prev && prev.end_with?("(?") then
1059
1149
  self.string_buffer << chr
1150
+ elsif term == chr || chr.ascii_only? then
1151
+ self.string_buffer << matched # dunno why we keep them for ascii
1060
1152
  else
1061
- self.string_buffer << matched
1153
+ self.string_buffer << chr # HACK? this is such a rat's nest
1062
1154
  end
1063
1155
  else
1064
1156
  rb_compile_error "Invalid escape character syntax"
@@ -1089,7 +1181,7 @@ class RubyLexer
1089
1181
  else
1090
1182
  self.string_nest -= 1
1091
1183
  end
1092
- when expand && scan(/#(?=[\$\@\{])/) then
1184
+ when expand && scan(/#(?=[\$\@\{])/) then # TODO: this seems wrong
1093
1185
  ss.pos -= 1
1094
1186
  break
1095
1187
  when qwords && scan(/\s/) then
@@ -1173,12 +1265,13 @@ class RubyLexer
1173
1265
  s
1174
1266
  when /^[McCx0-9]/ then
1175
1267
  rb_compile_error("Invalid escape character syntax")
1176
- when /u([0-9a-fA-F]{2,4}|\{[0-9a-fA-F]{2,6}\})/ then
1268
+ when /u([0-9a-fA-F]{4}|\{[0-9a-fA-F]{2,6}\})/ then
1177
1269
  [$1.delete("{}").to_i(16)].pack("U")
1270
+ when /u([0-9a-fA-F]{1,3})/ then
1271
+ rb_compile_error("Invalid escape character syntax")
1178
1272
  else
1179
1273
  s
1180
1274
  end
1181
- x.force_encoding "UTF-8" if HAS_ENC
1182
1275
  x
1183
1276
  end
1184
1277
 
@@ -1206,7 +1299,7 @@ class RubyLexer
1206
1299
 
1207
1300
  # matches parser_string_term
1208
1301
  if ruby22plus? && token_type == :tSTRING_END && ["'", '"'].include?(c) then
1209
- if (([:expr_beg, :expr_endfn].include?(lex_state) &&
1302
+ if ((lex_state =~ EXPR_BEG|EXPR_ENDFN &&
1210
1303
  !cond.is_in_state) || is_arg?) &&
1211
1304
  is_label_suffix? then
1212
1305
  scan(/:/)
@@ -1216,8 +1309,7 @@ class RubyLexer
1216
1309
 
1217
1310
  if [:tSTRING_END, :tREGEXP_END, :tLABEL_END].include? token_type then
1218
1311
  self.lex_strterm = nil
1219
- # TODO: :expr_beg|:expr_label
1220
- self.lex_state = (token_type == :tLABEL_END) ? :expr_label : :expr_end
1312
+ self.lex_state = (token_type == :tLABEL_END) ? EXPR_PAR : EXPR_END
1221
1313
  end
1222
1314
 
1223
1315
  return token
@@ -1260,7 +1352,7 @@ class RubyLexer
1260
1352
  when 'r' then
1261
1353
  [:tREGEXP_BEG, STR_REGEXP]
1262
1354
  when 's' then
1263
- self.lex_state = :expr_fname
1355
+ self.lex_state = EXPR_FNAME
1264
1356
  [:tSYMBEG, STR_SSYM]
1265
1357
  when 'I' then
1266
1358
  eat_whitespace