ruby_parser 3.12.0 → 3.13.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -34,6 +34,10 @@ class RubyLexer
34
34
  STR_SSYM = STR_FUNC_SYMBOL
35
35
  STR_DSYM = STR_FUNC_SYMBOL | STR_FUNC_EXPAND
36
36
 
37
+ EXPR_BEG_ANY = [:expr_beg, :expr_mid, :expr_class ]
38
+ EXPR_ARG_ANY = [:expr_arg, :expr_cmdarg, ]
39
+ EXPR_END_ANY = [:expr_end, :expr_endarg, :expr_endfn]
40
+
37
41
  ESCAPES = {
38
42
  "a" => "\007",
39
43
  "b" => "\010",
@@ -77,7 +81,7 @@ class RubyLexer
77
81
  attr_accessor :brace_nest
78
82
  attr_accessor :cmdarg
79
83
  attr_accessor :command_start
80
- attr_accessor :command_state
84
+ attr_accessor :cmd_state # temporary--ivar to avoid passing everywhere
81
85
  attr_accessor :last_state
82
86
  attr_accessor :cond
83
87
  attr_accessor :extra_lineno
@@ -109,20 +113,13 @@ class RubyLexer
109
113
  # Last token read via next_token.
110
114
  attr_accessor :token
111
115
 
112
- ##
113
- # What version of ruby to parse. 18 and 19 are the only valid values
114
- # currently supported.
115
-
116
- attr_accessor :version
117
-
118
116
  attr_writer :comments
119
117
 
120
- def initialize v = 18
121
- self.version = v
118
+ def initialize _ = nil
122
119
  @lex_state = :expr_none
123
120
 
124
- self.cmdarg = RubyParserStuff::StackState.new(:cmdarg, $DEBUG)
125
121
  self.cond = RubyParserStuff::StackState.new(:cond, $DEBUG)
122
+ self.cmdarg = RubyParserStuff::StackState.new(:cmdarg, $DEBUG)
126
123
 
127
124
  reset
128
125
  end
@@ -343,23 +340,30 @@ class RubyLexer
343
340
  end
344
341
 
345
342
  def is_arg?
346
- in_lex_state? :expr_arg, :expr_cmdarg
343
+ in_lex_state?(*EXPR_ARG_ANY)
347
344
  end
348
345
 
349
346
  def is_beg?
350
- in_lex_state? :expr_beg, :expr_value, :expr_mid, :expr_class, :expr_labelarg
347
+ # TODO: in_lex_state?(*EXPR_BEG_ANY) || lex_state == [:expr_arg, :expr_labeled]
348
+ in_lex_state?(*EXPR_BEG_ANY, :expr_value, :expr_labeled)
351
349
  end
352
350
 
353
351
  def is_end?
354
- in_lex_state? :expr_end, :expr_endarg, :expr_endfn
352
+ in_lex_state?(*EXPR_END_ANY)
355
353
  end
356
354
 
355
+ def lvar_defined? id
356
+ # TODO: (dyna_in_block? && dvar_defined?(id)) || local_id?(id)
357
+ self.parser.env[id.to_sym] == :lvar
358
+ end
359
+
360
+
357
361
  def ruby22_label?
358
362
  ruby22plus? and is_label_possible?
359
363
  end
360
364
 
361
365
  def is_label_possible?
362
- (in_lex_state?(:expr_beg, :expr_endfn) && !command_state) || is_arg?
366
+ (in_lex_state?(:expr_beg, :expr_endfn) && !cmd_state) || is_arg?
363
367
  end
364
368
 
365
369
  def is_label_suffix?
@@ -370,6 +374,10 @@ class RubyLexer
370
374
  is_arg? and space_seen and c !~ /\s/
371
375
  end
372
376
 
377
+ def lambda_beginning?
378
+ lpar_beg && lpar_beg == paren_nest
379
+ end
380
+
373
381
  def matched
374
382
  ss.matched
375
383
  end
@@ -411,20 +419,17 @@ class RubyLexer
411
419
  nil # TODO
412
420
  end
413
421
 
414
- def process_bracing text
422
+ def process_brace_close text
423
+ # matching compare/parse23.y:8561
415
424
  cond.lexpop
416
425
  cmdarg.lexpop
417
426
 
418
427
  case matched
419
428
  when "}" then
420
429
  self.brace_nest -= 1
421
- self.lex_state = :expr_endarg
422
-
423
- # TODO
424
- # if (c == '}') {
425
- # if (!brace_nest--) c = tSTRING_DEND;
426
- # }
430
+ self.lex_state = :expr_endarg # TODO: :expr_end ? Look at 2.6
427
431
 
432
+ return :tSTRING_DEND, matched if brace_nest < 0
428
433
  return :tRCURLY, matched
429
434
  when "]" then
430
435
  self.paren_nest -= 1
@@ -463,24 +468,30 @@ class RubyLexer
463
468
  end
464
469
  end
465
470
 
466
- def process_curly_brace text
471
+ def process_brace_open text
472
+ # matching compare/parse23.y:8694
467
473
  self.brace_nest += 1
468
- if lpar_beg && lpar_beg == paren_nest then
474
+
475
+ if lambda_beginning? then
469
476
  self.lpar_beg = nil
470
- self.paren_nest -= 1
477
+ self.paren_nest -= 1 # close arg list when lambda opens body
471
478
 
472
479
  return expr_result(:tLAMBEG, "{")
473
480
  end
474
481
 
475
- token = if is_arg? || in_lex_state?(:expr_end, :expr_endfn) then
476
- :tLCURLY # block (primary)
477
- elsif in_lex_state?(:expr_endarg) then
478
- :tLBRACE_ARG # block (expr)
479
- else
480
- :tLBRACE # hash
481
- end
482
+ token = case lex_state
483
+ when :expr_labeled then
484
+ :tLBRACE # hash
485
+ when *EXPR_ARG_ANY, :expr_end, :expr_endfn then
486
+ :tLCURLY # block (primary)
487
+ when :expr_endarg
488
+ :tLBRACE_ARG # block (expr)
489
+ else
490
+ :tLBRACE # hash
491
+ end
482
492
 
483
- self.command_start = true unless token == :tLBRACE
493
+ # TODO: self.lex_state |= :expr_label if token != :tLBRACE_ARG
494
+ self.command_start = true if token != :tLBRACE
484
495
 
485
496
  return expr_result(token, "{")
486
497
  end
@@ -519,12 +530,19 @@ class RubyLexer
519
530
  def process_lchevron text
520
531
  if (!in_lex_state?(:expr_dot, :expr_class) &&
521
532
  !is_end? &&
522
- (!is_arg? || space_seen)) then
533
+ (!is_arg? || space_seen)) then # TODO: || in_state(:expr_labeled)
523
534
  tok = self.heredoc_identifier
524
535
  return tok if tok
525
536
  end
526
537
 
527
- return result(:arg_state, :tLSHFT, "\<\<")
538
+ if in_arg_state? then
539
+ self.lex_state = :expr_arg
540
+ else
541
+ self.command_start = true if lex_state == :expr_class
542
+ self.lex_state = :expr_beg
543
+ end
544
+
545
+ return result(lex_state, :tLSHFT, "\<\<")
528
546
  end
529
547
 
530
548
  def process_newline_or_comment text
@@ -534,6 +552,7 @@ class RubyLexer
534
552
  if c == '#' then
535
553
  ss.pos -= 1
536
554
 
555
+ # TODO: handle magic comments
537
556
  while scan(/\s*\#.*(\n+|\z)/) do
538
557
  hit = true
539
558
  self.lineno += matched.lines.to_a.size
@@ -548,8 +567,21 @@ class RubyLexer
548
567
  # Replace a string of newlines with a single one
549
568
  self.lineno += matched.lines.to_a.size if scan(/\n+/)
550
569
 
551
- return if in_lex_state?(:expr_beg, :expr_value, :expr_class,
552
- :expr_fname, :expr_dot)
570
+ # TODO: remove :expr_value -- audit all uses of it
571
+ c = in_lex_state?(:expr_beg, :expr_value, :expr_class,
572
+ :expr_fname, :expr_dot) && !in_lex_state?(:expr_labeled)
573
+
574
+ # TODO: figure out what token_seen is for
575
+ # TODO: if c || self.lex_state == [:expr_beg, :expr_labeled] then
576
+ if c || self.lex_state == :expr_labeled then
577
+ # ignore if !fallthrough?
578
+ if !c && parser.in_kwarg then
579
+ # normal newline
580
+ return result(:expr_beg, :tNL, nil)
581
+ else
582
+ return # skip
583
+ end
584
+ end
553
585
 
554
586
  if scan(/([\ \t\r\f\v]*)(\.|&)/) then
555
587
  self.space_seen = true unless ss[1].empty?
@@ -569,11 +601,7 @@ class RubyLexer
569
601
  end
570
602
 
571
603
  def process_paren text
572
- token = if ruby18 then
573
- process_paren18
574
- else
575
- process_paren19
576
- end
604
+ token = process_paren19
577
605
 
578
606
  self.paren_nest += 1
579
607
 
@@ -581,25 +609,6 @@ class RubyLexer
581
609
  return expr_result(token, "(")
582
610
  end
583
611
 
584
- def process_paren18
585
- self.command_start = true
586
- token = :tLPAREN2
587
-
588
- if in_lex_state? :expr_beg, :expr_mid then
589
- token = :tLPAREN
590
- elsif space_seen then
591
- if in_lex_state? :expr_cmdarg then
592
- token = :tLPAREN_ARG
593
- elsif in_lex_state? :expr_arg then
594
- warning "don't put space before argument parentheses"
595
- end
596
- else
597
- # not a ternary -- do nothing?
598
- end
599
-
600
- token
601
- end
602
-
603
612
  def process_paren19
604
613
  if is_beg? then
605
614
  :tLPAREN
@@ -654,8 +663,7 @@ class RubyLexer
654
663
 
655
664
  def process_questionmark text
656
665
  if is_end? then
657
- state = ruby18 ? :expr_beg : :expr_value # HACK?
658
- return result(state, :tEH, "?")
666
+ return result(:expr_value, :tEH, "?")
659
667
  end
660
668
 
661
669
  if end_of_stream? then
@@ -677,8 +685,7 @@ class RubyLexer
677
685
  end
678
686
 
679
687
  # ternary
680
- state = ruby18 ? :expr_beg : :expr_value # HACK?
681
- return result(state, :tEH, "?")
688
+ return result(:expr_value, :tEH, "?")
682
689
  elsif check(/\w(?=\w)/) then # ternary, also
683
690
  return result(:expr_beg, :tEH, "?")
684
691
  end
@@ -689,11 +696,7 @@ class RubyLexer
689
696
  ss.getch
690
697
  end
691
698
 
692
- if version == 18 then
693
- return result(:expr_end, :tINTEGER, c[0].ord & 0xff)
694
- else
695
- return result(:expr_end, :tSTRING, c)
696
- end
699
+ return result(:expr_end, :tSTRING, c)
697
700
  end
698
701
 
699
702
  def process_slash text
@@ -742,7 +745,9 @@ class RubyLexer
742
745
  token = :tLBRACK2
743
746
  end
744
747
 
745
- return expr_result(token, "[")
748
+ # TODO: this is done by expr_result except "|EXPR_LABEL")
749
+ # SET_LEX_STATE(EXPR_BEG|EXPR_LABEL);
750
+ expr_result token, "["
746
751
  end
747
752
 
748
753
  def possibly_escape_string text, check
@@ -758,9 +763,6 @@ class RubyLexer
758
763
  def process_symbol text
759
764
  symbol = possibly_escape_string text, /^:"/
760
765
 
761
- rb_compile_error "symbol cannot contain '\\0'" if
762
- ruby18 && symbol =~ /\0/
763
-
764
766
  return result(:expr_end, :tSYMBOL, symbol)
765
767
  end
766
768
 
@@ -784,11 +786,14 @@ class RubyLexer
784
786
  def process_label text
785
787
  symbol = possibly_escape_string text, /^"/
786
788
 
787
- result(:expr_labelarg, :tLABEL, [symbol, self.lineno])
789
+ result(:expr_labeled, :tLABEL, [symbol, self.lineno]) # TODO: expr_arg|expr_labeled
788
790
  end
789
791
 
790
792
  def process_token text
793
+ # matching: parse_ident in compare/parse23.y:7989
791
794
  # TODO: make this always return [token, lineno]
795
+ self.last_state = lex_state
796
+
792
797
  token = self.token = text
793
798
  token << matched if scan(/[\!\?](?!=)/)
794
799
 
@@ -807,36 +812,33 @@ class RubyLexer
807
812
  :tIDENTIFIER
808
813
  end
809
814
 
810
- if !ruby18 and is_label_possible? and is_label_suffix? then
815
+ if is_label_possible? and is_label_suffix? then
811
816
  scan(/:/)
812
- return result(:expr_labelarg, :tLABEL, [token, self.lineno])
817
+ # TODO: :expr_arg|:expr_labeled
818
+ return result :expr_labeled, :tLABEL, [token, self.lineno]
813
819
  end
814
820
 
821
+ # TODO: mb == ENC_CODERANGE_7BIT && !in_lex_state?(:expr_dot)
815
822
  unless in_lex_state? :expr_dot then
816
823
  # See if it is a reserved word.
817
- keyword = if ruby18 then # REFACTOR need 18/19 lexer subclasses
818
- RubyParserStuff::Keyword.keyword18 token
819
- else
820
- RubyParserStuff::Keyword.keyword19 token
821
- end
824
+ keyword = RubyParserStuff::Keyword.keyword token
822
825
 
823
826
  return process_token_keyword keyword if keyword
824
827
  end # unless in_lex_state? :expr_dot
825
828
 
826
- # TODO:
827
- # if (mb == ENC_CODERANGE_7BIT && lex_state != EXPR_DOT) {
828
-
829
+ # matching: compare/parse23.y:8079
829
830
  state = if is_beg? or is_arg? or in_lex_state? :expr_dot then
830
- command_state ? :expr_cmdarg : :expr_arg
831
- elsif not ruby18 and in_lex_state? :expr_fname then
831
+ cmd_state ? :expr_cmdarg : :expr_arg
832
+ elsif in_lex_state? :expr_fname then
832
833
  :expr_endfn
833
834
  else
834
835
  :expr_end
835
836
  end
836
837
 
837
838
  if not [:expr_dot, :expr_fname].include? last_state and
838
- self.parser.env[token.to_sym] == :lvar then
839
- state = :expr_end
839
+ (tok_id == :tIDENTIFIER) and # not :expr_fname, not attrasgn
840
+ lvar_defined?(token) then
841
+ state = :expr_end # TODO: EXPR_END|EXPR_LABEL
840
842
  end
841
843
 
842
844
  token.lineno = self.lineno # yes, on a string. I know... I know...
@@ -845,38 +847,38 @@ class RubyLexer
845
847
  end
846
848
 
847
849
  def process_token_keyword keyword
848
- state = keyword.state
850
+ # matching MIDDLE of parse_ident in compare/parse23.y:8046
851
+ state = lex_state
852
+ self.lex_state = keyword.state
849
853
 
850
854
  value = [token, self.lineno]
851
855
 
852
- self.command_start = true if state == :expr_beg and lex_state != :expr_fname
856
+ return result(lex_state, keyword.id0, value) if state == :expr_fname
857
+
858
+ self.command_start = true if lex_state == :expr_beg
853
859
 
854
860
  case
855
- when lex_state == :expr_fname then
856
- result(state, keyword.id0, keyword.name)
857
861
  when keyword.id0 == :kDO then
858
862
  case
859
- when lpar_beg && lpar_beg == paren_nest then
860
- self.lpar_beg = nil
863
+ when lambda_beginning? then
864
+ self.lpar_beg = nil # lambda_beginning? == FALSE in the body of "-> do ... end"
861
865
  self.paren_nest -= 1
862
- expr_result(:kDO_LAMBDA, value)
866
+ result(lex_state, :kDO_LAMBDA, value)
863
867
  when cond.is_in_state then
864
- result(state, :kDO_COND, value)
865
- when cmdarg.is_in_state && lex_state != :expr_cmdarg then
866
- result(state, :kDO_BLOCK, value)
867
- when in_lex_state?(:expr_beg, :expr_endarg) then
868
- result(state, :kDO_BLOCK, value)
869
- when lex_state == :expr_end # eg: a -> do end do end
870
- result(state, :kDO_BLOCK, value)
868
+ result(lex_state, :kDO_COND, value)
869
+ when cmdarg.is_in_state && state != :expr_cmdarg then
870
+ result(lex_state, :kDO_BLOCK, value)
871
+ when [:expr_beg, :expr_endarg].include?(state) then
872
+ result(lex_state, :kDO_BLOCK, value)
871
873
  else
872
- result(state, :kDO, value)
874
+ result(lex_state, :kDO, value)
873
875
  end
874
- when in_lex_state?(:expr_beg, :expr_value, :expr_labelarg) then
875
- result(state, keyword.id0, value)
876
+ when [:expr_beg, :expr_labeled].include?(state) then
877
+ result(lex_state, keyword.id0, value)
876
878
  when keyword.id0 != keyword.id1 then
877
- result(:expr_beg, keyword.id1, value)
879
+ result(:expr_beg, keyword.id1, value) # TODO: :expr_beg|:expr_label
878
880
  else
879
- result(state, keyword.id1, value)
881
+ result(lex_state, keyword.id1, value)
880
882
  end
881
883
  end
882
884
 
@@ -982,8 +984,8 @@ class RubyLexer
982
984
  self.token = nil
983
985
  self.extra_lineno = 0
984
986
 
985
- self.cmdarg.reset
986
987
  self.cond.reset
988
+ self.cmdarg.reset
987
989
  end
988
990
 
989
991
  def result lex_state, token, text # :nodoc:
@@ -992,10 +994,6 @@ class RubyLexer
992
994
  [token, text]
993
995
  end
994
996
 
995
- def ruby18
996
- RubyParser::V18 === parser
997
- end
998
-
999
997
  def scan re
1000
998
  ss.scan re
1001
999
  end
@@ -1133,11 +1131,7 @@ class RubyLexer
1133
1131
  t = Regexp.escape term
1134
1132
  x = Regexp.escape(paren) if paren && paren != "\000"
1135
1133
  re = if qwords then
1136
- if HAS_ENC then
1137
- /[^#{t}#{x}\#\0\\\s]+|./ # |. to pick up whatever
1138
- else
1139
- /[^#{t}#{x}\#\0\\\s\v]+|./ # argh. 1.8's \s doesn't pick up \v
1140
- end
1134
+ /[^#{t}#{x}\#\0\\\s]+|./ # |. to pick up whatever
1141
1135
  else
1142
1136
  /[^#{t}#{x}\#\0\\]+|./
1143
1137
  end
@@ -1201,6 +1195,7 @@ class RubyLexer
1201
1195
  end
1202
1196
 
1203
1197
  def process_string # TODO: rewrite / remove
1198
+ # matches top of parser_yylex in compare/parse23.y:8113
1204
1199
  token = if lex_strterm[0] == :heredoc then
1205
1200
  self.heredoc lex_strterm
1206
1201
  else
@@ -1209,6 +1204,7 @@ class RubyLexer
1209
1204
 
1210
1205
  token_type, c = token
1211
1206
 
1207
+ # matches parser_string_term
1212
1208
  if ruby22plus? && token_type == :tSTRING_END && ["'", '"'].include?(c) then
1213
1209
  if (([:expr_beg, :expr_endfn].include?(lex_state) &&
1214
1210
  !cond.is_in_state) || is_arg?) &&
@@ -1220,7 +1216,8 @@ class RubyLexer
1220
1216
 
1221
1217
  if [:tSTRING_END, :tREGEXP_END, :tLABEL_END].include? token_type then
1222
1218
  self.lex_strterm = nil
1223
- self.lex_state = (token_type == :tLABEL_END) ? :expr_labelarg : :expr_end
1219
+ # TODO: :expr_beg|:expr_label
1220
+ self.lex_state = (token_type == :tLABEL_END) ? :expr_label : :expr_end
1224
1221
  end
1225
1222
 
1226
1223
  return token
@@ -1326,6 +1323,7 @@ class RubyLexer
1326
1323
  # TODO: !ISASCII
1327
1324
  return :tSTRING_DVAR, nil
1328
1325
  when scan(/#[{]/) then
1326
+ self.command_start = true
1329
1327
  return :tSTRING_DBEG, nil
1330
1328
  when scan(/#/) then
1331
1329
  string_buffer << '#'