parser 2.6.5.0 → 2.7.0.4

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,43 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Parser
4
+ # Stack that holds names of current arguments,
5
+ # i.e. while parsing
6
+ # def m1(a = (def m2(b = def m3(c = 1); end); end)); end
7
+ # ^
8
+ # stack is [:a, :b, :c]
9
+ #
10
+ # Emulates `p->cur_arg` in MRI's parse.y
11
+ #
12
+ # @api private
13
+ #
14
+ class CurrentArgStack
15
+ attr_reader :stack
16
+
17
+ def initialize
18
+ @stack = []
19
+ freeze
20
+ end
21
+
22
+ def push(value)
23
+ @stack << value
24
+ end
25
+
26
+ def set(value)
27
+ pop
28
+ push(value)
29
+ end
30
+
31
+ def pop
32
+ @stack.pop
33
+ end
34
+
35
+ def reset
36
+ @stack.clear
37
+ end
38
+
39
+ def top
40
+ @stack.last
41
+ end
42
+ end
43
+ end
@@ -89,16 +89,13 @@ class Parser::Lexer
89
89
 
90
90
  REGEXP_META_CHARACTERS = Regexp.union(*"\\$()*+.<>?[]^{|}".chars).freeze
91
91
 
92
- NUMPARAM_MAX = 9
93
-
94
92
  attr_reader :source_buffer
95
- attr_reader :max_numparam_stack
96
93
 
97
94
  attr_accessor :diagnostics
98
95
  attr_accessor :static_env
99
96
  attr_accessor :force_utf32
100
97
 
101
- attr_accessor :cond, :cmdarg, :in_kwarg, :context
98
+ attr_accessor :cond, :cmdarg, :in_kwarg, :context, :command_start
102
99
 
103
100
  attr_accessor :tokens, :comments
104
101
 
@@ -179,9 +176,6 @@ class Parser::Lexer
179
176
 
180
177
  # State before =begin / =end block comment
181
178
  @cs_before_block_comment = self.class.lex_en_line_begin
182
-
183
- # Maximum numbered parameters stack
184
- @max_numparam_stack = MaxNumparamStack.new
185
179
  end
186
180
 
187
181
  def source_buffer=(source_buffer)
@@ -255,10 +249,6 @@ class Parser::Lexer
255
249
  @cond = @cond_stack.pop
256
250
  end
257
251
 
258
- def max_numparam
259
- @max_numparam_stack.top
260
- end
261
-
262
252
  def dedent_level
263
253
  # We erase @dedent_level as a precaution to avoid accidentally
264
254
  # using a stale value.
@@ -457,7 +447,7 @@ class Parser::Lexer
457
447
  '=>' => :tASSOC, '::' => :tCOLON2, '===' => :tEQQ,
458
448
  '<=>' => :tCMP, '[]' => :tAREF, '[]=' => :tASET,
459
449
  '{' => :tLCURLY, '}' => :tRCURLY, '`' => :tBACK_REF2,
460
- '!@' => :tBANG, '&.' => :tANDDOT, '.:' => :tMETHREF
450
+ '!@' => :tBANG, '&.' => :tANDDOT,
461
451
  }
462
452
 
463
453
  PUNCTUATION_BEGIN = {
@@ -1029,6 +1019,20 @@ class Parser::Lexer
1029
1019
  fcall expr_variable;
1030
1020
  }
1031
1021
 
1022
+ # Special case for Ruby > 2.7
1023
+ # If interpolated instance/class variable starts with a digit we parse it as a plain substring
1024
+ # However, "#$1" is still a regular interpolation
1025
+ interp_digit_var = '#' ('@' | '@@') digit c_alpha*;
1026
+
1027
+ action extend_interp_digit_var {
1028
+ if @version >= 27
1029
+ literal.extend_string(tok, @ts, @te)
1030
+ else
1031
+ message = tok.start_with?('#@@') ? :cvar_name : :ivar_name
1032
+ diagnostic :error, message, { :name => tok(@ts + 1, @te) }, range(@ts + 1, @te)
1033
+ end
1034
+ }
1035
+
1032
1036
  # Interpolations with code blocks must match nested curly braces, as
1033
1037
  # interpolation ending is ambiguous with a block ending. So, every
1034
1038
  # opening and closing brace should be matched with e_[lr]brace rules,
@@ -1074,6 +1078,8 @@ class Parser::Lexer
1074
1078
  fbreak;
1075
1079
  end
1076
1080
  end
1081
+
1082
+ @paren_nest -= 1
1077
1083
  };
1078
1084
 
1079
1085
  action extend_interp_code {
@@ -1098,60 +1104,64 @@ class Parser::Lexer
1098
1104
  # above.
1099
1105
 
1100
1106
  interp_words := |*
1101
- interp_code => extend_interp_code;
1102
- interp_var => extend_interp_var;
1103
- e_bs escape => extend_string_escaped;
1104
- c_space+ => extend_string_space;
1105
- c_eol => extend_string_eol;
1106
- c_any => extend_string;
1107
+ interp_code => extend_interp_code;
1108
+ interp_digit_var => extend_interp_digit_var;
1109
+ interp_var => extend_interp_var;
1110
+ e_bs escape => extend_string_escaped;
1111
+ c_space+ => extend_string_space;
1112
+ c_eol => extend_string_eol;
1113
+ c_any => extend_string;
1107
1114
  *|;
1108
1115
 
1109
1116
  interp_string := |*
1110
- interp_code => extend_interp_code;
1111
- interp_var => extend_interp_var;
1112
- e_bs escape => extend_string_escaped;
1113
- c_eol => extend_string_eol;
1114
- c_any => extend_string;
1117
+ interp_code => extend_interp_code;
1118
+ interp_digit_var => extend_interp_digit_var;
1119
+ interp_var => extend_interp_var;
1120
+ e_bs escape => extend_string_escaped;
1121
+ c_eol => extend_string_eol;
1122
+ c_any => extend_string;
1115
1123
  *|;
1116
1124
 
1117
1125
  plain_words := |*
1118
- e_bs c_any => extend_string_escaped;
1119
- c_space+ => extend_string_space;
1120
- c_eol => extend_string_eol;
1121
- c_any => extend_string;
1126
+ e_bs c_any => extend_string_escaped;
1127
+ c_space+ => extend_string_space;
1128
+ c_eol => extend_string_eol;
1129
+ c_any => extend_string;
1122
1130
  *|;
1123
1131
 
1124
1132
  plain_string := |*
1125
- '\\' c_nl => extend_string_eol;
1126
- e_bs c_any => extend_string_escaped;
1127
- c_eol => extend_string_eol;
1128
- c_any => extend_string;
1133
+ '\\' c_nl => extend_string_eol;
1134
+ e_bs c_any => extend_string_escaped;
1135
+ c_eol => extend_string_eol;
1136
+ c_any => extend_string;
1129
1137
  *|;
1130
1138
 
1131
1139
  interp_backslash_delimited := |*
1132
- interp_code => extend_interp_code;
1133
- interp_var => extend_interp_var;
1134
- c_eol => extend_string_eol;
1135
- c_any => extend_string;
1140
+ interp_code => extend_interp_code;
1141
+ interp_digit_var => extend_interp_digit_var;
1142
+ interp_var => extend_interp_var;
1143
+ c_eol => extend_string_eol;
1144
+ c_any => extend_string;
1136
1145
  *|;
1137
1146
 
1138
1147
  plain_backslash_delimited := |*
1139
- c_eol => extend_string_eol;
1140
- c_any => extend_string;
1148
+ c_eol => extend_string_eol;
1149
+ c_any => extend_string;
1141
1150
  *|;
1142
1151
 
1143
1152
  interp_backslash_delimited_words := |*
1144
- interp_code => extend_interp_code;
1145
- interp_var => extend_interp_var;
1146
- c_space+ => extend_string_space;
1147
- c_eol => extend_string_eol;
1148
- c_any => extend_string;
1153
+ interp_code => extend_interp_code;
1154
+ interp_digit_var => extend_interp_digit_var;
1155
+ interp_var => extend_interp_var;
1156
+ c_space+ => extend_string_space;
1157
+ c_eol => extend_string_eol;
1158
+ c_any => extend_string;
1149
1159
  *|;
1150
1160
 
1151
1161
  plain_backslash_delimited_words := |*
1152
- c_space+ => extend_string_space;
1153
- c_eol => extend_string_eol;
1154
- c_any => extend_string;
1162
+ c_space+ => extend_string_space;
1163
+ c_eol => extend_string_eol;
1164
+ c_any => extend_string;
1155
1165
  *|;
1156
1166
 
1157
1167
  regexp_modifiers := |*
@@ -1267,6 +1277,12 @@ class Parser::Lexer
1267
1277
 
1268
1278
  e_lbrack = '[' % {
1269
1279
  @cond.push(false); @cmdarg.push(false)
1280
+
1281
+ @paren_nest += 1
1282
+ };
1283
+
1284
+ e_rbrack = ']' % {
1285
+ @paren_nest -= 1
1270
1286
  };
1271
1287
 
1272
1288
  # Ruby 1.9 lambdas require parentheses counting in order to
@@ -1324,36 +1340,6 @@ class Parser::Lexer
1324
1340
  fnext *stack_pop; fbreak;
1325
1341
  };
1326
1342
 
1327
- '@' [0-9]+
1328
- => {
1329
- if @version < 27
1330
- diagnostic :error, :ivar_name, { :name => tok }
1331
- end
1332
-
1333
- value = tok[1..-1]
1334
-
1335
- if value[0] == '0'
1336
- diagnostic :error, :leading_zero_in_numparam, nil, range(@ts, @te)
1337
- end
1338
-
1339
- if value.to_i > NUMPARAM_MAX
1340
- diagnostic :error, :too_large_numparam, nil, range(@ts, @te)
1341
- end
1342
-
1343
- if !@context.in_block? && !@context.in_lambda?
1344
- diagnostic :error, :numparam_outside_block, nil, range(@ts, @te)
1345
- end
1346
-
1347
- if !@max_numparam_stack.can_have_numparams?
1348
- diagnostic :error, :ordinary_param_defined, nil, range(@ts, @te)
1349
- end
1350
-
1351
- @max_numparam_stack.register(value.to_i)
1352
-
1353
- emit(:tNUMPARAM, tok[1..-1])
1354
- fnext *stack_pop; fbreak;
1355
- };
1356
-
1357
1343
  instance_var_v
1358
1344
  => {
1359
1345
  if tok =~ /^@[0-9]/
@@ -1519,6 +1505,7 @@ class Parser::Lexer
1519
1505
  emit(:tLCURLY, '{'.freeze, @te - 1, @te)
1520
1506
  end
1521
1507
  @command_start = true
1508
+ @paren_nest += 1
1522
1509
  fnext expr_value; fbreak;
1523
1510
  };
1524
1511
 
@@ -1679,6 +1666,7 @@ class Parser::Lexer
1679
1666
  else
1680
1667
  emit(:tLBRACE_ARG, '{'.freeze)
1681
1668
  end
1669
+ @paren_nest += 1
1682
1670
  @command_start = true
1683
1671
  fnext expr_value; fbreak;
1684
1672
  };
@@ -1927,6 +1915,24 @@ class Parser::Lexer
1927
1915
  fgoto expr_end;
1928
1916
  };
1929
1917
 
1918
+ #
1919
+ # AMBIGUOUS EMPTY BLOCK ARGUMENTS
1920
+ #
1921
+
1922
+ # Ruby >= 2.7 emits it as two tPIPE terminals
1923
+ # while Ruby < 2.7 as a single tOROP (like in `a || b`)
1924
+ '||'
1925
+ => {
1926
+ if @version >= 27
1927
+ emit(:tPIPE, tok(@ts, @ts + 1), @ts, @ts + 1)
1928
+ fhold;
1929
+ fnext expr_beg; fbreak;
1930
+ else
1931
+ p -= 2
1932
+ fgoto expr_end;
1933
+ end
1934
+ };
1935
+
1930
1936
  #
1931
1937
  # KEYWORDS AND PUNCTUATION
1932
1938
  #
@@ -1941,6 +1947,7 @@ class Parser::Lexer
1941
1947
  else
1942
1948
  emit(:tLBRACE, '{'.freeze)
1943
1949
  end
1950
+ @paren_nest += 1
1944
1951
  fbreak;
1945
1952
  };
1946
1953
 
@@ -2159,6 +2166,9 @@ class Parser::Lexer
2159
2166
  emit_do
2160
2167
  end
2161
2168
  end
2169
+ if tok == '{'.freeze
2170
+ @paren_nest += 1
2171
+ end
2162
2172
  @command_start = true
2163
2173
 
2164
2174
  fnext expr_value; fbreak;
@@ -2333,24 +2343,6 @@ class Parser::Lexer
2333
2343
  # METHOD CALLS
2334
2344
  #
2335
2345
 
2336
- '.:' w_space+
2337
- => { emit(:tDOT, '.', @ts, @ts + 1)
2338
- emit(:tCOLON, ':', @ts + 1, @ts + 2)
2339
- p = p - tok.length + 2
2340
- fnext expr_dot; fbreak; };
2341
-
2342
- '.:'
2343
- => {
2344
- if @version >= 27
2345
- emit_table(PUNCTUATION)
2346
- else
2347
- emit(:tDOT, tok(@ts, @ts + 1), @ts, @ts + 1)
2348
- fhold;
2349
- end
2350
-
2351
- fnext expr_dot; fbreak;
2352
- };
2353
-
2354
2346
  '.' | '&.' | '::'
2355
2347
  => { emit_table(PUNCTUATION)
2356
2348
  fnext expr_dot; fbreak; };
@@ -2394,7 +2386,7 @@ class Parser::Lexer
2394
2386
  => { emit_table(PUNCTUATION)
2395
2387
  fnext expr_beg; fbreak; };
2396
2388
 
2397
- e_rbrace | e_rparen | ']'
2389
+ e_rbrace | e_rparen | e_rbrack
2398
2390
  => {
2399
2391
  emit_table(PUNCTUATION)
2400
2392
 
@@ -2431,6 +2423,17 @@ class Parser::Lexer
2431
2423
  => { emit(:tLBRACK2, '['.freeze)
2432
2424
  fnext expr_beg; fbreak; };
2433
2425
 
2426
+ '...' c_nl
2427
+ => {
2428
+ if @paren_nest == 0
2429
+ diagnostic :warning, :triple_dot_at_eol, nil, range(@ts, @te - 1)
2430
+ end
2431
+
2432
+ emit(:tDOT3, '...'.freeze, @ts, @te - 1)
2433
+ fhold;
2434
+ fnext expr_beg; fbreak;
2435
+ };
2436
+
2434
2437
  punctuation_end
2435
2438
  => { emit_table(PUNCTUATION)
2436
2439
  fnext expr_beg; fbreak; };
@@ -2470,7 +2473,7 @@ class Parser::Lexer
2470
2473
 
2471
2474
  # Here we use '\n' instead of w_newline to not modify @newline_s
2472
2475
  # and eventually properly emit tNL
2473
- (w_space_comment '\n')+
2476
+ (c_space* w_space_comment '\n')+
2474
2477
  => {
2475
2478
  if @version < 27
2476
2479
  # Ruby before 2.7 doesn't support comments before leading dot.
@@ -3,72 +3,75 @@
3
3
  module Parser
4
4
 
5
5
  class Lexer::Dedenter
6
+ # Tab (\t) counts as 8 spaces
7
+ TAB_WIDTH = 8
8
+
6
9
  def initialize(dedent_level)
7
10
  @dedent_level = dedent_level
8
11
  @at_line_begin = true
9
12
  @indent_level = 0
10
13
  end
11
14
 
15
+ # For a heredoc like
16
+ # <<-HERE
17
+ # a
18
+ # b
19
+ # HERE
20
+ # this method gets called with " a\n" and " b\n"
21
+ #
22
+ # However, the following heredoc:
23
+ #
24
+ # <<-HERE
25
+ # a\
26
+ # b
27
+ # HERE
28
+ # calls this method only once with a string " a\\\n b\n"
29
+ #
30
+ # This is important because technically it's a single line,
31
+ # but it has to be concatenated __after__ dedenting.
32
+ #
33
+ # It has no effect for non-squiggly heredocs, i.e. it simply removes "\\\n"
34
+ # Of course, lexer could do it but once again: it's all because of dedenting.
35
+ #
12
36
  def dedent(string)
13
- space_begin = space_end = offset = 0
14
- last_index = string.length - 1
15
- escape = false
16
- _at_line_begin = nil
37
+ original_encoding = string.encoding
38
+ # Prevent the following error when processing binary encoded source.
39
+ # "\xC0".split # => ArgumentError (invalid byte sequence in UTF-8)
40
+ lines = string.force_encoding(Encoding::BINARY).split("\\\n")
41
+ lines.map! {|s| s.force_encoding(original_encoding) }
17
42
 
18
- string.chars.each_with_index do |char, index|
19
- if char == '\\'
20
- # entering escape mode
21
- escape = true
22
- string.slice!(index - offset)
23
- offset += 1
24
- _at_line_begin = @at_line_begin
25
- @at_line_begin = false
26
- elsif escape
27
- if char == ?\n
28
- # trimming \n, starting a new line
29
- string.slice!(index - offset)
30
- offset += 1
31
- @at_line_begin = true
32
- space_begin = space_end = index - offset
33
- @indent_level = 0
34
- elsif char == ?n
35
- # replacing \\n to \n
36
- string.slice!(index - offset)
37
- string.insert(index - offset, ?\n)
38
- else
39
- # exiting escape mode as it's not an escape sequence
40
- @at_line_begin = _at_line_begin
41
- escape = false
42
- redo
43
- end
44
- escape = false
45
- elsif @at_line_begin
46
- if char == ?\n || @indent_level >= @dedent_level
47
- string.slice!(space_begin...space_end)
48
- offset += space_end - space_begin
49
- @at_line_begin = false
50
- end
43
+ if @at_line_begin
44
+ lines_to_dedent = lines
45
+ else
46
+ _first, *lines_to_dedent = lines
47
+ end
51
48
 
49
+ lines_to_dedent.each do |line|
50
+ left_to_remove = @dedent_level
51
+ remove = 0
52
+
53
+ line.each_char do |char|
54
+ break if left_to_remove <= 0
52
55
  case char
53
56
  when ?\s
54
- @indent_level += 1
55
- space_end += 1
57
+ remove += 1
58
+ left_to_remove -= 1
56
59
  when ?\t
57
- @indent_level += 8 - @indent_level % 8
58
- space_end += 1
60
+ break if TAB_WIDTH * (remove / TAB_WIDTH + 1) > @dedent_level
61
+ remove += 1
62
+ left_to_remove -= TAB_WIDTH
63
+ else
64
+ # no more spaces or tabs
65
+ break
59
66
  end
60
- elsif char == ?\n && index == last_index
61
- @at_line_begin = true
62
- @indent_level = 0
63
- space_begin = space_end = index - offset + 1
64
67
  end
65
- end
66
68
 
67
- if @at_line_begin
68
- string.slice!(space_begin..space_end)
69
+ line.slice!(0, remove)
69
70
  end
70
71
 
71
- nil
72
+ string.replace(lines.join)
73
+
74
+ @at_line_begin = string.end_with?("\n")
72
75
  end
73
76
 
74
77
  def interrupt