parser 2.6.5.0 → 2.7.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -89,16 +89,13 @@ class Parser::Lexer
89
89
 
90
90
  REGEXP_META_CHARACTERS = Regexp.union(*"\\$()*+.<>?[]^{|}".chars).freeze
91
91
 
92
- NUMPARAM_MAX = 9
93
-
94
92
  attr_reader :source_buffer
95
- attr_reader :max_numparam_stack
96
93
 
97
94
  attr_accessor :diagnostics
98
95
  attr_accessor :static_env
99
96
  attr_accessor :force_utf32
100
97
 
101
- attr_accessor :cond, :cmdarg, :in_kwarg, :context
98
+ attr_accessor :cond, :cmdarg, :in_kwarg, :context, :command_start
102
99
 
103
100
  attr_accessor :tokens, :comments
104
101
 
@@ -179,9 +176,6 @@ class Parser::Lexer
179
176
 
180
177
  # State before =begin / =end block comment
181
178
  @cs_before_block_comment = self.class.lex_en_line_begin
182
-
183
- # Maximum numbered parameters stack
184
- @max_numparam_stack = MaxNumparamStack.new
185
179
  end
186
180
 
187
181
  def source_buffer=(source_buffer)
@@ -255,10 +249,6 @@ class Parser::Lexer
255
249
  @cond = @cond_stack.pop
256
250
  end
257
251
 
258
- def max_numparam
259
- @max_numparam_stack.top
260
- end
261
-
262
252
  def dedent_level
263
253
  # We erase @dedent_level as a precaution to avoid accidentally
264
254
  # using a stale value.
@@ -457,7 +447,7 @@ class Parser::Lexer
457
447
  '=>' => :tASSOC, '::' => :tCOLON2, '===' => :tEQQ,
458
448
  '<=>' => :tCMP, '[]' => :tAREF, '[]=' => :tASET,
459
449
  '{' => :tLCURLY, '}' => :tRCURLY, '`' => :tBACK_REF2,
460
- '!@' => :tBANG, '&.' => :tANDDOT, '.:' => :tMETHREF
450
+ '!@' => :tBANG, '&.' => :tANDDOT,
461
451
  }
462
452
 
463
453
  PUNCTUATION_BEGIN = {
@@ -1029,6 +1019,20 @@ class Parser::Lexer
1029
1019
  fcall expr_variable;
1030
1020
  }
1031
1021
 
1022
+ # Special case for Ruby > 2.7
1023
+ # If interpolated instance/class variable starts with a digit we parse it as a plain substring
1024
+ # However, "#$1" is still a regular interpolation
1025
+ interp_digit_var = '#' ('@' | '@@') digit c_alpha*;
1026
+
1027
+ action extend_interp_digit_var {
1028
+ if @version >= 27
1029
+ literal.extend_string(tok, @ts, @te)
1030
+ else
1031
+ message = tok.start_with?('#@@') ? :cvar_name : :ivar_name
1032
+ diagnostic :error, message, { :name => tok(@ts + 1, @te) }, range(@ts + 1, @te)
1033
+ end
1034
+ }
1035
+
1032
1036
  # Interpolations with code blocks must match nested curly braces, as
1033
1037
  # interpolation ending is ambiguous with a block ending. So, every
1034
1038
  # opening and closing brace should be matched with e_[lr]brace rules,
@@ -1060,6 +1064,7 @@ class Parser::Lexer
1060
1064
  emit(:tRCURLY, '}'.freeze, p - 1, p)
1061
1065
  @cond.lexpop
1062
1066
  @cmdarg.lexpop
1067
+ @paren_nest -= 1
1063
1068
  else
1064
1069
  emit(:tSTRING_DEND, '}'.freeze, p - 1, p)
1065
1070
  end
@@ -1098,60 +1103,64 @@ class Parser::Lexer
1098
1103
  # above.
1099
1104
 
1100
1105
  interp_words := |*
1101
- interp_code => extend_interp_code;
1102
- interp_var => extend_interp_var;
1103
- e_bs escape => extend_string_escaped;
1104
- c_space+ => extend_string_space;
1105
- c_eol => extend_string_eol;
1106
- c_any => extend_string;
1106
+ interp_code => extend_interp_code;
1107
+ interp_digit_var => extend_interp_digit_var;
1108
+ interp_var => extend_interp_var;
1109
+ e_bs escape => extend_string_escaped;
1110
+ c_space+ => extend_string_space;
1111
+ c_eol => extend_string_eol;
1112
+ c_any => extend_string;
1107
1113
  *|;
1108
1114
 
1109
1115
  interp_string := |*
1110
- interp_code => extend_interp_code;
1111
- interp_var => extend_interp_var;
1112
- e_bs escape => extend_string_escaped;
1113
- c_eol => extend_string_eol;
1114
- c_any => extend_string;
1116
+ interp_code => extend_interp_code;
1117
+ interp_digit_var => extend_interp_digit_var;
1118
+ interp_var => extend_interp_var;
1119
+ e_bs escape => extend_string_escaped;
1120
+ c_eol => extend_string_eol;
1121
+ c_any => extend_string;
1115
1122
  *|;
1116
1123
 
1117
1124
  plain_words := |*
1118
- e_bs c_any => extend_string_escaped;
1119
- c_space+ => extend_string_space;
1120
- c_eol => extend_string_eol;
1121
- c_any => extend_string;
1125
+ e_bs c_any => extend_string_escaped;
1126
+ c_space+ => extend_string_space;
1127
+ c_eol => extend_string_eol;
1128
+ c_any => extend_string;
1122
1129
  *|;
1123
1130
 
1124
1131
  plain_string := |*
1125
- '\\' c_nl => extend_string_eol;
1126
- e_bs c_any => extend_string_escaped;
1127
- c_eol => extend_string_eol;
1128
- c_any => extend_string;
1132
+ '\\' c_nl => extend_string_eol;
1133
+ e_bs c_any => extend_string_escaped;
1134
+ c_eol => extend_string_eol;
1135
+ c_any => extend_string;
1129
1136
  *|;
1130
1137
 
1131
1138
  interp_backslash_delimited := |*
1132
- interp_code => extend_interp_code;
1133
- interp_var => extend_interp_var;
1134
- c_eol => extend_string_eol;
1135
- c_any => extend_string;
1139
+ interp_code => extend_interp_code;
1140
+ interp_digit_var => extend_interp_digit_var;
1141
+ interp_var => extend_interp_var;
1142
+ c_eol => extend_string_eol;
1143
+ c_any => extend_string;
1136
1144
  *|;
1137
1145
 
1138
1146
  plain_backslash_delimited := |*
1139
- c_eol => extend_string_eol;
1140
- c_any => extend_string;
1147
+ c_eol => extend_string_eol;
1148
+ c_any => extend_string;
1141
1149
  *|;
1142
1150
 
1143
1151
  interp_backslash_delimited_words := |*
1144
- interp_code => extend_interp_code;
1145
- interp_var => extend_interp_var;
1146
- c_space+ => extend_string_space;
1147
- c_eol => extend_string_eol;
1148
- c_any => extend_string;
1152
+ interp_code => extend_interp_code;
1153
+ interp_digit_var => extend_interp_digit_var;
1154
+ interp_var => extend_interp_var;
1155
+ c_space+ => extend_string_space;
1156
+ c_eol => extend_string_eol;
1157
+ c_any => extend_string;
1149
1158
  *|;
1150
1159
 
1151
1160
  plain_backslash_delimited_words := |*
1152
- c_space+ => extend_string_space;
1153
- c_eol => extend_string_eol;
1154
- c_any => extend_string;
1161
+ c_space+ => extend_string_space;
1162
+ c_eol => extend_string_eol;
1163
+ c_any => extend_string;
1155
1164
  *|;
1156
1165
 
1157
1166
  regexp_modifiers := |*
@@ -1267,6 +1276,12 @@ class Parser::Lexer
1267
1276
 
1268
1277
  e_lbrack = '[' % {
1269
1278
  @cond.push(false); @cmdarg.push(false)
1279
+
1280
+ @paren_nest += 1
1281
+ };
1282
+
1283
+ e_rbrack = ']' % {
1284
+ @paren_nest -= 1
1270
1285
  };
1271
1286
 
1272
1287
  # Ruby 1.9 lambdas require parentheses counting in order to
@@ -1324,36 +1339,6 @@ class Parser::Lexer
1324
1339
  fnext *stack_pop; fbreak;
1325
1340
  };
1326
1341
 
1327
- '@' [0-9]+
1328
- => {
1329
- if @version < 27
1330
- diagnostic :error, :ivar_name, { :name => tok }
1331
- end
1332
-
1333
- value = tok[1..-1]
1334
-
1335
- if value[0] == '0'
1336
- diagnostic :error, :leading_zero_in_numparam, nil, range(@ts, @te)
1337
- end
1338
-
1339
- if value.to_i > NUMPARAM_MAX
1340
- diagnostic :error, :too_large_numparam, nil, range(@ts, @te)
1341
- end
1342
-
1343
- if !@context.in_block? && !@context.in_lambda?
1344
- diagnostic :error, :numparam_outside_block, nil, range(@ts, @te)
1345
- end
1346
-
1347
- if !@max_numparam_stack.can_have_numparams?
1348
- diagnostic :error, :ordinary_param_defined, nil, range(@ts, @te)
1349
- end
1350
-
1351
- @max_numparam_stack.register(value.to_i)
1352
-
1353
- emit(:tNUMPARAM, tok[1..-1])
1354
- fnext *stack_pop; fbreak;
1355
- };
1356
-
1357
1342
  instance_var_v
1358
1343
  => {
1359
1344
  if tok =~ /^@[0-9]/
@@ -1519,6 +1504,7 @@ class Parser::Lexer
1519
1504
  emit(:tLCURLY, '{'.freeze, @te - 1, @te)
1520
1505
  end
1521
1506
  @command_start = true
1507
+ @paren_nest += 1
1522
1508
  fnext expr_value; fbreak;
1523
1509
  };
1524
1510
 
@@ -1679,6 +1665,7 @@ class Parser::Lexer
1679
1665
  else
1680
1666
  emit(:tLBRACE_ARG, '{'.freeze)
1681
1667
  end
1668
+ @paren_nest += 1
1682
1669
  @command_start = true
1683
1670
  fnext expr_value; fbreak;
1684
1671
  };
@@ -1927,6 +1914,24 @@ class Parser::Lexer
1927
1914
  fgoto expr_end;
1928
1915
  };
1929
1916
 
1917
+ #
1918
+ # AMBIGUOUS EMPTY BLOCK ARGUMENTS
1919
+ #
1920
+
1921
+ # Ruby >= 2.7 emits it as two tPIPE terminals
1922
+ # while Ruby < 2.7 as a single tOROP (like in `a || b`)
1923
+ '||'
1924
+ => {
1925
+ if @version >= 27
1926
+ emit(:tPIPE, tok(@ts, @ts + 1), @ts, @ts + 1)
1927
+ fhold;
1928
+ fnext expr_beg; fbreak;
1929
+ else
1930
+ p -= 2
1931
+ fgoto expr_end;
1932
+ end
1933
+ };
1934
+
1930
1935
  #
1931
1936
  # KEYWORDS AND PUNCTUATION
1932
1937
  #
@@ -1941,6 +1946,7 @@ class Parser::Lexer
1941
1946
  else
1942
1947
  emit(:tLBRACE, '{'.freeze)
1943
1948
  end
1949
+ @paren_nest += 1
1944
1950
  fbreak;
1945
1951
  };
1946
1952
 
@@ -2333,24 +2339,6 @@ class Parser::Lexer
2333
2339
  # METHOD CALLS
2334
2340
  #
2335
2341
 
2336
- '.:' w_space+
2337
- => { emit(:tDOT, '.', @ts, @ts + 1)
2338
- emit(:tCOLON, ':', @ts + 1, @ts + 2)
2339
- p = p - tok.length + 2
2340
- fnext expr_dot; fbreak; };
2341
-
2342
- '.:'
2343
- => {
2344
- if @version >= 27
2345
- emit_table(PUNCTUATION)
2346
- else
2347
- emit(:tDOT, tok(@ts, @ts + 1), @ts, @ts + 1)
2348
- fhold;
2349
- end
2350
-
2351
- fnext expr_dot; fbreak;
2352
- };
2353
-
2354
2342
  '.' | '&.' | '::'
2355
2343
  => { emit_table(PUNCTUATION)
2356
2344
  fnext expr_dot; fbreak; };
@@ -2394,7 +2382,7 @@ class Parser::Lexer
2394
2382
  => { emit_table(PUNCTUATION)
2395
2383
  fnext expr_beg; fbreak; };
2396
2384
 
2397
- e_rbrace | e_rparen | ']'
2385
+ e_rbrace | e_rparen | e_rbrack
2398
2386
  => {
2399
2387
  emit_table(PUNCTUATION)
2400
2388
 
@@ -2431,6 +2419,17 @@ class Parser::Lexer
2431
2419
  => { emit(:tLBRACK2, '['.freeze)
2432
2420
  fnext expr_beg; fbreak; };
2433
2421
 
2422
+ '...' c_nl
2423
+ => {
2424
+ if @paren_nest == 0
2425
+ diagnostic :warning, :triple_dot_at_eol, nil, range(@ts, @te - 1)
2426
+ end
2427
+
2428
+ emit(:tDOT3, '...'.freeze, @ts, @te - 1)
2429
+ fhold;
2430
+ fnext expr_beg; fbreak;
2431
+ };
2432
+
2434
2433
  punctuation_end
2435
2434
  => { emit_table(PUNCTUATION)
2436
2435
  fnext expr_beg; fbreak; };
@@ -3,72 +3,71 @@
3
3
  module Parser
4
4
 
5
5
  class Lexer::Dedenter
6
+ # Tab (\t) counts as 8 spaces
7
+ TAB_WIDTH = 8
8
+
6
9
  def initialize(dedent_level)
7
10
  @dedent_level = dedent_level
8
11
  @at_line_begin = true
9
12
  @indent_level = 0
10
13
  end
11
14
 
15
+ # For a heredoc like
16
+ # <<-HERE
17
+ # a
18
+ # b
19
+ # HERE
20
+ # this method gets called with " a\n" and " b\n"
21
+ #
22
+ # However, the following heredoc:
23
+ #
24
+ # <<-HERE
25
+ # a\
26
+ # b
27
+ # HERE
28
+ # calls this method only once with a string " a\\\n b\n"
29
+ #
30
+ # This is important because technically it's a single line,
31
+ # but it has to be concatenated __after__ dedenting.
32
+ #
33
+ # It has no effect for non-squiggly heredocs, i.e. it simply removes "\\\n"
34
+ # Of course, lexer could do it but once again: it's all because of dedenting.
35
+ #
12
36
  def dedent(string)
13
- space_begin = space_end = offset = 0
14
- last_index = string.length - 1
15
- escape = false
16
- _at_line_begin = nil
37
+ lines = string.split("\\\n")
17
38
 
18
- string.chars.each_with_index do |char, index|
19
- if char == '\\'
20
- # entering escape mode
21
- escape = true
22
- string.slice!(index - offset)
23
- offset += 1
24
- _at_line_begin = @at_line_begin
25
- @at_line_begin = false
26
- elsif escape
27
- if char == ?\n
28
- # trimming \n, starting a new line
29
- string.slice!(index - offset)
30
- offset += 1
31
- @at_line_begin = true
32
- space_begin = space_end = index - offset
33
- @indent_level = 0
34
- elsif char == ?n
35
- # replacing \\n to \n
36
- string.slice!(index - offset)
37
- string.insert(index - offset, ?\n)
38
- else
39
- # exiting escape mode as it's not an escape sequence
40
- @at_line_begin = _at_line_begin
41
- escape = false
42
- redo
43
- end
44
- escape = false
45
- elsif @at_line_begin
46
- if char == ?\n || @indent_level >= @dedent_level
47
- string.slice!(space_begin...space_end)
48
- offset += space_end - space_begin
49
- @at_line_begin = false
50
- end
39
+ if @at_line_begin
40
+ lines_to_dedent = lines
41
+ else
42
+ _first, *lines_to_dedent = lines
43
+ end
51
44
 
45
+ lines_to_dedent.each do |line|
46
+ left_to_remove = @dedent_level
47
+ remove = 0
48
+
49
+ line.each_char do |char|
50
+ break if left_to_remove <= 0
52
51
  case char
53
52
  when ?\s
54
- @indent_level += 1
55
- space_end += 1
53
+ remove += 1
54
+ left_to_remove -= 1
56
55
  when ?\t
57
- @indent_level += 8 - @indent_level % 8
58
- space_end += 1
56
+ break if TAB_WIDTH * (remove / TAB_WIDTH + 1) > @dedent_level
57
+ remove += 1
58
+ left_to_remove -= TAB_WIDTH
59
+ else
60
+ # no more spaces or tabs
61
+ break
59
62
  end
60
- elsif char == ?\n && index == last_index
61
- @at_line_begin = true
62
- @indent_level = 0
63
- space_begin = space_end = index - offset + 1
64
63
  end
65
- end
66
64
 
67
- if @at_line_begin
68
- string.slice!(space_begin..space_end)
65
+ line.slice!(0, remove)
69
66
  end
70
67
 
71
- nil
68
+ string.replace(lines.join)
69
+
70
+ @at_line_begin = string.end_with?("\n")
72
71
  end
73
72
 
74
73
  def interrupt
@@ -2,17 +2,23 @@
2
2
 
3
3
  module Parser
4
4
 
5
- class Lexer::MaxNumparamStack
5
+ class MaxNumparamStack
6
+ attr_reader :stack
7
+
6
8
  def initialize
7
9
  @stack = []
8
10
  end
9
11
 
10
- def cant_have_numparams!
12
+ def has_ordinary_params!
11
13
  set(-1)
12
14
  end
13
15
 
14
- def can_have_numparams?
15
- top >= 0
16
+ def has_ordinary_params?
17
+ top < 0
18
+ end
19
+
20
+ def has_numparams?
21
+ top > 0
16
22
  end
17
23
 
18
24
  def register(numparam)