parser 2.6.5.0 → 2.7.0.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -89,16 +89,13 @@ class Parser::Lexer
89
89
 
90
90
  REGEXP_META_CHARACTERS = Regexp.union(*"\\$()*+.<>?[]^{|}".chars).freeze
91
91
 
92
- NUMPARAM_MAX = 9
93
-
94
92
  attr_reader :source_buffer
95
- attr_reader :max_numparam_stack
96
93
 
97
94
  attr_accessor :diagnostics
98
95
  attr_accessor :static_env
99
96
  attr_accessor :force_utf32
100
97
 
101
- attr_accessor :cond, :cmdarg, :in_kwarg, :context
98
+ attr_accessor :cond, :cmdarg, :in_kwarg, :context, :command_start
102
99
 
103
100
  attr_accessor :tokens, :comments
104
101
 
@@ -179,9 +176,6 @@ class Parser::Lexer
179
176
 
180
177
  # State before =begin / =end block comment
181
178
  @cs_before_block_comment = self.class.lex_en_line_begin
182
-
183
- # Maximum numbered parameters stack
184
- @max_numparam_stack = MaxNumparamStack.new
185
179
  end
186
180
 
187
181
  def source_buffer=(source_buffer)
@@ -255,10 +249,6 @@ class Parser::Lexer
255
249
  @cond = @cond_stack.pop
256
250
  end
257
251
 
258
- def max_numparam
259
- @max_numparam_stack.top
260
- end
261
-
262
252
  def dedent_level
263
253
  # We erase @dedent_level as a precaution to avoid accidentally
264
254
  # using a stale value.
@@ -457,7 +447,7 @@ class Parser::Lexer
457
447
  '=>' => :tASSOC, '::' => :tCOLON2, '===' => :tEQQ,
458
448
  '<=>' => :tCMP, '[]' => :tAREF, '[]=' => :tASET,
459
449
  '{' => :tLCURLY, '}' => :tRCURLY, '`' => :tBACK_REF2,
460
- '!@' => :tBANG, '&.' => :tANDDOT, '.:' => :tMETHREF
450
+ '!@' => :tBANG, '&.' => :tANDDOT,
461
451
  }
462
452
 
463
453
  PUNCTUATION_BEGIN = {
@@ -1029,6 +1019,20 @@ class Parser::Lexer
1029
1019
  fcall expr_variable;
1030
1020
  }
1031
1021
 
1022
+ # Special case for Ruby > 2.7
1023
+ # If interpolated instance/class variable starts with a digit we parse it as a plain substring
1024
+ # However, "#$1" is still a regular interpolation
1025
+ interp_digit_var = '#' ('@' | '@@') digit c_alpha*;
1026
+
1027
+ action extend_interp_digit_var {
1028
+ if @version >= 27
1029
+ literal.extend_string(tok, @ts, @te)
1030
+ else
1031
+ message = tok.start_with?('#@@') ? :cvar_name : :ivar_name
1032
+ diagnostic :error, message, { :name => tok(@ts + 1, @te) }, range(@ts + 1, @te)
1033
+ end
1034
+ }
1035
+
1032
1036
  # Interpolations with code blocks must match nested curly braces, as
1033
1037
  # interpolation ending is ambiguous with a block ending. So, every
1034
1038
  # opening and closing brace should be matched with e_[lr]brace rules,
@@ -1060,6 +1064,7 @@ class Parser::Lexer
1060
1064
  emit(:tRCURLY, '}'.freeze, p - 1, p)
1061
1065
  @cond.lexpop
1062
1066
  @cmdarg.lexpop
1067
+ @paren_nest -= 1
1063
1068
  else
1064
1069
  emit(:tSTRING_DEND, '}'.freeze, p - 1, p)
1065
1070
  end
@@ -1098,60 +1103,64 @@ class Parser::Lexer
1098
1103
  # above.
1099
1104
 
1100
1105
  interp_words := |*
1101
- interp_code => extend_interp_code;
1102
- interp_var => extend_interp_var;
1103
- e_bs escape => extend_string_escaped;
1104
- c_space+ => extend_string_space;
1105
- c_eol => extend_string_eol;
1106
- c_any => extend_string;
1106
+ interp_code => extend_interp_code;
1107
+ interp_digit_var => extend_interp_digit_var;
1108
+ interp_var => extend_interp_var;
1109
+ e_bs escape => extend_string_escaped;
1110
+ c_space+ => extend_string_space;
1111
+ c_eol => extend_string_eol;
1112
+ c_any => extend_string;
1107
1113
  *|;
1108
1114
 
1109
1115
  interp_string := |*
1110
- interp_code => extend_interp_code;
1111
- interp_var => extend_interp_var;
1112
- e_bs escape => extend_string_escaped;
1113
- c_eol => extend_string_eol;
1114
- c_any => extend_string;
1116
+ interp_code => extend_interp_code;
1117
+ interp_digit_var => extend_interp_digit_var;
1118
+ interp_var => extend_interp_var;
1119
+ e_bs escape => extend_string_escaped;
1120
+ c_eol => extend_string_eol;
1121
+ c_any => extend_string;
1115
1122
  *|;
1116
1123
 
1117
1124
  plain_words := |*
1118
- e_bs c_any => extend_string_escaped;
1119
- c_space+ => extend_string_space;
1120
- c_eol => extend_string_eol;
1121
- c_any => extend_string;
1125
+ e_bs c_any => extend_string_escaped;
1126
+ c_space+ => extend_string_space;
1127
+ c_eol => extend_string_eol;
1128
+ c_any => extend_string;
1122
1129
  *|;
1123
1130
 
1124
1131
  plain_string := |*
1125
- '\\' c_nl => extend_string_eol;
1126
- e_bs c_any => extend_string_escaped;
1127
- c_eol => extend_string_eol;
1128
- c_any => extend_string;
1132
+ '\\' c_nl => extend_string_eol;
1133
+ e_bs c_any => extend_string_escaped;
1134
+ c_eol => extend_string_eol;
1135
+ c_any => extend_string;
1129
1136
  *|;
1130
1137
 
1131
1138
  interp_backslash_delimited := |*
1132
- interp_code => extend_interp_code;
1133
- interp_var => extend_interp_var;
1134
- c_eol => extend_string_eol;
1135
- c_any => extend_string;
1139
+ interp_code => extend_interp_code;
1140
+ interp_digit_var => extend_interp_digit_var;
1141
+ interp_var => extend_interp_var;
1142
+ c_eol => extend_string_eol;
1143
+ c_any => extend_string;
1136
1144
  *|;
1137
1145
 
1138
1146
  plain_backslash_delimited := |*
1139
- c_eol => extend_string_eol;
1140
- c_any => extend_string;
1147
+ c_eol => extend_string_eol;
1148
+ c_any => extend_string;
1141
1149
  *|;
1142
1150
 
1143
1151
  interp_backslash_delimited_words := |*
1144
- interp_code => extend_interp_code;
1145
- interp_var => extend_interp_var;
1146
- c_space+ => extend_string_space;
1147
- c_eol => extend_string_eol;
1148
- c_any => extend_string;
1152
+ interp_code => extend_interp_code;
1153
+ interp_digit_var => extend_interp_digit_var;
1154
+ interp_var => extend_interp_var;
1155
+ c_space+ => extend_string_space;
1156
+ c_eol => extend_string_eol;
1157
+ c_any => extend_string;
1149
1158
  *|;
1150
1159
 
1151
1160
  plain_backslash_delimited_words := |*
1152
- c_space+ => extend_string_space;
1153
- c_eol => extend_string_eol;
1154
- c_any => extend_string;
1161
+ c_space+ => extend_string_space;
1162
+ c_eol => extend_string_eol;
1163
+ c_any => extend_string;
1155
1164
  *|;
1156
1165
 
1157
1166
  regexp_modifiers := |*
@@ -1267,6 +1276,12 @@ class Parser::Lexer
1267
1276
 
1268
1277
  e_lbrack = '[' % {
1269
1278
  @cond.push(false); @cmdarg.push(false)
1279
+
1280
+ @paren_nest += 1
1281
+ };
1282
+
1283
+ e_rbrack = ']' % {
1284
+ @paren_nest -= 1
1270
1285
  };
1271
1286
 
1272
1287
  # Ruby 1.9 lambdas require parentheses counting in order to
@@ -1324,36 +1339,6 @@ class Parser::Lexer
1324
1339
  fnext *stack_pop; fbreak;
1325
1340
  };
1326
1341
 
1327
- '@' [0-9]+
1328
- => {
1329
- if @version < 27
1330
- diagnostic :error, :ivar_name, { :name => tok }
1331
- end
1332
-
1333
- value = tok[1..-1]
1334
-
1335
- if value[0] == '0'
1336
- diagnostic :error, :leading_zero_in_numparam, nil, range(@ts, @te)
1337
- end
1338
-
1339
- if value.to_i > NUMPARAM_MAX
1340
- diagnostic :error, :too_large_numparam, nil, range(@ts, @te)
1341
- end
1342
-
1343
- if !@context.in_block? && !@context.in_lambda?
1344
- diagnostic :error, :numparam_outside_block, nil, range(@ts, @te)
1345
- end
1346
-
1347
- if !@max_numparam_stack.can_have_numparams?
1348
- diagnostic :error, :ordinary_param_defined, nil, range(@ts, @te)
1349
- end
1350
-
1351
- @max_numparam_stack.register(value.to_i)
1352
-
1353
- emit(:tNUMPARAM, tok[1..-1])
1354
- fnext *stack_pop; fbreak;
1355
- };
1356
-
1357
1342
  instance_var_v
1358
1343
  => {
1359
1344
  if tok =~ /^@[0-9]/
@@ -1519,6 +1504,7 @@ class Parser::Lexer
1519
1504
  emit(:tLCURLY, '{'.freeze, @te - 1, @te)
1520
1505
  end
1521
1506
  @command_start = true
1507
+ @paren_nest += 1
1522
1508
  fnext expr_value; fbreak;
1523
1509
  };
1524
1510
 
@@ -1679,6 +1665,7 @@ class Parser::Lexer
1679
1665
  else
1680
1666
  emit(:tLBRACE_ARG, '{'.freeze)
1681
1667
  end
1668
+ @paren_nest += 1
1682
1669
  @command_start = true
1683
1670
  fnext expr_value; fbreak;
1684
1671
  };
@@ -1927,6 +1914,24 @@ class Parser::Lexer
1927
1914
  fgoto expr_end;
1928
1915
  };
1929
1916
 
1917
+ #
1918
+ # AMBIGUOUS EMPTY BLOCK ARGUMENTS
1919
+ #
1920
+
1921
+ # Ruby >= 2.7 emits it as two tPIPE terminals
1922
+ # while Ruby < 2.7 as a single tOROP (like in `a || b`)
1923
+ '||'
1924
+ => {
1925
+ if @version >= 27
1926
+ emit(:tPIPE, tok(@ts, @ts + 1), @ts, @ts + 1)
1927
+ fhold;
1928
+ fnext expr_beg; fbreak;
1929
+ else
1930
+ p -= 2
1931
+ fgoto expr_end;
1932
+ end
1933
+ };
1934
+
1930
1935
  #
1931
1936
  # KEYWORDS AND PUNCTUATION
1932
1937
  #
@@ -1941,6 +1946,7 @@ class Parser::Lexer
1941
1946
  else
1942
1947
  emit(:tLBRACE, '{'.freeze)
1943
1948
  end
1949
+ @paren_nest += 1
1944
1950
  fbreak;
1945
1951
  };
1946
1952
 
@@ -2333,24 +2339,6 @@ class Parser::Lexer
2333
2339
  # METHOD CALLS
2334
2340
  #
2335
2341
 
2336
- '.:' w_space+
2337
- => { emit(:tDOT, '.', @ts, @ts + 1)
2338
- emit(:tCOLON, ':', @ts + 1, @ts + 2)
2339
- p = p - tok.length + 2
2340
- fnext expr_dot; fbreak; };
2341
-
2342
- '.:'
2343
- => {
2344
- if @version >= 27
2345
- emit_table(PUNCTUATION)
2346
- else
2347
- emit(:tDOT, tok(@ts, @ts + 1), @ts, @ts + 1)
2348
- fhold;
2349
- end
2350
-
2351
- fnext expr_dot; fbreak;
2352
- };
2353
-
2354
2342
  '.' | '&.' | '::'
2355
2343
  => { emit_table(PUNCTUATION)
2356
2344
  fnext expr_dot; fbreak; };
@@ -2394,7 +2382,7 @@ class Parser::Lexer
2394
2382
  => { emit_table(PUNCTUATION)
2395
2383
  fnext expr_beg; fbreak; };
2396
2384
 
2397
- e_rbrace | e_rparen | ']'
2385
+ e_rbrace | e_rparen | e_rbrack
2398
2386
  => {
2399
2387
  emit_table(PUNCTUATION)
2400
2388
 
@@ -2431,6 +2419,17 @@ class Parser::Lexer
2431
2419
  => { emit(:tLBRACK2, '['.freeze)
2432
2420
  fnext expr_beg; fbreak; };
2433
2421
 
2422
+ '...' c_nl
2423
+ => {
2424
+ if @paren_nest == 0
2425
+ diagnostic :warning, :triple_dot_at_eol, nil, range(@ts, @te - 1)
2426
+ end
2427
+
2428
+ emit(:tDOT3, '...'.freeze, @ts, @te - 1)
2429
+ fhold;
2430
+ fnext expr_beg; fbreak;
2431
+ };
2432
+
2434
2433
  punctuation_end
2435
2434
  => { emit_table(PUNCTUATION)
2436
2435
  fnext expr_beg; fbreak; };
@@ -3,72 +3,71 @@
3
3
  module Parser
4
4
 
5
5
  class Lexer::Dedenter
6
+ # Tab (\t) counts as 8 spaces
7
+ TAB_WIDTH = 8
8
+
6
9
  def initialize(dedent_level)
7
10
  @dedent_level = dedent_level
8
11
  @at_line_begin = true
9
12
  @indent_level = 0
10
13
  end
11
14
 
15
+ # For a heredoc like
16
+ # <<-HERE
17
+ # a
18
+ # b
19
+ # HERE
20
+ # this method gets called with " a\n" and " b\n"
21
+ #
22
+ # However, the following heredoc:
23
+ #
24
+ # <<-HERE
25
+ # a\
26
+ # b
27
+ # HERE
28
+ # calls this method only once with a string " a\\\n b\n"
29
+ #
30
+ # This is important because technically it's a single line,
31
+ # but it has to be concatenated __after__ dedenting.
32
+ #
33
+ # It has no effect for non-squiggly heredocs, i.e. it simply removes "\\\n"
34
+ # Of course, lexer could do it but once again: it's all because of dedenting.
35
+ #
12
36
  def dedent(string)
13
- space_begin = space_end = offset = 0
14
- last_index = string.length - 1
15
- escape = false
16
- _at_line_begin = nil
37
+ lines = string.split("\\\n")
17
38
 
18
- string.chars.each_with_index do |char, index|
19
- if char == '\\'
20
- # entering escape mode
21
- escape = true
22
- string.slice!(index - offset)
23
- offset += 1
24
- _at_line_begin = @at_line_begin
25
- @at_line_begin = false
26
- elsif escape
27
- if char == ?\n
28
- # trimming \n, starting a new line
29
- string.slice!(index - offset)
30
- offset += 1
31
- @at_line_begin = true
32
- space_begin = space_end = index - offset
33
- @indent_level = 0
34
- elsif char == ?n
35
- # replacing \\n to \n
36
- string.slice!(index - offset)
37
- string.insert(index - offset, ?\n)
38
- else
39
- # exiting escape mode as it's not an escape sequence
40
- @at_line_begin = _at_line_begin
41
- escape = false
42
- redo
43
- end
44
- escape = false
45
- elsif @at_line_begin
46
- if char == ?\n || @indent_level >= @dedent_level
47
- string.slice!(space_begin...space_end)
48
- offset += space_end - space_begin
49
- @at_line_begin = false
50
- end
39
+ if @at_line_begin
40
+ lines_to_dedent = lines
41
+ else
42
+ _first, *lines_to_dedent = lines
43
+ end
51
44
 
45
+ lines_to_dedent.each do |line|
46
+ left_to_remove = @dedent_level
47
+ remove = 0
48
+
49
+ line.each_char do |char|
50
+ break if left_to_remove <= 0
52
51
  case char
53
52
  when ?\s
54
- @indent_level += 1
55
- space_end += 1
53
+ remove += 1
54
+ left_to_remove -= 1
56
55
  when ?\t
57
- @indent_level += 8 - @indent_level % 8
58
- space_end += 1
56
+ break if TAB_WIDTH * (remove / TAB_WIDTH + 1) > @dedent_level
57
+ remove += 1
58
+ left_to_remove -= TAB_WIDTH
59
+ else
60
+ # no more spaces or tabs
61
+ break
59
62
  end
60
- elsif char == ?\n && index == last_index
61
- @at_line_begin = true
62
- @indent_level = 0
63
- space_begin = space_end = index - offset + 1
64
63
  end
65
- end
66
64
 
67
- if @at_line_begin
68
- string.slice!(space_begin..space_end)
65
+ line.slice!(0, remove)
69
66
  end
70
67
 
71
- nil
68
+ string.replace(lines.join)
69
+
70
+ @at_line_begin = string.end_with?("\n")
72
71
  end
73
72
 
74
73
  def interrupt
@@ -2,17 +2,23 @@
2
2
 
3
3
  module Parser
4
4
 
5
- class Lexer::MaxNumparamStack
5
+ class MaxNumparamStack
6
+ attr_reader :stack
7
+
6
8
  def initialize
7
9
  @stack = []
8
10
  end
9
11
 
10
- def cant_have_numparams!
12
+ def has_ordinary_params!
11
13
  set(-1)
12
14
  end
13
15
 
14
- def can_have_numparams?
15
- top >= 0
16
+ def has_ordinary_params?
17
+ top < 0
18
+ end
19
+
20
+ def has_numparams?
21
+ top > 0
16
22
  end
17
23
 
18
24
  def register(numparam)