parser 2.6.5.0 → 2.7.0.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/lib/parser/lexer.rl CHANGED
@@ -89,16 +89,13 @@ class Parser::Lexer
89
89
 
90
90
  REGEXP_META_CHARACTERS = Regexp.union(*"\\$()*+.<>?[]^{|}".chars).freeze
91
91
 
92
- NUMPARAM_MAX = 9
93
-
94
92
  attr_reader :source_buffer
95
- attr_reader :max_numparam_stack
96
93
 
97
94
  attr_accessor :diagnostics
98
95
  attr_accessor :static_env
99
96
  attr_accessor :force_utf32
100
97
 
101
- attr_accessor :cond, :cmdarg, :in_kwarg, :context
98
+ attr_accessor :cond, :cmdarg, :in_kwarg, :context, :command_start
102
99
 
103
100
  attr_accessor :tokens, :comments
104
101
 
@@ -179,9 +176,6 @@ class Parser::Lexer
179
176
 
180
177
  # State before =begin / =end block comment
181
178
  @cs_before_block_comment = self.class.lex_en_line_begin
182
-
183
- # Maximum numbered parameters stack
184
- @max_numparam_stack = MaxNumparamStack.new
185
179
  end
186
180
 
187
181
  def source_buffer=(source_buffer)
@@ -255,10 +249,6 @@ class Parser::Lexer
255
249
  @cond = @cond_stack.pop
256
250
  end
257
251
 
258
- def max_numparam
259
- @max_numparam_stack.top
260
- end
261
-
262
252
  def dedent_level
263
253
  # We erase @dedent_level as a precaution to avoid accidentally
264
254
  # using a stale value.
@@ -457,7 +447,7 @@ class Parser::Lexer
457
447
  '=>' => :tASSOC, '::' => :tCOLON2, '===' => :tEQQ,
458
448
  '<=>' => :tCMP, '[]' => :tAREF, '[]=' => :tASET,
459
449
  '{' => :tLCURLY, '}' => :tRCURLY, '`' => :tBACK_REF2,
460
- '!@' => :tBANG, '&.' => :tANDDOT, '.:' => :tMETHREF
450
+ '!@' => :tBANG, '&.' => :tANDDOT,
461
451
  }
462
452
 
463
453
  PUNCTUATION_BEGIN = {
@@ -1029,6 +1019,20 @@ class Parser::Lexer
1029
1019
  fcall expr_variable;
1030
1020
  }
1031
1021
 
1022
+ # Special case for Ruby > 2.7
1023
+ # If interpolated instance/class variable starts with a digit we parse it as a plain substring
1024
+ # However, "#$1" is still a regular interpolation
1025
+ interp_digit_var = '#' ('@' | '@@') digit c_alpha*;
1026
+
1027
+ action extend_interp_digit_var {
1028
+ if @version >= 27
1029
+ literal.extend_string(tok, @ts, @te)
1030
+ else
1031
+ message = tok.start_with?('#@@') ? :cvar_name : :ivar_name
1032
+ diagnostic :error, message, { :name => tok(@ts + 1, @te) }, range(@ts + 1, @te)
1033
+ end
1034
+ }
1035
+
1032
1036
  # Interpolations with code blocks must match nested curly braces, as
1033
1037
  # interpolation ending is ambiguous with a block ending. So, every
1034
1038
  # opening and closing brace should be matched with e_[lr]brace rules,
@@ -1074,6 +1078,8 @@ class Parser::Lexer
1074
1078
  fbreak;
1075
1079
  end
1076
1080
  end
1081
+
1082
+ @paren_nest -= 1
1077
1083
  };
1078
1084
 
1079
1085
  action extend_interp_code {
@@ -1098,60 +1104,64 @@ class Parser::Lexer
1098
1104
  # above.
1099
1105
 
1100
1106
  interp_words := |*
1101
- interp_code => extend_interp_code;
1102
- interp_var => extend_interp_var;
1103
- e_bs escape => extend_string_escaped;
1104
- c_space+ => extend_string_space;
1105
- c_eol => extend_string_eol;
1106
- c_any => extend_string;
1107
+ interp_code => extend_interp_code;
1108
+ interp_digit_var => extend_interp_digit_var;
1109
+ interp_var => extend_interp_var;
1110
+ e_bs escape => extend_string_escaped;
1111
+ c_space+ => extend_string_space;
1112
+ c_eol => extend_string_eol;
1113
+ c_any => extend_string;
1107
1114
  *|;
1108
1115
 
1109
1116
  interp_string := |*
1110
- interp_code => extend_interp_code;
1111
- interp_var => extend_interp_var;
1112
- e_bs escape => extend_string_escaped;
1113
- c_eol => extend_string_eol;
1114
- c_any => extend_string;
1117
+ interp_code => extend_interp_code;
1118
+ interp_digit_var => extend_interp_digit_var;
1119
+ interp_var => extend_interp_var;
1120
+ e_bs escape => extend_string_escaped;
1121
+ c_eol => extend_string_eol;
1122
+ c_any => extend_string;
1115
1123
  *|;
1116
1124
 
1117
1125
  plain_words := |*
1118
- e_bs c_any => extend_string_escaped;
1119
- c_space+ => extend_string_space;
1120
- c_eol => extend_string_eol;
1121
- c_any => extend_string;
1126
+ e_bs c_any => extend_string_escaped;
1127
+ c_space+ => extend_string_space;
1128
+ c_eol => extend_string_eol;
1129
+ c_any => extend_string;
1122
1130
  *|;
1123
1131
 
1124
1132
  plain_string := |*
1125
- '\\' c_nl => extend_string_eol;
1126
- e_bs c_any => extend_string_escaped;
1127
- c_eol => extend_string_eol;
1128
- c_any => extend_string;
1133
+ '\\' c_nl => extend_string_eol;
1134
+ e_bs c_any => extend_string_escaped;
1135
+ c_eol => extend_string_eol;
1136
+ c_any => extend_string;
1129
1137
  *|;
1130
1138
 
1131
1139
  interp_backslash_delimited := |*
1132
- interp_code => extend_interp_code;
1133
- interp_var => extend_interp_var;
1134
- c_eol => extend_string_eol;
1135
- c_any => extend_string;
1140
+ interp_code => extend_interp_code;
1141
+ interp_digit_var => extend_interp_digit_var;
1142
+ interp_var => extend_interp_var;
1143
+ c_eol => extend_string_eol;
1144
+ c_any => extend_string;
1136
1145
  *|;
1137
1146
 
1138
1147
  plain_backslash_delimited := |*
1139
- c_eol => extend_string_eol;
1140
- c_any => extend_string;
1148
+ c_eol => extend_string_eol;
1149
+ c_any => extend_string;
1141
1150
  *|;
1142
1151
 
1143
1152
  interp_backslash_delimited_words := |*
1144
- interp_code => extend_interp_code;
1145
- interp_var => extend_interp_var;
1146
- c_space+ => extend_string_space;
1147
- c_eol => extend_string_eol;
1148
- c_any => extend_string;
1153
+ interp_code => extend_interp_code;
1154
+ interp_digit_var => extend_interp_digit_var;
1155
+ interp_var => extend_interp_var;
1156
+ c_space+ => extend_string_space;
1157
+ c_eol => extend_string_eol;
1158
+ c_any => extend_string;
1149
1159
  *|;
1150
1160
 
1151
1161
  plain_backslash_delimited_words := |*
1152
- c_space+ => extend_string_space;
1153
- c_eol => extend_string_eol;
1154
- c_any => extend_string;
1162
+ c_space+ => extend_string_space;
1163
+ c_eol => extend_string_eol;
1164
+ c_any => extend_string;
1155
1165
  *|;
1156
1166
 
1157
1167
  regexp_modifiers := |*
@@ -1267,6 +1277,12 @@ class Parser::Lexer
1267
1277
 
1268
1278
  e_lbrack = '[' % {
1269
1279
  @cond.push(false); @cmdarg.push(false)
1280
+
1281
+ @paren_nest += 1
1282
+ };
1283
+
1284
+ e_rbrack = ']' % {
1285
+ @paren_nest -= 1
1270
1286
  };
1271
1287
 
1272
1288
  # Ruby 1.9 lambdas require parentheses counting in order to
@@ -1324,36 +1340,6 @@ class Parser::Lexer
1324
1340
  fnext *stack_pop; fbreak;
1325
1341
  };
1326
1342
 
1327
- '@' [0-9]+
1328
- => {
1329
- if @version < 27
1330
- diagnostic :error, :ivar_name, { :name => tok }
1331
- end
1332
-
1333
- value = tok[1..-1]
1334
-
1335
- if value[0] == '0'
1336
- diagnostic :error, :leading_zero_in_numparam, nil, range(@ts, @te)
1337
- end
1338
-
1339
- if value.to_i > NUMPARAM_MAX
1340
- diagnostic :error, :too_large_numparam, nil, range(@ts, @te)
1341
- end
1342
-
1343
- if !@context.in_block? && !@context.in_lambda?
1344
- diagnostic :error, :numparam_outside_block, nil, range(@ts, @te)
1345
- end
1346
-
1347
- if !@max_numparam_stack.can_have_numparams?
1348
- diagnostic :error, :ordinary_param_defined, nil, range(@ts, @te)
1349
- end
1350
-
1351
- @max_numparam_stack.register(value.to_i)
1352
-
1353
- emit(:tNUMPARAM, tok[1..-1])
1354
- fnext *stack_pop; fbreak;
1355
- };
1356
-
1357
1343
  instance_var_v
1358
1344
  => {
1359
1345
  if tok =~ /^@[0-9]/
@@ -1519,6 +1505,7 @@ class Parser::Lexer
1519
1505
  emit(:tLCURLY, '{'.freeze, @te - 1, @te)
1520
1506
  end
1521
1507
  @command_start = true
1508
+ @paren_nest += 1
1522
1509
  fnext expr_value; fbreak;
1523
1510
  };
1524
1511
 
@@ -1679,6 +1666,7 @@ class Parser::Lexer
1679
1666
  else
1680
1667
  emit(:tLBRACE_ARG, '{'.freeze)
1681
1668
  end
1669
+ @paren_nest += 1
1682
1670
  @command_start = true
1683
1671
  fnext expr_value; fbreak;
1684
1672
  };
@@ -1927,6 +1915,24 @@ class Parser::Lexer
1927
1915
  fgoto expr_end;
1928
1916
  };
1929
1917
 
1918
+ #
1919
+ # AMBIGUOUS EMPTY BLOCK ARGUMENTS
1920
+ #
1921
+
1922
+ # Ruby >= 2.7 emits it as two tPIPE terminals
1923
+ # while Ruby < 2.7 as a single tOROP (like in `a || b`)
1924
+ '||'
1925
+ => {
1926
+ if @version >= 27
1927
+ emit(:tPIPE, tok(@ts, @ts + 1), @ts, @ts + 1)
1928
+ fhold;
1929
+ fnext expr_beg; fbreak;
1930
+ else
1931
+ p -= 2
1932
+ fgoto expr_end;
1933
+ end
1934
+ };
1935
+
1930
1936
  #
1931
1937
  # KEYWORDS AND PUNCTUATION
1932
1938
  #
@@ -1941,6 +1947,7 @@ class Parser::Lexer
1941
1947
  else
1942
1948
  emit(:tLBRACE, '{'.freeze)
1943
1949
  end
1950
+ @paren_nest += 1
1944
1951
  fbreak;
1945
1952
  };
1946
1953
 
@@ -2159,6 +2166,9 @@ class Parser::Lexer
2159
2166
  emit_do
2160
2167
  end
2161
2168
  end
2169
+ if tok == '{'.freeze
2170
+ @paren_nest += 1
2171
+ end
2162
2172
  @command_start = true
2163
2173
 
2164
2174
  fnext expr_value; fbreak;
@@ -2333,24 +2343,6 @@ class Parser::Lexer
2333
2343
  # METHOD CALLS
2334
2344
  #
2335
2345
 
2336
- '.:' w_space+
2337
- => { emit(:tDOT, '.', @ts, @ts + 1)
2338
- emit(:tCOLON, ':', @ts + 1, @ts + 2)
2339
- p = p - tok.length + 2
2340
- fnext expr_dot; fbreak; };
2341
-
2342
- '.:'
2343
- => {
2344
- if @version >= 27
2345
- emit_table(PUNCTUATION)
2346
- else
2347
- emit(:tDOT, tok(@ts, @ts + 1), @ts, @ts + 1)
2348
- fhold;
2349
- end
2350
-
2351
- fnext expr_dot; fbreak;
2352
- };
2353
-
2354
2346
  '.' | '&.' | '::'
2355
2347
  => { emit_table(PUNCTUATION)
2356
2348
  fnext expr_dot; fbreak; };
@@ -2394,7 +2386,7 @@ class Parser::Lexer
2394
2386
  => { emit_table(PUNCTUATION)
2395
2387
  fnext expr_beg; fbreak; };
2396
2388
 
2397
- e_rbrace | e_rparen | ']'
2389
+ e_rbrace | e_rparen | e_rbrack
2398
2390
  => {
2399
2391
  emit_table(PUNCTUATION)
2400
2392
 
@@ -2431,6 +2423,17 @@ class Parser::Lexer
2431
2423
  => { emit(:tLBRACK2, '['.freeze)
2432
2424
  fnext expr_beg; fbreak; };
2433
2425
 
2426
+ '...' c_nl
2427
+ => {
2428
+ if @paren_nest == 0
2429
+ diagnostic :warning, :triple_dot_at_eol, nil, range(@ts, @te - 1)
2430
+ end
2431
+
2432
+ emit(:tDOT3, '...'.freeze, @ts, @te - 1)
2433
+ fhold;
2434
+ fnext expr_beg; fbreak;
2435
+ };
2436
+
2434
2437
  punctuation_end
2435
2438
  => { emit_table(PUNCTUATION)
2436
2439
  fnext expr_beg; fbreak; };
@@ -3,72 +3,75 @@
3
3
  module Parser
4
4
 
5
5
  class Lexer::Dedenter
6
+ # Tab (\t) counts as 8 spaces
7
+ TAB_WIDTH = 8
8
+
6
9
  def initialize(dedent_level)
7
10
  @dedent_level = dedent_level
8
11
  @at_line_begin = true
9
12
  @indent_level = 0
10
13
  end
11
14
 
15
+ # For a heredoc like
16
+ # <<-HERE
17
+ # a
18
+ # b
19
+ # HERE
20
+ # this method gets called with " a\n" and " b\n"
21
+ #
22
+ # However, the following heredoc:
23
+ #
24
+ # <<-HERE
25
+ # a\
26
+ # b
27
+ # HERE
28
+ # calls this method only once with a string " a\\\n b\n"
29
+ #
30
+ # This is important because technically it's a single line,
31
+ # but it has to be concatenated __after__ dedenting.
32
+ #
33
+ # It has no effect for non-squiggly heredocs, i.e. it simply removes "\\\n"
34
+ # Of course, lexer could do it but once again: it's all because of dedenting.
35
+ #
12
36
  def dedent(string)
13
- space_begin = space_end = offset = 0
14
- last_index = string.length - 1
15
- escape = false
16
- _at_line_begin = nil
37
+ original_encoding = string.encoding
38
+ # Prevent the following error when processing binary encoded source.
39
+ # "\xC0".split # => ArgumentError (invalid byte sequence in UTF-8)
40
+ lines = string.force_encoding(Encoding::BINARY).split("\\\n")
41
+ lines.map! {|s| s.force_encoding(original_encoding) }
17
42
 
18
- string.chars.each_with_index do |char, index|
19
- if char == '\\'
20
- # entering escape mode
21
- escape = true
22
- string.slice!(index - offset)
23
- offset += 1
24
- _at_line_begin = @at_line_begin
25
- @at_line_begin = false
26
- elsif escape
27
- if char == ?\n
28
- # trimming \n, starting a new line
29
- string.slice!(index - offset)
30
- offset += 1
31
- @at_line_begin = true
32
- space_begin = space_end = index - offset
33
- @indent_level = 0
34
- elsif char == ?n
35
- # replacing \\n to \n
36
- string.slice!(index - offset)
37
- string.insert(index - offset, ?\n)
38
- else
39
- # exiting escape mode as it's not an escape sequence
40
- @at_line_begin = _at_line_begin
41
- escape = false
42
- redo
43
- end
44
- escape = false
45
- elsif @at_line_begin
46
- if char == ?\n || @indent_level >= @dedent_level
47
- string.slice!(space_begin...space_end)
48
- offset += space_end - space_begin
49
- @at_line_begin = false
50
- end
43
+ if @at_line_begin
44
+ lines_to_dedent = lines
45
+ else
46
+ _first, *lines_to_dedent = lines
47
+ end
51
48
 
49
+ lines_to_dedent.each do |line|
50
+ left_to_remove = @dedent_level
51
+ remove = 0
52
+
53
+ line.each_char do |char|
54
+ break if left_to_remove <= 0
52
55
  case char
53
56
  when ?\s
54
- @indent_level += 1
55
- space_end += 1
57
+ remove += 1
58
+ left_to_remove -= 1
56
59
  when ?\t
57
- @indent_level += 8 - @indent_level % 8
58
- space_end += 1
60
+ break if TAB_WIDTH * (remove / TAB_WIDTH + 1) > @dedent_level
61
+ remove += 1
62
+ left_to_remove -= TAB_WIDTH
63
+ else
64
+ # no more spaces or tabs
65
+ break
59
66
  end
60
- elsif char == ?\n && index == last_index
61
- @at_line_begin = true
62
- @indent_level = 0
63
- space_begin = space_end = index - offset + 1
64
67
  end
65
- end
66
68
 
67
- if @at_line_begin
68
- string.slice!(space_begin..space_end)
69
+ line.slice!(0, remove)
69
70
  end
70
71
 
71
- nil
72
+ string.replace(lines.join)
73
+
74
+ @at_line_begin = string.end_with?("\n")
72
75
  end
73
76
 
74
77
  def interrupt