parser 2.6.5.0 → 2.7.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +34 -0
- data/README.md +7 -0
- data/doc/AST_FORMAT.md +346 -20
- data/lib/parser.rb +3 -1
- data/lib/parser/ast/processor.rb +15 -0
- data/lib/parser/base.rb +19 -0
- data/lib/parser/builders/default.rb +245 -12
- data/lib/parser/context.rb +4 -0
- data/lib/parser/current.rb +4 -4
- data/lib/parser/current_arg_stack.rb +43 -0
- data/lib/parser/lexer.rl +93 -94
- data/lib/parser/lexer/dedenter.rb +48 -49
- data/lib/parser/{lexer/max_numparam_stack.rb → max_numparam_stack.rb} +10 -4
- data/lib/parser/messages.rb +34 -29
- data/lib/parser/meta.rb +6 -2
- data/lib/parser/ruby27.y +488 -35
- data/lib/parser/static_environment.rb +10 -0
- data/lib/parser/variables_stack.rb +32 -0
- data/lib/parser/version.rb +1 -1
- data/test/helper.rb +1 -0
- data/test/test_lexer.rb +7 -66
- data/test/test_parser.rb +1776 -123
- metadata +5 -3
data/lib/parser/lexer.rl
CHANGED
@@ -89,16 +89,13 @@ class Parser::Lexer
|
|
89
89
|
|
90
90
|
REGEXP_META_CHARACTERS = Regexp.union(*"\\$()*+.<>?[]^{|}".chars).freeze
|
91
91
|
|
92
|
-
NUMPARAM_MAX = 9
|
93
|
-
|
94
92
|
attr_reader :source_buffer
|
95
|
-
attr_reader :max_numparam_stack
|
96
93
|
|
97
94
|
attr_accessor :diagnostics
|
98
95
|
attr_accessor :static_env
|
99
96
|
attr_accessor :force_utf32
|
100
97
|
|
101
|
-
attr_accessor :cond, :cmdarg, :in_kwarg, :context
|
98
|
+
attr_accessor :cond, :cmdarg, :in_kwarg, :context, :command_start
|
102
99
|
|
103
100
|
attr_accessor :tokens, :comments
|
104
101
|
|
@@ -179,9 +176,6 @@ class Parser::Lexer
|
|
179
176
|
|
180
177
|
# State before =begin / =end block comment
|
181
178
|
@cs_before_block_comment = self.class.lex_en_line_begin
|
182
|
-
|
183
|
-
# Maximum numbered parameters stack
|
184
|
-
@max_numparam_stack = MaxNumparamStack.new
|
185
179
|
end
|
186
180
|
|
187
181
|
def source_buffer=(source_buffer)
|
@@ -255,10 +249,6 @@ class Parser::Lexer
|
|
255
249
|
@cond = @cond_stack.pop
|
256
250
|
end
|
257
251
|
|
258
|
-
def max_numparam
|
259
|
-
@max_numparam_stack.top
|
260
|
-
end
|
261
|
-
|
262
252
|
def dedent_level
|
263
253
|
# We erase @dedent_level as a precaution to avoid accidentally
|
264
254
|
# using a stale value.
|
@@ -457,7 +447,7 @@ class Parser::Lexer
|
|
457
447
|
'=>' => :tASSOC, '::' => :tCOLON2, '===' => :tEQQ,
|
458
448
|
'<=>' => :tCMP, '[]' => :tAREF, '[]=' => :tASET,
|
459
449
|
'{' => :tLCURLY, '}' => :tRCURLY, '`' => :tBACK_REF2,
|
460
|
-
'!@' => :tBANG, '&.' => :tANDDOT,
|
450
|
+
'!@' => :tBANG, '&.' => :tANDDOT,
|
461
451
|
}
|
462
452
|
|
463
453
|
PUNCTUATION_BEGIN = {
|
@@ -1029,6 +1019,20 @@ class Parser::Lexer
|
|
1029
1019
|
fcall expr_variable;
|
1030
1020
|
}
|
1031
1021
|
|
1022
|
+
# Special case for Ruby > 2.7
|
1023
|
+
# If interpolated instance/class variable starts with a digit we parse it as a plain substring
|
1024
|
+
# However, "#$1" is still a regular interpolation
|
1025
|
+
interp_digit_var = '#' ('@' | '@@') digit c_alpha*;
|
1026
|
+
|
1027
|
+
action extend_interp_digit_var {
|
1028
|
+
if @version >= 27
|
1029
|
+
literal.extend_string(tok, @ts, @te)
|
1030
|
+
else
|
1031
|
+
message = tok.start_with?('#@@') ? :cvar_name : :ivar_name
|
1032
|
+
diagnostic :error, message, { :name => tok(@ts + 1, @te) }, range(@ts + 1, @te)
|
1033
|
+
end
|
1034
|
+
}
|
1035
|
+
|
1032
1036
|
# Interpolations with code blocks must match nested curly braces, as
|
1033
1037
|
# interpolation ending is ambiguous with a block ending. So, every
|
1034
1038
|
# opening and closing brace should be matched with e_[lr]brace rules,
|
@@ -1060,6 +1064,7 @@ class Parser::Lexer
|
|
1060
1064
|
emit(:tRCURLY, '}'.freeze, p - 1, p)
|
1061
1065
|
@cond.lexpop
|
1062
1066
|
@cmdarg.lexpop
|
1067
|
+
@paren_nest -= 1
|
1063
1068
|
else
|
1064
1069
|
emit(:tSTRING_DEND, '}'.freeze, p - 1, p)
|
1065
1070
|
end
|
@@ -1098,60 +1103,64 @@ class Parser::Lexer
|
|
1098
1103
|
# above.
|
1099
1104
|
|
1100
1105
|
interp_words := |*
|
1101
|
-
interp_code
|
1102
|
-
|
1103
|
-
|
1104
|
-
|
1105
|
-
|
1106
|
-
|
1106
|
+
interp_code => extend_interp_code;
|
1107
|
+
interp_digit_var => extend_interp_digit_var;
|
1108
|
+
interp_var => extend_interp_var;
|
1109
|
+
e_bs escape => extend_string_escaped;
|
1110
|
+
c_space+ => extend_string_space;
|
1111
|
+
c_eol => extend_string_eol;
|
1112
|
+
c_any => extend_string;
|
1107
1113
|
*|;
|
1108
1114
|
|
1109
1115
|
interp_string := |*
|
1110
|
-
interp_code
|
1111
|
-
|
1112
|
-
|
1113
|
-
|
1114
|
-
|
1116
|
+
interp_code => extend_interp_code;
|
1117
|
+
interp_digit_var => extend_interp_digit_var;
|
1118
|
+
interp_var => extend_interp_var;
|
1119
|
+
e_bs escape => extend_string_escaped;
|
1120
|
+
c_eol => extend_string_eol;
|
1121
|
+
c_any => extend_string;
|
1115
1122
|
*|;
|
1116
1123
|
|
1117
1124
|
plain_words := |*
|
1118
|
-
e_bs c_any
|
1119
|
-
c_space+
|
1120
|
-
c_eol
|
1121
|
-
c_any
|
1125
|
+
e_bs c_any => extend_string_escaped;
|
1126
|
+
c_space+ => extend_string_space;
|
1127
|
+
c_eol => extend_string_eol;
|
1128
|
+
c_any => extend_string;
|
1122
1129
|
*|;
|
1123
1130
|
|
1124
1131
|
plain_string := |*
|
1125
|
-
'\\' c_nl
|
1126
|
-
e_bs c_any
|
1127
|
-
c_eol
|
1128
|
-
c_any
|
1132
|
+
'\\' c_nl => extend_string_eol;
|
1133
|
+
e_bs c_any => extend_string_escaped;
|
1134
|
+
c_eol => extend_string_eol;
|
1135
|
+
c_any => extend_string;
|
1129
1136
|
*|;
|
1130
1137
|
|
1131
1138
|
interp_backslash_delimited := |*
|
1132
|
-
interp_code
|
1133
|
-
|
1134
|
-
|
1135
|
-
|
1139
|
+
interp_code => extend_interp_code;
|
1140
|
+
interp_digit_var => extend_interp_digit_var;
|
1141
|
+
interp_var => extend_interp_var;
|
1142
|
+
c_eol => extend_string_eol;
|
1143
|
+
c_any => extend_string;
|
1136
1144
|
*|;
|
1137
1145
|
|
1138
1146
|
plain_backslash_delimited := |*
|
1139
|
-
c_eol
|
1140
|
-
c_any
|
1147
|
+
c_eol => extend_string_eol;
|
1148
|
+
c_any => extend_string;
|
1141
1149
|
*|;
|
1142
1150
|
|
1143
1151
|
interp_backslash_delimited_words := |*
|
1144
|
-
interp_code
|
1145
|
-
|
1146
|
-
|
1147
|
-
|
1148
|
-
|
1152
|
+
interp_code => extend_interp_code;
|
1153
|
+
interp_digit_var => extend_interp_digit_var;
|
1154
|
+
interp_var => extend_interp_var;
|
1155
|
+
c_space+ => extend_string_space;
|
1156
|
+
c_eol => extend_string_eol;
|
1157
|
+
c_any => extend_string;
|
1149
1158
|
*|;
|
1150
1159
|
|
1151
1160
|
plain_backslash_delimited_words := |*
|
1152
|
-
c_space+
|
1153
|
-
c_eol
|
1154
|
-
c_any
|
1161
|
+
c_space+ => extend_string_space;
|
1162
|
+
c_eol => extend_string_eol;
|
1163
|
+
c_any => extend_string;
|
1155
1164
|
*|;
|
1156
1165
|
|
1157
1166
|
regexp_modifiers := |*
|
@@ -1267,6 +1276,12 @@ class Parser::Lexer
|
|
1267
1276
|
|
1268
1277
|
e_lbrack = '[' % {
|
1269
1278
|
@cond.push(false); @cmdarg.push(false)
|
1279
|
+
|
1280
|
+
@paren_nest += 1
|
1281
|
+
};
|
1282
|
+
|
1283
|
+
e_rbrack = ']' % {
|
1284
|
+
@paren_nest -= 1
|
1270
1285
|
};
|
1271
1286
|
|
1272
1287
|
# Ruby 1.9 lambdas require parentheses counting in order to
|
@@ -1324,36 +1339,6 @@ class Parser::Lexer
|
|
1324
1339
|
fnext *stack_pop; fbreak;
|
1325
1340
|
};
|
1326
1341
|
|
1327
|
-
'@' [0-9]+
|
1328
|
-
=> {
|
1329
|
-
if @version < 27
|
1330
|
-
diagnostic :error, :ivar_name, { :name => tok }
|
1331
|
-
end
|
1332
|
-
|
1333
|
-
value = tok[1..-1]
|
1334
|
-
|
1335
|
-
if value[0] == '0'
|
1336
|
-
diagnostic :error, :leading_zero_in_numparam, nil, range(@ts, @te)
|
1337
|
-
end
|
1338
|
-
|
1339
|
-
if value.to_i > NUMPARAM_MAX
|
1340
|
-
diagnostic :error, :too_large_numparam, nil, range(@ts, @te)
|
1341
|
-
end
|
1342
|
-
|
1343
|
-
if !@context.in_block? && !@context.in_lambda?
|
1344
|
-
diagnostic :error, :numparam_outside_block, nil, range(@ts, @te)
|
1345
|
-
end
|
1346
|
-
|
1347
|
-
if !@max_numparam_stack.can_have_numparams?
|
1348
|
-
diagnostic :error, :ordinary_param_defined, nil, range(@ts, @te)
|
1349
|
-
end
|
1350
|
-
|
1351
|
-
@max_numparam_stack.register(value.to_i)
|
1352
|
-
|
1353
|
-
emit(:tNUMPARAM, tok[1..-1])
|
1354
|
-
fnext *stack_pop; fbreak;
|
1355
|
-
};
|
1356
|
-
|
1357
1342
|
instance_var_v
|
1358
1343
|
=> {
|
1359
1344
|
if tok =~ /^@[0-9]/
|
@@ -1519,6 +1504,7 @@ class Parser::Lexer
|
|
1519
1504
|
emit(:tLCURLY, '{'.freeze, @te - 1, @te)
|
1520
1505
|
end
|
1521
1506
|
@command_start = true
|
1507
|
+
@paren_nest += 1
|
1522
1508
|
fnext expr_value; fbreak;
|
1523
1509
|
};
|
1524
1510
|
|
@@ -1679,6 +1665,7 @@ class Parser::Lexer
|
|
1679
1665
|
else
|
1680
1666
|
emit(:tLBRACE_ARG, '{'.freeze)
|
1681
1667
|
end
|
1668
|
+
@paren_nest += 1
|
1682
1669
|
@command_start = true
|
1683
1670
|
fnext expr_value; fbreak;
|
1684
1671
|
};
|
@@ -1927,6 +1914,24 @@ class Parser::Lexer
|
|
1927
1914
|
fgoto expr_end;
|
1928
1915
|
};
|
1929
1916
|
|
1917
|
+
#
|
1918
|
+
# AMBIGUOUS EMPTY BLOCK ARGUMENTS
|
1919
|
+
#
|
1920
|
+
|
1921
|
+
# Ruby >= 2.7 emits it as two tPIPE terminals
|
1922
|
+
# while Ruby < 2.7 as a single tOROP (like in `a || b`)
|
1923
|
+
'||'
|
1924
|
+
=> {
|
1925
|
+
if @version >= 27
|
1926
|
+
emit(:tPIPE, tok(@ts, @ts + 1), @ts, @ts + 1)
|
1927
|
+
fhold;
|
1928
|
+
fnext expr_beg; fbreak;
|
1929
|
+
else
|
1930
|
+
p -= 2
|
1931
|
+
fgoto expr_end;
|
1932
|
+
end
|
1933
|
+
};
|
1934
|
+
|
1930
1935
|
#
|
1931
1936
|
# KEYWORDS AND PUNCTUATION
|
1932
1937
|
#
|
@@ -1941,6 +1946,7 @@ class Parser::Lexer
|
|
1941
1946
|
else
|
1942
1947
|
emit(:tLBRACE, '{'.freeze)
|
1943
1948
|
end
|
1949
|
+
@paren_nest += 1
|
1944
1950
|
fbreak;
|
1945
1951
|
};
|
1946
1952
|
|
@@ -2333,24 +2339,6 @@ class Parser::Lexer
|
|
2333
2339
|
# METHOD CALLS
|
2334
2340
|
#
|
2335
2341
|
|
2336
|
-
'.:' w_space+
|
2337
|
-
=> { emit(:tDOT, '.', @ts, @ts + 1)
|
2338
|
-
emit(:tCOLON, ':', @ts + 1, @ts + 2)
|
2339
|
-
p = p - tok.length + 2
|
2340
|
-
fnext expr_dot; fbreak; };
|
2341
|
-
|
2342
|
-
'.:'
|
2343
|
-
=> {
|
2344
|
-
if @version >= 27
|
2345
|
-
emit_table(PUNCTUATION)
|
2346
|
-
else
|
2347
|
-
emit(:tDOT, tok(@ts, @ts + 1), @ts, @ts + 1)
|
2348
|
-
fhold;
|
2349
|
-
end
|
2350
|
-
|
2351
|
-
fnext expr_dot; fbreak;
|
2352
|
-
};
|
2353
|
-
|
2354
2342
|
'.' | '&.' | '::'
|
2355
2343
|
=> { emit_table(PUNCTUATION)
|
2356
2344
|
fnext expr_dot; fbreak; };
|
@@ -2394,7 +2382,7 @@ class Parser::Lexer
|
|
2394
2382
|
=> { emit_table(PUNCTUATION)
|
2395
2383
|
fnext expr_beg; fbreak; };
|
2396
2384
|
|
2397
|
-
e_rbrace | e_rparen |
|
2385
|
+
e_rbrace | e_rparen | e_rbrack
|
2398
2386
|
=> {
|
2399
2387
|
emit_table(PUNCTUATION)
|
2400
2388
|
|
@@ -2431,6 +2419,17 @@ class Parser::Lexer
|
|
2431
2419
|
=> { emit(:tLBRACK2, '['.freeze)
|
2432
2420
|
fnext expr_beg; fbreak; };
|
2433
2421
|
|
2422
|
+
'...' c_nl
|
2423
|
+
=> {
|
2424
|
+
if @paren_nest == 0
|
2425
|
+
diagnostic :warning, :triple_dot_at_eol, nil, range(@ts, @te - 1)
|
2426
|
+
end
|
2427
|
+
|
2428
|
+
emit(:tDOT3, '...'.freeze, @ts, @te - 1)
|
2429
|
+
fhold;
|
2430
|
+
fnext expr_beg; fbreak;
|
2431
|
+
};
|
2432
|
+
|
2434
2433
|
punctuation_end
|
2435
2434
|
=> { emit_table(PUNCTUATION)
|
2436
2435
|
fnext expr_beg; fbreak; };
|
@@ -3,72 +3,71 @@
|
|
3
3
|
module Parser
|
4
4
|
|
5
5
|
class Lexer::Dedenter
|
6
|
+
# Tab (\t) counts as 8 spaces
|
7
|
+
TAB_WIDTH = 8
|
8
|
+
|
6
9
|
def initialize(dedent_level)
|
7
10
|
@dedent_level = dedent_level
|
8
11
|
@at_line_begin = true
|
9
12
|
@indent_level = 0
|
10
13
|
end
|
11
14
|
|
15
|
+
# For a heredoc like
|
16
|
+
# <<-HERE
|
17
|
+
# a
|
18
|
+
# b
|
19
|
+
# HERE
|
20
|
+
# this method gets called with " a\n" and " b\n"
|
21
|
+
#
|
22
|
+
# However, the following heredoc:
|
23
|
+
#
|
24
|
+
# <<-HERE
|
25
|
+
# a\
|
26
|
+
# b
|
27
|
+
# HERE
|
28
|
+
# calls this method only once with a string " a\\\n b\n"
|
29
|
+
#
|
30
|
+
# This is important because technically it's a single line,
|
31
|
+
# but it has to be concatenated __after__ dedenting.
|
32
|
+
#
|
33
|
+
# It has no effect for non-squiggly heredocs, i.e. it simply removes "\\\n"
|
34
|
+
# Of course, lexer could do it but once again: it's all because of dedenting.
|
35
|
+
#
|
12
36
|
def dedent(string)
|
13
|
-
|
14
|
-
last_index = string.length - 1
|
15
|
-
escape = false
|
16
|
-
_at_line_begin = nil
|
37
|
+
lines = string.split("\\\n")
|
17
38
|
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
offset += 1
|
24
|
-
_at_line_begin = @at_line_begin
|
25
|
-
@at_line_begin = false
|
26
|
-
elsif escape
|
27
|
-
if char == ?\n
|
28
|
-
# trimming \n, starting a new line
|
29
|
-
string.slice!(index - offset)
|
30
|
-
offset += 1
|
31
|
-
@at_line_begin = true
|
32
|
-
space_begin = space_end = index - offset
|
33
|
-
@indent_level = 0
|
34
|
-
elsif char == ?n
|
35
|
-
# replacing \\n to \n
|
36
|
-
string.slice!(index - offset)
|
37
|
-
string.insert(index - offset, ?\n)
|
38
|
-
else
|
39
|
-
# exiting escape mode as it's not an escape sequence
|
40
|
-
@at_line_begin = _at_line_begin
|
41
|
-
escape = false
|
42
|
-
redo
|
43
|
-
end
|
44
|
-
escape = false
|
45
|
-
elsif @at_line_begin
|
46
|
-
if char == ?\n || @indent_level >= @dedent_level
|
47
|
-
string.slice!(space_begin...space_end)
|
48
|
-
offset += space_end - space_begin
|
49
|
-
@at_line_begin = false
|
50
|
-
end
|
39
|
+
if @at_line_begin
|
40
|
+
lines_to_dedent = lines
|
41
|
+
else
|
42
|
+
_first, *lines_to_dedent = lines
|
43
|
+
end
|
51
44
|
|
45
|
+
lines_to_dedent.each do |line|
|
46
|
+
left_to_remove = @dedent_level
|
47
|
+
remove = 0
|
48
|
+
|
49
|
+
line.each_char do |char|
|
50
|
+
break if left_to_remove <= 0
|
52
51
|
case char
|
53
52
|
when ?\s
|
54
|
-
|
55
|
-
|
53
|
+
remove += 1
|
54
|
+
left_to_remove -= 1
|
56
55
|
when ?\t
|
57
|
-
|
58
|
-
|
56
|
+
break if TAB_WIDTH * (remove / TAB_WIDTH + 1) > @dedent_level
|
57
|
+
remove += 1
|
58
|
+
left_to_remove -= TAB_WIDTH
|
59
|
+
else
|
60
|
+
# no more spaces or tabs
|
61
|
+
break
|
59
62
|
end
|
60
|
-
elsif char == ?\n && index == last_index
|
61
|
-
@at_line_begin = true
|
62
|
-
@indent_level = 0
|
63
|
-
space_begin = space_end = index - offset + 1
|
64
63
|
end
|
65
|
-
end
|
66
64
|
|
67
|
-
|
68
|
-
string.slice!(space_begin..space_end)
|
65
|
+
line.slice!(0, remove)
|
69
66
|
end
|
70
67
|
|
71
|
-
|
68
|
+
string.replace(lines.join)
|
69
|
+
|
70
|
+
@at_line_begin = string.end_with?("\n")
|
72
71
|
end
|
73
72
|
|
74
73
|
def interrupt
|
@@ -2,17 +2,23 @@
|
|
2
2
|
|
3
3
|
module Parser
|
4
4
|
|
5
|
-
class
|
5
|
+
class MaxNumparamStack
|
6
|
+
attr_reader :stack
|
7
|
+
|
6
8
|
def initialize
|
7
9
|
@stack = []
|
8
10
|
end
|
9
11
|
|
10
|
-
def
|
12
|
+
def has_ordinary_params!
|
11
13
|
set(-1)
|
12
14
|
end
|
13
15
|
|
14
|
-
def
|
15
|
-
top
|
16
|
+
def has_ordinary_params?
|
17
|
+
top < 0
|
18
|
+
end
|
19
|
+
|
20
|
+
def has_numparams?
|
21
|
+
top > 0
|
16
22
|
end
|
17
23
|
|
18
24
|
def register(numparam)
|