parser 2.6.5.0 → 2.7.0.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +34 -0
- data/README.md +7 -0
- data/doc/AST_FORMAT.md +346 -20
- data/lib/parser.rb +3 -1
- data/lib/parser/ast/processor.rb +15 -0
- data/lib/parser/base.rb +19 -0
- data/lib/parser/builders/default.rb +245 -12
- data/lib/parser/context.rb +4 -0
- data/lib/parser/current.rb +4 -4
- data/lib/parser/current_arg_stack.rb +43 -0
- data/lib/parser/lexer.rl +93 -94
- data/lib/parser/lexer/dedenter.rb +48 -49
- data/lib/parser/{lexer/max_numparam_stack.rb → max_numparam_stack.rb} +10 -4
- data/lib/parser/messages.rb +34 -29
- data/lib/parser/meta.rb +6 -2
- data/lib/parser/ruby27.y +488 -35
- data/lib/parser/static_environment.rb +10 -0
- data/lib/parser/variables_stack.rb +32 -0
- data/lib/parser/version.rb +1 -1
- data/test/helper.rb +1 -0
- data/test/test_lexer.rb +7 -66
- data/test/test_parser.rb +1776 -123
- metadata +5 -3
data/lib/parser/lexer.rl
CHANGED
@@ -89,16 +89,13 @@ class Parser::Lexer
|
|
89
89
|
|
90
90
|
REGEXP_META_CHARACTERS = Regexp.union(*"\\$()*+.<>?[]^{|}".chars).freeze
|
91
91
|
|
92
|
-
NUMPARAM_MAX = 9
|
93
|
-
|
94
92
|
attr_reader :source_buffer
|
95
|
-
attr_reader :max_numparam_stack
|
96
93
|
|
97
94
|
attr_accessor :diagnostics
|
98
95
|
attr_accessor :static_env
|
99
96
|
attr_accessor :force_utf32
|
100
97
|
|
101
|
-
attr_accessor :cond, :cmdarg, :in_kwarg, :context
|
98
|
+
attr_accessor :cond, :cmdarg, :in_kwarg, :context, :command_start
|
102
99
|
|
103
100
|
attr_accessor :tokens, :comments
|
104
101
|
|
@@ -179,9 +176,6 @@ class Parser::Lexer
|
|
179
176
|
|
180
177
|
# State before =begin / =end block comment
|
181
178
|
@cs_before_block_comment = self.class.lex_en_line_begin
|
182
|
-
|
183
|
-
# Maximum numbered parameters stack
|
184
|
-
@max_numparam_stack = MaxNumparamStack.new
|
185
179
|
end
|
186
180
|
|
187
181
|
def source_buffer=(source_buffer)
|
@@ -255,10 +249,6 @@ class Parser::Lexer
|
|
255
249
|
@cond = @cond_stack.pop
|
256
250
|
end
|
257
251
|
|
258
|
-
def max_numparam
|
259
|
-
@max_numparam_stack.top
|
260
|
-
end
|
261
|
-
|
262
252
|
def dedent_level
|
263
253
|
# We erase @dedent_level as a precaution to avoid accidentally
|
264
254
|
# using a stale value.
|
@@ -457,7 +447,7 @@ class Parser::Lexer
|
|
457
447
|
'=>' => :tASSOC, '::' => :tCOLON2, '===' => :tEQQ,
|
458
448
|
'<=>' => :tCMP, '[]' => :tAREF, '[]=' => :tASET,
|
459
449
|
'{' => :tLCURLY, '}' => :tRCURLY, '`' => :tBACK_REF2,
|
460
|
-
'!@' => :tBANG, '&.' => :tANDDOT,
|
450
|
+
'!@' => :tBANG, '&.' => :tANDDOT,
|
461
451
|
}
|
462
452
|
|
463
453
|
PUNCTUATION_BEGIN = {
|
@@ -1029,6 +1019,20 @@ class Parser::Lexer
|
|
1029
1019
|
fcall expr_variable;
|
1030
1020
|
}
|
1031
1021
|
|
1022
|
+
# Special case for Ruby > 2.7
|
1023
|
+
# If interpolated instance/class variable starts with a digit we parse it as a plain substring
|
1024
|
+
# However, "#$1" is still a regular interpolation
|
1025
|
+
interp_digit_var = '#' ('@' | '@@') digit c_alpha*;
|
1026
|
+
|
1027
|
+
action extend_interp_digit_var {
|
1028
|
+
if @version >= 27
|
1029
|
+
literal.extend_string(tok, @ts, @te)
|
1030
|
+
else
|
1031
|
+
message = tok.start_with?('#@@') ? :cvar_name : :ivar_name
|
1032
|
+
diagnostic :error, message, { :name => tok(@ts + 1, @te) }, range(@ts + 1, @te)
|
1033
|
+
end
|
1034
|
+
}
|
1035
|
+
|
1032
1036
|
# Interpolations with code blocks must match nested curly braces, as
|
1033
1037
|
# interpolation ending is ambiguous with a block ending. So, every
|
1034
1038
|
# opening and closing brace should be matched with e_[lr]brace rules,
|
@@ -1060,6 +1064,7 @@ class Parser::Lexer
|
|
1060
1064
|
emit(:tRCURLY, '}'.freeze, p - 1, p)
|
1061
1065
|
@cond.lexpop
|
1062
1066
|
@cmdarg.lexpop
|
1067
|
+
@paren_nest -= 1
|
1063
1068
|
else
|
1064
1069
|
emit(:tSTRING_DEND, '}'.freeze, p - 1, p)
|
1065
1070
|
end
|
@@ -1098,60 +1103,64 @@ class Parser::Lexer
|
|
1098
1103
|
# above.
|
1099
1104
|
|
1100
1105
|
interp_words := |*
|
1101
|
-
interp_code
|
1102
|
-
|
1103
|
-
|
1104
|
-
|
1105
|
-
|
1106
|
-
|
1106
|
+
interp_code => extend_interp_code;
|
1107
|
+
interp_digit_var => extend_interp_digit_var;
|
1108
|
+
interp_var => extend_interp_var;
|
1109
|
+
e_bs escape => extend_string_escaped;
|
1110
|
+
c_space+ => extend_string_space;
|
1111
|
+
c_eol => extend_string_eol;
|
1112
|
+
c_any => extend_string;
|
1107
1113
|
*|;
|
1108
1114
|
|
1109
1115
|
interp_string := |*
|
1110
|
-
interp_code
|
1111
|
-
|
1112
|
-
|
1113
|
-
|
1114
|
-
|
1116
|
+
interp_code => extend_interp_code;
|
1117
|
+
interp_digit_var => extend_interp_digit_var;
|
1118
|
+
interp_var => extend_interp_var;
|
1119
|
+
e_bs escape => extend_string_escaped;
|
1120
|
+
c_eol => extend_string_eol;
|
1121
|
+
c_any => extend_string;
|
1115
1122
|
*|;
|
1116
1123
|
|
1117
1124
|
plain_words := |*
|
1118
|
-
e_bs c_any
|
1119
|
-
c_space+
|
1120
|
-
c_eol
|
1121
|
-
c_any
|
1125
|
+
e_bs c_any => extend_string_escaped;
|
1126
|
+
c_space+ => extend_string_space;
|
1127
|
+
c_eol => extend_string_eol;
|
1128
|
+
c_any => extend_string;
|
1122
1129
|
*|;
|
1123
1130
|
|
1124
1131
|
plain_string := |*
|
1125
|
-
'\\' c_nl
|
1126
|
-
e_bs c_any
|
1127
|
-
c_eol
|
1128
|
-
c_any
|
1132
|
+
'\\' c_nl => extend_string_eol;
|
1133
|
+
e_bs c_any => extend_string_escaped;
|
1134
|
+
c_eol => extend_string_eol;
|
1135
|
+
c_any => extend_string;
|
1129
1136
|
*|;
|
1130
1137
|
|
1131
1138
|
interp_backslash_delimited := |*
|
1132
|
-
interp_code
|
1133
|
-
|
1134
|
-
|
1135
|
-
|
1139
|
+
interp_code => extend_interp_code;
|
1140
|
+
interp_digit_var => extend_interp_digit_var;
|
1141
|
+
interp_var => extend_interp_var;
|
1142
|
+
c_eol => extend_string_eol;
|
1143
|
+
c_any => extend_string;
|
1136
1144
|
*|;
|
1137
1145
|
|
1138
1146
|
plain_backslash_delimited := |*
|
1139
|
-
c_eol
|
1140
|
-
c_any
|
1147
|
+
c_eol => extend_string_eol;
|
1148
|
+
c_any => extend_string;
|
1141
1149
|
*|;
|
1142
1150
|
|
1143
1151
|
interp_backslash_delimited_words := |*
|
1144
|
-
interp_code
|
1145
|
-
|
1146
|
-
|
1147
|
-
|
1148
|
-
|
1152
|
+
interp_code => extend_interp_code;
|
1153
|
+
interp_digit_var => extend_interp_digit_var;
|
1154
|
+
interp_var => extend_interp_var;
|
1155
|
+
c_space+ => extend_string_space;
|
1156
|
+
c_eol => extend_string_eol;
|
1157
|
+
c_any => extend_string;
|
1149
1158
|
*|;
|
1150
1159
|
|
1151
1160
|
plain_backslash_delimited_words := |*
|
1152
|
-
c_space+
|
1153
|
-
c_eol
|
1154
|
-
c_any
|
1161
|
+
c_space+ => extend_string_space;
|
1162
|
+
c_eol => extend_string_eol;
|
1163
|
+
c_any => extend_string;
|
1155
1164
|
*|;
|
1156
1165
|
|
1157
1166
|
regexp_modifiers := |*
|
@@ -1267,6 +1276,12 @@ class Parser::Lexer
|
|
1267
1276
|
|
1268
1277
|
e_lbrack = '[' % {
|
1269
1278
|
@cond.push(false); @cmdarg.push(false)
|
1279
|
+
|
1280
|
+
@paren_nest += 1
|
1281
|
+
};
|
1282
|
+
|
1283
|
+
e_rbrack = ']' % {
|
1284
|
+
@paren_nest -= 1
|
1270
1285
|
};
|
1271
1286
|
|
1272
1287
|
# Ruby 1.9 lambdas require parentheses counting in order to
|
@@ -1324,36 +1339,6 @@ class Parser::Lexer
|
|
1324
1339
|
fnext *stack_pop; fbreak;
|
1325
1340
|
};
|
1326
1341
|
|
1327
|
-
'@' [0-9]+
|
1328
|
-
=> {
|
1329
|
-
if @version < 27
|
1330
|
-
diagnostic :error, :ivar_name, { :name => tok }
|
1331
|
-
end
|
1332
|
-
|
1333
|
-
value = tok[1..-1]
|
1334
|
-
|
1335
|
-
if value[0] == '0'
|
1336
|
-
diagnostic :error, :leading_zero_in_numparam, nil, range(@ts, @te)
|
1337
|
-
end
|
1338
|
-
|
1339
|
-
if value.to_i > NUMPARAM_MAX
|
1340
|
-
diagnostic :error, :too_large_numparam, nil, range(@ts, @te)
|
1341
|
-
end
|
1342
|
-
|
1343
|
-
if !@context.in_block? && !@context.in_lambda?
|
1344
|
-
diagnostic :error, :numparam_outside_block, nil, range(@ts, @te)
|
1345
|
-
end
|
1346
|
-
|
1347
|
-
if !@max_numparam_stack.can_have_numparams?
|
1348
|
-
diagnostic :error, :ordinary_param_defined, nil, range(@ts, @te)
|
1349
|
-
end
|
1350
|
-
|
1351
|
-
@max_numparam_stack.register(value.to_i)
|
1352
|
-
|
1353
|
-
emit(:tNUMPARAM, tok[1..-1])
|
1354
|
-
fnext *stack_pop; fbreak;
|
1355
|
-
};
|
1356
|
-
|
1357
1342
|
instance_var_v
|
1358
1343
|
=> {
|
1359
1344
|
if tok =~ /^@[0-9]/
|
@@ -1519,6 +1504,7 @@ class Parser::Lexer
|
|
1519
1504
|
emit(:tLCURLY, '{'.freeze, @te - 1, @te)
|
1520
1505
|
end
|
1521
1506
|
@command_start = true
|
1507
|
+
@paren_nest += 1
|
1522
1508
|
fnext expr_value; fbreak;
|
1523
1509
|
};
|
1524
1510
|
|
@@ -1679,6 +1665,7 @@ class Parser::Lexer
|
|
1679
1665
|
else
|
1680
1666
|
emit(:tLBRACE_ARG, '{'.freeze)
|
1681
1667
|
end
|
1668
|
+
@paren_nest += 1
|
1682
1669
|
@command_start = true
|
1683
1670
|
fnext expr_value; fbreak;
|
1684
1671
|
};
|
@@ -1927,6 +1914,24 @@ class Parser::Lexer
|
|
1927
1914
|
fgoto expr_end;
|
1928
1915
|
};
|
1929
1916
|
|
1917
|
+
#
|
1918
|
+
# AMBIGUOUS EMPTY BLOCK ARGUMENTS
|
1919
|
+
#
|
1920
|
+
|
1921
|
+
# Ruby >= 2.7 emits it as two tPIPE terminals
|
1922
|
+
# while Ruby < 2.7 as a single tOROP (like in `a || b`)
|
1923
|
+
'||'
|
1924
|
+
=> {
|
1925
|
+
if @version >= 27
|
1926
|
+
emit(:tPIPE, tok(@ts, @ts + 1), @ts, @ts + 1)
|
1927
|
+
fhold;
|
1928
|
+
fnext expr_beg; fbreak;
|
1929
|
+
else
|
1930
|
+
p -= 2
|
1931
|
+
fgoto expr_end;
|
1932
|
+
end
|
1933
|
+
};
|
1934
|
+
|
1930
1935
|
#
|
1931
1936
|
# KEYWORDS AND PUNCTUATION
|
1932
1937
|
#
|
@@ -1941,6 +1946,7 @@ class Parser::Lexer
|
|
1941
1946
|
else
|
1942
1947
|
emit(:tLBRACE, '{'.freeze)
|
1943
1948
|
end
|
1949
|
+
@paren_nest += 1
|
1944
1950
|
fbreak;
|
1945
1951
|
};
|
1946
1952
|
|
@@ -2333,24 +2339,6 @@ class Parser::Lexer
|
|
2333
2339
|
# METHOD CALLS
|
2334
2340
|
#
|
2335
2341
|
|
2336
|
-
'.:' w_space+
|
2337
|
-
=> { emit(:tDOT, '.', @ts, @ts + 1)
|
2338
|
-
emit(:tCOLON, ':', @ts + 1, @ts + 2)
|
2339
|
-
p = p - tok.length + 2
|
2340
|
-
fnext expr_dot; fbreak; };
|
2341
|
-
|
2342
|
-
'.:'
|
2343
|
-
=> {
|
2344
|
-
if @version >= 27
|
2345
|
-
emit_table(PUNCTUATION)
|
2346
|
-
else
|
2347
|
-
emit(:tDOT, tok(@ts, @ts + 1), @ts, @ts + 1)
|
2348
|
-
fhold;
|
2349
|
-
end
|
2350
|
-
|
2351
|
-
fnext expr_dot; fbreak;
|
2352
|
-
};
|
2353
|
-
|
2354
2342
|
'.' | '&.' | '::'
|
2355
2343
|
=> { emit_table(PUNCTUATION)
|
2356
2344
|
fnext expr_dot; fbreak; };
|
@@ -2394,7 +2382,7 @@ class Parser::Lexer
|
|
2394
2382
|
=> { emit_table(PUNCTUATION)
|
2395
2383
|
fnext expr_beg; fbreak; };
|
2396
2384
|
|
2397
|
-
e_rbrace | e_rparen |
|
2385
|
+
e_rbrace | e_rparen | e_rbrack
|
2398
2386
|
=> {
|
2399
2387
|
emit_table(PUNCTUATION)
|
2400
2388
|
|
@@ -2431,6 +2419,17 @@ class Parser::Lexer
|
|
2431
2419
|
=> { emit(:tLBRACK2, '['.freeze)
|
2432
2420
|
fnext expr_beg; fbreak; };
|
2433
2421
|
|
2422
|
+
'...' c_nl
|
2423
|
+
=> {
|
2424
|
+
if @paren_nest == 0
|
2425
|
+
diagnostic :warning, :triple_dot_at_eol, nil, range(@ts, @te - 1)
|
2426
|
+
end
|
2427
|
+
|
2428
|
+
emit(:tDOT3, '...'.freeze, @ts, @te - 1)
|
2429
|
+
fhold;
|
2430
|
+
fnext expr_beg; fbreak;
|
2431
|
+
};
|
2432
|
+
|
2434
2433
|
punctuation_end
|
2435
2434
|
=> { emit_table(PUNCTUATION)
|
2436
2435
|
fnext expr_beg; fbreak; };
|
@@ -3,72 +3,71 @@
|
|
3
3
|
module Parser
|
4
4
|
|
5
5
|
class Lexer::Dedenter
|
6
|
+
# Tab (\t) counts as 8 spaces
|
7
|
+
TAB_WIDTH = 8
|
8
|
+
|
6
9
|
def initialize(dedent_level)
|
7
10
|
@dedent_level = dedent_level
|
8
11
|
@at_line_begin = true
|
9
12
|
@indent_level = 0
|
10
13
|
end
|
11
14
|
|
15
|
+
# For a heredoc like
|
16
|
+
# <<-HERE
|
17
|
+
# a
|
18
|
+
# b
|
19
|
+
# HERE
|
20
|
+
# this method gets called with " a\n" and " b\n"
|
21
|
+
#
|
22
|
+
# However, the following heredoc:
|
23
|
+
#
|
24
|
+
# <<-HERE
|
25
|
+
# a\
|
26
|
+
# b
|
27
|
+
# HERE
|
28
|
+
# calls this method only once with a string " a\\\n b\n"
|
29
|
+
#
|
30
|
+
# This is important because technically it's a single line,
|
31
|
+
# but it has to be concatenated __after__ dedenting.
|
32
|
+
#
|
33
|
+
# It has no effect for non-squiggly heredocs, i.e. it simply removes "\\\n"
|
34
|
+
# Of course, lexer could do it but once again: it's all because of dedenting.
|
35
|
+
#
|
12
36
|
def dedent(string)
|
13
|
-
|
14
|
-
last_index = string.length - 1
|
15
|
-
escape = false
|
16
|
-
_at_line_begin = nil
|
37
|
+
lines = string.split("\\\n")
|
17
38
|
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
offset += 1
|
24
|
-
_at_line_begin = @at_line_begin
|
25
|
-
@at_line_begin = false
|
26
|
-
elsif escape
|
27
|
-
if char == ?\n
|
28
|
-
# trimming \n, starting a new line
|
29
|
-
string.slice!(index - offset)
|
30
|
-
offset += 1
|
31
|
-
@at_line_begin = true
|
32
|
-
space_begin = space_end = index - offset
|
33
|
-
@indent_level = 0
|
34
|
-
elsif char == ?n
|
35
|
-
# replacing \\n to \n
|
36
|
-
string.slice!(index - offset)
|
37
|
-
string.insert(index - offset, ?\n)
|
38
|
-
else
|
39
|
-
# exiting escape mode as it's not an escape sequence
|
40
|
-
@at_line_begin = _at_line_begin
|
41
|
-
escape = false
|
42
|
-
redo
|
43
|
-
end
|
44
|
-
escape = false
|
45
|
-
elsif @at_line_begin
|
46
|
-
if char == ?\n || @indent_level >= @dedent_level
|
47
|
-
string.slice!(space_begin...space_end)
|
48
|
-
offset += space_end - space_begin
|
49
|
-
@at_line_begin = false
|
50
|
-
end
|
39
|
+
if @at_line_begin
|
40
|
+
lines_to_dedent = lines
|
41
|
+
else
|
42
|
+
_first, *lines_to_dedent = lines
|
43
|
+
end
|
51
44
|
|
45
|
+
lines_to_dedent.each do |line|
|
46
|
+
left_to_remove = @dedent_level
|
47
|
+
remove = 0
|
48
|
+
|
49
|
+
line.each_char do |char|
|
50
|
+
break if left_to_remove <= 0
|
52
51
|
case char
|
53
52
|
when ?\s
|
54
|
-
|
55
|
-
|
53
|
+
remove += 1
|
54
|
+
left_to_remove -= 1
|
56
55
|
when ?\t
|
57
|
-
|
58
|
-
|
56
|
+
break if TAB_WIDTH * (remove / TAB_WIDTH + 1) > @dedent_level
|
57
|
+
remove += 1
|
58
|
+
left_to_remove -= TAB_WIDTH
|
59
|
+
else
|
60
|
+
# no more spaces or tabs
|
61
|
+
break
|
59
62
|
end
|
60
|
-
elsif char == ?\n && index == last_index
|
61
|
-
@at_line_begin = true
|
62
|
-
@indent_level = 0
|
63
|
-
space_begin = space_end = index - offset + 1
|
64
63
|
end
|
65
|
-
end
|
66
64
|
|
67
|
-
|
68
|
-
string.slice!(space_begin..space_end)
|
65
|
+
line.slice!(0, remove)
|
69
66
|
end
|
70
67
|
|
71
|
-
|
68
|
+
string.replace(lines.join)
|
69
|
+
|
70
|
+
@at_line_begin = string.end_with?("\n")
|
72
71
|
end
|
73
72
|
|
74
73
|
def interrupt
|
@@ -2,17 +2,23 @@
|
|
2
2
|
|
3
3
|
module Parser
|
4
4
|
|
5
|
-
class
|
5
|
+
class MaxNumparamStack
|
6
|
+
attr_reader :stack
|
7
|
+
|
6
8
|
def initialize
|
7
9
|
@stack = []
|
8
10
|
end
|
9
11
|
|
10
|
-
def
|
12
|
+
def has_ordinary_params!
|
11
13
|
set(-1)
|
12
14
|
end
|
13
15
|
|
14
|
-
def
|
15
|
-
top
|
16
|
+
def has_ordinary_params?
|
17
|
+
top < 0
|
18
|
+
end
|
19
|
+
|
20
|
+
def has_numparams?
|
21
|
+
top > 0
|
16
22
|
end
|
17
23
|
|
18
24
|
def register(numparam)
|