parser 2.6.5.0 → 2.7.0.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.travis.yml +6 -0
- data/CHANGELOG.md +46 -0
- data/README.md +7 -0
- data/doc/AST_FORMAT.md +346 -20
- data/lib/parser.rb +3 -1
- data/lib/parser/ast/processor.rb +15 -0
- data/lib/parser/base.rb +19 -0
- data/lib/parser/builders/default.rb +245 -12
- data/lib/parser/context.rb +4 -0
- data/lib/parser/current.rb +4 -4
- data/lib/parser/current_arg_stack.rb +43 -0
- data/lib/parser/lexer.rb +11444 -11437
- data/lib/parser/lexer.rl +97 -94
- data/lib/parser/lexer/dedenter.rb +52 -49
- data/lib/parser/{lexer/max_numparam_stack.rb → max_numparam_stack.rb} +10 -4
- data/lib/parser/messages.rb +34 -29
- data/lib/parser/meta.rb +6 -2
- data/lib/parser/ruby27.rb +4174 -3318
- data/lib/parser/ruby27.y +488 -35
- data/lib/parser/static_environment.rb +10 -0
- data/lib/parser/variables_stack.rb +32 -0
- data/lib/parser/version.rb +1 -1
- data/test/helper.rb +1 -0
- data/test/parse_helper.rb +3 -0
- data/test/test_lexer.rb +7 -66
- data/test/test_parser.rb +1788 -123
- metadata +6 -4
data/lib/parser/lexer.rl
CHANGED
@@ -89,16 +89,13 @@ class Parser::Lexer
|
|
89
89
|
|
90
90
|
REGEXP_META_CHARACTERS = Regexp.union(*"\\$()*+.<>?[]^{|}".chars).freeze
|
91
91
|
|
92
|
-
NUMPARAM_MAX = 9
|
93
|
-
|
94
92
|
attr_reader :source_buffer
|
95
|
-
attr_reader :max_numparam_stack
|
96
93
|
|
97
94
|
attr_accessor :diagnostics
|
98
95
|
attr_accessor :static_env
|
99
96
|
attr_accessor :force_utf32
|
100
97
|
|
101
|
-
attr_accessor :cond, :cmdarg, :in_kwarg, :context
|
98
|
+
attr_accessor :cond, :cmdarg, :in_kwarg, :context, :command_start
|
102
99
|
|
103
100
|
attr_accessor :tokens, :comments
|
104
101
|
|
@@ -179,9 +176,6 @@ class Parser::Lexer
|
|
179
176
|
|
180
177
|
# State before =begin / =end block comment
|
181
178
|
@cs_before_block_comment = self.class.lex_en_line_begin
|
182
|
-
|
183
|
-
# Maximum numbered parameters stack
|
184
|
-
@max_numparam_stack = MaxNumparamStack.new
|
185
179
|
end
|
186
180
|
|
187
181
|
def source_buffer=(source_buffer)
|
@@ -255,10 +249,6 @@ class Parser::Lexer
|
|
255
249
|
@cond = @cond_stack.pop
|
256
250
|
end
|
257
251
|
|
258
|
-
def max_numparam
|
259
|
-
@max_numparam_stack.top
|
260
|
-
end
|
261
|
-
|
262
252
|
def dedent_level
|
263
253
|
# We erase @dedent_level as a precaution to avoid accidentally
|
264
254
|
# using a stale value.
|
@@ -457,7 +447,7 @@ class Parser::Lexer
|
|
457
447
|
'=>' => :tASSOC, '::' => :tCOLON2, '===' => :tEQQ,
|
458
448
|
'<=>' => :tCMP, '[]' => :tAREF, '[]=' => :tASET,
|
459
449
|
'{' => :tLCURLY, '}' => :tRCURLY, '`' => :tBACK_REF2,
|
460
|
-
'!@' => :tBANG, '&.' => :tANDDOT,
|
450
|
+
'!@' => :tBANG, '&.' => :tANDDOT,
|
461
451
|
}
|
462
452
|
|
463
453
|
PUNCTUATION_BEGIN = {
|
@@ -1029,6 +1019,20 @@ class Parser::Lexer
|
|
1029
1019
|
fcall expr_variable;
|
1030
1020
|
}
|
1031
1021
|
|
1022
|
+
# Special case for Ruby > 2.7
|
1023
|
+
# If interpolated instance/class variable starts with a digit we parse it as a plain substring
|
1024
|
+
# However, "#$1" is still a regular interpolation
|
1025
|
+
interp_digit_var = '#' ('@' | '@@') digit c_alpha*;
|
1026
|
+
|
1027
|
+
action extend_interp_digit_var {
|
1028
|
+
if @version >= 27
|
1029
|
+
literal.extend_string(tok, @ts, @te)
|
1030
|
+
else
|
1031
|
+
message = tok.start_with?('#@@') ? :cvar_name : :ivar_name
|
1032
|
+
diagnostic :error, message, { :name => tok(@ts + 1, @te) }, range(@ts + 1, @te)
|
1033
|
+
end
|
1034
|
+
}
|
1035
|
+
|
1032
1036
|
# Interpolations with code blocks must match nested curly braces, as
|
1033
1037
|
# interpolation ending is ambiguous with a block ending. So, every
|
1034
1038
|
# opening and closing brace should be matched with e_[lr]brace rules,
|
@@ -1074,6 +1078,8 @@ class Parser::Lexer
|
|
1074
1078
|
fbreak;
|
1075
1079
|
end
|
1076
1080
|
end
|
1081
|
+
|
1082
|
+
@paren_nest -= 1
|
1077
1083
|
};
|
1078
1084
|
|
1079
1085
|
action extend_interp_code {
|
@@ -1098,60 +1104,64 @@ class Parser::Lexer
|
|
1098
1104
|
# above.
|
1099
1105
|
|
1100
1106
|
interp_words := |*
|
1101
|
-
interp_code
|
1102
|
-
|
1103
|
-
|
1104
|
-
|
1105
|
-
|
1106
|
-
|
1107
|
+
interp_code => extend_interp_code;
|
1108
|
+
interp_digit_var => extend_interp_digit_var;
|
1109
|
+
interp_var => extend_interp_var;
|
1110
|
+
e_bs escape => extend_string_escaped;
|
1111
|
+
c_space+ => extend_string_space;
|
1112
|
+
c_eol => extend_string_eol;
|
1113
|
+
c_any => extend_string;
|
1107
1114
|
*|;
|
1108
1115
|
|
1109
1116
|
interp_string := |*
|
1110
|
-
interp_code
|
1111
|
-
|
1112
|
-
|
1113
|
-
|
1114
|
-
|
1117
|
+
interp_code => extend_interp_code;
|
1118
|
+
interp_digit_var => extend_interp_digit_var;
|
1119
|
+
interp_var => extend_interp_var;
|
1120
|
+
e_bs escape => extend_string_escaped;
|
1121
|
+
c_eol => extend_string_eol;
|
1122
|
+
c_any => extend_string;
|
1115
1123
|
*|;
|
1116
1124
|
|
1117
1125
|
plain_words := |*
|
1118
|
-
e_bs c_any
|
1119
|
-
c_space+
|
1120
|
-
c_eol
|
1121
|
-
c_any
|
1126
|
+
e_bs c_any => extend_string_escaped;
|
1127
|
+
c_space+ => extend_string_space;
|
1128
|
+
c_eol => extend_string_eol;
|
1129
|
+
c_any => extend_string;
|
1122
1130
|
*|;
|
1123
1131
|
|
1124
1132
|
plain_string := |*
|
1125
|
-
'\\' c_nl
|
1126
|
-
e_bs c_any
|
1127
|
-
c_eol
|
1128
|
-
c_any
|
1133
|
+
'\\' c_nl => extend_string_eol;
|
1134
|
+
e_bs c_any => extend_string_escaped;
|
1135
|
+
c_eol => extend_string_eol;
|
1136
|
+
c_any => extend_string;
|
1129
1137
|
*|;
|
1130
1138
|
|
1131
1139
|
interp_backslash_delimited := |*
|
1132
|
-
interp_code
|
1133
|
-
|
1134
|
-
|
1135
|
-
|
1140
|
+
interp_code => extend_interp_code;
|
1141
|
+
interp_digit_var => extend_interp_digit_var;
|
1142
|
+
interp_var => extend_interp_var;
|
1143
|
+
c_eol => extend_string_eol;
|
1144
|
+
c_any => extend_string;
|
1136
1145
|
*|;
|
1137
1146
|
|
1138
1147
|
plain_backslash_delimited := |*
|
1139
|
-
c_eol
|
1140
|
-
c_any
|
1148
|
+
c_eol => extend_string_eol;
|
1149
|
+
c_any => extend_string;
|
1141
1150
|
*|;
|
1142
1151
|
|
1143
1152
|
interp_backslash_delimited_words := |*
|
1144
|
-
interp_code
|
1145
|
-
|
1146
|
-
|
1147
|
-
|
1148
|
-
|
1153
|
+
interp_code => extend_interp_code;
|
1154
|
+
interp_digit_var => extend_interp_digit_var;
|
1155
|
+
interp_var => extend_interp_var;
|
1156
|
+
c_space+ => extend_string_space;
|
1157
|
+
c_eol => extend_string_eol;
|
1158
|
+
c_any => extend_string;
|
1149
1159
|
*|;
|
1150
1160
|
|
1151
1161
|
plain_backslash_delimited_words := |*
|
1152
|
-
c_space+
|
1153
|
-
c_eol
|
1154
|
-
c_any
|
1162
|
+
c_space+ => extend_string_space;
|
1163
|
+
c_eol => extend_string_eol;
|
1164
|
+
c_any => extend_string;
|
1155
1165
|
*|;
|
1156
1166
|
|
1157
1167
|
regexp_modifiers := |*
|
@@ -1267,6 +1277,12 @@ class Parser::Lexer
|
|
1267
1277
|
|
1268
1278
|
e_lbrack = '[' % {
|
1269
1279
|
@cond.push(false); @cmdarg.push(false)
|
1280
|
+
|
1281
|
+
@paren_nest += 1
|
1282
|
+
};
|
1283
|
+
|
1284
|
+
e_rbrack = ']' % {
|
1285
|
+
@paren_nest -= 1
|
1270
1286
|
};
|
1271
1287
|
|
1272
1288
|
# Ruby 1.9 lambdas require parentheses counting in order to
|
@@ -1324,36 +1340,6 @@ class Parser::Lexer
|
|
1324
1340
|
fnext *stack_pop; fbreak;
|
1325
1341
|
};
|
1326
1342
|
|
1327
|
-
'@' [0-9]+
|
1328
|
-
=> {
|
1329
|
-
if @version < 27
|
1330
|
-
diagnostic :error, :ivar_name, { :name => tok }
|
1331
|
-
end
|
1332
|
-
|
1333
|
-
value = tok[1..-1]
|
1334
|
-
|
1335
|
-
if value[0] == '0'
|
1336
|
-
diagnostic :error, :leading_zero_in_numparam, nil, range(@ts, @te)
|
1337
|
-
end
|
1338
|
-
|
1339
|
-
if value.to_i > NUMPARAM_MAX
|
1340
|
-
diagnostic :error, :too_large_numparam, nil, range(@ts, @te)
|
1341
|
-
end
|
1342
|
-
|
1343
|
-
if !@context.in_block? && !@context.in_lambda?
|
1344
|
-
diagnostic :error, :numparam_outside_block, nil, range(@ts, @te)
|
1345
|
-
end
|
1346
|
-
|
1347
|
-
if !@max_numparam_stack.can_have_numparams?
|
1348
|
-
diagnostic :error, :ordinary_param_defined, nil, range(@ts, @te)
|
1349
|
-
end
|
1350
|
-
|
1351
|
-
@max_numparam_stack.register(value.to_i)
|
1352
|
-
|
1353
|
-
emit(:tNUMPARAM, tok[1..-1])
|
1354
|
-
fnext *stack_pop; fbreak;
|
1355
|
-
};
|
1356
|
-
|
1357
1343
|
instance_var_v
|
1358
1344
|
=> {
|
1359
1345
|
if tok =~ /^@[0-9]/
|
@@ -1519,6 +1505,7 @@ class Parser::Lexer
|
|
1519
1505
|
emit(:tLCURLY, '{'.freeze, @te - 1, @te)
|
1520
1506
|
end
|
1521
1507
|
@command_start = true
|
1508
|
+
@paren_nest += 1
|
1522
1509
|
fnext expr_value; fbreak;
|
1523
1510
|
};
|
1524
1511
|
|
@@ -1679,6 +1666,7 @@ class Parser::Lexer
|
|
1679
1666
|
else
|
1680
1667
|
emit(:tLBRACE_ARG, '{'.freeze)
|
1681
1668
|
end
|
1669
|
+
@paren_nest += 1
|
1682
1670
|
@command_start = true
|
1683
1671
|
fnext expr_value; fbreak;
|
1684
1672
|
};
|
@@ -1927,6 +1915,24 @@ class Parser::Lexer
|
|
1927
1915
|
fgoto expr_end;
|
1928
1916
|
};
|
1929
1917
|
|
1918
|
+
#
|
1919
|
+
# AMBIGUOUS EMPTY BLOCK ARGUMENTS
|
1920
|
+
#
|
1921
|
+
|
1922
|
+
# Ruby >= 2.7 emits it as two tPIPE terminals
|
1923
|
+
# while Ruby < 2.7 as a single tOROP (like in `a || b`)
|
1924
|
+
'||'
|
1925
|
+
=> {
|
1926
|
+
if @version >= 27
|
1927
|
+
emit(:tPIPE, tok(@ts, @ts + 1), @ts, @ts + 1)
|
1928
|
+
fhold;
|
1929
|
+
fnext expr_beg; fbreak;
|
1930
|
+
else
|
1931
|
+
p -= 2
|
1932
|
+
fgoto expr_end;
|
1933
|
+
end
|
1934
|
+
};
|
1935
|
+
|
1930
1936
|
#
|
1931
1937
|
# KEYWORDS AND PUNCTUATION
|
1932
1938
|
#
|
@@ -1941,6 +1947,7 @@ class Parser::Lexer
|
|
1941
1947
|
else
|
1942
1948
|
emit(:tLBRACE, '{'.freeze)
|
1943
1949
|
end
|
1950
|
+
@paren_nest += 1
|
1944
1951
|
fbreak;
|
1945
1952
|
};
|
1946
1953
|
|
@@ -2159,6 +2166,9 @@ class Parser::Lexer
|
|
2159
2166
|
emit_do
|
2160
2167
|
end
|
2161
2168
|
end
|
2169
|
+
if tok == '{'.freeze
|
2170
|
+
@paren_nest += 1
|
2171
|
+
end
|
2162
2172
|
@command_start = true
|
2163
2173
|
|
2164
2174
|
fnext expr_value; fbreak;
|
@@ -2333,24 +2343,6 @@ class Parser::Lexer
|
|
2333
2343
|
# METHOD CALLS
|
2334
2344
|
#
|
2335
2345
|
|
2336
|
-
'.:' w_space+
|
2337
|
-
=> { emit(:tDOT, '.', @ts, @ts + 1)
|
2338
|
-
emit(:tCOLON, ':', @ts + 1, @ts + 2)
|
2339
|
-
p = p - tok.length + 2
|
2340
|
-
fnext expr_dot; fbreak; };
|
2341
|
-
|
2342
|
-
'.:'
|
2343
|
-
=> {
|
2344
|
-
if @version >= 27
|
2345
|
-
emit_table(PUNCTUATION)
|
2346
|
-
else
|
2347
|
-
emit(:tDOT, tok(@ts, @ts + 1), @ts, @ts + 1)
|
2348
|
-
fhold;
|
2349
|
-
end
|
2350
|
-
|
2351
|
-
fnext expr_dot; fbreak;
|
2352
|
-
};
|
2353
|
-
|
2354
2346
|
'.' | '&.' | '::'
|
2355
2347
|
=> { emit_table(PUNCTUATION)
|
2356
2348
|
fnext expr_dot; fbreak; };
|
@@ -2394,7 +2386,7 @@ class Parser::Lexer
|
|
2394
2386
|
=> { emit_table(PUNCTUATION)
|
2395
2387
|
fnext expr_beg; fbreak; };
|
2396
2388
|
|
2397
|
-
e_rbrace | e_rparen |
|
2389
|
+
e_rbrace | e_rparen | e_rbrack
|
2398
2390
|
=> {
|
2399
2391
|
emit_table(PUNCTUATION)
|
2400
2392
|
|
@@ -2431,6 +2423,17 @@ class Parser::Lexer
|
|
2431
2423
|
=> { emit(:tLBRACK2, '['.freeze)
|
2432
2424
|
fnext expr_beg; fbreak; };
|
2433
2425
|
|
2426
|
+
'...' c_nl
|
2427
|
+
=> {
|
2428
|
+
if @paren_nest == 0
|
2429
|
+
diagnostic :warning, :triple_dot_at_eol, nil, range(@ts, @te - 1)
|
2430
|
+
end
|
2431
|
+
|
2432
|
+
emit(:tDOT3, '...'.freeze, @ts, @te - 1)
|
2433
|
+
fhold;
|
2434
|
+
fnext expr_beg; fbreak;
|
2435
|
+
};
|
2436
|
+
|
2434
2437
|
punctuation_end
|
2435
2438
|
=> { emit_table(PUNCTUATION)
|
2436
2439
|
fnext expr_beg; fbreak; };
|
@@ -3,72 +3,75 @@
|
|
3
3
|
module Parser
|
4
4
|
|
5
5
|
class Lexer::Dedenter
|
6
|
+
# Tab (\t) counts as 8 spaces
|
7
|
+
TAB_WIDTH = 8
|
8
|
+
|
6
9
|
def initialize(dedent_level)
|
7
10
|
@dedent_level = dedent_level
|
8
11
|
@at_line_begin = true
|
9
12
|
@indent_level = 0
|
10
13
|
end
|
11
14
|
|
15
|
+
# For a heredoc like
|
16
|
+
# <<-HERE
|
17
|
+
# a
|
18
|
+
# b
|
19
|
+
# HERE
|
20
|
+
# this method gets called with " a\n" and " b\n"
|
21
|
+
#
|
22
|
+
# However, the following heredoc:
|
23
|
+
#
|
24
|
+
# <<-HERE
|
25
|
+
# a\
|
26
|
+
# b
|
27
|
+
# HERE
|
28
|
+
# calls this method only once with a string " a\\\n b\n"
|
29
|
+
#
|
30
|
+
# This is important because technically it's a single line,
|
31
|
+
# but it has to be concatenated __after__ dedenting.
|
32
|
+
#
|
33
|
+
# It has no effect for non-squiggly heredocs, i.e. it simply removes "\\\n"
|
34
|
+
# Of course, lexer could do it but once again: it's all because of dedenting.
|
35
|
+
#
|
12
36
|
def dedent(string)
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
37
|
+
original_encoding = string.encoding
|
38
|
+
# Prevent the following error when processing binary encoded source.
|
39
|
+
# "\xC0".split # => ArgumentError (invalid byte sequence in UTF-8)
|
40
|
+
lines = string.force_encoding(Encoding::BINARY).split("\\\n")
|
41
|
+
lines.map! {|s| s.force_encoding(original_encoding) }
|
17
42
|
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
offset += 1
|
24
|
-
_at_line_begin = @at_line_begin
|
25
|
-
@at_line_begin = false
|
26
|
-
elsif escape
|
27
|
-
if char == ?\n
|
28
|
-
# trimming \n, starting a new line
|
29
|
-
string.slice!(index - offset)
|
30
|
-
offset += 1
|
31
|
-
@at_line_begin = true
|
32
|
-
space_begin = space_end = index - offset
|
33
|
-
@indent_level = 0
|
34
|
-
elsif char == ?n
|
35
|
-
# replacing \\n to \n
|
36
|
-
string.slice!(index - offset)
|
37
|
-
string.insert(index - offset, ?\n)
|
38
|
-
else
|
39
|
-
# exiting escape mode as it's not an escape sequence
|
40
|
-
@at_line_begin = _at_line_begin
|
41
|
-
escape = false
|
42
|
-
redo
|
43
|
-
end
|
44
|
-
escape = false
|
45
|
-
elsif @at_line_begin
|
46
|
-
if char == ?\n || @indent_level >= @dedent_level
|
47
|
-
string.slice!(space_begin...space_end)
|
48
|
-
offset += space_end - space_begin
|
49
|
-
@at_line_begin = false
|
50
|
-
end
|
43
|
+
if @at_line_begin
|
44
|
+
lines_to_dedent = lines
|
45
|
+
else
|
46
|
+
_first, *lines_to_dedent = lines
|
47
|
+
end
|
51
48
|
|
49
|
+
lines_to_dedent.each do |line|
|
50
|
+
left_to_remove = @dedent_level
|
51
|
+
remove = 0
|
52
|
+
|
53
|
+
line.each_char do |char|
|
54
|
+
break if left_to_remove <= 0
|
52
55
|
case char
|
53
56
|
when ?\s
|
54
|
-
|
55
|
-
|
57
|
+
remove += 1
|
58
|
+
left_to_remove -= 1
|
56
59
|
when ?\t
|
57
|
-
|
58
|
-
|
60
|
+
break if TAB_WIDTH * (remove / TAB_WIDTH + 1) > @dedent_level
|
61
|
+
remove += 1
|
62
|
+
left_to_remove -= TAB_WIDTH
|
63
|
+
else
|
64
|
+
# no more spaces or tabs
|
65
|
+
break
|
59
66
|
end
|
60
|
-
elsif char == ?\n && index == last_index
|
61
|
-
@at_line_begin = true
|
62
|
-
@indent_level = 0
|
63
|
-
space_begin = space_end = index - offset + 1
|
64
67
|
end
|
65
|
-
end
|
66
68
|
|
67
|
-
|
68
|
-
string.slice!(space_begin..space_end)
|
69
|
+
line.slice!(0, remove)
|
69
70
|
end
|
70
71
|
|
71
|
-
|
72
|
+
string.replace(lines.join)
|
73
|
+
|
74
|
+
@at_line_begin = string.end_with?("\n")
|
72
75
|
end
|
73
76
|
|
74
77
|
def interrupt
|