parser 2.7.1.5 → 2.7.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/parser/current.rb +1 -1
- data/lib/parser/meta.rb +2 -2
- data/lib/parser/ruby28.rb +8047 -0
- data/lib/parser/version.rb +1 -1
- data/parser.gemspec +1 -20
- metadata +7 -96
- data/.travis.yml +0 -41
- data/.yardopts +0 -21
- data/CHANGELOG.md +0 -1137
- data/CONTRIBUTING.md +0 -17
- data/Gemfile +0 -10
- data/LICENSE.txt +0 -25
- data/README.md +0 -309
- data/Rakefile +0 -167
- data/ci/run_rubocop_specs +0 -14
- data/doc/AST_FORMAT.md +0 -2284
- data/doc/CUSTOMIZATION.md +0 -37
- data/doc/INTERNALS.md +0 -21
- data/doc/css/.gitkeep +0 -0
- data/doc/css/common.css +0 -68
- data/lib/parser/lexer.rl +0 -2550
- data/lib/parser/macruby.y +0 -2208
- data/lib/parser/ruby18.y +0 -1936
- data/lib/parser/ruby19.y +0 -2185
- data/lib/parser/ruby20.y +0 -2363
- data/lib/parser/ruby21.y +0 -2364
- data/lib/parser/ruby22.y +0 -2371
- data/lib/parser/ruby23.y +0 -2377
- data/lib/parser/ruby24.y +0 -2415
- data/lib/parser/ruby25.y +0 -2412
- data/lib/parser/ruby26.y +0 -2420
- data/lib/parser/ruby27.y +0 -2949
- data/lib/parser/ruby30.y +0 -3048
- data/lib/parser/rubymotion.y +0 -2192
- data/test/bug_163/fixtures/input.rb +0 -5
- data/test/bug_163/fixtures/output.rb +0 -5
- data/test/bug_163/rewriter.rb +0 -20
- data/test/helper.rb +0 -103
- data/test/parse_helper.rb +0 -328
- data/test/racc_coverage_helper.rb +0 -133
- data/test/test_ast_processor.rb +0 -32
- data/test/test_base.rb +0 -31
- data/test/test_current.rb +0 -31
- data/test/test_diagnostic.rb +0 -95
- data/test/test_diagnostic_engine.rb +0 -59
- data/test/test_encoding.rb +0 -99
- data/test/test_lexer.rb +0 -3617
- data/test/test_lexer_stack_state.rb +0 -78
- data/test/test_meta.rb +0 -12
- data/test/test_parse_helper.rb +0 -83
- data/test/test_parser.rb +0 -9986
- data/test/test_runner_parse.rb +0 -56
- data/test/test_runner_rewrite.rb +0 -47
- data/test/test_source_buffer.rb +0 -165
- data/test/test_source_comment.rb +0 -36
- data/test/test_source_comment_associator.rb +0 -399
- data/test/test_source_map.rb +0 -14
- data/test/test_source_range.rb +0 -192
- data/test/test_source_rewriter.rb +0 -541
- data/test/test_source_rewriter_action.rb +0 -46
- data/test/test_source_tree_rewriter.rb +0 -361
- data/test/test_static_environment.rb +0 -45
- data/test/using_tree_rewriter/fixtures/input.rb +0 -3
- data/test/using_tree_rewriter/fixtures/output.rb +0 -3
- data/test/using_tree_rewriter/using_tree_rewriter.rb +0 -9
data/doc/CUSTOMIZATION.md
DELETED
@@ -1,37 +0,0 @@
|
|
1
|
-
# Customizing Parsers
|
2
|
-
|
3
|
-
While the default setup of the parsers provided by this Gem should be suitable
|
4
|
-
for most some developers might want to change parts of it. An example would be
|
5
|
-
the use of a custom class for nodes instead of `Parser::AST::Node`.
|
6
|
-
|
7
|
-
Customizing the AST is done by creating a custom builder class and passing it
|
8
|
-
to the constructor method of a parser. The default setup comes down to the
|
9
|
-
following:
|
10
|
-
|
11
|
-
builder = Parser::Builders::Default.new
|
12
|
-
parser = Parser::Ruby19.new(builder)
|
13
|
-
|
14
|
-
When creating your own builder class it's best to subclass the default one so
|
15
|
-
that you don't have to redefine every used method again:
|
16
|
-
|
17
|
-
class MyBuilder < Parser::Builders::Default
|
18
|
-
|
19
|
-
end
|
20
|
-
|
21
|
-
builder = MyBuilder.new
|
22
|
-
parser = Parser::Ruby19.new(builder)
|
23
|
-
|
24
|
-
## Custom Node Classes
|
25
|
-
|
26
|
-
To use a custom node class you have to override the method
|
27
|
-
`Parser::Builders::Default#n`:
|
28
|
-
|
29
|
-
class MyBuilder < Parser::Builders::Default
|
30
|
-
def n(type, children, location)
|
31
|
-
return MyNodeClass.new(type, children, :location => location)
|
32
|
-
end
|
33
|
-
end
|
34
|
-
|
35
|
-
Note that the used class (and corresponding instance) must be compatible with
|
36
|
-
`Parser::AST::Node` so it's best to subclass it and override/add code where
|
37
|
-
needed.
|
data/doc/INTERNALS.md
DELETED
@@ -1,21 +0,0 @@
|
|
1
|
-
Entry points
|
2
|
-
------------
|
3
|
-
|
4
|
-
Parser should be kept as slim as possible. This includes not loading
|
5
|
-
any potentially large files when they are likely to be unused in practice.
|
6
|
-
|
7
|
-
Parser has five main (classes of) `require` entry points:
|
8
|
-
|
9
|
-
* `require 'parser'`. Main entry point, requires all classes which
|
10
|
-
are used across the entire library.
|
11
|
-
* `require 'parser/rubyXX'`. Version-specific entry point. Can raise
|
12
|
-
a NotImplementedError if current Ruby runtime is unable to parse the
|
13
|
-
requested Ruby version.
|
14
|
-
* `require 'parser/all'`. Requires all available parsers for released
|
15
|
-
versions of Ruby. Can raise NotImplementedError.
|
16
|
-
* `require 'parser/runner'`. Requires all the stuff which is useful for
|
17
|
-
command-line tools but not otherwise.
|
18
|
-
* `require 'parser/runner/X'`. Runner-specific entry point.
|
19
|
-
|
20
|
-
All non-main entry points internally `require 'parser'`. Additionally, all
|
21
|
-
runner-specific entry points internally `requre 'parser/runner'`.
|
data/doc/css/.gitkeep
DELETED
File without changes
|
data/doc/css/common.css
DELETED
@@ -1,68 +0,0 @@
|
|
1
|
-
body
|
2
|
-
{
|
3
|
-
font-size: 14px;
|
4
|
-
line-height: 1.6;
|
5
|
-
margin: 0 auto;
|
6
|
-
max-width: 960px;
|
7
|
-
}
|
8
|
-
|
9
|
-
p code
|
10
|
-
{
|
11
|
-
background: #f2f2f2;
|
12
|
-
padding-left: 3px;
|
13
|
-
padding-right: 3px;
|
14
|
-
}
|
15
|
-
|
16
|
-
pre.code
|
17
|
-
{
|
18
|
-
font-size: 13px;
|
19
|
-
line-height: 1.4;
|
20
|
-
}
|
21
|
-
|
22
|
-
/**
|
23
|
-
* YARD uses generic table styles, using a special class means those tables
|
24
|
-
* don't get messed up.
|
25
|
-
*/
|
26
|
-
.table
|
27
|
-
{
|
28
|
-
border: 1px solid #ccc;
|
29
|
-
border-right: none;
|
30
|
-
border-collapse: separate;
|
31
|
-
border-spacing: 0;
|
32
|
-
text-align: left;
|
33
|
-
}
|
34
|
-
|
35
|
-
.table.full
|
36
|
-
{
|
37
|
-
width: 100%;
|
38
|
-
}
|
39
|
-
|
40
|
-
.table .field_name
|
41
|
-
{
|
42
|
-
min-width: 160px;
|
43
|
-
}
|
44
|
-
|
45
|
-
.table thead tr th.no_sort:first-child
|
46
|
-
{
|
47
|
-
width: 25px;
|
48
|
-
}
|
49
|
-
|
50
|
-
.table thead tr th, .table tbody tr td
|
51
|
-
{
|
52
|
-
border-bottom: 1px solid #ccc;
|
53
|
-
border-right: 1px solid #ccc;
|
54
|
-
min-width: 20px;
|
55
|
-
padding: 8px 5px;
|
56
|
-
text-align: left;
|
57
|
-
vertical-align: top;
|
58
|
-
}
|
59
|
-
|
60
|
-
.table tbody tr:last-child td
|
61
|
-
{
|
62
|
-
border-bottom: none;
|
63
|
-
}
|
64
|
-
|
65
|
-
.table tr:nth-child(odd) td
|
66
|
-
{
|
67
|
-
background: #f9f9f9;
|
68
|
-
}
|
data/lib/parser/lexer.rl
DELETED
@@ -1,2550 +0,0 @@
|
|
1
|
-
%%machine lex; # % fix highlighting
|
2
|
-
|
3
|
-
#
|
4
|
-
# === BEFORE YOU START ===
|
5
|
-
#
|
6
|
-
# Read the Ruby Hacking Guide chapter 11, available in English at
|
7
|
-
# http://whitequark.org/blog/2013/04/01/ruby-hacking-guide-ch-11-finite-state-lexer/
|
8
|
-
#
|
9
|
-
# Remember two things about Ragel scanners:
|
10
|
-
#
|
11
|
-
# 1) Longest match wins.
|
12
|
-
#
|
13
|
-
# 2) If two matches have the same length, the first
|
14
|
-
# in source code wins.
|
15
|
-
#
|
16
|
-
# General rules of making Ragel and Bison happy:
|
17
|
-
#
|
18
|
-
# * `p` (position) and `@te` contain the index of the character
|
19
|
-
# they're pointing to ("current"), plus one. `@ts` contains the index
|
20
|
-
# of the corresponding character. The code for extracting matched token is:
|
21
|
-
#
|
22
|
-
# @source_buffer.slice(@ts...@te)
|
23
|
-
#
|
24
|
-
# * If your input is `foooooooobar` and the rule is:
|
25
|
-
#
|
26
|
-
# 'f' 'o'+
|
27
|
-
#
|
28
|
-
# the result will be:
|
29
|
-
#
|
30
|
-
# foooooooobar
|
31
|
-
# ^ ts=0 ^ p=te=9
|
32
|
-
#
|
33
|
-
# * A Ragel lexer action should not emit more than one token, unless
|
34
|
-
# you know what you are doing.
|
35
|
-
#
|
36
|
-
# * All Ragel commands (fnext, fgoto, ...) end with a semicolon.
|
37
|
-
#
|
38
|
-
# * If an action emits the token and transitions to another state, use
|
39
|
-
# these Ragel commands:
|
40
|
-
#
|
41
|
-
# emit($whatever)
|
42
|
-
# fnext $next_state; fbreak;
|
43
|
-
#
|
44
|
-
# If you perform `fgoto` in an action which does not emit a token nor
|
45
|
-
# rewinds the stream pointer, the parser's side-effectful,
|
46
|
-
# context-sensitive lookahead actions will break in a hard to detect
|
47
|
-
# and debug way.
|
48
|
-
#
|
49
|
-
# * If an action does not emit a token:
|
50
|
-
#
|
51
|
-
# fgoto $next_state;
|
52
|
-
#
|
53
|
-
# * If an action features lookbehind, i.e. matches characters with the
|
54
|
-
# intent of passing them to another action:
|
55
|
-
#
|
56
|
-
# p = @ts - 1
|
57
|
-
# fgoto $next_state;
|
58
|
-
#
|
59
|
-
# or, if the lookbehind consists of a single character:
|
60
|
-
#
|
61
|
-
# fhold; fgoto $next_state;
|
62
|
-
#
|
63
|
-
# * Ragel merges actions. So, if you have `e_lparen = '(' %act` and
|
64
|
-
# `c_lparen = '('` and a lexer action `e_lparen | c_lparen`, the result
|
65
|
-
# _will_ invoke the action `act`.
|
66
|
-
#
|
67
|
-
# e_something stands for "something with **e**mbedded action".
|
68
|
-
#
|
69
|
-
# * EOF is explicit and is matched by `c_eof`. If you want to introspect
|
70
|
-
# the state of the lexer, add this rule to the state:
|
71
|
-
#
|
72
|
-
# c_eof => do_eof;
|
73
|
-
#
|
74
|
-
# * If you proceed past EOF, the lexer will complain:
|
75
|
-
#
|
76
|
-
# NoMethodError: undefined method `ord' for nil:NilClass
|
77
|
-
#
|
78
|
-
|
79
|
-
class Parser::Lexer
|
80
|
-
|
81
|
-
%% write data nofinal;
|
82
|
-
# %
|
83
|
-
|
84
|
-
ESCAPES = {
|
85
|
-
?a.ord => "\a", ?b.ord => "\b", ?e.ord => "\e", ?f.ord => "\f",
|
86
|
-
?n.ord => "\n", ?r.ord => "\r", ?s.ord => "\s", ?t.ord => "\t",
|
87
|
-
?v.ord => "\v", ?\\.ord => "\\"
|
88
|
-
}.freeze
|
89
|
-
|
90
|
-
REGEXP_META_CHARACTERS = Regexp.union(*"\\$()*+.<>?[]^{|}".chars).freeze
|
91
|
-
|
92
|
-
attr_reader :source_buffer
|
93
|
-
|
94
|
-
attr_accessor :diagnostics
|
95
|
-
attr_accessor :static_env
|
96
|
-
attr_accessor :force_utf32
|
97
|
-
|
98
|
-
attr_accessor :cond, :cmdarg, :in_kwarg, :context, :command_start
|
99
|
-
|
100
|
-
attr_accessor :tokens, :comments
|
101
|
-
|
102
|
-
def initialize(version)
|
103
|
-
@version = version
|
104
|
-
@static_env = nil
|
105
|
-
@context = nil
|
106
|
-
|
107
|
-
@tokens = nil
|
108
|
-
@comments = nil
|
109
|
-
|
110
|
-
reset
|
111
|
-
end
|
112
|
-
|
113
|
-
def reset(reset_state=true)
|
114
|
-
# Ragel state:
|
115
|
-
if reset_state
|
116
|
-
# Unit tests set state prior to resetting lexer.
|
117
|
-
@cs = self.class.lex_en_line_begin
|
118
|
-
|
119
|
-
@cond = StackState.new('cond')
|
120
|
-
@cmdarg = StackState.new('cmdarg')
|
121
|
-
@cond_stack = []
|
122
|
-
@cmdarg_stack = []
|
123
|
-
end
|
124
|
-
|
125
|
-
@force_utf32 = false # Set to true by some tests
|
126
|
-
|
127
|
-
@source_pts = nil # @source as a codepoint array
|
128
|
-
|
129
|
-
@p = 0 # stream position (saved manually in #advance)
|
130
|
-
@ts = nil # token start
|
131
|
-
@te = nil # token end
|
132
|
-
@act = 0 # next action
|
133
|
-
|
134
|
-
@stack = [] # state stack
|
135
|
-
@top = 0 # state stack top pointer
|
136
|
-
|
137
|
-
# Lexer state:
|
138
|
-
@token_queue = []
|
139
|
-
@literal_stack = []
|
140
|
-
|
141
|
-
@eq_begin_s = nil # location of last encountered =begin
|
142
|
-
@sharp_s = nil # location of last encountered #
|
143
|
-
|
144
|
-
@newline_s = nil # location of last encountered newline
|
145
|
-
|
146
|
-
@num_base = nil # last numeric base
|
147
|
-
@num_digits_s = nil # starting position of numeric digits
|
148
|
-
@num_suffix_s = nil # starting position of numeric suffix
|
149
|
-
@num_xfrm = nil # numeric suffix-induced transformation
|
150
|
-
|
151
|
-
@escape_s = nil # starting position of current sequence
|
152
|
-
@escape = nil # last escaped sequence, as string
|
153
|
-
|
154
|
-
@herebody_s = nil # starting position of current heredoc line
|
155
|
-
|
156
|
-
# Ruby 1.9 ->() lambdas emit a distinct token if do/{ is
|
157
|
-
# encountered after a matching closing parenthesis.
|
158
|
-
@paren_nest = 0
|
159
|
-
@lambda_stack = []
|
160
|
-
|
161
|
-
# After encountering the closing line of <<~SQUIGGLY_HEREDOC,
|
162
|
-
# we store the indentation level and give it out to the parser
|
163
|
-
# on request. It is not possible to infer indentation level just
|
164
|
-
# from the AST because escape sequences such as `\ ` or `\t` are
|
165
|
-
# expanded inside the lexer, but count as non-whitespace for
|
166
|
-
# indentation purposes.
|
167
|
-
@dedent_level = nil
|
168
|
-
|
169
|
-
# If the lexer is in `command state' (aka expr_value)
|
170
|
-
# at the entry to #advance, it will transition to expr_cmdarg
|
171
|
-
# instead of expr_arg at certain points.
|
172
|
-
@command_start = true
|
173
|
-
|
174
|
-
# True at the end of "def foo a:"
|
175
|
-
@in_kwarg = false
|
176
|
-
|
177
|
-
# State before =begin / =end block comment
|
178
|
-
@cs_before_block_comment = self.class.lex_en_line_begin
|
179
|
-
end
|
180
|
-
|
181
|
-
def source_buffer=(source_buffer)
|
182
|
-
@source_buffer = source_buffer
|
183
|
-
|
184
|
-
if @source_buffer
|
185
|
-
source = @source_buffer.source
|
186
|
-
|
187
|
-
if source.encoding == Encoding::UTF_8
|
188
|
-
@source_pts = source.unpack('U*')
|
189
|
-
else
|
190
|
-
@source_pts = source.unpack('C*')
|
191
|
-
end
|
192
|
-
|
193
|
-
if @source_pts[0] == 0xfeff
|
194
|
-
# Skip byte order mark.
|
195
|
-
@p = 1
|
196
|
-
end
|
197
|
-
else
|
198
|
-
@source_pts = nil
|
199
|
-
end
|
200
|
-
end
|
201
|
-
|
202
|
-
def encoding
|
203
|
-
@source_buffer.source.encoding
|
204
|
-
end
|
205
|
-
|
206
|
-
LEX_STATES = {
|
207
|
-
:line_begin => lex_en_line_begin,
|
208
|
-
:expr_dot => lex_en_expr_dot,
|
209
|
-
:expr_fname => lex_en_expr_fname,
|
210
|
-
:expr_value => lex_en_expr_value,
|
211
|
-
:expr_beg => lex_en_expr_beg,
|
212
|
-
:expr_mid => lex_en_expr_mid,
|
213
|
-
:expr_arg => lex_en_expr_arg,
|
214
|
-
:expr_cmdarg => lex_en_expr_cmdarg,
|
215
|
-
:expr_end => lex_en_expr_end,
|
216
|
-
:expr_endarg => lex_en_expr_endarg,
|
217
|
-
:expr_endfn => lex_en_expr_endfn,
|
218
|
-
:expr_labelarg => lex_en_expr_labelarg,
|
219
|
-
|
220
|
-
:interp_string => lex_en_interp_string,
|
221
|
-
:interp_words => lex_en_interp_words,
|
222
|
-
:plain_string => lex_en_plain_string,
|
223
|
-
:plain_words => lex_en_plain_string,
|
224
|
-
}
|
225
|
-
|
226
|
-
def state
|
227
|
-
LEX_STATES.invert.fetch(@cs, @cs)
|
228
|
-
end
|
229
|
-
|
230
|
-
def state=(state)
|
231
|
-
@cs = LEX_STATES.fetch(state)
|
232
|
-
end
|
233
|
-
|
234
|
-
def push_cmdarg
|
235
|
-
@cmdarg_stack.push(@cmdarg)
|
236
|
-
@cmdarg = StackState.new("cmdarg.#{@cmdarg_stack.count}")
|
237
|
-
end
|
238
|
-
|
239
|
-
def pop_cmdarg
|
240
|
-
@cmdarg = @cmdarg_stack.pop
|
241
|
-
end
|
242
|
-
|
243
|
-
def push_cond
|
244
|
-
@cond_stack.push(@cond)
|
245
|
-
@cond = StackState.new("cond.#{@cond_stack.count}")
|
246
|
-
end
|
247
|
-
|
248
|
-
def pop_cond
|
249
|
-
@cond = @cond_stack.pop
|
250
|
-
end
|
251
|
-
|
252
|
-
def dedent_level
|
253
|
-
# We erase @dedent_level as a precaution to avoid accidentally
|
254
|
-
# using a stale value.
|
255
|
-
dedent_level, @dedent_level = @dedent_level, nil
|
256
|
-
dedent_level
|
257
|
-
end
|
258
|
-
|
259
|
-
# Return next token: [type, value].
|
260
|
-
def advance
|
261
|
-
if @token_queue.any?
|
262
|
-
return @token_queue.shift
|
263
|
-
end
|
264
|
-
|
265
|
-
# Ugly, but dependent on Ragel output. Consider refactoring it somehow.
|
266
|
-
klass = self.class
|
267
|
-
_lex_trans_keys = klass.send :_lex_trans_keys
|
268
|
-
_lex_key_spans = klass.send :_lex_key_spans
|
269
|
-
_lex_index_offsets = klass.send :_lex_index_offsets
|
270
|
-
_lex_indicies = klass.send :_lex_indicies
|
271
|
-
_lex_trans_targs = klass.send :_lex_trans_targs
|
272
|
-
_lex_trans_actions = klass.send :_lex_trans_actions
|
273
|
-
_lex_to_state_actions = klass.send :_lex_to_state_actions
|
274
|
-
_lex_from_state_actions = klass.send :_lex_from_state_actions
|
275
|
-
_lex_eof_trans = klass.send :_lex_eof_trans
|
276
|
-
|
277
|
-
pe = @source_pts.size + 2
|
278
|
-
p, eof = @p, pe
|
279
|
-
|
280
|
-
cmd_state = @command_start
|
281
|
-
@command_start = false
|
282
|
-
|
283
|
-
%% write exec;
|
284
|
-
# %
|
285
|
-
|
286
|
-
# Ragel creates a local variable called `testEof` but it doesn't use
|
287
|
-
# it in any assignment. This dead code is here to swallow the warning.
|
288
|
-
# It has no runtime cost because Ruby doesn't produce any instructions from it.
|
289
|
-
if false
|
290
|
-
testEof
|
291
|
-
end
|
292
|
-
|
293
|
-
@p = p
|
294
|
-
|
295
|
-
if @token_queue.any?
|
296
|
-
@token_queue.shift
|
297
|
-
elsif @cs == klass.lex_error
|
298
|
-
[ false, [ '$error'.freeze, range(p - 1, p) ] ]
|
299
|
-
else
|
300
|
-
eof = @source_pts.size
|
301
|
-
[ false, [ '$eof'.freeze, range(eof, eof) ] ]
|
302
|
-
end
|
303
|
-
end
|
304
|
-
|
305
|
-
protected
|
306
|
-
|
307
|
-
def eof_codepoint?(point)
|
308
|
-
[0x04, 0x1a, 0x00].include? point
|
309
|
-
end
|
310
|
-
|
311
|
-
def version?(*versions)
|
312
|
-
versions.include?(@version)
|
313
|
-
end
|
314
|
-
|
315
|
-
def stack_pop
|
316
|
-
@top -= 1
|
317
|
-
@stack[@top]
|
318
|
-
end
|
319
|
-
|
320
|
-
def encode_escape(ord)
|
321
|
-
ord.chr.force_encoding(@source_buffer.source.encoding)
|
322
|
-
end
|
323
|
-
|
324
|
-
def tok(s = @ts, e = @te)
|
325
|
-
@source_buffer.slice(s...e)
|
326
|
-
end
|
327
|
-
|
328
|
-
def range(s = @ts, e = @te)
|
329
|
-
Parser::Source::Range.new(@source_buffer, s, e)
|
330
|
-
end
|
331
|
-
|
332
|
-
def emit(type, value = tok, s = @ts, e = @te)
|
333
|
-
token = [ type, [ value, range(s, e) ] ]
|
334
|
-
|
335
|
-
@token_queue.push(token)
|
336
|
-
|
337
|
-
@tokens.push(token) if @tokens
|
338
|
-
|
339
|
-
token
|
340
|
-
end
|
341
|
-
|
342
|
-
def emit_table(table, s = @ts, e = @te)
|
343
|
-
value = tok(s, e)
|
344
|
-
|
345
|
-
emit(table[value], value, s, e)
|
346
|
-
end
|
347
|
-
|
348
|
-
def emit_do(do_block=false)
|
349
|
-
if @cond.active?
|
350
|
-
emit(:kDO_COND, 'do'.freeze)
|
351
|
-
elsif @cmdarg.active? || do_block
|
352
|
-
emit(:kDO_BLOCK, 'do'.freeze)
|
353
|
-
else
|
354
|
-
emit(:kDO, 'do'.freeze)
|
355
|
-
end
|
356
|
-
end
|
357
|
-
|
358
|
-
def arg_or_cmdarg(cmd_state)
|
359
|
-
if cmd_state
|
360
|
-
self.class.lex_en_expr_cmdarg
|
361
|
-
else
|
362
|
-
self.class.lex_en_expr_arg
|
363
|
-
end
|
364
|
-
end
|
365
|
-
|
366
|
-
def emit_comment(s = @ts, e = @te)
|
367
|
-
if @comments
|
368
|
-
@comments.push(Parser::Source::Comment.new(range(s, e)))
|
369
|
-
end
|
370
|
-
|
371
|
-
if @tokens
|
372
|
-
@tokens.push([ :tCOMMENT, [ tok(s, e), range(s, e) ] ])
|
373
|
-
end
|
374
|
-
|
375
|
-
nil
|
376
|
-
end
|
377
|
-
|
378
|
-
def diagnostic(type, reason, arguments=nil, location=range, highlights=[])
|
379
|
-
@diagnostics.process(
|
380
|
-
Parser::Diagnostic.new(type, reason, arguments, location, highlights))
|
381
|
-
end
|
382
|
-
|
383
|
-
#
|
384
|
-
# === LITERAL STACK ===
|
385
|
-
#
|
386
|
-
|
387
|
-
def push_literal(*args)
|
388
|
-
new_literal = Literal.new(self, *args)
|
389
|
-
@literal_stack.push(new_literal)
|
390
|
-
next_state_for_literal(new_literal)
|
391
|
-
end
|
392
|
-
|
393
|
-
def next_state_for_literal(literal)
|
394
|
-
if literal.words? && literal.backslash_delimited?
|
395
|
-
if literal.interpolate?
|
396
|
-
self.class.lex_en_interp_backslash_delimited_words
|
397
|
-
else
|
398
|
-
self.class.lex_en_plain_backslash_delimited_words
|
399
|
-
end
|
400
|
-
elsif literal.words? && !literal.backslash_delimited?
|
401
|
-
if literal.interpolate?
|
402
|
-
self.class.lex_en_interp_words
|
403
|
-
else
|
404
|
-
self.class.lex_en_plain_words
|
405
|
-
end
|
406
|
-
elsif !literal.words? && literal.backslash_delimited?
|
407
|
-
if literal.interpolate?
|
408
|
-
self.class.lex_en_interp_backslash_delimited
|
409
|
-
else
|
410
|
-
self.class.lex_en_plain_backslash_delimited
|
411
|
-
end
|
412
|
-
else
|
413
|
-
if literal.interpolate?
|
414
|
-
self.class.lex_en_interp_string
|
415
|
-
else
|
416
|
-
self.class.lex_en_plain_string
|
417
|
-
end
|
418
|
-
end
|
419
|
-
end
|
420
|
-
|
421
|
-
def literal
|
422
|
-
@literal_stack.last
|
423
|
-
end
|
424
|
-
|
425
|
-
def pop_literal
|
426
|
-
old_literal = @literal_stack.pop
|
427
|
-
|
428
|
-
@dedent_level = old_literal.dedent_level
|
429
|
-
|
430
|
-
if old_literal.type == :tREGEXP_BEG
|
431
|
-
# Fetch modifiers.
|
432
|
-
self.class.lex_en_regexp_modifiers
|
433
|
-
else
|
434
|
-
self.class.lex_en_expr_end
|
435
|
-
end
|
436
|
-
end
|
437
|
-
|
438
|
-
# Mapping of strings to parser tokens.
|
439
|
-
|
440
|
-
PUNCTUATION = {
|
441
|
-
'=' => :tEQL, '&' => :tAMPER2, '|' => :tPIPE,
|
442
|
-
'!' => :tBANG, '^' => :tCARET, '+' => :tPLUS,
|
443
|
-
'-' => :tMINUS, '*' => :tSTAR2, '/' => :tDIVIDE,
|
444
|
-
'%' => :tPERCENT, '~' => :tTILDE, ',' => :tCOMMA,
|
445
|
-
';' => :tSEMI, '.' => :tDOT, '..' => :tDOT2,
|
446
|
-
'...' => :tDOT3, '[' => :tLBRACK2, ']' => :tRBRACK,
|
447
|
-
'(' => :tLPAREN2, ')' => :tRPAREN, '?' => :tEH,
|
448
|
-
':' => :tCOLON, '&&' => :tANDOP, '||' => :tOROP,
|
449
|
-
'-@' => :tUMINUS, '+@' => :tUPLUS, '~@' => :tTILDE,
|
450
|
-
'**' => :tPOW, '->' => :tLAMBDA, '=~' => :tMATCH,
|
451
|
-
'!~' => :tNMATCH, '==' => :tEQ, '!=' => :tNEQ,
|
452
|
-
'>' => :tGT, '>>' => :tRSHFT, '>=' => :tGEQ,
|
453
|
-
'<' => :tLT, '<<' => :tLSHFT, '<=' => :tLEQ,
|
454
|
-
'=>' => :tASSOC, '::' => :tCOLON2, '===' => :tEQQ,
|
455
|
-
'<=>' => :tCMP, '[]' => :tAREF, '[]=' => :tASET,
|
456
|
-
'{' => :tLCURLY, '}' => :tRCURLY, '`' => :tBACK_REF2,
|
457
|
-
'!@' => :tBANG, '&.' => :tANDDOT,
|
458
|
-
}
|
459
|
-
|
460
|
-
PUNCTUATION_BEGIN = {
|
461
|
-
'&' => :tAMPER, '*' => :tSTAR, '**' => :tDSTAR,
|
462
|
-
'+' => :tUPLUS, '-' => :tUMINUS, '::' => :tCOLON3,
|
463
|
-
'(' => :tLPAREN, '{' => :tLBRACE, '[' => :tLBRACK,
|
464
|
-
}
|
465
|
-
|
466
|
-
KEYWORDS = {
|
467
|
-
'if' => :kIF_MOD, 'unless' => :kUNLESS_MOD,
|
468
|
-
'while' => :kWHILE_MOD, 'until' => :kUNTIL_MOD,
|
469
|
-
'rescue' => :kRESCUE_MOD, 'defined?' => :kDEFINED,
|
470
|
-
'BEGIN' => :klBEGIN, 'END' => :klEND,
|
471
|
-
}
|
472
|
-
|
473
|
-
KEYWORDS_BEGIN = {
|
474
|
-
'if' => :kIF, 'unless' => :kUNLESS,
|
475
|
-
'while' => :kWHILE, 'until' => :kUNTIL,
|
476
|
-
'rescue' => :kRESCUE, 'defined?' => :kDEFINED,
|
477
|
-
'BEGIN' => :klBEGIN, 'END' => :klEND,
|
478
|
-
}
|
479
|
-
|
480
|
-
%w(class module def undef begin end then elsif else ensure case when
|
481
|
-
for break next redo retry in do return yield super self nil true
|
482
|
-
false and or not alias __FILE__ __LINE__ __ENCODING__).each do |keyword|
|
483
|
-
KEYWORDS_BEGIN[keyword] = KEYWORDS[keyword] = :"k#{keyword.upcase}"
|
484
|
-
end
|
485
|
-
|
486
|
-
%%{
|
487
|
-
# %
|
488
|
-
|
489
|
-
access @;
|
490
|
-
getkey (@source_pts[p] || 0);
|
491
|
-
|
492
|
-
# === CHARACTER CLASSES ===
|
493
|
-
#
|
494
|
-
# Pay close attention to the differences between c_any and any.
|
495
|
-
# c_any does not include EOF and so will cause incorrect behavior
|
496
|
-
# for machine subtraction (any-except rules) and default transitions
|
497
|
-
# for scanners.
|
498
|
-
|
499
|
-
action do_nl {
|
500
|
-
# Record position of a newline for precise location reporting on tNL
|
501
|
-
# tokens.
|
502
|
-
#
|
503
|
-
# This action is embedded directly into c_nl, as it is idempotent and
|
504
|
-
# there are no cases when we need to skip it.
|
505
|
-
@newline_s = p
|
506
|
-
}
|
507
|
-
|
508
|
-
c_nl = '\n' $ do_nl;
|
509
|
-
c_space = [ \t\r\f\v];
|
510
|
-
c_space_nl = c_space | c_nl;
|
511
|
-
|
512
|
-
c_eof = 0x04 | 0x1a | 0 | zlen; # ^D, ^Z, \0, EOF
|
513
|
-
c_eol = c_nl | c_eof;
|
514
|
-
c_any = any - c_eof;
|
515
|
-
|
516
|
-
c_nl_zlen = c_nl | zlen;
|
517
|
-
c_line = any - c_nl_zlen;
|
518
|
-
|
519
|
-
c_unicode = c_any - 0x00..0x7f;
|
520
|
-
c_upper = [A-Z];
|
521
|
-
c_lower = [a-z_] | c_unicode;
|
522
|
-
c_alpha = c_lower | c_upper;
|
523
|
-
c_alnum = c_alpha | [0-9];
|
524
|
-
|
525
|
-
action do_eof {
|
526
|
-
# Sit at EOF indefinitely. #advance would return $eof each time.
|
527
|
-
# This allows to feed the lexer more data if needed; this is only used
|
528
|
-
# in tests.
|
529
|
-
#
|
530
|
-
# Note that this action is not embedded into e_eof like e_heredoc_nl and e_bs
|
531
|
-
# below. This is due to the fact that scanner state at EOF is observed
|
532
|
-
# by tests, and encapsulating it in a rule would break the introspection.
|
533
|
-
fhold; fbreak;
|
534
|
-
}
|
535
|
-
|
536
|
-
#
|
537
|
-
# === TOKEN DEFINITIONS ===
|
538
|
-
#
|
539
|
-
|
540
|
-
# All operators are punctuation. There is more to punctuation
|
541
|
-
# than just operators. Operators can be overridden by user;
|
542
|
-
# punctuation can not.
|
543
|
-
|
544
|
-
# A list of operators which are valid in the function name context, but
|
545
|
-
# have different semantics in others.
|
546
|
-
operator_fname = '[]' | '[]=' | '`' | '-@' | '+@' | '~@' | '!@' ;
|
547
|
-
|
548
|
-
# A list of operators which can occur within an assignment shortcut (+ → +=).
|
549
|
-
operator_arithmetic = '&' | '|' | '&&' | '||' | '^' | '+' | '-' |
|
550
|
-
'*' | '/' | '**' | '~' | '<<' | '>>' | '%' ;
|
551
|
-
|
552
|
-
# A list of all user-definable operators not covered by groups above.
|
553
|
-
operator_rest = '=~' | '!~' | '==' | '!=' | '!' | '===' |
|
554
|
-
'<' | '<=' | '>' | '>=' | '<=>' | '=>' ;
|
555
|
-
|
556
|
-
# Note that `{` and `}` need to be referred to as e_lbrace and e_rbrace,
|
557
|
-
# as they are ambiguous with interpolation `#{}` and should be counted.
|
558
|
-
# These braces are not present in punctuation lists.
|
559
|
-
|
560
|
-
# A list of punctuation which has different meaning when used at the
|
561
|
-
# beginning of expression.
|
562
|
-
punctuation_begin = '-' | '+' | '::' | '(' | '[' |
|
563
|
-
'*' | '**' | '&' ;
|
564
|
-
|
565
|
-
# A list of all punctuation except punctuation_begin.
|
566
|
-
punctuation_end = ',' | '=' | '->' | '(' | '[' | ']' |
|
567
|
-
'::' | '?' | ':' | '.' | '..' | '...' ;
|
568
|
-
|
569
|
-
# A list of keywords which have different meaning at the beginning of expression.
|
570
|
-
keyword_modifier = 'if' | 'unless' | 'while' | 'until' | 'rescue' ;
|
571
|
-
|
572
|
-
# A list of keywords which accept an argument-like expression, i.e. have the
|
573
|
-
# same post-processing as method calls or commands. Example: `yield 1`,
|
574
|
-
# `yield (1)`, `yield(1)`, are interpreted as if `yield` was a function.
|
575
|
-
keyword_with_arg = 'yield' | 'super' | 'not' | 'defined?' ;
|
576
|
-
|
577
|
-
# A list of keywords which accept a literal function name as an argument.
|
578
|
-
keyword_with_fname = 'def' | 'undef' | 'alias' ;
|
579
|
-
|
580
|
-
# A list of keywords which accept an expression after them.
|
581
|
-
keyword_with_value = 'else' | 'case' | 'ensure' | 'module' | 'elsif' | 'then' |
|
582
|
-
'for' | 'in' | 'do' | 'when' | 'begin' | 'class' |
|
583
|
-
'and' | 'or' ;
|
584
|
-
|
585
|
-
# A list of keywords which accept a value, and treat the keywords from
|
586
|
-
# `keyword_modifier` list as modifiers.
|
587
|
-
keyword_with_mid = 'rescue' | 'return' | 'break' | 'next' ;
|
588
|
-
|
589
|
-
# A list of keywords which do not accept an expression after them.
|
590
|
-
keyword_with_end = 'end' | 'self' | 'true' | 'false' | 'retry' |
|
591
|
-
'redo' | 'nil' | 'BEGIN' | 'END' | '__FILE__' |
|
592
|
-
'__LINE__' | '__ENCODING__';
|
593
|
-
|
594
|
-
# All keywords.
|
595
|
-
keyword = keyword_with_value | keyword_with_mid |
|
596
|
-
keyword_with_end | keyword_with_arg |
|
597
|
-
keyword_with_fname | keyword_modifier ;
|
598
|
-
|
599
|
-
constant = c_upper c_alnum*;
|
600
|
-
bareword = c_alpha c_alnum*;
|
601
|
-
|
602
|
-
call_or_var = c_lower c_alnum*;
|
603
|
-
class_var = '@@' bareword;
|
604
|
-
instance_var = '@' bareword;
|
605
|
-
global_var = '$'
|
606
|
-
( bareword | digit+
|
607
|
-
| [`'+~*$&?!@/\\;,.=:<>"] # `
|
608
|
-
| '-' c_alnum
|
609
|
-
)
|
610
|
-
;
|
611
|
-
|
612
|
-
# Ruby accepts (and fails on) variables with leading digit
|
613
|
-
# in literal context, but not in unquoted symbol body.
|
614
|
-
class_var_v = '@@' c_alnum+;
|
615
|
-
instance_var_v = '@' c_alnum+;
|
616
|
-
|
617
|
-
label = bareword [?!]? ':';
|
618
|
-
|
619
|
-
#
|
620
|
-
# === NUMERIC PARSING ===
|
621
|
-
#
|
622
|
-
|
623
|
-
int_hex = ( xdigit+ '_' )* xdigit* '_'? ;
|
624
|
-
int_dec = ( digit+ '_' )* digit* '_'? ;
|
625
|
-
int_bin = ( [01]+ '_' )* [01]* '_'? ;
|
626
|
-
|
627
|
-
flo_int = [1-9] [0-9]* ( '_' digit+ )* | '0';
|
628
|
-
flo_frac = '.' ( digit+ '_' )* digit+;
|
629
|
-
flo_pow = [eE] [+\-]? ( digit+ '_' )* digit+;
|
630
|
-
|
631
|
-
int_suffix =
|
632
|
-
'' % { @num_xfrm = lambda { |chars| emit(:tINTEGER, chars) } }
|
633
|
-
| 'r' % { @num_xfrm = lambda { |chars| emit(:tRATIONAL, Rational(chars)) } }
|
634
|
-
| 'i' % { @num_xfrm = lambda { |chars| emit(:tIMAGINARY, Complex(0, chars)) } }
|
635
|
-
| 'ri' % { @num_xfrm = lambda { |chars| emit(:tIMAGINARY, Complex(0, Rational(chars))) } }
|
636
|
-
| 're' % { @num_xfrm = lambda { |chars| emit(:tINTEGER, chars, @ts, @te - 2); p -= 2 } }
|
637
|
-
| 'if' % { @num_xfrm = lambda { |chars| emit(:tINTEGER, chars, @ts, @te - 2); p -= 2 } }
|
638
|
-
| 'rescue' % { @num_xfrm = lambda { |chars| emit(:tINTEGER, chars, @ts, @te - 6); p -= 6 } };
|
639
|
-
|
640
|
-
flo_pow_suffix =
|
641
|
-
'' % { @num_xfrm = lambda { |chars| emit(:tFLOAT, Float(chars)) } }
|
642
|
-
| 'i' % { @num_xfrm = lambda { |chars| emit(:tIMAGINARY, Complex(0, Float(chars))) } }
|
643
|
-
| 'if' % { @num_xfrm = lambda { |chars| emit(:tFLOAT, Float(chars), @ts, @te - 2); p -= 2 } };
|
644
|
-
|
645
|
-
flo_suffix =
|
646
|
-
flo_pow_suffix
|
647
|
-
| 'r' % { @num_xfrm = lambda { |chars| emit(:tRATIONAL, Rational(chars)) } }
|
648
|
-
| 'ri' % { @num_xfrm = lambda { |chars| emit(:tIMAGINARY, Complex(0, Rational(chars))) } }
|
649
|
-
| 'rescue' % { @num_xfrm = lambda { |chars| emit(:tFLOAT, Float(chars), @ts, @te - 6); p -= 6 } };
|
650
|
-
|
651
|
-
#
|
652
|
-
# === ESCAPE SEQUENCE PARSING ===
|
653
|
-
#
|
654
|
-
|
655
|
-
# Escape parsing code is a Ragel pattern, not a scanner, and therefore
|
656
|
-
# it shouldn't directly raise errors or perform other actions with side effects.
|
657
|
-
# In reality this would probably just mess up error reporting in pathological
|
658
|
-
# cases, through.
|
659
|
-
|
660
|
-
# The amount of code required to parse \M\C stuff correctly is ridiculous.
|
661
|
-
|
662
|
-
escaped_nl = "\\" c_nl;
|
663
|
-
|
664
|
-
action unicode_points {
|
665
|
-
@escape = ""
|
666
|
-
|
667
|
-
codepoints = tok(@escape_s + 2, p - 1)
|
668
|
-
codepoint_s = @escape_s + 2
|
669
|
-
|
670
|
-
if @version < 24
|
671
|
-
if codepoints.start_with?(" ") || codepoints.start_with?("\t")
|
672
|
-
diagnostic :fatal, :invalid_unicode_escape, nil,
|
673
|
-
range(@escape_s + 2, @escape_s + 3)
|
674
|
-
end
|
675
|
-
|
676
|
-
if spaces_p = codepoints.index(/[ \t]{2}/)
|
677
|
-
diagnostic :fatal, :invalid_unicode_escape, nil,
|
678
|
-
range(codepoint_s + spaces_p + 1, codepoint_s + spaces_p + 2)
|
679
|
-
end
|
680
|
-
|
681
|
-
if codepoints.end_with?(" ") || codepoints.end_with?("\t")
|
682
|
-
diagnostic :fatal, :invalid_unicode_escape, nil, range(p - 1, p)
|
683
|
-
end
|
684
|
-
end
|
685
|
-
|
686
|
-
codepoints.scan(/([0-9a-fA-F]+)|([ \t]+)/).each do |(codepoint_str, spaces)|
|
687
|
-
if spaces
|
688
|
-
codepoint_s += spaces.length
|
689
|
-
else
|
690
|
-
codepoint = codepoint_str.to_i(16)
|
691
|
-
|
692
|
-
if codepoint >= 0x110000
|
693
|
-
diagnostic :error, :unicode_point_too_large, nil,
|
694
|
-
range(codepoint_s, codepoint_s + codepoint_str.length)
|
695
|
-
break
|
696
|
-
end
|
697
|
-
|
698
|
-
@escape += codepoint.chr(Encoding::UTF_8)
|
699
|
-
codepoint_s += codepoint_str.length
|
700
|
-
end
|
701
|
-
end
|
702
|
-
}
|
703
|
-
|
704
|
-
action unescape_char {
|
705
|
-
codepoint = @source_pts[p - 1]
|
706
|
-
if (@escape = ESCAPES[codepoint]).nil?
|
707
|
-
@escape = encode_escape(@source_buffer.slice(p - 1))
|
708
|
-
end
|
709
|
-
}
|
710
|
-
|
711
|
-
action invalid_complex_escape {
|
712
|
-
diagnostic :fatal, :invalid_escape
|
713
|
-
}
|
714
|
-
|
715
|
-
action read_post_meta_or_ctrl_char {
|
716
|
-
@escape = @source_buffer.slice(p - 1).chr
|
717
|
-
|
718
|
-
if @version >= 27 && ((0..8).include?(@escape.ord) || (14..31).include?(@escape.ord))
|
719
|
-
diagnostic :fatal, :invalid_escape
|
720
|
-
end
|
721
|
-
}
|
722
|
-
|
723
|
-
action slash_c_char {
|
724
|
-
@escape = encode_escape(@escape[0].ord & 0x9f)
|
725
|
-
}
|
726
|
-
|
727
|
-
action slash_m_char {
|
728
|
-
@escape = encode_escape(@escape[0].ord | 0x80)
|
729
|
-
}
|
730
|
-
|
731
|
-
maybe_escaped_char = (
|
732
|
-
'\\' c_any %unescape_char
|
733
|
-
| ( c_any - [\\] ) %read_post_meta_or_ctrl_char
|
734
|
-
);
|
735
|
-
|
736
|
-
maybe_escaped_ctrl_char = ( # why?!
|
737
|
-
'\\' c_any %unescape_char %slash_c_char
|
738
|
-
| '?' % { @escape = "\x7f" }
|
739
|
-
| ( c_any - [\\?] ) %read_post_meta_or_ctrl_char %slash_c_char
|
740
|
-
);
|
741
|
-
|
742
|
-
escape = (
|
743
|
-
# \377
|
744
|
-
[0-7]{1,3}
|
745
|
-
% { @escape = encode_escape(tok(@escape_s, p).to_i(8) % 0x100) }
|
746
|
-
|
747
|
-
# \xff
|
748
|
-
| 'x' xdigit{1,2}
|
749
|
-
% { @escape = encode_escape(tok(@escape_s + 1, p).to_i(16)) }
|
750
|
-
|
751
|
-
# %q[\x]
|
752
|
-
| 'x' ( c_any - xdigit )
|
753
|
-
% {
|
754
|
-
diagnostic :fatal, :invalid_hex_escape, nil, range(@escape_s - 1, p + 2)
|
755
|
-
}
|
756
|
-
|
757
|
-
# \u263a
|
758
|
-
| 'u' xdigit{4}
|
759
|
-
% { @escape = tok(@escape_s + 1, p).to_i(16).chr(Encoding::UTF_8) }
|
760
|
-
|
761
|
-
# \u123
|
762
|
-
| 'u' xdigit{0,3}
|
763
|
-
% {
|
764
|
-
diagnostic :fatal, :invalid_unicode_escape, nil, range(@escape_s - 1, p)
|
765
|
-
}
|
766
|
-
|
767
|
-
# u{not hex} or u{}
|
768
|
-
| 'u{' ( c_any - xdigit - [ \t}] )* '}'
|
769
|
-
% {
|
770
|
-
diagnostic :fatal, :invalid_unicode_escape, nil, range(@escape_s - 1, p)
|
771
|
-
}
|
772
|
-
|
773
|
-
# \u{ \t 123 \t 456 \t\t }
|
774
|
-
| 'u{' [ \t]* ( xdigit{1,6} [ \t]+ )*
|
775
|
-
(
|
776
|
-
( xdigit{1,6} [ \t]* '}'
|
777
|
-
%unicode_points
|
778
|
-
)
|
779
|
-
|
|
780
|
-
( xdigit* ( c_any - xdigit - [ \t}] )+ '}'
|
781
|
-
| ( c_any - [ \t}] )* c_eof
|
782
|
-
| xdigit{7,}
|
783
|
-
) % {
|
784
|
-
diagnostic :fatal, :unterminated_unicode, nil, range(p - 1, p)
|
785
|
-
}
|
786
|
-
)
|
787
|
-
|
788
|
-
# \C-\a \cx
|
789
|
-
| ( 'C-' | 'c' ) escaped_nl?
|
790
|
-
maybe_escaped_ctrl_char
|
791
|
-
|
792
|
-
# \M-a
|
793
|
-
| 'M-' escaped_nl?
|
794
|
-
maybe_escaped_char
|
795
|
-
%slash_m_char
|
796
|
-
|
797
|
-
# \C-\M-f \M-\cf \c\M-f
|
798
|
-
| ( ( 'C-' | 'c' ) escaped_nl? '\\M-'
|
799
|
-
| 'M-\\' escaped_nl? ( 'C-' | 'c' ) ) escaped_nl?
|
800
|
-
maybe_escaped_ctrl_char
|
801
|
-
%slash_m_char
|
802
|
-
|
803
|
-
| 'C' c_any %invalid_complex_escape
|
804
|
-
| 'M' c_any %invalid_complex_escape
|
805
|
-
| ( 'M-\\C' | 'C-\\M' ) c_any %invalid_complex_escape
|
806
|
-
|
807
|
-
| ( c_any - [0-7xuCMc] ) %unescape_char
|
808
|
-
|
809
|
-
| c_eof % {
|
810
|
-
diagnostic :fatal, :escape_eof, nil, range(p - 1, p)
|
811
|
-
}
|
812
|
-
);
|
813
|
-
|
814
|
-
# Use rules in form of `e_bs escape' when you need to parse a sequence.
|
815
|
-
e_bs = '\\' % {
|
816
|
-
@escape_s = p
|
817
|
-
@escape = nil
|
818
|
-
};
|
819
|
-
|
820
|
-
#
|
821
|
-
# === STRING AND HEREDOC PARSING ===
|
822
|
-
#
|
823
|
-
|
824
|
-
# Heredoc parsing is quite a complex topic. First, consider that heredocs
|
825
|
-
# can be arbitrarily nested. For example:
|
826
|
-
#
|
827
|
-
# puts <<CODE
|
828
|
-
# the result is: #{<<RESULT.inspect
|
829
|
-
# i am a heredoc
|
830
|
-
# RESULT
|
831
|
-
# }
|
832
|
-
# CODE
|
833
|
-
#
|
834
|
-
# which, incidentally, evaluates to:
|
835
|
-
#
|
836
|
-
# the result is: " i am a heredoc\n"
|
837
|
-
#
|
838
|
-
# To parse them, lexer refers to two kinds (remember, nested heredocs)
|
839
|
-
# of positions in the input stream, namely heredoc_e
|
840
|
-
# (HEREDOC declaration End) and @herebody_s (HEREdoc BODY line Start).
|
841
|
-
#
|
842
|
-
# heredoc_e is simply contained inside the corresponding Literal, and
|
843
|
-
# when the heredoc is closed, the lexing is restarted from that position.
|
844
|
-
#
|
845
|
-
# @herebody_s is quite more complex. First, @herebody_s changes after each
|
846
|
-
# heredoc line is lexed. This way, at '\n' tok(@herebody_s, @te) always
|
847
|
-
# contains the current line, and also when a heredoc is started, @herebody_s
|
848
|
-
# contains the position from which the heredoc will be lexed.
|
849
|
-
#
|
850
|
-
# Second, as (insanity) there are nested heredocs, we need to maintain a
|
851
|
-
# stack of these positions. Each time #push_literal is called, it saves current
|
852
|
-
# @heredoc_s to literal.saved_herebody_s, and after an interpolation (possibly
|
853
|
-
# containing another heredocs) is closed, the previous value is restored.
|
854
|
-
|
855
|
-
e_heredoc_nl = c_nl % {
|
856
|
-
# After every heredoc was parsed, @herebody_s contains the
|
857
|
-
# position of next token after all heredocs.
|
858
|
-
if @herebody_s
|
859
|
-
p = @herebody_s
|
860
|
-
@herebody_s = nil
|
861
|
-
end
|
862
|
-
};
|
863
|
-
|
864
|
-
action extend_string {
|
865
|
-
string = tok
|
866
|
-
|
867
|
-
# tLABEL_END is only possible in non-cond context on >= 2.2
|
868
|
-
if @version >= 22 && !@cond.active?
|
869
|
-
lookahead = @source_buffer.slice(@te...@te+2)
|
870
|
-
end
|
871
|
-
|
872
|
-
current_literal = literal
|
873
|
-
if !current_literal.heredoc? &&
|
874
|
-
(token = current_literal.nest_and_try_closing(string, @ts, @te, lookahead))
|
875
|
-
if token[0] == :tLABEL_END
|
876
|
-
p += 1
|
877
|
-
pop_literal
|
878
|
-
fnext expr_labelarg;
|
879
|
-
else
|
880
|
-
fnext *pop_literal;
|
881
|
-
end
|
882
|
-
fbreak;
|
883
|
-
else
|
884
|
-
current_literal.extend_string(string, @ts, @te)
|
885
|
-
end
|
886
|
-
}
|
887
|
-
|
888
|
-
action extend_string_escaped {
|
889
|
-
current_literal = literal
|
890
|
-
# Get the first character after the backslash.
|
891
|
-
escaped_char = @source_buffer.slice(@escape_s).chr
|
892
|
-
|
893
|
-
if current_literal.munge_escape? escaped_char
|
894
|
-
# If this particular literal uses this character as an opening
|
895
|
-
# or closing delimiter, it is an escape sequence for that
|
896
|
-
# particular character. Write it without the backslash.
|
897
|
-
|
898
|
-
if current_literal.regexp? && REGEXP_META_CHARACTERS.match(escaped_char)
|
899
|
-
# Regular expressions should include escaped delimiters in their
|
900
|
-
# escaped form, except when the escaped character is
|
901
|
-
# a closing delimiter but not a regexp metacharacter.
|
902
|
-
#
|
903
|
-
# The backslash itself cannot be used as a closing delimiter
|
904
|
-
# at the same time as an escape symbol, but it is always munged,
|
905
|
-
# so this branch also executes for the non-closing-delimiter case
|
906
|
-
# for the backslash.
|
907
|
-
current_literal.extend_string(tok, @ts, @te)
|
908
|
-
else
|
909
|
-
current_literal.extend_string(escaped_char, @ts, @te)
|
910
|
-
end
|
911
|
-
else
|
912
|
-
# It does not. So this is an actual escape sequence, yay!
|
913
|
-
if current_literal.squiggly_heredoc? && escaped_char == "\n".freeze
|
914
|
-
# Squiggly heredocs like
|
915
|
-
# <<~-HERE
|
916
|
-
# 1\
|
917
|
-
# 2
|
918
|
-
# HERE
|
919
|
-
# treat '\' as a line continuation, but still dedent the body, so the heredoc above becomes "12\n".
|
920
|
-
# This information is emitted as is, without escaping,
|
921
|
-
# later this escape sequence (\\\n) gets handled manually in the Lexer::Dedenter
|
922
|
-
current_literal.extend_string(tok, @ts, @te)
|
923
|
-
elsif current_literal.supports_line_continuation_via_slash? && escaped_char == "\n".freeze
|
924
|
-
# Heredocs, regexp and a few other types of literals support line
|
925
|
-
# continuation via \\\n sequence. The code like
|
926
|
-
# "a\
|
927
|
-
# b"
|
928
|
-
# must be parsed as "ab"
|
929
|
-
current_literal.extend_string(tok.gsub("\\\n".freeze, ''.freeze), @ts, @te)
|
930
|
-
elsif current_literal.regexp?
|
931
|
-
# Regular expressions should include escape sequences in their
|
932
|
-
# escaped form. On the other hand, escaped newlines are removed (in cases like "\\C-\\\n\\M-x")
|
933
|
-
current_literal.extend_string(tok.gsub("\\\n".freeze, ''.freeze), @ts, @te)
|
934
|
-
else
|
935
|
-
current_literal.extend_string(@escape || tok, @ts, @te)
|
936
|
-
end
|
937
|
-
end
|
938
|
-
}
|
939
|
-
|
940
|
-
# Extend a string with a newline or a EOF character.
|
941
|
-
# As heredoc closing line can immediately precede EOF, this action
|
942
|
-
# has to handle such case specially.
|
943
|
-
action extend_string_eol {
|
944
|
-
current_literal = literal
|
945
|
-
if @te == pe
|
946
|
-
diagnostic :fatal, :string_eof, nil,
|
947
|
-
range(current_literal.str_s, current_literal.str_s + 1)
|
948
|
-
end
|
949
|
-
|
950
|
-
if current_literal.heredoc?
|
951
|
-
line = tok(@herebody_s, @ts).gsub(/\r+$/, ''.freeze)
|
952
|
-
|
953
|
-
if version?(18, 19, 20)
|
954
|
-
# See ruby:c48b4209c
|
955
|
-
line = line.gsub(/\r.*$/, ''.freeze)
|
956
|
-
end
|
957
|
-
|
958
|
-
# Try ending the heredoc with the complete most recently
|
959
|
-
# scanned line. @herebody_s always refers to the start of such line.
|
960
|
-
if current_literal.nest_and_try_closing(line, @herebody_s, @ts)
|
961
|
-
# Adjust @herebody_s to point to the next line.
|
962
|
-
@herebody_s = @te
|
963
|
-
|
964
|
-
# Continue regular lexing after the heredoc reference (<<END).
|
965
|
-
p = current_literal.heredoc_e - 1
|
966
|
-
fnext *pop_literal; fbreak;
|
967
|
-
else
|
968
|
-
# Calculate indentation level for <<~HEREDOCs.
|
969
|
-
current_literal.infer_indent_level(line)
|
970
|
-
|
971
|
-
# Ditto.
|
972
|
-
@herebody_s = @te
|
973
|
-
end
|
974
|
-
else
|
975
|
-
# Try ending the literal with a newline.
|
976
|
-
if current_literal.nest_and_try_closing(tok, @ts, @te)
|
977
|
-
fnext *pop_literal; fbreak;
|
978
|
-
end
|
979
|
-
|
980
|
-
if @herebody_s
|
981
|
-
# This is a regular literal intertwined with a heredoc. Like:
|
982
|
-
#
|
983
|
-
# p <<-foo+"1
|
984
|
-
# bar
|
985
|
-
# foo
|
986
|
-
# 2"
|
987
|
-
#
|
988
|
-
# which, incidentally, evaluates to "bar\n1\n2".
|
989
|
-
p = @herebody_s - 1
|
990
|
-
@herebody_s = nil
|
991
|
-
end
|
992
|
-
end
|
993
|
-
|
994
|
-
if current_literal.words? && !eof_codepoint?(@source_pts[p])
|
995
|
-
current_literal.extend_space @ts, @te
|
996
|
-
else
|
997
|
-
# A literal newline is appended if the heredoc was _not_ closed
|
998
|
-
# this time (see fbreak above). See also Literal#nest_and_try_closing
|
999
|
-
# for rationale of calling #flush_string here.
|
1000
|
-
current_literal.extend_string tok, @ts, @te
|
1001
|
-
current_literal.flush_string
|
1002
|
-
end
|
1003
|
-
}
|
1004
|
-
|
1005
|
-
action extend_string_space {
|
1006
|
-
literal.extend_space @ts, @te
|
1007
|
-
}
|
1008
|
-
|
1009
|
-
#
|
1010
|
-
# === INTERPOLATION PARSING ===
|
1011
|
-
#
|
1012
|
-
|
1013
|
-
# Interpolations with immediate variable names simply call into
|
1014
|
-
# the corresponding machine.
|
1015
|
-
|
1016
|
-
interp_var = '#' ( global_var | class_var_v | instance_var_v );
|
1017
|
-
|
1018
|
-
action extend_interp_var {
|
1019
|
-
current_literal = literal
|
1020
|
-
current_literal.flush_string
|
1021
|
-
current_literal.extend_content
|
1022
|
-
|
1023
|
-
emit(:tSTRING_DVAR, nil, @ts, @ts + 1)
|
1024
|
-
|
1025
|
-
p = @ts
|
1026
|
-
fcall expr_variable;
|
1027
|
-
}
|
1028
|
-
|
1029
|
-
# Special case for Ruby > 2.7
|
1030
|
-
# If interpolated instance/class variable starts with a digit we parse it as a plain substring
|
1031
|
-
# However, "#$1" is still a regular interpolation
|
1032
|
-
interp_digit_var = '#' ('@' | '@@') digit c_alpha*;
|
1033
|
-
|
1034
|
-
action extend_interp_digit_var {
|
1035
|
-
if @version >= 27
|
1036
|
-
literal.extend_string(tok, @ts, @te)
|
1037
|
-
else
|
1038
|
-
message = tok.start_with?('#@@') ? :cvar_name : :ivar_name
|
1039
|
-
diagnostic :error, message, { :name => tok(@ts + 1, @te) }, range(@ts + 1, @te)
|
1040
|
-
end
|
1041
|
-
}
|
1042
|
-
|
1043
|
-
# Interpolations with code blocks must match nested curly braces, as
|
1044
|
-
# interpolation ending is ambiguous with a block ending. So, every
|
1045
|
-
# opening and closing brace should be matched with e_[lr]brace rules,
|
1046
|
-
# which automatically perform the counting.
|
1047
|
-
#
|
1048
|
-
# Note that interpolations can themselves be nested, so brace balance
|
1049
|
-
# is tied to the innermost literal.
|
1050
|
-
#
|
1051
|
-
# Also note that literals themselves should not use e_[lr]brace rules
|
1052
|
-
# when matching their opening and closing delimiters, as the amount of
|
1053
|
-
# braces inside the characters of a string literal is independent.
|
1054
|
-
|
1055
|
-
interp_code = '#{';
|
1056
|
-
|
1057
|
-
e_lbrace = '{' % {
|
1058
|
-
@cond.push(false); @cmdarg.push(false)
|
1059
|
-
|
1060
|
-
current_literal = literal
|
1061
|
-
if current_literal
|
1062
|
-
current_literal.start_interp_brace
|
1063
|
-
end
|
1064
|
-
};
|
1065
|
-
|
1066
|
-
e_rbrace = '}' % {
|
1067
|
-
current_literal = literal
|
1068
|
-
if current_literal
|
1069
|
-
if current_literal.end_interp_brace_and_try_closing
|
1070
|
-
if version?(18, 19)
|
1071
|
-
emit(:tRCURLY, '}'.freeze, p - 1, p)
|
1072
|
-
@cond.lexpop
|
1073
|
-
@cmdarg.lexpop
|
1074
|
-
else
|
1075
|
-
emit(:tSTRING_DEND, '}'.freeze, p - 1, p)
|
1076
|
-
end
|
1077
|
-
|
1078
|
-
if current_literal.saved_herebody_s
|
1079
|
-
@herebody_s = current_literal.saved_herebody_s
|
1080
|
-
end
|
1081
|
-
|
1082
|
-
|
1083
|
-
fhold;
|
1084
|
-
fnext *next_state_for_literal(current_literal);
|
1085
|
-
fbreak;
|
1086
|
-
end
|
1087
|
-
end
|
1088
|
-
|
1089
|
-
@paren_nest -= 1
|
1090
|
-
};
|
1091
|
-
|
1092
|
-
action extend_interp_code {
|
1093
|
-
current_literal = literal
|
1094
|
-
current_literal.flush_string
|
1095
|
-
current_literal.extend_content
|
1096
|
-
|
1097
|
-
emit(:tSTRING_DBEG, '#{'.freeze)
|
1098
|
-
|
1099
|
-
if current_literal.heredoc?
|
1100
|
-
current_literal.saved_herebody_s = @herebody_s
|
1101
|
-
@herebody_s = nil
|
1102
|
-
end
|
1103
|
-
|
1104
|
-
current_literal.start_interp_brace
|
1105
|
-
@command_start = true
|
1106
|
-
fnext expr_value;
|
1107
|
-
fbreak;
|
1108
|
-
}
|
1109
|
-
|
1110
|
-
# Actual string parsers are simply combined from the primitives defined
|
1111
|
-
# above.
|
1112
|
-
|
1113
|
-
interp_words := |*
|
1114
|
-
interp_code => extend_interp_code;
|
1115
|
-
interp_digit_var => extend_interp_digit_var;
|
1116
|
-
interp_var => extend_interp_var;
|
1117
|
-
e_bs escape => extend_string_escaped;
|
1118
|
-
c_space+ => extend_string_space;
|
1119
|
-
c_eol => extend_string_eol;
|
1120
|
-
c_any => extend_string;
|
1121
|
-
*|;
|
1122
|
-
|
1123
|
-
interp_string := |*
|
1124
|
-
interp_code => extend_interp_code;
|
1125
|
-
interp_digit_var => extend_interp_digit_var;
|
1126
|
-
interp_var => extend_interp_var;
|
1127
|
-
e_bs escape => extend_string_escaped;
|
1128
|
-
c_eol => extend_string_eol;
|
1129
|
-
c_any => extend_string;
|
1130
|
-
*|;
|
1131
|
-
|
1132
|
-
plain_words := |*
|
1133
|
-
e_bs c_any => extend_string_escaped;
|
1134
|
-
c_space+ => extend_string_space;
|
1135
|
-
c_eol => extend_string_eol;
|
1136
|
-
c_any => extend_string;
|
1137
|
-
*|;
|
1138
|
-
|
1139
|
-
plain_string := |*
|
1140
|
-
'\\' c_nl => extend_string_eol;
|
1141
|
-
e_bs c_any => extend_string_escaped;
|
1142
|
-
c_eol => extend_string_eol;
|
1143
|
-
c_any => extend_string;
|
1144
|
-
*|;
|
1145
|
-
|
1146
|
-
interp_backslash_delimited := |*
|
1147
|
-
interp_code => extend_interp_code;
|
1148
|
-
interp_digit_var => extend_interp_digit_var;
|
1149
|
-
interp_var => extend_interp_var;
|
1150
|
-
c_eol => extend_string_eol;
|
1151
|
-
c_any => extend_string;
|
1152
|
-
*|;
|
1153
|
-
|
1154
|
-
plain_backslash_delimited := |*
|
1155
|
-
c_eol => extend_string_eol;
|
1156
|
-
c_any => extend_string;
|
1157
|
-
*|;
|
1158
|
-
|
1159
|
-
interp_backslash_delimited_words := |*
|
1160
|
-
interp_code => extend_interp_code;
|
1161
|
-
interp_digit_var => extend_interp_digit_var;
|
1162
|
-
interp_var => extend_interp_var;
|
1163
|
-
c_space+ => extend_string_space;
|
1164
|
-
c_eol => extend_string_eol;
|
1165
|
-
c_any => extend_string;
|
1166
|
-
*|;
|
1167
|
-
|
1168
|
-
plain_backslash_delimited_words := |*
|
1169
|
-
c_space+ => extend_string_space;
|
1170
|
-
c_eol => extend_string_eol;
|
1171
|
-
c_any => extend_string;
|
1172
|
-
*|;
|
1173
|
-
|
1174
|
-
regexp_modifiers := |*
|
1175
|
-
[A-Za-z]+
|
1176
|
-
=> {
|
1177
|
-
unknown_options = tok.scan(/[^imxouesn]/)
|
1178
|
-
if unknown_options.any?
|
1179
|
-
diagnostic :error, :regexp_options,
|
1180
|
-
{ :options => unknown_options.join }
|
1181
|
-
end
|
1182
|
-
|
1183
|
-
emit(:tREGEXP_OPT)
|
1184
|
-
fnext expr_end;
|
1185
|
-
fbreak;
|
1186
|
-
};
|
1187
|
-
|
1188
|
-
any
|
1189
|
-
=> {
|
1190
|
-
emit(:tREGEXP_OPT, tok(@ts, @te - 1), @ts, @te - 1)
|
1191
|
-
fhold;
|
1192
|
-
fgoto expr_end;
|
1193
|
-
};
|
1194
|
-
*|;
|
1195
|
-
|
1196
|
-
#
|
1197
|
-
# === WHITESPACE HANDLING ===
|
1198
|
-
#
|
1199
|
-
|
1200
|
-
# Various contexts in Ruby allow various kinds of whitespace
|
1201
|
-
# to be used. They are grouped to clarify the lexing machines
|
1202
|
-
# and ease collection of comments.
|
1203
|
-
|
1204
|
-
# A line of code with inline #comment at end is always equivalent
|
1205
|
-
# to a line of code ending with just a newline, so an inline
|
1206
|
-
# comment is deemed equivalent to non-newline whitespace
|
1207
|
-
# (c_space character class).
|
1208
|
-
|
1209
|
-
w_space =
|
1210
|
-
c_space+
|
1211
|
-
| '\\' e_heredoc_nl
|
1212
|
-
;
|
1213
|
-
|
1214
|
-
w_comment =
|
1215
|
-
'#' %{ @sharp_s = p - 1 }
|
1216
|
-
# The (p == pe) condition compensates for added "\0" and
|
1217
|
-
# the way Ragel handles EOF.
|
1218
|
-
c_line* %{ emit_comment(@sharp_s, p == pe ? p - 2 : p) }
|
1219
|
-
;
|
1220
|
-
|
1221
|
-
w_space_comment =
|
1222
|
-
w_space
|
1223
|
-
| w_comment
|
1224
|
-
;
|
1225
|
-
|
1226
|
-
# A newline in non-literal context always interoperates with
|
1227
|
-
# here document logic and can always be escaped by a backslash,
|
1228
|
-
# still interoperating with here document logic in the same way,
|
1229
|
-
# yet being invisible to anything else.
|
1230
|
-
#
|
1231
|
-
# To demonstrate:
|
1232
|
-
#
|
1233
|
-
# foo = <<FOO \
|
1234
|
-
# bar
|
1235
|
-
# FOO
|
1236
|
-
# + 2
|
1237
|
-
#
|
1238
|
-
# is equivalent to `foo = "bar\n" + 2`.
|
1239
|
-
|
1240
|
-
w_newline =
|
1241
|
-
e_heredoc_nl;
|
1242
|
-
|
1243
|
-
w_any =
|
1244
|
-
w_space
|
1245
|
-
| w_comment
|
1246
|
-
| w_newline
|
1247
|
-
;
|
1248
|
-
|
1249
|
-
|
1250
|
-
#
|
1251
|
-
# === EXPRESSION PARSING ===
|
1252
|
-
#
|
1253
|
-
|
1254
|
-
# These rules implement a form of manually defined lookahead.
|
1255
|
-
# The default longest-match scanning does not work here due
|
1256
|
-
# to sheer ambiguity.
|
1257
|
-
|
1258
|
-
ambiguous_fid_suffix = # actual parsed
|
1259
|
-
[?!] %{ tm = p } | # a? a?
|
1260
|
-
[?!]'=' %{ tm = p - 2 } # a!=b a != b
|
1261
|
-
;
|
1262
|
-
|
1263
|
-
ambiguous_ident_suffix = # actual parsed
|
1264
|
-
ambiguous_fid_suffix |
|
1265
|
-
'=' %{ tm = p } | # a= a=
|
1266
|
-
'==' %{ tm = p - 2 } | # a==b a == b
|
1267
|
-
'=~' %{ tm = p - 2 } | # a=~b a =~ b
|
1268
|
-
'=>' %{ tm = p - 2 } | # a=>b a => b
|
1269
|
-
'===' %{ tm = p - 3 } # a===b a === b
|
1270
|
-
;
|
1271
|
-
|
1272
|
-
ambiguous_symbol_suffix = # actual parsed
|
1273
|
-
ambiguous_ident_suffix |
|
1274
|
-
'==>' %{ tm = p - 2 } # :a==>b :a= => b
|
1275
|
-
;
|
1276
|
-
|
1277
|
-
# Ambiguous with 1.9 hash labels.
|
1278
|
-
ambiguous_const_suffix = # actual parsed
|
1279
|
-
'::' %{ tm = p - 2 } # A::B A :: B
|
1280
|
-
;
|
1281
|
-
|
1282
|
-
# Resolving kDO/kDO_COND/kDO_BLOCK ambiguity requires embedding
|
1283
|
-
# @cond/@cmdarg-related code to e_lbrack, e_lparen and e_lbrace.
|
1284
|
-
|
1285
|
-
e_lbrack = '[' % {
|
1286
|
-
@cond.push(false); @cmdarg.push(false)
|
1287
|
-
|
1288
|
-
@paren_nest += 1
|
1289
|
-
};
|
1290
|
-
|
1291
|
-
e_rbrack = ']' % {
|
1292
|
-
@paren_nest -= 1
|
1293
|
-
};
|
1294
|
-
|
1295
|
-
# Ruby 1.9 lambdas require parentheses counting in order to
|
1296
|
-
# emit correct opening kDO/tLBRACE.
|
1297
|
-
|
1298
|
-
e_lparen = '(' % {
|
1299
|
-
@cond.push(false); @cmdarg.push(false)
|
1300
|
-
|
1301
|
-
@paren_nest += 1
|
1302
|
-
|
1303
|
-
if version?(18)
|
1304
|
-
@command_start = true
|
1305
|
-
end
|
1306
|
-
};
|
1307
|
-
|
1308
|
-
e_rparen = ')' % {
|
1309
|
-
@paren_nest -= 1
|
1310
|
-
};
|
1311
|
-
|
1312
|
-
# Ruby is context-sensitive wrt/ local identifiers.
|
1313
|
-
action local_ident {
|
1314
|
-
emit(:tIDENTIFIER)
|
1315
|
-
|
1316
|
-
if !@static_env.nil? && @static_env.declared?(tok)
|
1317
|
-
fnext expr_endfn; fbreak;
|
1318
|
-
else
|
1319
|
-
fnext *arg_or_cmdarg(cmd_state); fbreak;
|
1320
|
-
end
|
1321
|
-
}
|
1322
|
-
|
1323
|
-
# Variable lexing code is accessed from both expressions and
|
1324
|
-
# string interpolation related code.
|
1325
|
-
#
|
1326
|
-
expr_variable := |*
|
1327
|
-
global_var
|
1328
|
-
=> {
|
1329
|
-
if tok =~ /^\$([1-9][0-9]*)$/
|
1330
|
-
emit(:tNTH_REF, tok(@ts + 1).to_i)
|
1331
|
-
elsif tok =~ /^\$([&`'+])$/
|
1332
|
-
emit(:tBACK_REF)
|
1333
|
-
else
|
1334
|
-
emit(:tGVAR)
|
1335
|
-
end
|
1336
|
-
|
1337
|
-
fnext *stack_pop; fbreak;
|
1338
|
-
};
|
1339
|
-
|
1340
|
-
class_var_v
|
1341
|
-
=> {
|
1342
|
-
if tok =~ /^@@[0-9]/
|
1343
|
-
diagnostic :error, :cvar_name, { :name => tok }
|
1344
|
-
end
|
1345
|
-
|
1346
|
-
emit(:tCVAR)
|
1347
|
-
fnext *stack_pop; fbreak;
|
1348
|
-
};
|
1349
|
-
|
1350
|
-
instance_var_v
|
1351
|
-
=> {
|
1352
|
-
if tok =~ /^@[0-9]/
|
1353
|
-
diagnostic :error, :ivar_name, { :name => tok }
|
1354
|
-
end
|
1355
|
-
|
1356
|
-
emit(:tIVAR)
|
1357
|
-
fnext *stack_pop; fbreak;
|
1358
|
-
};
|
1359
|
-
*|;
|
1360
|
-
|
1361
|
-
# Literal function name in definition (e.g. `def class`).
|
1362
|
-
# Keywords are returned as their respective tokens; this is used
|
1363
|
-
# to support singleton def `def self.foo`. Global variables are
|
1364
|
-
# returned as `tGVAR`; this is used in global variable alias
|
1365
|
-
# statements `alias $a $b`. Symbols are returned verbatim; this
|
1366
|
-
# is used in `alias :a :"b#{foo}"` and `undef :a`.
|
1367
|
-
#
|
1368
|
-
# Transitions to `expr_endfn` afterwards.
|
1369
|
-
#
|
1370
|
-
expr_fname := |*
|
1371
|
-
keyword
|
1372
|
-
=> { emit_table(KEYWORDS_BEGIN);
|
1373
|
-
fnext expr_endfn; fbreak; };
|
1374
|
-
|
1375
|
-
constant
|
1376
|
-
=> { emit(:tCONSTANT)
|
1377
|
-
fnext expr_endfn; fbreak; };
|
1378
|
-
|
1379
|
-
bareword [?=!]?
|
1380
|
-
=> { emit(:tIDENTIFIER)
|
1381
|
-
fnext expr_endfn; fbreak; };
|
1382
|
-
|
1383
|
-
global_var
|
1384
|
-
=> { p = @ts - 1
|
1385
|
-
fnext expr_end; fcall expr_variable; };
|
1386
|
-
|
1387
|
-
# If the handling was to be delegated to expr_end,
|
1388
|
-
# these cases would transition to something else than
|
1389
|
-
# expr_endfn, which is incorrect.
|
1390
|
-
operator_fname |
|
1391
|
-
operator_arithmetic |
|
1392
|
-
operator_rest
|
1393
|
-
=> { emit_table(PUNCTUATION)
|
1394
|
-
fnext expr_endfn; fbreak; };
|
1395
|
-
|
1396
|
-
'::'
|
1397
|
-
=> { fhold; fhold; fgoto expr_end; };
|
1398
|
-
|
1399
|
-
':'
|
1400
|
-
=> { fhold; fgoto expr_beg; };
|
1401
|
-
|
1402
|
-
'%s' c_any
|
1403
|
-
=> {
|
1404
|
-
if version?(23)
|
1405
|
-
type, delimiter = tok[0..-2], tok[-1].chr
|
1406
|
-
fgoto *push_literal(type, delimiter, @ts);
|
1407
|
-
else
|
1408
|
-
p = @ts - 1
|
1409
|
-
fgoto expr_end;
|
1410
|
-
end
|
1411
|
-
};
|
1412
|
-
|
1413
|
-
w_any;
|
1414
|
-
|
1415
|
-
c_any
|
1416
|
-
=> { fhold; fgoto expr_end; };
|
1417
|
-
|
1418
|
-
c_eof => do_eof;
|
1419
|
-
*|;
|
1420
|
-
|
1421
|
-
# After literal function name in definition. Behaves like `expr_end`,
|
1422
|
-
# but allows a tLABEL.
|
1423
|
-
#
|
1424
|
-
# Transitions to `expr_end` afterwards.
|
1425
|
-
#
|
1426
|
-
expr_endfn := |*
|
1427
|
-
label ( any - ':' )
|
1428
|
-
=> { emit(:tLABEL, tok(@ts, @te - 2), @ts, @te - 1)
|
1429
|
-
fhold; fnext expr_labelarg; fbreak; };
|
1430
|
-
|
1431
|
-
w_space_comment;
|
1432
|
-
|
1433
|
-
c_any
|
1434
|
-
=> { fhold; fgoto expr_end; };
|
1435
|
-
|
1436
|
-
c_eof => do_eof;
|
1437
|
-
*|;
|
1438
|
-
|
1439
|
-
# Literal function name in method call (e.g. `a.class`).
|
1440
|
-
#
|
1441
|
-
# Transitions to `expr_arg` afterwards.
|
1442
|
-
#
|
1443
|
-
expr_dot := |*
|
1444
|
-
constant
|
1445
|
-
=> { emit(:tCONSTANT)
|
1446
|
-
fnext *arg_or_cmdarg(cmd_state); fbreak; };
|
1447
|
-
|
1448
|
-
call_or_var
|
1449
|
-
=> { emit(:tIDENTIFIER)
|
1450
|
-
fnext *arg_or_cmdarg(cmd_state); fbreak; };
|
1451
|
-
|
1452
|
-
bareword ambiguous_fid_suffix
|
1453
|
-
=> { emit(:tFID, tok(@ts, tm), @ts, tm)
|
1454
|
-
fnext *arg_or_cmdarg(cmd_state); p = tm - 1; fbreak; };
|
1455
|
-
|
1456
|
-
# See the comment in `expr_fname`.
|
1457
|
-
operator_fname |
|
1458
|
-
operator_arithmetic |
|
1459
|
-
operator_rest
|
1460
|
-
=> { emit_table(PUNCTUATION)
|
1461
|
-
fnext expr_arg; fbreak; };
|
1462
|
-
|
1463
|
-
w_any;
|
1464
|
-
|
1465
|
-
c_any
|
1466
|
-
=> { fhold; fgoto expr_end; };
|
1467
|
-
|
1468
|
-
c_eof => do_eof;
|
1469
|
-
*|;
|
1470
|
-
|
1471
|
-
# The previous token emitted was a `tIDENTIFIER` or `tFID`; no space
|
1472
|
-
# is consumed; the current expression is a command or method call.
|
1473
|
-
#
|
1474
|
-
expr_arg := |*
|
1475
|
-
#
|
1476
|
-
# COMMAND MODE SPECIFIC TOKENS
|
1477
|
-
#
|
1478
|
-
|
1479
|
-
# cmd (1 + 2)
|
1480
|
-
# See below the rationale about expr_endarg.
|
1481
|
-
w_space+ e_lparen
|
1482
|
-
=> {
|
1483
|
-
if version?(18)
|
1484
|
-
emit(:tLPAREN2, '('.freeze, @te - 1, @te)
|
1485
|
-
fnext expr_value; fbreak;
|
1486
|
-
else
|
1487
|
-
emit(:tLPAREN_ARG, '('.freeze, @te - 1, @te)
|
1488
|
-
fnext expr_beg; fbreak;
|
1489
|
-
end
|
1490
|
-
};
|
1491
|
-
|
1492
|
-
# meth(1 + 2)
|
1493
|
-
# Regular method call.
|
1494
|
-
e_lparen
|
1495
|
-
=> { emit(:tLPAREN2, '('.freeze)
|
1496
|
-
fnext expr_beg; fbreak; };
|
1497
|
-
|
1498
|
-
# meth [...]
|
1499
|
-
# Array argument. Compare with indexing `meth[...]`.
|
1500
|
-
w_space+ e_lbrack
|
1501
|
-
=> { emit(:tLBRACK, '['.freeze, @te - 1, @te)
|
1502
|
-
fnext expr_beg; fbreak; };
|
1503
|
-
|
1504
|
-
# cmd {}
|
1505
|
-
# Command: method call without parentheses.
|
1506
|
-
w_space* e_lbrace
|
1507
|
-
=> {
|
1508
|
-
if @lambda_stack.last == @paren_nest
|
1509
|
-
@lambda_stack.pop
|
1510
|
-
emit(:tLAMBEG, '{'.freeze, @te - 1, @te)
|
1511
|
-
else
|
1512
|
-
emit(:tLCURLY, '{'.freeze, @te - 1, @te)
|
1513
|
-
end
|
1514
|
-
@command_start = true
|
1515
|
-
@paren_nest += 1
|
1516
|
-
fnext expr_value; fbreak;
|
1517
|
-
};
|
1518
|
-
|
1519
|
-
#
|
1520
|
-
# AMBIGUOUS TOKENS RESOLVED VIA EXPR_BEG
|
1521
|
-
#
|
1522
|
-
|
1523
|
-
# a??
|
1524
|
-
# Ternary operator
|
1525
|
-
'?' c_space_nl
|
1526
|
-
=> {
|
1527
|
-
# Unlike expr_beg as invoked in the next rule, do not warn
|
1528
|
-
p = @ts - 1
|
1529
|
-
fgoto expr_end;
|
1530
|
-
};
|
1531
|
-
|
1532
|
-
# a ?b, a? ?
|
1533
|
-
# Character literal or ternary operator
|
1534
|
-
w_space* '?'
|
1535
|
-
=> { fhold; fgoto expr_beg; };
|
1536
|
-
|
1537
|
-
# a %{1}, a %[1] (but not "a %=1=" or "a % foo")
|
1538
|
-
# a /foo/ (but not "a / foo" or "a /=foo")
|
1539
|
-
# a <<HEREDOC
|
1540
|
-
w_space+ %{ tm = p }
|
1541
|
-
( [%/] ( c_any - c_space_nl - '=' ) # /
|
1542
|
-
| '<<'
|
1543
|
-
)
|
1544
|
-
=> {
|
1545
|
-
if tok(tm, tm + 1) == '/'.freeze
|
1546
|
-
# Ambiguous regexp literal.
|
1547
|
-
diagnostic :warning, :ambiguous_literal, nil, range(tm, tm + 1)
|
1548
|
-
end
|
1549
|
-
|
1550
|
-
p = tm - 1
|
1551
|
-
fgoto expr_beg;
|
1552
|
-
};
|
1553
|
-
|
1554
|
-
# x *1
|
1555
|
-
# Ambiguous splat, kwsplat or block-pass.
|
1556
|
-
w_space+ %{ tm = p } ( '+' | '-' | '*' | '&' | '**' )
|
1557
|
-
=> {
|
1558
|
-
diagnostic :warning, :ambiguous_prefix, { :prefix => tok(tm, @te) },
|
1559
|
-
range(tm, @te)
|
1560
|
-
|
1561
|
-
p = tm - 1
|
1562
|
-
fgoto expr_beg;
|
1563
|
-
};
|
1564
|
-
|
1565
|
-
# x ::Foo
|
1566
|
-
# Ambiguous toplevel constant access.
|
1567
|
-
w_space+ '::'
|
1568
|
-
=> { fhold; fhold; fgoto expr_beg; };
|
1569
|
-
|
1570
|
-
# x:b
|
1571
|
-
# Symbol.
|
1572
|
-
w_space* ':'
|
1573
|
-
=> { fhold; fgoto expr_beg; };
|
1574
|
-
|
1575
|
-
w_space+ label
|
1576
|
-
=> { p = @ts - 1; fgoto expr_beg; };
|
1577
|
-
|
1578
|
-
#
|
1579
|
-
# AMBIGUOUS TOKENS RESOLVED VIA EXPR_END
|
1580
|
-
#
|
1581
|
-
|
1582
|
-
# a ? b
|
1583
|
-
# Ternary operator.
|
1584
|
-
w_space+ %{ tm = p } '?' c_space_nl
|
1585
|
-
=> { p = tm - 1; fgoto expr_end; };
|
1586
|
-
|
1587
|
-
# x + 1: Binary operator or operator-assignment.
|
1588
|
-
w_space* operator_arithmetic
|
1589
|
-
( '=' | c_space_nl )? |
|
1590
|
-
# x rescue y: Modifier keyword.
|
1591
|
-
w_space* keyword_modifier |
|
1592
|
-
# a &. b: Safe navigation operator.
|
1593
|
-
w_space* '&.' |
|
1594
|
-
# Miscellanea.
|
1595
|
-
w_space* punctuation_end
|
1596
|
-
=> {
|
1597
|
-
p = @ts - 1
|
1598
|
-
fgoto expr_end;
|
1599
|
-
};
|
1600
|
-
|
1601
|
-
w_space;
|
1602
|
-
|
1603
|
-
w_comment
|
1604
|
-
=> { fgoto expr_end; };
|
1605
|
-
|
1606
|
-
w_newline
|
1607
|
-
=> { fhold; fgoto expr_end; };
|
1608
|
-
|
1609
|
-
c_any
|
1610
|
-
=> { fhold; fgoto expr_beg; };
|
1611
|
-
|
1612
|
-
c_eof => do_eof;
|
1613
|
-
*|;
|
1614
|
-
|
1615
|
-
# The previous token was an identifier which was seen while in the
|
1616
|
-
# command mode (that is, the state at the beginning of #advance was
|
1617
|
-
# expr_value). This state is very similar to expr_arg, but disambiguates
|
1618
|
-
# two very rare and specific condition:
|
1619
|
-
# * In 1.8 mode, "foo (lambda do end)".
|
1620
|
-
# * In 1.9+ mode, "f x: -> do foo do end end".
|
1621
|
-
expr_cmdarg := |*
|
1622
|
-
w_space+ e_lparen
|
1623
|
-
=> {
|
1624
|
-
emit(:tLPAREN_ARG, '('.freeze, @te - 1, @te)
|
1625
|
-
if version?(18)
|
1626
|
-
fnext expr_value; fbreak;
|
1627
|
-
else
|
1628
|
-
fnext expr_beg; fbreak;
|
1629
|
-
end
|
1630
|
-
};
|
1631
|
-
|
1632
|
-
w_space* 'do'
|
1633
|
-
=> {
|
1634
|
-
if @cond.active?
|
1635
|
-
emit(:kDO_COND, 'do'.freeze, @te - 2, @te)
|
1636
|
-
else
|
1637
|
-
emit(:kDO, 'do'.freeze, @te - 2, @te)
|
1638
|
-
end
|
1639
|
-
fnext expr_value; fbreak;
|
1640
|
-
};
|
1641
|
-
|
1642
|
-
c_any |
|
1643
|
-
# Disambiguate with the `do' rule above.
|
1644
|
-
w_space* bareword |
|
1645
|
-
w_space* label
|
1646
|
-
=> { p = @ts - 1
|
1647
|
-
fgoto expr_arg; };
|
1648
|
-
|
1649
|
-
c_eof => do_eof;
|
1650
|
-
*|;
|
1651
|
-
|
1652
|
-
# The rationale for this state is pretty complex. Normally, if an argument
|
1653
|
-
# is passed to a command and then there is a block (tLCURLY...tRCURLY),
|
1654
|
-
# the block is attached to the innermost argument (`f` in `m f {}`), or it
|
1655
|
-
# is a parse error (`m 1 {}`). But there is a special case for passing a single
|
1656
|
-
# primary expression grouped with parentheses: if you write `m (1) {}` or
|
1657
|
-
# (2.0 only) `m () {}`, then the block is attached to `m`.
|
1658
|
-
#
|
1659
|
-
# Thus, we recognize the opening `(` of a command (remember, a command is
|
1660
|
-
# a method call without parens) as a tLPAREN_ARG; then, in parser, we recognize
|
1661
|
-
# `tLPAREN_ARG expr rparen` as a `primary_expr` and before rparen, set the
|
1662
|
-
# lexer's state to `expr_endarg`, which makes it emit the possibly following
|
1663
|
-
# `{` as `tLBRACE_ARG`.
|
1664
|
-
#
|
1665
|
-
# The default post-`expr_endarg` state is `expr_end`, so this state also handles
|
1666
|
-
# `do` (as `kDO_BLOCK` in `expr_beg`).
|
1667
|
-
expr_endarg := |*
|
1668
|
-
e_lbrace
|
1669
|
-
=> {
|
1670
|
-
if @lambda_stack.last == @paren_nest
|
1671
|
-
@lambda_stack.pop
|
1672
|
-
emit(:tLAMBEG, '{'.freeze)
|
1673
|
-
else
|
1674
|
-
emit(:tLBRACE_ARG, '{'.freeze)
|
1675
|
-
end
|
1676
|
-
@paren_nest += 1
|
1677
|
-
@command_start = true
|
1678
|
-
fnext expr_value; fbreak;
|
1679
|
-
};
|
1680
|
-
|
1681
|
-
'do'
|
1682
|
-
=> { emit_do(true)
|
1683
|
-
fnext expr_value; fbreak; };
|
1684
|
-
|
1685
|
-
w_space_comment;
|
1686
|
-
|
1687
|
-
c_any
|
1688
|
-
=> { fhold; fgoto expr_end; };
|
1689
|
-
|
1690
|
-
c_eof => do_eof;
|
1691
|
-
*|;
|
1692
|
-
|
1693
|
-
# The rationale for this state is that several keywords accept value
|
1694
|
-
# (i.e. should transition to `expr_beg`), do not accept it like a command
|
1695
|
-
# (i.e. not an `expr_arg`), and must behave like a statement, that is,
|
1696
|
-
# accept a modifier if/while/etc.
|
1697
|
-
#
|
1698
|
-
expr_mid := |*
|
1699
|
-
keyword_modifier
|
1700
|
-
=> { emit_table(KEYWORDS)
|
1701
|
-
fnext expr_beg; fbreak; };
|
1702
|
-
|
1703
|
-
bareword
|
1704
|
-
=> { p = @ts - 1; fgoto expr_beg; };
|
1705
|
-
|
1706
|
-
w_space_comment;
|
1707
|
-
|
1708
|
-
w_newline
|
1709
|
-
=> { fhold; fgoto expr_end; };
|
1710
|
-
|
1711
|
-
c_any
|
1712
|
-
=> { fhold; fgoto expr_beg; };
|
1713
|
-
|
1714
|
-
c_eof => do_eof;
|
1715
|
-
*|;
|
1716
|
-
|
1717
|
-
# Beginning of an expression.
|
1718
|
-
#
|
1719
|
-
# Don't fallthrough to this state from `c_any`; make sure to handle
|
1720
|
-
# `c_space* c_nl` and let `expr_end` handle the newline.
|
1721
|
-
# Otherwise code like `f\ndef x` gets glued together and the parser
|
1722
|
-
# explodes.
|
1723
|
-
#
|
1724
|
-
expr_beg := |*
|
1725
|
-
# +5, -5, - 5
|
1726
|
-
[+\-] w_any* [0-9]
|
1727
|
-
=> {
|
1728
|
-
emit(:tUNARY_NUM, tok(@ts, @ts + 1), @ts, @ts + 1)
|
1729
|
-
fhold; fnext expr_end; fbreak;
|
1730
|
-
};
|
1731
|
-
|
1732
|
-
# splat *a
|
1733
|
-
'*'
|
1734
|
-
=> { emit(:tSTAR, '*'.freeze)
|
1735
|
-
fbreak; };
|
1736
|
-
|
1737
|
-
#
|
1738
|
-
# STRING AND REGEXP LITERALS
|
1739
|
-
#
|
1740
|
-
|
1741
|
-
# /regexp/oui
|
1742
|
-
# /=/ (disambiguation with /=)
|
1743
|
-
'/' c_any
|
1744
|
-
=> {
|
1745
|
-
type = delimiter = tok[0].chr
|
1746
|
-
fhold; fgoto *push_literal(type, delimiter, @ts);
|
1747
|
-
};
|
1748
|
-
|
1749
|
-
# %<string>
|
1750
|
-
'%' ( any - [A-Za-z] )
|
1751
|
-
=> {
|
1752
|
-
type, delimiter = @source_buffer.slice(@ts).chr, tok[-1].chr
|
1753
|
-
fgoto *push_literal(type, delimiter, @ts);
|
1754
|
-
};
|
1755
|
-
|
1756
|
-
# %w(we are the people)
|
1757
|
-
'%' [A-Za-z]+ c_any
|
1758
|
-
=> {
|
1759
|
-
type, delimiter = tok[0..-2], tok[-1].chr
|
1760
|
-
fgoto *push_literal(type, delimiter, @ts);
|
1761
|
-
};
|
1762
|
-
|
1763
|
-
'%' c_eof
|
1764
|
-
=> {
|
1765
|
-
diagnostic :fatal, :string_eof, nil, range(@ts, @ts + 1)
|
1766
|
-
};
|
1767
|
-
|
1768
|
-
# Heredoc start.
|
1769
|
-
# <<END | <<'END' | <<"END" | <<`END` |
|
1770
|
-
# <<-END | <<-'END' | <<-"END" | <<-`END` |
|
1771
|
-
# <<~END | <<~'END' | <<~"END" | <<~`END`
|
1772
|
-
'<<' [~\-]?
|
1773
|
-
( '"' ( any - '"' )* '"'
|
1774
|
-
| "'" ( any - "'" )* "'"
|
1775
|
-
| "`" ( any - "`" )* "`"
|
1776
|
-
| bareword ) % { heredoc_e = p }
|
1777
|
-
c_line* c_nl % { new_herebody_s = p }
|
1778
|
-
=> {
|
1779
|
-
tok(@ts, heredoc_e) =~ /^<<(-?)(~?)(["'`]?)(.*)\3$/m
|
1780
|
-
|
1781
|
-
indent = !$1.empty? || !$2.empty?
|
1782
|
-
dedent_body = !$2.empty?
|
1783
|
-
type = $3.empty? ? '<<"'.freeze : ('<<'.freeze + $3)
|
1784
|
-
delimiter = $4
|
1785
|
-
|
1786
|
-
if @version >= 27
|
1787
|
-
if delimiter.count("\n") > 0 || delimiter.count("\r") > 0
|
1788
|
-
diagnostic :error, :unterminated_heredoc_id, nil, range(@ts, @ts + 1)
|
1789
|
-
end
|
1790
|
-
elsif @version >= 24
|
1791
|
-
if delimiter.count("\n") > 0
|
1792
|
-
if delimiter.end_with?("\n")
|
1793
|
-
diagnostic :warning, :heredoc_id_ends_with_nl, nil, range(@ts, @ts + 1)
|
1794
|
-
delimiter = delimiter.rstrip
|
1795
|
-
else
|
1796
|
-
diagnostic :fatal, :heredoc_id_has_newline, nil, range(@ts, @ts + 1)
|
1797
|
-
end
|
1798
|
-
end
|
1799
|
-
end
|
1800
|
-
|
1801
|
-
if dedent_body && version?(18, 19, 20, 21, 22)
|
1802
|
-
emit(:tLSHFT, '<<'.freeze, @ts, @ts + 2)
|
1803
|
-
p = @ts + 1
|
1804
|
-
fnext expr_beg; fbreak;
|
1805
|
-
else
|
1806
|
-
fnext *push_literal(type, delimiter, @ts, heredoc_e, indent, dedent_body);
|
1807
|
-
|
1808
|
-
@herebody_s ||= new_herebody_s
|
1809
|
-
p = @herebody_s - 1
|
1810
|
-
end
|
1811
|
-
};
|
1812
|
-
|
1813
|
-
# Escaped unterminated heredoc start
|
1814
|
-
# <<'END | <<"END | <<`END |
|
1815
|
-
# <<-'END | <<-"END | <<-`END |
|
1816
|
-
# <<~'END | <<~"END | <<~`END
|
1817
|
-
#
|
1818
|
-
# If the heredoc is terminated the rule above should handle it
|
1819
|
-
'<<' [~\-]?
|
1820
|
-
('"' (any - c_nl - '"')*
|
1821
|
-
|"'" (any - c_nl - "'")*
|
1822
|
-
|"`" (any - c_nl - "`")
|
1823
|
-
)
|
1824
|
-
=> {
|
1825
|
-
diagnostic :error, :unterminated_heredoc_id, nil, range(@ts, @ts + 1)
|
1826
|
-
};
|
1827
|
-
|
1828
|
-
#
|
1829
|
-
# SYMBOL LITERALS
|
1830
|
-
#
|
1831
|
-
|
1832
|
-
# :&&, :||
|
1833
|
-
':' ('&&' | '||') => {
|
1834
|
-
fhold; fhold;
|
1835
|
-
emit(:tSYMBEG, tok(@ts, @ts + 1), @ts, @ts + 1)
|
1836
|
-
fgoto expr_fname;
|
1837
|
-
};
|
1838
|
-
|
1839
|
-
# :"bar", :'baz'
|
1840
|
-
':' ['"] # '
|
1841
|
-
=> {
|
1842
|
-
type, delimiter = tok, tok[-1].chr
|
1843
|
-
fgoto *push_literal(type, delimiter, @ts);
|
1844
|
-
};
|
1845
|
-
|
1846
|
-
# :!@ is :!
|
1847
|
-
# :~@ is :~
|
1848
|
-
':' [!~] '@'
|
1849
|
-
=> {
|
1850
|
-
emit(:tSYMBOL, tok(@ts + 1, @ts + 2))
|
1851
|
-
fnext expr_end; fbreak;
|
1852
|
-
};
|
1853
|
-
|
1854
|
-
':' bareword ambiguous_symbol_suffix
|
1855
|
-
=> {
|
1856
|
-
emit(:tSYMBOL, tok(@ts + 1, tm), @ts, tm)
|
1857
|
-
p = tm - 1
|
1858
|
-
fnext expr_end; fbreak;
|
1859
|
-
};
|
1860
|
-
|
1861
|
-
':' ( bareword | global_var | class_var | instance_var |
|
1862
|
-
operator_fname | operator_arithmetic | operator_rest )
|
1863
|
-
=> {
|
1864
|
-
emit(:tSYMBOL, tok(@ts + 1), @ts)
|
1865
|
-
fnext expr_end; fbreak;
|
1866
|
-
};
|
1867
|
-
|
1868
|
-
':' ( '@' %{ tm = p - 1; diag_msg = :ivar_name }
|
1869
|
-
| '@@' %{ tm = p - 2; diag_msg = :cvar_name }
|
1870
|
-
) [0-9]*
|
1871
|
-
=> {
|
1872
|
-
if @version >= 27
|
1873
|
-
diagnostic :error, diag_msg, { name: tok(tm, @te) }, range(tm, @te)
|
1874
|
-
else
|
1875
|
-
emit(:tCOLON, tok(@ts, @ts + 1), @ts, @ts + 1)
|
1876
|
-
p = @ts
|
1877
|
-
end
|
1878
|
-
|
1879
|
-
fnext expr_end; fbreak;
|
1880
|
-
};
|
1881
|
-
|
1882
|
-
#
|
1883
|
-
# AMBIGUOUS TERNARY OPERATOR
|
1884
|
-
#
|
1885
|
-
|
1886
|
-
# Character constant, like ?a, ?\n, ?\u1000, and so on
|
1887
|
-
# Don't accept \u escape with multiple codepoints, like \u{1 2 3}
|
1888
|
-
'?' ( e_bs ( escape - ( '\u{' (xdigit+ [ \t]+)+ xdigit+ '}' ))
|
1889
|
-
| (c_any - c_space_nl - e_bs) % { @escape = nil }
|
1890
|
-
)
|
1891
|
-
=> {
|
1892
|
-
value = @escape || tok(@ts + 1)
|
1893
|
-
|
1894
|
-
if version?(18)
|
1895
|
-
emit(:tINTEGER, value.getbyte(0))
|
1896
|
-
else
|
1897
|
-
emit(:tCHARACTER, value)
|
1898
|
-
end
|
1899
|
-
|
1900
|
-
fnext expr_end; fbreak;
|
1901
|
-
};
|
1902
|
-
|
1903
|
-
'?' c_space_nl
|
1904
|
-
=> {
|
1905
|
-
escape = { " " => '\s', "\r" => '\r', "\n" => '\n', "\t" => '\t',
|
1906
|
-
"\v" => '\v', "\f" => '\f' }[@source_buffer.slice(@ts + 1)]
|
1907
|
-
diagnostic :warning, :invalid_escape_use, { :escape => escape }, range
|
1908
|
-
|
1909
|
-
p = @ts - 1
|
1910
|
-
fgoto expr_end;
|
1911
|
-
};
|
1912
|
-
|
1913
|
-
'?' c_eof
|
1914
|
-
=> {
|
1915
|
-
diagnostic :fatal, :incomplete_escape, nil, range(@ts, @ts + 1)
|
1916
|
-
};
|
1917
|
-
|
1918
|
-
# f ?aa : b: Disambiguate with a character literal.
|
1919
|
-
'?' [A-Za-z_] bareword
|
1920
|
-
=> {
|
1921
|
-
p = @ts - 1
|
1922
|
-
fgoto expr_end;
|
1923
|
-
};
|
1924
|
-
|
1925
|
-
#
|
1926
|
-
# AMBIGUOUS EMPTY BLOCK ARGUMENTS
|
1927
|
-
#
|
1928
|
-
|
1929
|
-
# Ruby >= 2.7 emits it as two tPIPE terminals
|
1930
|
-
# while Ruby < 2.7 as a single tOROP (like in `a || b`)
|
1931
|
-
'||'
|
1932
|
-
=> {
|
1933
|
-
if @version >= 27
|
1934
|
-
emit(:tPIPE, tok(@ts, @ts + 1), @ts, @ts + 1)
|
1935
|
-
fhold;
|
1936
|
-
fnext expr_beg; fbreak;
|
1937
|
-
else
|
1938
|
-
p -= 2
|
1939
|
-
fgoto expr_end;
|
1940
|
-
end
|
1941
|
-
};
|
1942
|
-
|
1943
|
-
#
|
1944
|
-
# KEYWORDS AND PUNCTUATION
|
1945
|
-
#
|
1946
|
-
|
1947
|
-
# a({b=>c})
|
1948
|
-
e_lbrace
|
1949
|
-
=> {
|
1950
|
-
if @lambda_stack.last == @paren_nest
|
1951
|
-
@lambda_stack.pop
|
1952
|
-
@command_start = true
|
1953
|
-
emit(:tLAMBEG, '{'.freeze)
|
1954
|
-
else
|
1955
|
-
emit(:tLBRACE, '{'.freeze)
|
1956
|
-
end
|
1957
|
-
@paren_nest += 1
|
1958
|
-
fbreak;
|
1959
|
-
};
|
1960
|
-
|
1961
|
-
# a([1, 2])
|
1962
|
-
e_lbrack
|
1963
|
-
=> { emit(:tLBRACK, '['.freeze)
|
1964
|
-
fbreak; };
|
1965
|
-
|
1966
|
-
# a()
|
1967
|
-
e_lparen
|
1968
|
-
=> { emit(:tLPAREN, '('.freeze)
|
1969
|
-
fbreak; };
|
1970
|
-
|
1971
|
-
# a(+b)
|
1972
|
-
punctuation_begin
|
1973
|
-
=> { emit_table(PUNCTUATION_BEGIN)
|
1974
|
-
fbreak; };
|
1975
|
-
|
1976
|
-
# rescue Exception => e: Block rescue.
|
1977
|
-
# Special because it should transition to expr_mid.
|
1978
|
-
'rescue' %{ tm = p } '=>'?
|
1979
|
-
=> { emit(:kRESCUE, 'rescue'.freeze, @ts, tm)
|
1980
|
-
p = tm - 1
|
1981
|
-
fnext expr_mid; fbreak; };
|
1982
|
-
|
1983
|
-
# if a: Statement if.
|
1984
|
-
keyword_modifier
|
1985
|
-
=> { emit_table(KEYWORDS_BEGIN)
|
1986
|
-
@command_start = true
|
1987
|
-
fnext expr_value; fbreak; };
|
1988
|
-
|
1989
|
-
#
|
1990
|
-
# RUBY 1.9 HASH LABELS
|
1991
|
-
#
|
1992
|
-
|
1993
|
-
label ( any - ':' )
|
1994
|
-
=> {
|
1995
|
-
fhold;
|
1996
|
-
|
1997
|
-
if version?(18)
|
1998
|
-
ident = tok(@ts, @te - 2)
|
1999
|
-
|
2000
|
-
emit((@source_buffer.slice(@ts) =~ /[A-Z]/) ? :tCONSTANT : :tIDENTIFIER,
|
2001
|
-
ident, @ts, @te - 2)
|
2002
|
-
fhold; # continue as a symbol
|
2003
|
-
|
2004
|
-
if !@static_env.nil? && @static_env.declared?(ident)
|
2005
|
-
fnext expr_end;
|
2006
|
-
else
|
2007
|
-
fnext *arg_or_cmdarg(cmd_state);
|
2008
|
-
end
|
2009
|
-
else
|
2010
|
-
emit(:tLABEL, tok(@ts, @te - 2), @ts, @te - 1)
|
2011
|
-
fnext expr_labelarg;
|
2012
|
-
end
|
2013
|
-
|
2014
|
-
fbreak;
|
2015
|
-
};
|
2016
|
-
|
2017
|
-
#
|
2018
|
-
# RUBY 2.7 BEGINLESS RANGE
|
2019
|
-
|
2020
|
-
'..'
|
2021
|
-
=> {
|
2022
|
-
if @version >= 27
|
2023
|
-
emit(:tBDOT2)
|
2024
|
-
else
|
2025
|
-
emit(:tDOT2)
|
2026
|
-
end
|
2027
|
-
|
2028
|
-
fnext expr_beg; fbreak;
|
2029
|
-
};
|
2030
|
-
|
2031
|
-
'...'
|
2032
|
-
=> {
|
2033
|
-
if @version >= 30
|
2034
|
-
if @lambda_stack.any? && @lambda_stack.last + 1 == @paren_nest
|
2035
|
-
# To reject `->(...)` like `->...`
|
2036
|
-
emit(:tDOT3)
|
2037
|
-
else
|
2038
|
-
emit(:tBDOT3)
|
2039
|
-
end
|
2040
|
-
elsif @version >= 27
|
2041
|
-
emit(:tBDOT3)
|
2042
|
-
else
|
2043
|
-
emit(:tDOT3)
|
2044
|
-
end
|
2045
|
-
|
2046
|
-
fnext expr_beg; fbreak;
|
2047
|
-
};
|
2048
|
-
|
2049
|
-
#
|
2050
|
-
# CONTEXT-DEPENDENT VARIABLE LOOKUP OR COMMAND INVOCATION
|
2051
|
-
#
|
2052
|
-
|
2053
|
-
# foo= bar: Disambiguate with bareword rule below.
|
2054
|
-
bareword ambiguous_ident_suffix |
|
2055
|
-
# def foo: Disambiguate with bareword rule below.
|
2056
|
-
keyword
|
2057
|
-
=> { p = @ts - 1
|
2058
|
-
fgoto expr_end; };
|
2059
|
-
|
2060
|
-
# a = 42; a [42]: Indexing.
|
2061
|
-
# def a; end; a [42]: Array argument.
|
2062
|
-
call_or_var
|
2063
|
-
=> local_ident;
|
2064
|
-
|
2065
|
-
(call_or_var - keyword)
|
2066
|
-
% { ident_tok = tok; ident_ts = @ts; ident_te = @te; }
|
2067
|
-
w_space+ '('
|
2068
|
-
=> {
|
2069
|
-
emit(:tIDENTIFIER, ident_tok, ident_ts, ident_te)
|
2070
|
-
p = ident_te - 1
|
2071
|
-
|
2072
|
-
if !@static_env.nil? && @static_env.declared?(ident_tok) && @version < 25
|
2073
|
-
fnext expr_endfn;
|
2074
|
-
else
|
2075
|
-
fnext expr_cmdarg;
|
2076
|
-
end
|
2077
|
-
fbreak;
|
2078
|
-
};
|
2079
|
-
|
2080
|
-
#
|
2081
|
-
# WHITESPACE
|
2082
|
-
#
|
2083
|
-
|
2084
|
-
w_any;
|
2085
|
-
|
2086
|
-
e_heredoc_nl '=begin' ( c_space | c_nl_zlen )
|
2087
|
-
=> {
|
2088
|
-
p = @ts - 1
|
2089
|
-
@cs_before_block_comment = @cs
|
2090
|
-
fgoto line_begin;
|
2091
|
-
};
|
2092
|
-
|
2093
|
-
#
|
2094
|
-
# DEFAULT TRANSITION
|
2095
|
-
#
|
2096
|
-
|
2097
|
-
# The following rules match most binary and all unary operators.
|
2098
|
-
# Rules for binary operators provide better error reporting.
|
2099
|
-
operator_arithmetic '=' |
|
2100
|
-
operator_rest |
|
2101
|
-
punctuation_end |
|
2102
|
-
c_any
|
2103
|
-
=> { p = @ts - 1; fgoto expr_end; };
|
2104
|
-
|
2105
|
-
c_eof => do_eof;
|
2106
|
-
*|;
|
2107
|
-
|
2108
|
-
# Special newline handling for "def a b:"
|
2109
|
-
#
|
2110
|
-
expr_labelarg := |*
|
2111
|
-
w_space_comment;
|
2112
|
-
|
2113
|
-
w_newline
|
2114
|
-
=> {
|
2115
|
-
if @in_kwarg
|
2116
|
-
fhold; fgoto expr_end;
|
2117
|
-
else
|
2118
|
-
fgoto line_begin;
|
2119
|
-
end
|
2120
|
-
};
|
2121
|
-
|
2122
|
-
c_any
|
2123
|
-
=> { fhold; fgoto expr_beg; };
|
2124
|
-
|
2125
|
-
c_eof => do_eof;
|
2126
|
-
*|;
|
2127
|
-
|
2128
|
-
# Like expr_beg, but no 1.9 label or 2.2 quoted label possible.
|
2129
|
-
#
|
2130
|
-
expr_value := |*
|
2131
|
-
# a:b: a(:b), a::B, A::B
|
2132
|
-
label (any - ':')
|
2133
|
-
=> { p = @ts - 1
|
2134
|
-
fgoto expr_end; };
|
2135
|
-
|
2136
|
-
# "bar", 'baz'
|
2137
|
-
['"] # '
|
2138
|
-
=> {
|
2139
|
-
fgoto *push_literal(tok, tok, @ts);
|
2140
|
-
};
|
2141
|
-
|
2142
|
-
w_space_comment;
|
2143
|
-
|
2144
|
-
w_newline
|
2145
|
-
=> { fgoto line_begin; };
|
2146
|
-
|
2147
|
-
c_any
|
2148
|
-
=> { fhold; fgoto expr_beg; };
|
2149
|
-
|
2150
|
-
c_eof => do_eof;
|
2151
|
-
*|;
|
2152
|
-
|
2153
|
-
expr_end := |*
|
2154
|
-
#
|
2155
|
-
# STABBY LAMBDA
|
2156
|
-
#
|
2157
|
-
|
2158
|
-
'->'
|
2159
|
-
=> {
|
2160
|
-
emit(:tLAMBDA, '->'.freeze, @ts, @ts + 2)
|
2161
|
-
|
2162
|
-
@lambda_stack.push @paren_nest
|
2163
|
-
fnext expr_endfn; fbreak;
|
2164
|
-
};
|
2165
|
-
|
2166
|
-
e_lbrace | 'do'
|
2167
|
-
=> {
|
2168
|
-
if @lambda_stack.last == @paren_nest
|
2169
|
-
@lambda_stack.pop
|
2170
|
-
|
2171
|
-
if tok == '{'.freeze
|
2172
|
-
emit(:tLAMBEG, '{'.freeze)
|
2173
|
-
else # 'do'
|
2174
|
-
emit(:kDO_LAMBDA, 'do'.freeze)
|
2175
|
-
end
|
2176
|
-
else
|
2177
|
-
if tok == '{'.freeze
|
2178
|
-
emit(:tLCURLY, '{'.freeze)
|
2179
|
-
else # 'do'
|
2180
|
-
emit_do
|
2181
|
-
end
|
2182
|
-
end
|
2183
|
-
if tok == '{'.freeze
|
2184
|
-
@paren_nest += 1
|
2185
|
-
end
|
2186
|
-
@command_start = true
|
2187
|
-
|
2188
|
-
fnext expr_value; fbreak;
|
2189
|
-
};
|
2190
|
-
|
2191
|
-
#
|
2192
|
-
# KEYWORDS
|
2193
|
-
#
|
2194
|
-
|
2195
|
-
keyword_with_fname
|
2196
|
-
=> { emit_table(KEYWORDS)
|
2197
|
-
fnext expr_fname; fbreak; };
|
2198
|
-
|
2199
|
-
'class' w_any* '<<'
|
2200
|
-
=> { emit(:kCLASS, 'class'.freeze, @ts, @ts + 5)
|
2201
|
-
emit(:tLSHFT, '<<'.freeze, @te - 2, @te)
|
2202
|
-
fnext expr_value; fbreak; };
|
2203
|
-
|
2204
|
-
# a if b:c: Syntax error.
|
2205
|
-
keyword_modifier
|
2206
|
-
=> { emit_table(KEYWORDS)
|
2207
|
-
fnext expr_beg; fbreak; };
|
2208
|
-
|
2209
|
-
# elsif b:c: elsif b(:c)
|
2210
|
-
keyword_with_value
|
2211
|
-
=> { emit_table(KEYWORDS)
|
2212
|
-
@command_start = true
|
2213
|
-
fnext expr_value; fbreak; };
|
2214
|
-
|
2215
|
-
keyword_with_mid
|
2216
|
-
=> { emit_table(KEYWORDS)
|
2217
|
-
fnext expr_mid; fbreak; };
|
2218
|
-
|
2219
|
-
keyword_with_arg
|
2220
|
-
=> {
|
2221
|
-
emit_table(KEYWORDS)
|
2222
|
-
|
2223
|
-
if version?(18) && tok == 'not'.freeze
|
2224
|
-
fnext expr_beg; fbreak;
|
2225
|
-
else
|
2226
|
-
fnext expr_arg; fbreak;
|
2227
|
-
end
|
2228
|
-
};
|
2229
|
-
|
2230
|
-
'__ENCODING__'
|
2231
|
-
=> {
|
2232
|
-
if version?(18)
|
2233
|
-
emit(:tIDENTIFIER)
|
2234
|
-
|
2235
|
-
unless !@static_env.nil? && @static_env.declared?(tok)
|
2236
|
-
fnext *arg_or_cmdarg(cmd_state);
|
2237
|
-
end
|
2238
|
-
else
|
2239
|
-
emit(:k__ENCODING__, '__ENCODING__'.freeze)
|
2240
|
-
end
|
2241
|
-
fbreak;
|
2242
|
-
};
|
2243
|
-
|
2244
|
-
keyword_with_end
|
2245
|
-
=> { emit_table(KEYWORDS)
|
2246
|
-
fbreak; };
|
2247
|
-
|
2248
|
-
#
|
2249
|
-
# NUMERIC LITERALS
|
2250
|
-
#
|
2251
|
-
|
2252
|
-
( '0' [Xx] %{ @num_base = 16; @num_digits_s = p } int_hex
|
2253
|
-
| '0' [Dd] %{ @num_base = 10; @num_digits_s = p } int_dec
|
2254
|
-
| '0' [Oo] %{ @num_base = 8; @num_digits_s = p } int_dec
|
2255
|
-
| '0' [Bb] %{ @num_base = 2; @num_digits_s = p } int_bin
|
2256
|
-
| [1-9] digit* '_'? %{ @num_base = 10; @num_digits_s = @ts } int_dec
|
2257
|
-
| '0' digit* '_'? %{ @num_base = 8; @num_digits_s = @ts } int_dec
|
2258
|
-
) %{ @num_suffix_s = p } int_suffix
|
2259
|
-
=> {
|
2260
|
-
digits = tok(@num_digits_s, @num_suffix_s)
|
2261
|
-
|
2262
|
-
if digits.end_with? '_'.freeze
|
2263
|
-
diagnostic :error, :trailing_in_number, { :character => '_'.freeze },
|
2264
|
-
range(@te - 1, @te)
|
2265
|
-
elsif digits.empty? && @num_base == 8 && version?(18)
|
2266
|
-
# 1.8 did not raise an error on 0o.
|
2267
|
-
digits = '0'.freeze
|
2268
|
-
elsif digits.empty?
|
2269
|
-
diagnostic :error, :empty_numeric
|
2270
|
-
elsif @num_base == 8 && (invalid_idx = digits.index(/[89]/))
|
2271
|
-
invalid_s = @num_digits_s + invalid_idx
|
2272
|
-
diagnostic :error, :invalid_octal, nil,
|
2273
|
-
range(invalid_s, invalid_s + 1)
|
2274
|
-
end
|
2275
|
-
|
2276
|
-
if version?(18, 19, 20)
|
2277
|
-
emit(:tINTEGER, digits.to_i(@num_base), @ts, @num_suffix_s)
|
2278
|
-
p = @num_suffix_s - 1
|
2279
|
-
else
|
2280
|
-
@num_xfrm.call(digits.to_i(@num_base))
|
2281
|
-
end
|
2282
|
-
fbreak;
|
2283
|
-
};
|
2284
|
-
|
2285
|
-
flo_frac flo_pow?
|
2286
|
-
=> {
|
2287
|
-
diagnostic :error, :no_dot_digit_literal
|
2288
|
-
};
|
2289
|
-
|
2290
|
-
flo_int [eE]
|
2291
|
-
=> {
|
2292
|
-
if version?(18, 19, 20)
|
2293
|
-
diagnostic :error,
|
2294
|
-
:trailing_in_number, { :character => tok(@te - 1, @te) },
|
2295
|
-
range(@te - 1, @te)
|
2296
|
-
else
|
2297
|
-
emit(:tINTEGER, tok(@ts, @te - 1).to_i, @ts, @te - 1)
|
2298
|
-
fhold; fbreak;
|
2299
|
-
end
|
2300
|
-
};
|
2301
|
-
|
2302
|
-
flo_int flo_frac [eE]
|
2303
|
-
=> {
|
2304
|
-
if version?(18, 19, 20)
|
2305
|
-
diagnostic :error,
|
2306
|
-
:trailing_in_number, { :character => tok(@te - 1, @te) },
|
2307
|
-
range(@te - 1, @te)
|
2308
|
-
else
|
2309
|
-
emit(:tFLOAT, tok(@ts, @te - 1).to_f, @ts, @te - 1)
|
2310
|
-
fhold; fbreak;
|
2311
|
-
end
|
2312
|
-
};
|
2313
|
-
|
2314
|
-
flo_int
|
2315
|
-
( flo_frac? flo_pow %{ @num_suffix_s = p } flo_pow_suffix
|
2316
|
-
| flo_frac %{ @num_suffix_s = p } flo_suffix
|
2317
|
-
)
|
2318
|
-
=> {
|
2319
|
-
digits = tok(@ts, @num_suffix_s)
|
2320
|
-
|
2321
|
-
if version?(18, 19, 20)
|
2322
|
-
emit(:tFLOAT, Float(digits), @ts, @num_suffix_s)
|
2323
|
-
p = @num_suffix_s - 1
|
2324
|
-
else
|
2325
|
-
@num_xfrm.call(digits)
|
2326
|
-
end
|
2327
|
-
fbreak;
|
2328
|
-
};
|
2329
|
-
|
2330
|
-
#
|
2331
|
-
# STRING AND XSTRING LITERALS
|
2332
|
-
#
|
2333
|
-
|
2334
|
-
# `echo foo`, "bar", 'baz'
|
2335
|
-
'`' | ['"] # '
|
2336
|
-
=> {
|
2337
|
-
type, delimiter = tok, tok[-1].chr
|
2338
|
-
fgoto *push_literal(type, delimiter, @ts, nil, false, false, true);
|
2339
|
-
};
|
2340
|
-
|
2341
|
-
#
|
2342
|
-
# CONSTANTS AND VARIABLES
|
2343
|
-
#
|
2344
|
-
|
2345
|
-
constant
|
2346
|
-
=> { emit(:tCONSTANT)
|
2347
|
-
fnext *arg_or_cmdarg(cmd_state); fbreak; };
|
2348
|
-
|
2349
|
-
constant ambiguous_const_suffix
|
2350
|
-
=> { emit(:tCONSTANT, tok(@ts, tm), @ts, tm)
|
2351
|
-
p = tm - 1; fbreak; };
|
2352
|
-
|
2353
|
-
global_var | class_var_v | instance_var_v
|
2354
|
-
=> { p = @ts - 1; fcall expr_variable; };
|
2355
|
-
|
2356
|
-
#
|
2357
|
-
# METHOD CALLS
|
2358
|
-
#
|
2359
|
-
|
2360
|
-
'.' | '&.' | '::'
|
2361
|
-
=> { emit_table(PUNCTUATION)
|
2362
|
-
fnext expr_dot; fbreak; };
|
2363
|
-
|
2364
|
-
call_or_var
|
2365
|
-
=> local_ident;
|
2366
|
-
|
2367
|
-
bareword ambiguous_fid_suffix
|
2368
|
-
=> {
|
2369
|
-
if tm == @te
|
2370
|
-
# Suffix was consumed, e.g. foo!
|
2371
|
-
emit(:tFID)
|
2372
|
-
else
|
2373
|
-
# Suffix was not consumed, e.g. foo!=
|
2374
|
-
emit(:tIDENTIFIER, tok(@ts, tm), @ts, tm)
|
2375
|
-
p = tm - 1
|
2376
|
-
end
|
2377
|
-
fnext expr_arg; fbreak;
|
2378
|
-
};
|
2379
|
-
|
2380
|
-
#
|
2381
|
-
# OPERATORS
|
2382
|
-
#
|
2383
|
-
|
2384
|
-
'*' | '=>'
|
2385
|
-
=> {
|
2386
|
-
emit_table(PUNCTUATION)
|
2387
|
-
fgoto expr_value;
|
2388
|
-
};
|
2389
|
-
|
2390
|
-
# When '|', '~', '!', '=>' are used as operators
|
2391
|
-
# they do not accept any symbols (or quoted labels) after.
|
2392
|
-
# Other binary operators accept it.
|
2393
|
-
( operator_arithmetic | operator_rest ) - ( '|' | '~' | '!' | '*' )
|
2394
|
-
=> {
|
2395
|
-
emit_table(PUNCTUATION);
|
2396
|
-
fnext expr_value; fbreak;
|
2397
|
-
};
|
2398
|
-
|
2399
|
-
( e_lparen | '|' | '~' | '!' )
|
2400
|
-
=> { emit_table(PUNCTUATION)
|
2401
|
-
fnext expr_beg; fbreak; };
|
2402
|
-
|
2403
|
-
e_rbrace | e_rparen | e_rbrack
|
2404
|
-
=> {
|
2405
|
-
emit_table(PUNCTUATION)
|
2406
|
-
|
2407
|
-
if @version < 24
|
2408
|
-
@cond.lexpop
|
2409
|
-
@cmdarg.lexpop
|
2410
|
-
else
|
2411
|
-
@cond.pop
|
2412
|
-
@cmdarg.pop
|
2413
|
-
end
|
2414
|
-
|
2415
|
-
if tok == '}'.freeze || tok == ']'.freeze
|
2416
|
-
if @version >= 25
|
2417
|
-
fnext expr_end;
|
2418
|
-
else
|
2419
|
-
fnext expr_endarg;
|
2420
|
-
end
|
2421
|
-
else # )
|
2422
|
-
# fnext expr_endfn; ?
|
2423
|
-
end
|
2424
|
-
|
2425
|
-
fbreak;
|
2426
|
-
};
|
2427
|
-
|
2428
|
-
operator_arithmetic '='
|
2429
|
-
=> { emit(:tOP_ASGN, tok(@ts, @te - 1))
|
2430
|
-
fnext expr_beg; fbreak; };
|
2431
|
-
|
2432
|
-
'?'
|
2433
|
-
=> { emit(:tEH, '?'.freeze)
|
2434
|
-
fnext expr_value; fbreak; };
|
2435
|
-
|
2436
|
-
e_lbrack
|
2437
|
-
=> { emit(:tLBRACK2, '['.freeze)
|
2438
|
-
fnext expr_beg; fbreak; };
|
2439
|
-
|
2440
|
-
'...' c_nl
|
2441
|
-
=> {
|
2442
|
-
if @paren_nest == 0
|
2443
|
-
diagnostic :warning, :triple_dot_at_eol, nil, range(@ts, @te - 1)
|
2444
|
-
end
|
2445
|
-
|
2446
|
-
emit(:tDOT3, '...'.freeze, @ts, @te - 1)
|
2447
|
-
fhold;
|
2448
|
-
fnext expr_beg; fbreak;
|
2449
|
-
};
|
2450
|
-
|
2451
|
-
punctuation_end
|
2452
|
-
=> { emit_table(PUNCTUATION)
|
2453
|
-
fnext expr_beg; fbreak; };
|
2454
|
-
|
2455
|
-
#
|
2456
|
-
# WHITESPACE
|
2457
|
-
#
|
2458
|
-
|
2459
|
-
w_space_comment;
|
2460
|
-
|
2461
|
-
w_newline
|
2462
|
-
=> { fgoto leading_dot; };
|
2463
|
-
|
2464
|
-
';'
|
2465
|
-
=> { emit(:tSEMI, ';'.freeze)
|
2466
|
-
@command_start = true
|
2467
|
-
fnext expr_value; fbreak; };
|
2468
|
-
|
2469
|
-
'\\' c_line {
|
2470
|
-
diagnostic :error, :bare_backslash, nil, range(@ts, @ts + 1)
|
2471
|
-
fhold;
|
2472
|
-
};
|
2473
|
-
|
2474
|
-
c_any
|
2475
|
-
=> {
|
2476
|
-
diagnostic :fatal, :unexpected, { :character => tok.inspect[1..-2] }
|
2477
|
-
};
|
2478
|
-
|
2479
|
-
c_eof => do_eof;
|
2480
|
-
*|;
|
2481
|
-
|
2482
|
-
leading_dot := |*
|
2483
|
-
# Insane leading dots:
|
2484
|
-
# a #comment
|
2485
|
-
# # post-2.7 comment
|
2486
|
-
# .b: a.b
|
2487
|
-
|
2488
|
-
# Here we use '\n' instead of w_newline to not modify @newline_s
|
2489
|
-
# and eventually properly emit tNL
|
2490
|
-
(c_space* w_space_comment '\n')+
|
2491
|
-
=> {
|
2492
|
-
if @version < 27
|
2493
|
-
# Ruby before 2.7 doesn't support comments before leading dot.
|
2494
|
-
# If a line after "a" starts with a comment then "a" is a self-contained statement.
|
2495
|
-
# So in that case we emit a special tNL token and start reading the
|
2496
|
-
# next line as a separate statement.
|
2497
|
-
#
|
2498
|
-
# Note: block comments before leading dot are not supported on any version of Ruby.
|
2499
|
-
emit(:tNL, nil, @newline_s, @newline_s + 1)
|
2500
|
-
fhold; fnext line_begin; fbreak;
|
2501
|
-
end
|
2502
|
-
};
|
2503
|
-
|
2504
|
-
c_space* %{ tm = p } ('.' | '&.')
|
2505
|
-
=> { p = tm - 1; fgoto expr_end; };
|
2506
|
-
|
2507
|
-
any
|
2508
|
-
=> { emit(:tNL, nil, @newline_s, @newline_s + 1)
|
2509
|
-
fhold; fnext line_begin; fbreak; };
|
2510
|
-
*|;
|
2511
|
-
|
2512
|
-
#
|
2513
|
-
# === EMBEDDED DOCUMENT (aka BLOCK COMMENT) PARSING ===
|
2514
|
-
#
|
2515
|
-
|
2516
|
-
line_comment := |*
|
2517
|
-
'=end' c_line* c_nl_zlen
|
2518
|
-
=> {
|
2519
|
-
emit_comment(@eq_begin_s, @te)
|
2520
|
-
fgoto *@cs_before_block_comment;
|
2521
|
-
};
|
2522
|
-
|
2523
|
-
c_line* c_nl;
|
2524
|
-
|
2525
|
-
c_line* zlen
|
2526
|
-
=> {
|
2527
|
-
diagnostic :fatal, :embedded_document, nil,
|
2528
|
-
range(@eq_begin_s, @eq_begin_s + '=begin'.length)
|
2529
|
-
};
|
2530
|
-
*|;
|
2531
|
-
|
2532
|
-
line_begin := |*
|
2533
|
-
w_any;
|
2534
|
-
|
2535
|
-
'=begin' ( c_space | c_nl_zlen )
|
2536
|
-
=> { @eq_begin_s = @ts
|
2537
|
-
fgoto line_comment; };
|
2538
|
-
|
2539
|
-
'__END__' ( c_eol - zlen )
|
2540
|
-
=> { p = pe - 3 };
|
2541
|
-
|
2542
|
-
c_any
|
2543
|
-
=> { cmd_state = true; fhold; fgoto expr_value; };
|
2544
|
-
|
2545
|
-
c_eof => do_eof;
|
2546
|
-
*|;
|
2547
|
-
|
2548
|
-
}%%
|
2549
|
-
# %
|
2550
|
-
end
|