c_lexer 2.5.1.2.0 → 2.5.3.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/Gemfile.lock +3 -3
- data/appveyor.yml +1 -0
- data/c_lexer.gemspec +1 -1
- data/ext/lexer/emit_tables.h +5 -5
- data/ext/lexer/lexer.c +3654 -3647
- data/ext/lexer/lexer.h +29 -29
- data/ext/lexer/lexer.rl +508 -508
- data/ext/lexer/{literal.h → literal/literal.h} +2 -2
- data/ext/lexer/literal/methods.h +1 -1
- data/ext/lexer/stack_state/cmdarg.h +47 -0
- data/ext/lexer/stack_state/cond.h +47 -0
- data/ext/lexer/{stack.h → stack_state/stack.h} +0 -0
- data/ext/lexer/{stack_state.h → stack_state/stack_state.h} +2 -2
- data/lib/c_lexer/version.rb +1 -1
- metadata +9 -9
- data/ext/lexer/cmdarg.h +0 -47
- data/ext/lexer/cond.h +0 -47
data/ext/lexer/lexer.rl
CHANGED
@@ -5,16 +5,16 @@
|
|
5
5
|
#include <stdint.h>
|
6
6
|
#include <stdio.h>
|
7
7
|
|
8
|
-
#include "stack.h"
|
9
|
-
#include "stack_state.h"
|
8
|
+
#include "stack_state/stack.h"
|
9
|
+
#include "stack_state/stack_state.h"
|
10
10
|
#include "lexer.h"
|
11
11
|
|
12
|
-
#define
|
12
|
+
#define GET_LEXER(self) Data_Get_Struct(self, Lexer, lexer)
|
13
13
|
#define STATIC_ENV_DECLARED(name) \
|
14
|
-
|
14
|
+
lexer->static_env != Qnil && RTEST(rb_funcall(lexer->static_env, rb_intern("declared?"), 1, name))
|
15
15
|
|
16
|
-
#include "cmdarg.h"
|
17
|
-
#include "cond.h"
|
16
|
+
#include "stack_state/cmdarg.h"
|
17
|
+
#include "stack_state/cond.h"
|
18
18
|
|
19
19
|
#include "literal/methods.h"
|
20
20
|
#include "emit_tables.h"
|
@@ -24,50 +24,50 @@
|
|
24
24
|
|
25
25
|
static VALUE lexer_alloc(VALUE klass)
|
26
26
|
{
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
ss_stack_init(&
|
49
|
-
ss_stack_init(&
|
50
|
-
lit_stack_init(&
|
51
|
-
|
52
|
-
return Data_Wrap_Struct(klass, lexer_mark, lexer_dealloc,
|
27
|
+
Lexer *lexer = xmalloc(sizeof(Lexer));
|
28
|
+
|
29
|
+
lexer->cs = lexer->p = lexer->pe = 0;
|
30
|
+
lexer->paren_nest = 0;
|
31
|
+
|
32
|
+
lexer->cs_stack = xmalloc(4 * sizeof(int));
|
33
|
+
lexer->cs_stack_top = 0;
|
34
|
+
lexer->cs_stack_size = 4;
|
35
|
+
|
36
|
+
lexer->source_buffer = Qnil;
|
37
|
+
lexer->source = Qnil;
|
38
|
+
lexer->source_pts = Qnil;
|
39
|
+
lexer->token_queue = Qnil;
|
40
|
+
lexer->static_env = Qnil;
|
41
|
+
lexer->lambda_stack = Qnil;
|
42
|
+
lexer->diagnostics = Qnil;
|
43
|
+
lexer->tokens = Qnil;
|
44
|
+
lexer->comments = Qnil;
|
45
|
+
lexer->encoding = Qnil;
|
46
|
+
lexer->escape = Qnil;
|
47
|
+
|
48
|
+
ss_stack_init(&lexer->cond_stack);
|
49
|
+
ss_stack_init(&lexer->cmdarg_stack);
|
50
|
+
lit_stack_init(&lexer->literal_stack);
|
51
|
+
|
52
|
+
return Data_Wrap_Struct(klass, lexer_mark, lexer_dealloc, lexer);
|
53
53
|
}
|
54
54
|
|
55
55
|
static void lexer_mark(void *ptr)
|
56
56
|
{
|
57
|
-
|
58
|
-
rb_gc_mark(
|
59
|
-
rb_gc_mark(
|
60
|
-
rb_gc_mark(
|
61
|
-
rb_gc_mark(
|
62
|
-
rb_gc_mark(
|
63
|
-
rb_gc_mark(
|
64
|
-
rb_gc_mark(
|
65
|
-
rb_gc_mark(
|
66
|
-
rb_gc_mark(
|
67
|
-
rb_gc_mark(
|
68
|
-
rb_gc_mark(
|
69
|
-
|
70
|
-
for (literal *lit =
|
57
|
+
Lexer *lexer = ptr;
|
58
|
+
rb_gc_mark(lexer->source_buffer);
|
59
|
+
rb_gc_mark(lexer->source);
|
60
|
+
rb_gc_mark(lexer->source_pts);
|
61
|
+
rb_gc_mark(lexer->token_queue);
|
62
|
+
rb_gc_mark(lexer->static_env);
|
63
|
+
rb_gc_mark(lexer->lambda_stack);
|
64
|
+
rb_gc_mark(lexer->diagnostics);
|
65
|
+
rb_gc_mark(lexer->tokens);
|
66
|
+
rb_gc_mark(lexer->comments);
|
67
|
+
rb_gc_mark(lexer->encoding);
|
68
|
+
rb_gc_mark(lexer->escape);
|
69
|
+
|
70
|
+
for (literal *lit = lexer->literal_stack.bottom; lit < lexer->literal_stack.top; lit++) {
|
71
71
|
rb_gc_mark(lit->buffer);
|
72
72
|
rb_gc_mark(lit->start_tok);
|
73
73
|
rb_gc_mark(lit->start_delim);
|
@@ -78,25 +78,25 @@ static void lexer_mark(void *ptr)
|
|
78
78
|
|
79
79
|
static void lexer_dealloc(void *ptr)
|
80
80
|
{
|
81
|
-
|
82
|
-
ss_stack_dealloc(&
|
83
|
-
ss_stack_dealloc(&
|
84
|
-
lit_stack_dealloc(&
|
81
|
+
Lexer *lexer = ptr;
|
82
|
+
ss_stack_dealloc(&lexer->cond_stack);
|
83
|
+
ss_stack_dealloc(&lexer->cmdarg_stack);
|
84
|
+
lit_stack_dealloc(&lexer->literal_stack);
|
85
85
|
xfree(ptr);
|
86
86
|
}
|
87
87
|
|
88
88
|
static VALUE lexer_initialize(VALUE self, VALUE version)
|
89
89
|
{
|
90
|
-
|
90
|
+
Lexer* lexer = GET_LEXER(self);
|
91
91
|
|
92
|
-
|
92
|
+
lexer->version = NUM2INT(version);
|
93
93
|
|
94
94
|
return lexer_reset(0, NULL, self);
|
95
95
|
}
|
96
96
|
|
97
97
|
static VALUE lexer_reset(int argc, VALUE *argv, VALUE self)
|
98
98
|
{
|
99
|
-
|
99
|
+
Lexer* lexer = GET_LEXER(self);
|
100
100
|
|
101
101
|
VALUE reset_state;
|
102
102
|
rb_scan_args(argc, argv, "01", &reset_state);
|
@@ -104,21 +104,21 @@ static VALUE lexer_reset(int argc, VALUE *argv, VALUE self)
|
|
104
104
|
reset_state = Qtrue;
|
105
105
|
|
106
106
|
if (RTEST(reset_state)) {
|
107
|
-
|
107
|
+
lexer->cs = lex_en_line_begin;
|
108
108
|
|
109
|
-
|
110
|
-
|
111
|
-
ss_stack_clear(&
|
112
|
-
ss_stack_clear(&
|
109
|
+
lexer->cond = 0;
|
110
|
+
lexer->cmdarg = 0;
|
111
|
+
ss_stack_clear(&lexer->cond_stack);
|
112
|
+
ss_stack_clear(&lexer->cmdarg_stack);
|
113
113
|
}
|
114
114
|
|
115
|
-
|
115
|
+
lexer->force_utf32 = 0;
|
116
116
|
|
117
|
-
|
118
|
-
|
119
|
-
|
117
|
+
lexer->source = Qnil;
|
118
|
+
lexer->source_pts = Qnil;
|
119
|
+
lexer->encoding = Qnil;
|
120
120
|
|
121
|
-
|
121
|
+
lexer->p = 0;
|
122
122
|
// @ts is a local variable
|
123
123
|
// @te is a local variable
|
124
124
|
// @act is a local variable
|
@@ -127,65 +127,65 @@ static VALUE lexer_reset(int argc, VALUE *argv, VALUE self)
|
|
127
127
|
// @top is handled on prepush
|
128
128
|
|
129
129
|
// Lexer state
|
130
|
-
|
131
|
-
lit_stack_clear(&
|
130
|
+
lexer->token_queue = rb_ary_new();
|
131
|
+
lit_stack_clear(&lexer->literal_stack);
|
132
132
|
|
133
|
-
|
133
|
+
lexer->eq_begin_s = 0;
|
134
134
|
// @sharp_s is a local variable
|
135
135
|
|
136
|
-
|
136
|
+
lexer->newline_s = 0;
|
137
137
|
|
138
138
|
// @num_base is a local variable
|
139
139
|
// @num_digits_s is a local variable
|
140
140
|
// @num_suffix_s is a local variable
|
141
141
|
// @num_xfrm is a local variable
|
142
142
|
|
143
|
-
|
144
|
-
|
143
|
+
lexer->escape_s = 0;
|
144
|
+
lexer->escape = Qnil;
|
145
145
|
|
146
|
-
|
146
|
+
lexer->herebody_s = 0;
|
147
147
|
|
148
|
-
|
149
|
-
|
148
|
+
lexer->paren_nest = 0;
|
149
|
+
lexer->lambda_stack = rb_ary_new();
|
150
150
|
|
151
|
-
|
151
|
+
lexer->dedent_level = -1;
|
152
152
|
|
153
153
|
// @command_state is a local variable
|
154
154
|
|
155
|
-
|
155
|
+
lexer->in_kwarg = 0;
|
156
156
|
|
157
|
-
|
157
|
+
lexer->cs_before_block_comment = lex_en_line_begin;
|
158
158
|
|
159
159
|
return self;
|
160
160
|
}
|
161
161
|
|
162
162
|
static VALUE lexer_set_source_buffer(VALUE self, VALUE buffer)
|
163
163
|
{
|
164
|
-
|
164
|
+
Lexer* lexer = GET_LEXER(self);
|
165
165
|
|
166
|
-
|
166
|
+
lexer->source_buffer = buffer;
|
167
167
|
|
168
168
|
if (RTEST(buffer)) {
|
169
|
-
|
170
|
-
|
169
|
+
lexer->source = rb_funcall(buffer, rb_intern("source"), 0);
|
170
|
+
lexer->encoding = rb_obj_encoding(lexer->source);
|
171
171
|
|
172
|
-
if (
|
173
|
-
|
172
|
+
if (lexer->encoding == utf8_encoding) {
|
173
|
+
lexer->source_pts = rb_funcall(lexer->source, rb_intern("unpack"), 1, rb_str_new2("U*"));
|
174
174
|
} else {
|
175
|
-
|
175
|
+
lexer->source_pts = rb_funcall(lexer->source, rb_intern("unpack"), 1, rb_str_new2("C*"));
|
176
176
|
}
|
177
177
|
|
178
|
-
|
178
|
+
lexer->pe = RARRAY_LEN(lexer->source_pts) + 2; /* pretend there is a null at the end */
|
179
179
|
|
180
|
-
VALUE source_pt = rb_ary_entry(
|
180
|
+
VALUE source_pt = rb_ary_entry(lexer->source_pts, 0);
|
181
181
|
if (source_pt != Qnil && NUM2INT(source_pt) == 0xfeff) {
|
182
|
-
|
182
|
+
lexer->p = 1;
|
183
183
|
}
|
184
184
|
} else {
|
185
|
-
|
186
|
-
|
187
|
-
|
188
|
-
|
185
|
+
lexer->source = Qnil;
|
186
|
+
lexer->source_pts = Qnil;
|
187
|
+
lexer->encoding = Qnil;
|
188
|
+
lexer->pe = 0;
|
189
189
|
}
|
190
190
|
|
191
191
|
return self;
|
@@ -193,9 +193,9 @@ static VALUE lexer_set_source_buffer(VALUE self, VALUE buffer)
|
|
193
193
|
|
194
194
|
static VALUE lexer_get_state(VALUE self)
|
195
195
|
{
|
196
|
-
|
196
|
+
Lexer* lexer = GET_LEXER(self);
|
197
197
|
|
198
|
-
switch (
|
198
|
+
switch (lexer->cs) {
|
199
199
|
case lex_en_line_begin: return ID2SYM(rb_intern("line_begin"));
|
200
200
|
case lex_en_expr_dot: return ID2SYM(rb_intern("expr_dot"));
|
201
201
|
case lex_en_expr_fname: return ID2SYM(rb_intern("expr_fname"));
|
@@ -220,42 +220,42 @@ static VALUE lexer_get_state(VALUE self)
|
|
220
220
|
|
221
221
|
static VALUE lexer_set_state(VALUE self, VALUE state_sym)
|
222
222
|
{
|
223
|
-
|
223
|
+
Lexer* lexer = GET_LEXER(self);
|
224
224
|
const char *state_name = rb_id2name(SYM2ID(state_sym));
|
225
225
|
|
226
226
|
if (strcmp(state_name, "line_begin") == 0)
|
227
|
-
|
227
|
+
lexer->cs = lex_en_line_begin;
|
228
228
|
else if (strcmp(state_name, "expr_dot") == 0)
|
229
|
-
|
229
|
+
lexer->cs = lex_en_expr_dot;
|
230
230
|
else if (strcmp(state_name, "expr_fname") == 0)
|
231
|
-
|
231
|
+
lexer->cs = lex_en_expr_fname;
|
232
232
|
else if (strcmp(state_name, "expr_value") == 0)
|
233
|
-
|
233
|
+
lexer->cs = lex_en_expr_value;
|
234
234
|
else if (strcmp(state_name, "expr_beg") == 0)
|
235
|
-
|
235
|
+
lexer->cs = lex_en_expr_beg;
|
236
236
|
else if (strcmp(state_name, "expr_mid") == 0)
|
237
|
-
|
237
|
+
lexer->cs = lex_en_expr_mid;
|
238
238
|
else if (strcmp(state_name, "expr_arg") == 0)
|
239
|
-
|
239
|
+
lexer->cs = lex_en_expr_arg;
|
240
240
|
else if (strcmp(state_name, "expr_cmdarg") == 0)
|
241
|
-
|
241
|
+
lexer->cs = lex_en_expr_cmdarg;
|
242
242
|
else if (strcmp(state_name, "expr_end") == 0)
|
243
|
-
|
243
|
+
lexer->cs = lex_en_expr_end;
|
244
244
|
else if (strcmp(state_name, "expr_endarg") == 0)
|
245
|
-
|
245
|
+
lexer->cs = lex_en_expr_endarg;
|
246
246
|
else if (strcmp(state_name, "expr_endfn") == 0)
|
247
|
-
|
247
|
+
lexer->cs = lex_en_expr_endfn;
|
248
248
|
else if (strcmp(state_name, "expr_labelarg") == 0)
|
249
|
-
|
249
|
+
lexer->cs = lex_en_expr_labelarg;
|
250
250
|
|
251
251
|
else if (strcmp(state_name, "interp_string") == 0)
|
252
|
-
|
252
|
+
lexer->cs = lex_en_interp_string;
|
253
253
|
else if (strcmp(state_name, "interp_words") == 0)
|
254
|
-
|
254
|
+
lexer->cs = lex_en_interp_words;
|
255
255
|
else if (strcmp(state_name, "plain_string") == 0)
|
256
|
-
|
256
|
+
lexer->cs = lex_en_plain_string;
|
257
257
|
else if (strcmp(state_name, "plain_words") == 0)
|
258
|
-
|
258
|
+
lexer->cs = lex_en_plain_words;
|
259
259
|
else
|
260
260
|
rb_raise(rb_eArgError, "Invalid state: %s", state_name);
|
261
261
|
|
@@ -264,52 +264,52 @@ static VALUE lexer_set_state(VALUE self, VALUE state_sym)
|
|
264
264
|
|
265
265
|
static VALUE lexer_push_cmdarg(VALUE self)
|
266
266
|
{
|
267
|
-
|
268
|
-
ss_stack_push(&
|
269
|
-
|
267
|
+
Lexer* lexer = GET_LEXER(self);
|
268
|
+
ss_stack_push(&lexer->cmdarg_stack, lexer->cmdarg);
|
269
|
+
lexer->cmdarg = 0;
|
270
270
|
return Qnil;
|
271
271
|
}
|
272
272
|
|
273
273
|
static VALUE lexer_pop_cmdarg(VALUE self)
|
274
274
|
{
|
275
|
-
|
276
|
-
|
275
|
+
Lexer* lexer = GET_LEXER(self);
|
276
|
+
lexer->cmdarg = ss_stack_pop(&lexer->cmdarg_stack);
|
277
277
|
return Qnil;
|
278
278
|
}
|
279
279
|
|
280
280
|
static VALUE lexer_push_cond(VALUE self)
|
281
281
|
{
|
282
|
-
|
283
|
-
ss_stack_push(&
|
284
|
-
|
282
|
+
Lexer* lexer = GET_LEXER(self);
|
283
|
+
ss_stack_push(&lexer->cond_stack, lexer->cond);
|
284
|
+
lexer->cond = 0;
|
285
285
|
return Qnil;
|
286
286
|
}
|
287
287
|
|
288
288
|
static VALUE lexer_pop_cond(VALUE self)
|
289
289
|
{
|
290
|
-
|
291
|
-
|
290
|
+
Lexer* lexer = GET_LEXER(self);
|
291
|
+
lexer->cond = ss_stack_pop(&lexer->cond_stack);
|
292
292
|
return Qnil;
|
293
293
|
}
|
294
294
|
|
295
295
|
static VALUE lexer_get_in_kwarg(VALUE self)
|
296
296
|
{
|
297
|
-
|
298
|
-
return
|
297
|
+
Lexer* lexer = GET_LEXER(self);
|
298
|
+
return lexer->in_kwarg ? Qtrue : Qfalse;
|
299
299
|
}
|
300
300
|
|
301
301
|
static VALUE lexer_set_in_kwarg(VALUE self, VALUE val)
|
302
302
|
{
|
303
|
-
|
304
|
-
|
303
|
+
Lexer* lexer = GET_LEXER(self);
|
304
|
+
lexer->in_kwarg = RTEST(val) ? 1 : 0;
|
305
305
|
return val;
|
306
306
|
}
|
307
307
|
|
308
308
|
static VALUE lexer_get_dedent_level(VALUE self)
|
309
309
|
{
|
310
|
-
|
311
|
-
int result =
|
312
|
-
|
310
|
+
Lexer* lexer = GET_LEXER(self);
|
311
|
+
int result = lexer->dedent_level;
|
312
|
+
lexer->dedent_level = -1;
|
313
313
|
if (result == -1)
|
314
314
|
return Qnil;
|
315
315
|
else
|
@@ -327,22 +327,22 @@ static VALUE lexer_advance(VALUE self)
|
|
327
327
|
int num_base = 0;
|
328
328
|
long p, pe, eof, ts = 0, te = 0, tm = 0, sharp_s = 0, heredoc_e = 0;
|
329
329
|
long num_digits_s = 0, num_suffix_s = 0;
|
330
|
-
void (*num_xfrm)(
|
331
|
-
|
330
|
+
void (*num_xfrm)(Lexer*, VALUE, long, long); /* numeric suffix-induced transformation */
|
331
|
+
Lexer *lexer;
|
332
332
|
int *stack;
|
333
333
|
VALUE ident_tok = Qnil;
|
334
334
|
long ident_ts = 0, ident_te = 0;
|
335
335
|
long numeric_s = 0;
|
336
|
-
Data_Get_Struct(self,
|
336
|
+
Data_Get_Struct(self, Lexer, lexer);
|
337
337
|
|
338
|
-
if (RARRAY_LEN(
|
339
|
-
return rb_ary_shift(
|
338
|
+
if (RARRAY_LEN(lexer->token_queue) > 0)
|
339
|
+
return rb_ary_shift(lexer->token_queue);
|
340
340
|
|
341
|
-
cs =
|
342
|
-
p =
|
343
|
-
pe = eof =
|
344
|
-
stack =
|
345
|
-
top =
|
341
|
+
cs = lexer->cs;
|
342
|
+
p = lexer->p;
|
343
|
+
pe = eof = lexer->pe;
|
344
|
+
stack = lexer->cs_stack;
|
345
|
+
top = lexer->cs_stack_top;
|
346
346
|
|
347
347
|
command_state = (cs == lex_en_expr_value || cs == lex_en_line_begin);
|
348
348
|
|
@@ -350,18 +350,18 @@ static VALUE lexer_advance(VALUE self)
|
|
350
350
|
write exec;
|
351
351
|
}%%
|
352
352
|
|
353
|
-
|
354
|
-
|
355
|
-
|
353
|
+
lexer->p = p;
|
354
|
+
lexer->cs = cs;
|
355
|
+
lexer->cs_stack_top = top;
|
356
356
|
|
357
|
-
if (RARRAY_LEN(
|
358
|
-
return rb_ary_shift(
|
357
|
+
if (RARRAY_LEN(lexer->token_queue) > 0) {
|
358
|
+
return rb_ary_shift(lexer->token_queue);
|
359
359
|
} else if (cs == lex_error) {
|
360
|
-
VALUE info = rb_ary_new3(2, rb_str_new2("$error"), range(
|
360
|
+
VALUE info = rb_ary_new3(2, rb_str_new2("$error"), range(lexer, p - 1, p));
|
361
361
|
VALUE token = rb_ary_new3(2, Qfalse, info);
|
362
362
|
return token;
|
363
363
|
} else {
|
364
|
-
VALUE info = rb_ary_new3(2, rb_str_new2("$eof"), range(
|
364
|
+
VALUE info = rb_ary_new3(2, rb_str_new2("$eof"), range(lexer, eof - 2, eof - 2));
|
365
365
|
VALUE token = rb_ary_new3(2, Qfalse, info);
|
366
366
|
return token;
|
367
367
|
}
|
@@ -372,62 +372,62 @@ static inline void force_encoding(VALUE str, VALUE enc)
|
|
372
372
|
rb_enc_associate(str, rb_to_encoding(enc));
|
373
373
|
}
|
374
374
|
|
375
|
-
static void emit_token(
|
375
|
+
static void emit_token(Lexer *lexer, VALUE type, VALUE value, long start, long end)
|
376
376
|
{
|
377
|
-
VALUE info = rb_ary_new3(2, value, range(
|
377
|
+
VALUE info = rb_ary_new3(2, value, range(lexer, start, end));
|
378
378
|
VALUE token = rb_ary_new3(2, type, info);
|
379
379
|
|
380
|
-
rb_ary_push(
|
380
|
+
rb_ary_push(lexer->token_queue, token);
|
381
381
|
|
382
|
-
if (
|
383
|
-
rb_ary_push(
|
382
|
+
if (lexer->tokens != Qnil)
|
383
|
+
rb_ary_push(lexer->tokens, token);
|
384
384
|
}
|
385
385
|
|
386
|
-
static void emit_comment(
|
386
|
+
static void emit_comment(Lexer *lexer, long start, long end)
|
387
387
|
{
|
388
388
|
VALUE rng = Qnil;
|
389
389
|
|
390
|
-
if (
|
391
|
-
rng = range(
|
390
|
+
if (lexer->tokens != Qnil) {
|
391
|
+
rng = range(lexer, start, end);
|
392
392
|
|
393
|
-
VALUE info = rb_ary_new3(2, tok(
|
393
|
+
VALUE info = rb_ary_new3(2, tok(lexer, start, end), rng);
|
394
394
|
VALUE token = rb_ary_new3(2, tCOMMENT, info);
|
395
|
-
rb_ary_push(
|
395
|
+
rb_ary_push(lexer->tokens, token);
|
396
396
|
}
|
397
397
|
|
398
|
-
if (
|
398
|
+
if (lexer->comments != Qnil) {
|
399
399
|
if (rng == Qnil)
|
400
|
-
rng = range(
|
400
|
+
rng = range(lexer, start, end);
|
401
401
|
VALUE comment = rb_class_new_instance(1, &rng, comment_klass);
|
402
|
-
rb_ary_push(
|
402
|
+
rb_ary_push(lexer->comments, comment);
|
403
403
|
}
|
404
404
|
}
|
405
405
|
|
406
|
-
static void emit_do(
|
406
|
+
static void emit_do(Lexer *lexer, int do_block, long ts, long te)
|
407
407
|
{
|
408
|
-
if (stack_state_active(&
|
408
|
+
if (stack_state_active(&lexer->cond))
|
409
409
|
emit(kDO_COND);
|
410
|
-
else if (stack_state_active(&
|
410
|
+
else if (stack_state_active(&lexer->cmdarg) || do_block)
|
411
411
|
emit(kDO_BLOCK);
|
412
412
|
else
|
413
413
|
emit(kDO);
|
414
414
|
}
|
415
415
|
|
416
|
-
static VALUE tok(
|
416
|
+
static VALUE tok(Lexer *lexer, long start, long end)
|
417
417
|
{
|
418
|
-
return rb_str_substr(
|
418
|
+
return rb_str_substr(lexer->source, start, end - start);
|
419
419
|
}
|
420
420
|
|
421
|
-
static VALUE range(
|
421
|
+
static VALUE range(Lexer *lexer, long start, long end)
|
422
422
|
{
|
423
423
|
VALUE args[3];
|
424
|
-
args[0] =
|
424
|
+
args[0] = lexer->source_buffer;
|
425
425
|
args[1] = INT2NUM(start);
|
426
426
|
args[2] = INT2NUM(end);
|
427
427
|
return rb_class_new_instance(3, args, range_klass);
|
428
428
|
}
|
429
429
|
|
430
|
-
static void diagnostic(
|
430
|
+
static void diagnostic(Lexer *lexer, VALUE type, VALUE reason,
|
431
431
|
VALUE arguments, VALUE loc, VALUE hilights)
|
432
432
|
{
|
433
433
|
VALUE args[5];
|
@@ -437,15 +437,15 @@ static void diagnostic(lexer_state *state, VALUE type, VALUE reason,
|
|
437
437
|
args[3] = loc;
|
438
438
|
args[4] = hilights;
|
439
439
|
VALUE diagnostic = rb_class_new_instance(5, args, diagnostic_klass);
|
440
|
-
rb_funcall(
|
440
|
+
rb_funcall(lexer->diagnostics, rb_intern("process"), 1, diagnostic);
|
441
441
|
}
|
442
442
|
|
443
|
-
static int get_codepoint(
|
443
|
+
static int get_codepoint(Lexer *lexer, long p)
|
444
444
|
{
|
445
|
-
if (p >= RARRAY_LEN(
|
445
|
+
if (p >= RARRAY_LEN(lexer->source_pts))
|
446
446
|
return 0;
|
447
447
|
else
|
448
|
-
return NUM2INT(rb_ary_entry(
|
448
|
+
return NUM2INT(rb_ary_entry(lexer->source_pts, p));
|
449
449
|
}
|
450
450
|
|
451
451
|
static int arg_or_cmdarg(int command_state)
|
@@ -558,53 +558,53 @@ static int find_8_or_9(VALUE str)
|
|
558
558
|
return -1;
|
559
559
|
}
|
560
560
|
|
561
|
-
static void emit_int(
|
561
|
+
static void emit_int(Lexer *lexer, VALUE val, long start, long end)
|
562
562
|
{
|
563
|
-
emit_token(
|
563
|
+
emit_token(lexer, tINTEGER, val, start, end);
|
564
564
|
}
|
565
565
|
|
566
|
-
static void emit_rational(
|
566
|
+
static void emit_rational(Lexer *lexer, VALUE val, long start, long end)
|
567
567
|
{
|
568
|
-
emit_token(
|
568
|
+
emit_token(lexer, tRATIONAL, rb_Rational1(val), start, end);
|
569
569
|
}
|
570
570
|
|
571
|
-
static void emit_complex(
|
571
|
+
static void emit_complex(Lexer *lexer, VALUE val, long start, long end)
|
572
572
|
{
|
573
|
-
emit_token(
|
573
|
+
emit_token(lexer, tIMAGINARY, rb_Complex(Qzero, val), start, end);
|
574
574
|
}
|
575
575
|
|
576
|
-
static void emit_complex_rational(
|
576
|
+
static void emit_complex_rational(Lexer *lexer, VALUE val, long start, long end)
|
577
577
|
{
|
578
|
-
emit_token(
|
578
|
+
emit_token(lexer, tIMAGINARY, rb_Complex(Qzero, rb_Rational1(val)), start, end);
|
579
579
|
}
|
580
580
|
|
581
|
-
static void emit_float(
|
581
|
+
static void emit_float(Lexer *lexer, VALUE val, long start, long end)
|
582
582
|
{
|
583
|
-
emit_token(
|
583
|
+
emit_token(lexer, tFLOAT, rb_Float(val), start, end);
|
584
584
|
}
|
585
585
|
|
586
|
-
static void emit_complex_float(
|
586
|
+
static void emit_complex_float(Lexer *lexer, VALUE val, long start, long end)
|
587
587
|
{
|
588
|
-
emit_token(
|
588
|
+
emit_token(lexer, tIMAGINARY, rb_Complex(Qzero, rb_Float(val)), start, end);
|
589
589
|
}
|
590
590
|
|
591
|
-
static void emit_int_followed_by_if(
|
591
|
+
static void emit_int_followed_by_if(Lexer *lexer, VALUE val, long start, long end)
|
592
592
|
{
|
593
|
-
emit_token(
|
593
|
+
emit_token(lexer, tINTEGER, val, start, end);
|
594
594
|
}
|
595
595
|
|
596
|
-
static void emit_int_followed_by_rescue(
|
596
|
+
static void emit_int_followed_by_rescue(Lexer *lexer, VALUE val, long start, long end)
|
597
597
|
{
|
598
|
-
emit_token(
|
598
|
+
emit_token(lexer, tINTEGER, val, start, end);
|
599
599
|
}
|
600
600
|
|
601
|
-
static void emit_float_followed_by_if(
|
601
|
+
static void emit_float_followed_by_if(Lexer *lexer, VALUE val, long start, long end)
|
602
602
|
{
|
603
|
-
emit_token(
|
603
|
+
emit_token(lexer, tFLOAT, rb_Float(val), start, end);
|
604
604
|
}
|
605
|
-
static void emit_float_followed_by_rescue(
|
605
|
+
static void emit_float_followed_by_rescue(Lexer *lexer, VALUE val, long start, long end)
|
606
606
|
{
|
607
|
-
emit_token(
|
607
|
+
emit_token(lexer, tFLOAT, rb_Float(val), start, end);
|
608
608
|
}
|
609
609
|
|
610
610
|
static int next_state_for_literal(literal *lit) {
|
@@ -635,23 +635,23 @@ static int next_state_for_literal(literal *lit) {
|
|
635
635
|
}
|
636
636
|
}
|
637
637
|
|
638
|
-
static int push_literal(
|
638
|
+
static int push_literal(Lexer *lexer, VALUE str_type, VALUE delimiter,
|
639
639
|
long str_s, long heredoc_e, int indent, int dedent_body,
|
640
640
|
int label_allowed)
|
641
641
|
{
|
642
642
|
literal lit;
|
643
|
-
literal_init(&lit,
|
643
|
+
literal_init(&lit, lexer, str_type, delimiter, str_s, heredoc_e, indent,
|
644
644
|
dedent_body, label_allowed);
|
645
|
-
lit_stack_push(&
|
645
|
+
lit_stack_push(&lexer->literal_stack, lit);
|
646
646
|
|
647
647
|
return next_state_for_literal(&lit);
|
648
648
|
}
|
649
649
|
|
650
|
-
static int pop_literal(
|
650
|
+
static int pop_literal(Lexer *lexer)
|
651
651
|
{
|
652
|
-
literal old_literal = lit_stack_pop(&
|
652
|
+
literal old_literal = lit_stack_pop(&lexer->literal_stack);
|
653
653
|
|
654
|
-
|
654
|
+
lexer->dedent_level = old_literal.dedent_level;
|
655
655
|
|
656
656
|
if (old_literal.start_tok == tREGEXP_BEG) {
|
657
657
|
return lex_en_regexp_modifiers;
|
@@ -986,20 +986,20 @@ void Init_lexer()
|
|
986
986
|
|
987
987
|
%%{
|
988
988
|
alphtype int;
|
989
|
-
getkey (get_codepoint(
|
989
|
+
getkey (get_codepoint(lexer, p));
|
990
990
|
|
991
991
|
prepush {
|
992
992
|
/* grow the state stack as needed */
|
993
|
-
if (
|
994
|
-
int *new_stack = xmalloc(
|
995
|
-
memcpy(new_stack,
|
996
|
-
xfree(
|
997
|
-
stack =
|
998
|
-
|
993
|
+
if (lexer->cs_stack_top == lexer->cs_stack_size) {
|
994
|
+
int *new_stack = xmalloc(lexer->cs_stack_size * 2 * sizeof(int));
|
995
|
+
memcpy(new_stack, lexer->cs_stack, lexer->cs_stack_size * sizeof(int));
|
996
|
+
xfree(lexer->cs_stack);
|
997
|
+
stack = lexer->cs_stack = new_stack;
|
998
|
+
lexer->cs_stack_size = lexer->cs_stack_size * 2;
|
999
999
|
}
|
1000
1000
|
}
|
1001
1001
|
|
1002
|
-
action do_nl {
|
1002
|
+
action do_nl { lexer->newline_s = p; }
|
1003
1003
|
|
1004
1004
|
c_nl = '\n' $ do_nl;
|
1005
1005
|
c_space = [ \t\r\f\v];
|
@@ -1094,30 +1094,30 @@ void Init_lexer()
|
|
1094
1094
|
escaped_nl = "\\" c_nl;
|
1095
1095
|
|
1096
1096
|
action unicode_points {
|
1097
|
-
|
1097
|
+
lexer->escape = rb_str_new2("");
|
1098
1098
|
|
1099
|
-
VALUE codepoints = tok(
|
1100
|
-
long codepoint_s =
|
1099
|
+
VALUE codepoints = tok(lexer, lexer->escape_s + 2, p - 1);
|
1100
|
+
long codepoint_s = lexer->escape_s + 2;
|
1101
1101
|
|
1102
1102
|
VALUE regexp;
|
1103
1103
|
|
1104
|
-
if (
|
1104
|
+
if (lexer->version < 24) {
|
1105
1105
|
if (str_start_with_p(codepoints, " ") || str_start_with_p(codepoints, "\t")) {
|
1106
|
-
diagnostic(
|
1107
|
-
range(
|
1106
|
+
diagnostic(lexer, severity_error, invalid_unicode_escape, Qnil,
|
1107
|
+
range(lexer, lexer->escape_s + 2, lexer->escape_s + 3), empty_array);
|
1108
1108
|
}
|
1109
1109
|
|
1110
1110
|
regexp = rb_reg_regcomp(rb_str_new2("[ \\t]{2}"));
|
1111
1111
|
VALUE space_p = rb_funcall(codepoints, rb_intern("index"), 1, regexp);
|
1112
1112
|
|
1113
1113
|
if (RTEST(space_p)) {
|
1114
|
-
diagnostic(
|
1115
|
-
range(
|
1114
|
+
diagnostic(lexer, severity_error, invalid_unicode_escape, Qnil,
|
1115
|
+
range(lexer, codepoint_s + NUM2INT(space_p) + 1, codepoint_s + NUM2INT(space_p) + 1), empty_array);
|
1116
1116
|
}
|
1117
1117
|
|
1118
1118
|
if (str_end_with_p(codepoints, " ") || str_end_with_p(codepoints, "\t")) {
|
1119
|
-
diagnostic(
|
1120
|
-
range(
|
1119
|
+
diagnostic(lexer, severity_error, invalid_unicode_escape, Qnil,
|
1120
|
+
range(lexer, p - 1, p), empty_array);
|
1121
1121
|
}
|
1122
1122
|
}
|
1123
1123
|
|
@@ -1136,93 +1136,93 @@ void Init_lexer()
|
|
1136
1136
|
} else {
|
1137
1137
|
VALUE codepoint = rb_str_to_inum(codepoint_str, 16, 0);
|
1138
1138
|
if (NUM2INT(codepoint) >= 0x110000) {
|
1139
|
-
diagnostic(
|
1140
|
-
range(
|
1139
|
+
diagnostic(lexer, severity_error, unicode_point_too_large, Qnil,
|
1140
|
+
range(lexer, codepoint_s, codepoint_s + RSTRING_LEN(codepoint_str)), empty_array);
|
1141
1141
|
break;
|
1142
1142
|
}
|
1143
1143
|
|
1144
1144
|
codepoint = rb_funcall(codepoint, rb_intern("chr"), 1, utf8_encoding);
|
1145
|
-
|
1145
|
+
lexer->escape = rb_str_plus(lexer->escape, codepoint);
|
1146
1146
|
codepoint_s += RSTRING_LEN(codepoint_str);
|
1147
1147
|
}
|
1148
1148
|
}
|
1149
1149
|
}
|
1150
1150
|
|
1151
1151
|
action unescape_char {
|
1152
|
-
char c = NUM2INT(rb_ary_entry(
|
1153
|
-
|
1152
|
+
char c = NUM2INT(rb_ary_entry(lexer->source_pts, p - 1));
|
1153
|
+
lexer->escape = unescape_char(c);
|
1154
1154
|
|
1155
|
-
if (
|
1156
|
-
VALUE codepoint = rb_funcall(
|
1157
|
-
|
1158
|
-
force_encoding(codepoint,
|
1155
|
+
if (lexer->escape == Qnil) {
|
1156
|
+
VALUE codepoint = rb_funcall(lexer->source_buffer, rb_intern("slice"), 1, INT2NUM(p - 1));
|
1157
|
+
lexer->escape = codepoint;
|
1158
|
+
force_encoding(codepoint, lexer->encoding);
|
1159
1159
|
}
|
1160
1160
|
}
|
1161
1161
|
|
1162
1162
|
action invalid_complex_escape {
|
1163
|
-
diagnostic(
|
1163
|
+
diagnostic(lexer, fatal, invalid_escape, Qnil, range(lexer, ts, te),
|
1164
1164
|
empty_array);
|
1165
1165
|
}
|
1166
1166
|
|
1167
1167
|
action slash_c_char {
|
1168
|
-
char c = *RSTRING_PTR(
|
1169
|
-
|
1170
|
-
force_encoding(
|
1168
|
+
char c = *RSTRING_PTR(lexer->escape) & 0x9f;
|
1169
|
+
lexer->escape = rb_str_new(&c, 1);
|
1170
|
+
force_encoding(lexer->escape, lexer->encoding);
|
1171
1171
|
}
|
1172
1172
|
|
1173
1173
|
action slash_m_char {
|
1174
|
-
char c = *RSTRING_PTR(
|
1175
|
-
|
1176
|
-
force_encoding(
|
1174
|
+
char c = *RSTRING_PTR(lexer->escape) | 0x80;
|
1175
|
+
lexer->escape = rb_str_new(&c, 1);
|
1176
|
+
force_encoding(lexer->escape, lexer->encoding);
|
1177
1177
|
}
|
1178
1178
|
|
1179
1179
|
maybe_escaped_char = (
|
1180
1180
|
'\\' c_any %unescape_char
|
1181
|
-
| ( c_any - [\\] ) % {
|
1181
|
+
| ( c_any - [\\] ) % { lexer->escape = rb_str_substr(lexer->source, p - 1, 1); }
|
1182
1182
|
);
|
1183
1183
|
|
1184
1184
|
maybe_escaped_ctrl_char = (
|
1185
1185
|
'\\' c_any %unescape_char %slash_c_char
|
1186
|
-
| '?' % {
|
1187
|
-
| ( c_any - [\\?] ) % {
|
1186
|
+
| '?' % { lexer->escape = rb_str_new2("\x7f"); }
|
1187
|
+
| ( c_any - [\\?] ) % { lexer->escape = rb_str_substr(lexer->source, p - 1, 1); } %slash_c_char
|
1188
1188
|
);
|
1189
1189
|
|
1190
1190
|
escape = (
|
1191
1191
|
[0-7]{1,3} % {
|
1192
|
-
VALUE token = tok(
|
1192
|
+
VALUE token = tok(lexer, lexer->escape_s, p);
|
1193
1193
|
char c = NUM2INT(rb_str_to_inum(token, 8, 0));
|
1194
1194
|
c = c % 0x100;
|
1195
|
-
|
1196
|
-
force_encoding(
|
1195
|
+
lexer->escape = rb_str_new(&c, 1);
|
1196
|
+
force_encoding(lexer->escape, lexer->encoding);
|
1197
1197
|
}
|
1198
1198
|
|
1199
1199
|
| 'x' xdigit{1,2} % {
|
1200
|
-
VALUE token = tok(
|
1200
|
+
VALUE token = tok(lexer, lexer->escape_s + 1, p);
|
1201
1201
|
char c = NUM2INT(rb_str_to_inum(token, 16, 0));
|
1202
|
-
|
1203
|
-
force_encoding(
|
1202
|
+
lexer->escape = rb_str_new(&c, 1);
|
1203
|
+
force_encoding(lexer->escape, lexer->encoding);
|
1204
1204
|
}
|
1205
1205
|
|
1206
1206
|
| 'x' ( c_any - xdigit )
|
1207
1207
|
% {
|
1208
|
-
diagnostic(
|
1209
|
-
range(
|
1208
|
+
diagnostic(lexer, fatal, invalid_hex_escape, Qnil,
|
1209
|
+
range(lexer, lexer->escape_s - 1, p + 2), empty_array);
|
1210
1210
|
}
|
1211
1211
|
|
1212
1212
|
| 'u' xdigit{4} % {
|
1213
|
-
VALUE token = tok(
|
1213
|
+
VALUE token = tok(lexer, lexer->escape_s + 1, p);
|
1214
1214
|
int i = NUM2INT(rb_str_to_inum(token, 16, 0));
|
1215
|
-
|
1215
|
+
lexer->escape = rb_enc_uint_chr(i, rb_to_encoding(utf8_encoding));
|
1216
1216
|
}
|
1217
1217
|
|
1218
1218
|
| 'u' xdigit{0,3} % {
|
1219
|
-
diagnostic(
|
1220
|
-
range(
|
1219
|
+
diagnostic(lexer, fatal, invalid_unicode_escape, Qnil,
|
1220
|
+
range(lexer, lexer->escape_s - 1, p), empty_array);
|
1221
1221
|
}
|
1222
1222
|
|
1223
1223
|
| 'u{' ( c_any - xdigit - [ \t}] )* '}' % {
|
1224
|
-
diagnostic(
|
1225
|
-
range(
|
1224
|
+
diagnostic(lexer, fatal, invalid_unicode_escape, Qnil,
|
1225
|
+
range(lexer, lexer->escape_s - 1, p), empty_array);
|
1226
1226
|
}
|
1227
1227
|
|
1228
1228
|
| 'u{' [ \t]* ( xdigit{1,6} [ \t]+ )*
|
@@ -1235,8 +1235,8 @@ void Init_lexer()
|
|
1235
1235
|
| ( c_any - [ \t}] )* c_eof
|
1236
1236
|
| xdigit{7,}
|
1237
1237
|
) % {
|
1238
|
-
diagnostic(
|
1239
|
-
range(
|
1238
|
+
diagnostic(lexer, fatal, invalid_unicode_escape, Qnil,
|
1239
|
+
range(lexer, p - 1, p), empty_array);
|
1240
1240
|
}
|
1241
1241
|
)
|
1242
1242
|
|
@@ -1259,42 +1259,42 @@ void Init_lexer()
|
|
1259
1259
|
| ( c_any - [0-7xuCMc] ) %unescape_char
|
1260
1260
|
|
1261
1261
|
| c_eof % {
|
1262
|
-
diagnostic(
|
1262
|
+
diagnostic(lexer, fatal, escape_eof, Qnil, range(lexer, p - 1, p),
|
1263
1263
|
empty_array);
|
1264
1264
|
}
|
1265
1265
|
);
|
1266
1266
|
|
1267
1267
|
e_bs = '\\' % {
|
1268
|
-
|
1269
|
-
|
1268
|
+
lexer->escape_s = p;
|
1269
|
+
lexer->escape = Qnil;
|
1270
1270
|
};
|
1271
1271
|
|
1272
1272
|
e_heredoc_nl = c_nl % {
|
1273
|
-
if (
|
1274
|
-
p =
|
1275
|
-
|
1273
|
+
if (lexer->herebody_s) {
|
1274
|
+
p = lexer->herebody_s;
|
1275
|
+
lexer->herebody_s = 0;
|
1276
1276
|
}
|
1277
1277
|
};
|
1278
1278
|
|
1279
1279
|
action extend_string {
|
1280
|
-
VALUE string = tok(
|
1280
|
+
VALUE string = tok(lexer, ts, te);
|
1281
1281
|
VALUE lookahead = Qnil;
|
1282
1282
|
|
1283
|
-
if (
|
1284
|
-
lookahead = tok(
|
1283
|
+
if (lexer->version >= 22 && !stack_state_active(&lexer->cond)) {
|
1284
|
+
lookahead = tok(lexer, te, te + 2);
|
1285
1285
|
}
|
1286
1286
|
|
1287
|
-
literal *current_literal = lit_stack_top(&
|
1287
|
+
literal *current_literal = lit_stack_top(&lexer->literal_stack);
|
1288
1288
|
|
1289
1289
|
if (!current_literal->heredoc_e &&
|
1290
1290
|
literal_nest_and_try_closing(current_literal, string, ts, te, lookahead)) {
|
1291
|
-
VALUE token = array_last(
|
1291
|
+
VALUE token = array_last(lexer->token_queue);
|
1292
1292
|
if (rb_ary_entry(token, 0) == tLABEL_END) {
|
1293
1293
|
p += 1;
|
1294
|
-
pop_literal(
|
1294
|
+
pop_literal(lexer);
|
1295
1295
|
fnext expr_labelarg;
|
1296
1296
|
} else {
|
1297
|
-
fnext *pop_literal(
|
1297
|
+
fnext *pop_literal(lexer);
|
1298
1298
|
}
|
1299
1299
|
|
1300
1300
|
fbreak;
|
@@ -1304,93 +1304,93 @@ void Init_lexer()
|
|
1304
1304
|
}
|
1305
1305
|
|
1306
1306
|
action extend_string_escaped {
|
1307
|
-
literal *current_literal = lit_stack_top(&
|
1308
|
-
VALUE escaped_char = rb_str_substr(
|
1307
|
+
literal *current_literal = lit_stack_top(&lexer->literal_stack);
|
1308
|
+
VALUE escaped_char = rb_str_substr(lexer->source, lexer->escape_s, 1);
|
1309
1309
|
|
1310
1310
|
if (literal_munge_escape_p(current_literal, escaped_char)) {
|
1311
1311
|
if (literal_regexp_p(current_literal) && is_regexp_metachar(escaped_char)) {
|
1312
|
-
literal_extend_string(current_literal, tok(
|
1312
|
+
literal_extend_string(current_literal, tok(lexer, ts, te), ts, te);
|
1313
1313
|
} else {
|
1314
1314
|
literal_extend_string(current_literal, escaped_char, ts, te);
|
1315
1315
|
}
|
1316
1316
|
} else {
|
1317
1317
|
if (literal_regexp_p(current_literal)) {
|
1318
|
-
VALUE token = tok(
|
1318
|
+
VALUE token = tok(lexer, ts, te);
|
1319
1319
|
rb_funcall(token, rb_intern("gsub!"), 2, escaped_newline, blank_string);
|
1320
1320
|
literal_extend_string(current_literal, token, ts, te);
|
1321
1321
|
} else if (literal_heredoc_p(current_literal) && newline_char_p(escaped_char)) {
|
1322
1322
|
if (literal_squiggly_heredoc_p(current_literal)) {
|
1323
|
-
literal_extend_string(current_literal, tok(
|
1323
|
+
literal_extend_string(current_literal, tok(lexer, ts, te), ts, te);
|
1324
1324
|
} else {
|
1325
|
-
VALUE token = tok(
|
1325
|
+
VALUE token = tok(lexer, ts, te);
|
1326
1326
|
rb_funcall(token, rb_intern("gsub!"), 2, escaped_newline, blank_string);
|
1327
1327
|
literal_extend_string(current_literal, token, ts, te);
|
1328
1328
|
}
|
1329
|
-
} else if (
|
1330
|
-
literal_extend_string(current_literal, tok(
|
1329
|
+
} else if (lexer->escape == Qnil) {
|
1330
|
+
literal_extend_string(current_literal, tok(lexer, ts, te), ts, te);
|
1331
1331
|
} else {
|
1332
|
-
literal_extend_string(current_literal,
|
1332
|
+
literal_extend_string(current_literal, lexer->escape, ts, te);
|
1333
1333
|
}
|
1334
1334
|
}
|
1335
1335
|
}
|
1336
1336
|
|
1337
1337
|
action extend_string_eol {
|
1338
|
-
literal *current_literal = lit_stack_top(&
|
1338
|
+
literal *current_literal = lit_stack_top(&lexer->literal_stack);
|
1339
1339
|
long str_s = current_literal->str_s;
|
1340
1340
|
|
1341
1341
|
if (te == pe) {
|
1342
|
-
diagnostic(
|
1343
|
-
range(
|
1342
|
+
diagnostic(lexer, fatal, string_eof, Qnil,
|
1343
|
+
range(lexer, str_s, str_s + 1), empty_array);
|
1344
1344
|
}
|
1345
1345
|
|
1346
1346
|
if (literal_heredoc_p(current_literal)) {
|
1347
|
-
VALUE line = tok(
|
1347
|
+
VALUE line = tok(lexer, lexer->herebody_s, ts);
|
1348
1348
|
rb_funcall(line, rb_intern("gsub!"), 2, crs_to_eol, blank_string);
|
1349
1349
|
|
1350
|
-
if (
|
1350
|
+
if (lexer->version >= 18 && lexer->version <= 20) {
|
1351
1351
|
rb_funcall(line, rb_intern("gsub!"), 2, cr_then_anything_to_eol, blank_string);
|
1352
1352
|
}
|
1353
1353
|
|
1354
|
-
if (literal_nest_and_try_closing(current_literal, line,
|
1355
|
-
|
1354
|
+
if (literal_nest_and_try_closing(current_literal, line, lexer->herebody_s, ts, Qnil)) {
|
1355
|
+
lexer->herebody_s = te;
|
1356
1356
|
p = current_literal->heredoc_e - 1;
|
1357
|
-
fnext *pop_literal(
|
1357
|
+
fnext *pop_literal(lexer); fbreak;
|
1358
1358
|
} else {
|
1359
1359
|
literal_infer_indent_level(current_literal, line);
|
1360
|
-
|
1360
|
+
lexer->herebody_s = te;
|
1361
1361
|
}
|
1362
1362
|
} else {
|
1363
|
-
if (literal_nest_and_try_closing(current_literal, tok(
|
1364
|
-
fnext *pop_literal(
|
1363
|
+
if (literal_nest_and_try_closing(current_literal, tok(lexer, ts, te), ts, te, Qnil)) {
|
1364
|
+
fnext *pop_literal(lexer); fbreak;
|
1365
1365
|
}
|
1366
1366
|
|
1367
|
-
if (
|
1368
|
-
p =
|
1369
|
-
|
1367
|
+
if (lexer->herebody_s) {
|
1368
|
+
p = lexer->herebody_s - 1;
|
1369
|
+
lexer->herebody_s = 0;
|
1370
1370
|
}
|
1371
1371
|
}
|
1372
1372
|
|
1373
|
-
if (literal_words_p(current_literal) && !eof_codepoint(get_codepoint(
|
1373
|
+
if (literal_words_p(current_literal) && !eof_codepoint(get_codepoint(lexer, p))) {
|
1374
1374
|
literal_extend_space(current_literal, ts, te);
|
1375
1375
|
} else {
|
1376
|
-
literal_extend_string(current_literal, tok(
|
1376
|
+
literal_extend_string(current_literal, tok(lexer, ts, te), ts, te);
|
1377
1377
|
literal_flush_string(current_literal);
|
1378
1378
|
}
|
1379
1379
|
}
|
1380
1380
|
|
1381
1381
|
action extend_string_space {
|
1382
|
-
literal *current_literal = lit_stack_top(&
|
1382
|
+
literal *current_literal = lit_stack_top(&lexer->literal_stack);
|
1383
1383
|
literal_extend_space(current_literal, ts, te);
|
1384
1384
|
}
|
1385
1385
|
|
1386
1386
|
interp_var = '#' ( global_var | class_var_v | instance_var_v );
|
1387
1387
|
|
1388
1388
|
action extend_interp_var {
|
1389
|
-
literal *current_literal = lit_stack_top(&
|
1389
|
+
literal *current_literal = lit_stack_top(&lexer->literal_stack);
|
1390
1390
|
literal_flush_string(current_literal);
|
1391
1391
|
literal_extend_content(current_literal);
|
1392
1392
|
|
1393
|
-
emit_token(
|
1393
|
+
emit_token(lexer, tSTRING_DVAR, Qnil, ts, ts + 1);
|
1394
1394
|
|
1395
1395
|
p = ts;
|
1396
1396
|
fcall expr_variable;
|
@@ -1399,34 +1399,34 @@ void Init_lexer()
|
|
1399
1399
|
interp_code = '#{';
|
1400
1400
|
|
1401
1401
|
e_lbrace = '{' % {
|
1402
|
-
stack_state_push(&
|
1403
|
-
stack_state_push(&
|
1402
|
+
stack_state_push(&lexer->cond, 0);
|
1403
|
+
stack_state_push(&lexer->cmdarg, 0);
|
1404
1404
|
|
1405
|
-
literal *current_literal = lit_stack_top(&
|
1405
|
+
literal *current_literal = lit_stack_top(&lexer->literal_stack);
|
1406
1406
|
if (current_literal != NULL) {
|
1407
1407
|
literal_start_interp_brace(current_literal);
|
1408
1408
|
}
|
1409
1409
|
};
|
1410
1410
|
|
1411
1411
|
e_rbrace = '}' % {
|
1412
|
-
literal *current_literal = lit_stack_top(&
|
1412
|
+
literal *current_literal = lit_stack_top(&lexer->literal_stack);
|
1413
1413
|
if (current_literal != NULL) {
|
1414
1414
|
if (literal_end_interp_brace_and_try_closing(current_literal)) {
|
1415
|
-
if (
|
1416
|
-
emit_token(
|
1417
|
-
if (
|
1418
|
-
stack_state_lexpop(&
|
1419
|
-
stack_state_lexpop(&
|
1415
|
+
if (lexer->version == 18 || lexer->version == 19) {
|
1416
|
+
emit_token(lexer, tRCURLY, rb_str_new2("}"), p - 1, p);
|
1417
|
+
if (lexer->version < 24) {
|
1418
|
+
stack_state_lexpop(&lexer->cond);
|
1419
|
+
stack_state_lexpop(&lexer->cmdarg);
|
1420
1420
|
} else {
|
1421
|
-
stack_state_pop(&
|
1422
|
-
stack_state_pop(&
|
1421
|
+
stack_state_pop(&lexer->cond);
|
1422
|
+
stack_state_pop(&lexer->cmdarg);
|
1423
1423
|
}
|
1424
1424
|
} else {
|
1425
|
-
emit_token(
|
1425
|
+
emit_token(lexer, tSTRING_DEND, rb_str_new2("}"), p - 1, p);
|
1426
1426
|
}
|
1427
1427
|
|
1428
1428
|
if (current_literal->herebody_s) {
|
1429
|
-
|
1429
|
+
lexer->herebody_s = current_literal->herebody_s;
|
1430
1430
|
}
|
1431
1431
|
|
1432
1432
|
fhold;
|
@@ -1437,15 +1437,15 @@ void Init_lexer()
|
|
1437
1437
|
};
|
1438
1438
|
|
1439
1439
|
action extend_interp_code {
|
1440
|
-
literal *current_literal = lit_stack_top(&
|
1440
|
+
literal *current_literal = lit_stack_top(&lexer->literal_stack);
|
1441
1441
|
literal_flush_string(current_literal);
|
1442
1442
|
literal_extend_content(current_literal);
|
1443
1443
|
|
1444
|
-
emit_token(
|
1444
|
+
emit_token(lexer, tSTRING_DBEG, rb_str_new2("#{"), ts, te);
|
1445
1445
|
|
1446
1446
|
if (current_literal->heredoc_e) {
|
1447
|
-
current_literal->herebody_s =
|
1448
|
-
|
1447
|
+
current_literal->herebody_s = lexer->herebody_s;
|
1448
|
+
lexer->herebody_s = 0;
|
1449
1449
|
}
|
1450
1450
|
|
1451
1451
|
literal_start_interp_brace(current_literal);
|
@@ -1513,13 +1513,13 @@ void Init_lexer()
|
|
1513
1513
|
regexp_modifiers := |*
|
1514
1514
|
[A-Za-z]+
|
1515
1515
|
=> {
|
1516
|
-
VALUE unknown_options = find_unknown_options(tok(
|
1516
|
+
VALUE unknown_options = find_unknown_options(tok(lexer, ts, te));
|
1517
1517
|
|
1518
1518
|
if (unknown_options != Qnil) {
|
1519
1519
|
VALUE hash = rb_hash_new();
|
1520
1520
|
rb_hash_aset(hash, ID2SYM(rb_intern("options")), unknown_options);
|
1521
|
-
diagnostic(
|
1522
|
-
range(
|
1521
|
+
diagnostic(lexer, severity_error, regexp_options, hash,
|
1522
|
+
range(lexer, ts, te), empty_array);
|
1523
1523
|
}
|
1524
1524
|
|
1525
1525
|
emit(tREGEXP_OPT);
|
@@ -1529,7 +1529,7 @@ void Init_lexer()
|
|
1529
1529
|
|
1530
1530
|
any
|
1531
1531
|
=> {
|
1532
|
-
emit_token(
|
1532
|
+
emit_token(lexer, tREGEXP_OPT, tok(lexer, ts, te - 1), ts, te - 1);
|
1533
1533
|
fhold;
|
1534
1534
|
fgoto expr_end;
|
1535
1535
|
};
|
@@ -1542,7 +1542,7 @@ void Init_lexer()
|
|
1542
1542
|
|
1543
1543
|
w_comment =
|
1544
1544
|
'#' %{ sharp_s = p - 1; }
|
1545
|
-
c_line* %{ emit_comment(
|
1545
|
+
c_line* %{ emit_comment(lexer, sharp_s, p == pe ? p - 2 : p); }
|
1546
1546
|
;
|
1547
1547
|
|
1548
1548
|
w_space_comment =
|
@@ -1583,22 +1583,22 @@ void Init_lexer()
|
|
1583
1583
|
;
|
1584
1584
|
|
1585
1585
|
e_lbrack = '[' % {
|
1586
|
-
stack_state_push(&
|
1587
|
-
stack_state_push(&
|
1586
|
+
stack_state_push(&lexer->cond, 0);
|
1587
|
+
stack_state_push(&lexer->cmdarg, 0);
|
1588
1588
|
};
|
1589
1589
|
|
1590
1590
|
e_lparen = '(' % {
|
1591
|
-
stack_state_push(&
|
1592
|
-
stack_state_push(&
|
1593
|
-
|
1591
|
+
stack_state_push(&lexer->cond, 0);
|
1592
|
+
stack_state_push(&lexer->cmdarg, 0);
|
1593
|
+
lexer->paren_nest += 1;
|
1594
1594
|
};
|
1595
1595
|
|
1596
1596
|
e_rparen = ')' % {
|
1597
|
-
|
1597
|
+
lexer->paren_nest -= 1;
|
1598
1598
|
};
|
1599
1599
|
|
1600
1600
|
action local_ident {
|
1601
|
-
VALUE str = tok(
|
1601
|
+
VALUE str = tok(lexer, ts, te);
|
1602
1602
|
emit(tIDENTIFIER);
|
1603
1603
|
|
1604
1604
|
if (STATIC_ENV_DECLARED(str)) {
|
@@ -1610,11 +1610,11 @@ void Init_lexer()
|
|
1610
1610
|
|
1611
1611
|
expr_variable := |*
|
1612
1612
|
global_var => {
|
1613
|
-
VALUE str = tok(
|
1613
|
+
VALUE str = tok(lexer, ts, te);
|
1614
1614
|
|
1615
1615
|
if (is_nthref(str)) {
|
1616
|
-
VALUE integer = rb_str_to_inum(tok(
|
1617
|
-
emit_token(
|
1616
|
+
VALUE integer = rb_str_to_inum(tok(lexer, ts + 1, te), 10, 0);
|
1617
|
+
emit_token(lexer, tNTH_REF, integer, ts, te);
|
1618
1618
|
} else if (is_backref(str)) {
|
1619
1619
|
emit(tBACK_REF);
|
1620
1620
|
} else {
|
@@ -1625,12 +1625,12 @@ void Init_lexer()
|
|
1625
1625
|
};
|
1626
1626
|
|
1627
1627
|
class_var_v => {
|
1628
|
-
VALUE str = tok(
|
1628
|
+
VALUE str = tok(lexer, ts, te);
|
1629
1629
|
|
1630
1630
|
if (bad_cvar_name(str)) {
|
1631
1631
|
VALUE hash = rb_hash_new();
|
1632
1632
|
rb_hash_aset(hash, ID2SYM(rb_intern("name")), str);
|
1633
|
-
diagnostic(
|
1633
|
+
diagnostic(lexer, severity_error, cvar_name, hash, range(lexer, ts, te), empty_array);
|
1634
1634
|
}
|
1635
1635
|
|
1636
1636
|
emit(tCVAR);
|
@@ -1638,12 +1638,12 @@ void Init_lexer()
|
|
1638
1638
|
};
|
1639
1639
|
|
1640
1640
|
instance_var_v => {
|
1641
|
-
VALUE str = tok(
|
1641
|
+
VALUE str = tok(lexer, ts, te);
|
1642
1642
|
|
1643
1643
|
if (bad_ivar_name(str)) {
|
1644
1644
|
VALUE hash = rb_hash_new();
|
1645
1645
|
rb_hash_aset(hash, ID2SYM(rb_intern("name")), str);
|
1646
|
-
diagnostic(
|
1646
|
+
diagnostic(lexer, severity_error, ivar_name, hash, range(lexer, ts, te), empty_array);
|
1647
1647
|
}
|
1648
1648
|
|
1649
1649
|
emit(tIVAR);
|
@@ -1653,7 +1653,7 @@ void Init_lexer()
|
|
1653
1653
|
|
1654
1654
|
expr_fname := |*
|
1655
1655
|
keyword
|
1656
|
-
=> { emit_table_KEYWORDS_BEGIN(
|
1656
|
+
=> { emit_table_KEYWORDS_BEGIN(lexer, tok(lexer, ts, te), ts, te);
|
1657
1657
|
fnext expr_endfn; fbreak; };
|
1658
1658
|
|
1659
1659
|
constant => { emit(tCONSTANT); fnext expr_endfn; fbreak; };
|
@@ -1665,7 +1665,7 @@ void Init_lexer()
|
|
1665
1665
|
operator_fname |
|
1666
1666
|
operator_arithmetic |
|
1667
1667
|
operator_rest
|
1668
|
-
=> { emit_table_PUNCTUATION(
|
1668
|
+
=> { emit_table_PUNCTUATION(lexer, tok(lexer, ts, te), ts, te);
|
1669
1669
|
fnext expr_endfn; fbreak; };
|
1670
1670
|
|
1671
1671
|
'::' => { fhold; fhold; fgoto expr_end; };
|
@@ -1674,13 +1674,13 @@ void Init_lexer()
|
|
1674
1674
|
|
1675
1675
|
'%s' c_any
|
1676
1676
|
=> {
|
1677
|
-
if (
|
1678
|
-
VALUE type = rb_str_substr(
|
1679
|
-
VALUE delimiter = rb_str_substr(
|
1677
|
+
if (lexer->version == 23) {
|
1678
|
+
VALUE type = rb_str_substr(lexer->source, ts, te - ts - 1);
|
1679
|
+
VALUE delimiter = rb_str_substr(lexer->source, te - 1, 1);
|
1680
1680
|
if (delimiter == Qnil)
|
1681
1681
|
delimiter = blank_string;
|
1682
1682
|
|
1683
|
-
fgoto *push_literal(
|
1683
|
+
fgoto *push_literal(lexer, type, delimiter, ts, 0, 0, 0, 0);
|
1684
1684
|
} else {
|
1685
1685
|
p = ts - 1;
|
1686
1686
|
fgoto expr_end;
|
@@ -1696,7 +1696,7 @@ void Init_lexer()
|
|
1696
1696
|
|
1697
1697
|
expr_endfn := |*
|
1698
1698
|
label ( any - ':' ) => {
|
1699
|
-
emit_token(
|
1699
|
+
emit_token(lexer, tLABEL, tok(lexer, ts, te - 2), ts, te - 1);
|
1700
1700
|
fhold; fnext expr_labelarg; fbreak;
|
1701
1701
|
};
|
1702
1702
|
|
@@ -1713,13 +1713,13 @@ void Init_lexer()
|
|
1713
1713
|
call_or_var => { emit(tIDENTIFIER); fnext *arg_or_cmdarg(command_state); fbreak; };
|
1714
1714
|
|
1715
1715
|
bareword ambiguous_fid_suffix
|
1716
|
-
=> { emit_token(
|
1716
|
+
=> { emit_token(lexer, tFID, tok(lexer, ts, tm), ts, tm);
|
1717
1717
|
fnext *arg_or_cmdarg(command_state); p = tm - 1; fbreak; };
|
1718
1718
|
|
1719
1719
|
operator_fname |
|
1720
1720
|
operator_arithmetic |
|
1721
1721
|
operator_rest
|
1722
|
-
=> { emit_table_PUNCTUATION(
|
1722
|
+
=> { emit_table_PUNCTUATION(lexer, tok(lexer, ts, te), ts, te);
|
1723
1723
|
fnext expr_arg; fbreak; };
|
1724
1724
|
|
1725
1725
|
w_any;
|
@@ -1732,11 +1732,11 @@ void Init_lexer()
|
|
1732
1732
|
|
1733
1733
|
expr_arg := |*
|
1734
1734
|
w_space+ e_lparen => {
|
1735
|
-
if (
|
1736
|
-
emit_token(
|
1735
|
+
if (lexer->version == 18) {
|
1736
|
+
emit_token(lexer, tLPAREN2, rb_str_new2("("), te - 1, te);
|
1737
1737
|
fnext expr_value; fbreak;
|
1738
1738
|
} else {
|
1739
|
-
emit_token(
|
1739
|
+
emit_token(lexer, tLPAREN_ARG, rb_str_new2("("), te - 1, te);
|
1740
1740
|
fnext expr_beg; fbreak;
|
1741
1741
|
}
|
1742
1742
|
};
|
@@ -1744,17 +1744,17 @@ void Init_lexer()
|
|
1744
1744
|
e_lparen => { emit(tLPAREN2); fnext expr_beg; fbreak; };
|
1745
1745
|
|
1746
1746
|
w_space+ e_lbrack => {
|
1747
|
-
emit_token(
|
1747
|
+
emit_token(lexer, tLBRACK, rb_str_new2("["), te - 1, te);
|
1748
1748
|
fnext expr_beg; fbreak;
|
1749
1749
|
};
|
1750
1750
|
|
1751
1751
|
w_space* e_lbrace => {
|
1752
|
-
VALUE val = array_last(
|
1753
|
-
if (val != Qnil && NUM2INT(val) ==
|
1754
|
-
rb_ary_pop(
|
1755
|
-
emit_token(
|
1752
|
+
VALUE val = array_last(lexer->lambda_stack);
|
1753
|
+
if (val != Qnil && NUM2INT(val) == lexer->paren_nest) {
|
1754
|
+
rb_ary_pop(lexer->lambda_stack);
|
1755
|
+
emit_token(lexer, tLAMBEG, rb_str_new2("{"), te - 1, te);
|
1756
1756
|
} else {
|
1757
|
-
emit_token(
|
1757
|
+
emit_token(lexer, tLCURLY, rb_str_new2("{"), te - 1, te);
|
1758
1758
|
}
|
1759
1759
|
fnext expr_value; fbreak;
|
1760
1760
|
};
|
@@ -1765,9 +1765,9 @@ void Init_lexer()
|
|
1765
1765
|
|
1766
1766
|
w_space+ %{ tm = p; }
|
1767
1767
|
( [%/] ( c_any - c_space_nl - '=' ) | '<<' ) => {
|
1768
|
-
if (NUM2INT(rb_ary_entry(
|
1769
|
-
diagnostic(
|
1770
|
-
range(
|
1768
|
+
if (NUM2INT(rb_ary_entry(lexer->source_pts, tm)) == '/') {
|
1769
|
+
diagnostic(lexer, warning, ambiguous_literal, Qnil,
|
1770
|
+
range(lexer, tm, tm + 1), empty_array);
|
1771
1771
|
}
|
1772
1772
|
|
1773
1773
|
p = tm - 1;
|
@@ -1776,9 +1776,9 @@ void Init_lexer()
|
|
1776
1776
|
|
1777
1777
|
w_space+ %{ tm = p; } ( '+' | '-' | '*' | '&' | '**' ) => {
|
1778
1778
|
VALUE hash = rb_hash_new();
|
1779
|
-
VALUE str = tok(
|
1779
|
+
VALUE str = tok(lexer, tm, te);
|
1780
1780
|
rb_hash_aset(hash, prefix, str);
|
1781
|
-
diagnostic(
|
1781
|
+
diagnostic(lexer, warning, ambiguous_prefix, hash, range(lexer, tm, te),
|
1782
1782
|
empty_array);
|
1783
1783
|
|
1784
1784
|
p = tm - 1;
|
@@ -1817,8 +1817,8 @@ void Init_lexer()
|
|
1817
1817
|
expr_cmdarg := |*
|
1818
1818
|
w_space+ e_lparen
|
1819
1819
|
=> {
|
1820
|
-
emit_token(
|
1821
|
-
if (
|
1820
|
+
emit_token(lexer, tLPAREN_ARG, rb_str_new2("("), te - 1, te);
|
1821
|
+
if (lexer->version == 18) {
|
1822
1822
|
fnext expr_value; fbreak;
|
1823
1823
|
} else {
|
1824
1824
|
fnext expr_beg; fbreak;
|
@@ -1827,10 +1827,10 @@ void Init_lexer()
|
|
1827
1827
|
|
1828
1828
|
w_space* 'do'
|
1829
1829
|
=> {
|
1830
|
-
if (stack_state_active(&
|
1831
|
-
emit_token(
|
1830
|
+
if (stack_state_active(&lexer->cond)) {
|
1831
|
+
emit_token(lexer, kDO_COND, rb_str_new2("do"), te - 2, te);
|
1832
1832
|
} else {
|
1833
|
-
emit_token(
|
1833
|
+
emit_token(lexer, kDO, rb_str_new2("do"), te - 2, te);
|
1834
1834
|
}
|
1835
1835
|
fnext expr_value; fbreak;
|
1836
1836
|
};
|
@@ -1846,17 +1846,17 @@ void Init_lexer()
|
|
1846
1846
|
|
1847
1847
|
expr_endarg := |*
|
1848
1848
|
e_lbrace => {
|
1849
|
-
VALUE val = array_last(
|
1850
|
-
if (val != Qnil && NUM2INT(val) ==
|
1851
|
-
rb_ary_pop(
|
1852
|
-
emit_token(
|
1849
|
+
VALUE val = array_last(lexer->lambda_stack);
|
1850
|
+
if (val != Qnil && NUM2INT(val) == lexer->paren_nest) {
|
1851
|
+
rb_ary_pop(lexer->lambda_stack);
|
1852
|
+
emit_token(lexer, tLAMBEG, rb_str_new2("{"), te - 1, te);
|
1853
1853
|
} else {
|
1854
|
-
emit_token(
|
1854
|
+
emit_token(lexer, tLBRACE_ARG, rb_str_new2("{"), te - 1, te);
|
1855
1855
|
}
|
1856
1856
|
fnext expr_value; fbreak;
|
1857
1857
|
};
|
1858
1858
|
|
1859
|
-
'do' => { emit_do(
|
1859
|
+
'do' => { emit_do(lexer, 1, ts, te); fnext expr_value; fbreak; };
|
1860
1860
|
|
1861
1861
|
w_space_comment;
|
1862
1862
|
|
@@ -1868,7 +1868,7 @@ void Init_lexer()
|
|
1868
1868
|
|
1869
1869
|
expr_mid := |*
|
1870
1870
|
keyword_modifier
|
1871
|
-
=> { emit_table_KEYWORDS(
|
1871
|
+
=> { emit_table_KEYWORDS(lexer, tok(lexer, ts, te), ts, te);
|
1872
1872
|
fnext expr_beg; fbreak; };
|
1873
1873
|
|
1874
1874
|
bareword => { p = ts - 1; fgoto expr_beg; };
|
@@ -1884,38 +1884,38 @@ void Init_lexer()
|
|
1884
1884
|
|
1885
1885
|
expr_beg := |*
|
1886
1886
|
[+\-] w_any* [0-9] => {
|
1887
|
-
emit_token(
|
1887
|
+
emit_token(lexer, tUNARY_NUM, tok(lexer, ts, ts + 1), ts, ts + 1);
|
1888
1888
|
fhold; fnext expr_end; fbreak;
|
1889
1889
|
};
|
1890
1890
|
|
1891
1891
|
'*' => { emit(tSTAR); fbreak; };
|
1892
1892
|
|
1893
1893
|
'/' c_any => {
|
1894
|
-
VALUE delimiter = rb_str_substr(
|
1895
|
-
fhold; fgoto *push_literal(
|
1894
|
+
VALUE delimiter = rb_str_substr(lexer->source, ts, 1);
|
1895
|
+
fhold; fgoto *push_literal(lexer, delimiter, delimiter, ts, 0, 0, 0, 0);
|
1896
1896
|
};
|
1897
1897
|
|
1898
1898
|
'%' ( any - [A-Za-z] ) => {
|
1899
|
-
VALUE type = rb_str_substr(
|
1900
|
-
VALUE delimiter = rb_str_substr(
|
1899
|
+
VALUE type = rb_str_substr(lexer->source, ts, 1);
|
1900
|
+
VALUE delimiter = rb_str_substr(lexer->source, te - 1, 1);
|
1901
1901
|
if (delimiter == Qnil)
|
1902
1902
|
delimiter = blank_string;
|
1903
1903
|
|
1904
|
-
fgoto *push_literal(
|
1904
|
+
fgoto *push_literal(lexer, type, delimiter, ts, 0, 0, 0, 0);
|
1905
1905
|
};
|
1906
1906
|
|
1907
1907
|
'%' [A-Za-z]+ c_any => {
|
1908
|
-
VALUE type = rb_str_substr(
|
1909
|
-
VALUE delimiter = rb_str_substr(
|
1908
|
+
VALUE type = rb_str_substr(lexer->source, ts, te - ts - 1);
|
1909
|
+
VALUE delimiter = rb_str_substr(lexer->source, te - 1, 1);
|
1910
1910
|
if (delimiter == Qnil)
|
1911
1911
|
delimiter = blank_string;
|
1912
1912
|
|
1913
|
-
fgoto *push_literal(
|
1913
|
+
fgoto *push_literal(lexer, type, delimiter, ts, 0, 0, 0, 0);
|
1914
1914
|
};
|
1915
1915
|
|
1916
1916
|
'%' c_eof => {
|
1917
|
-
diagnostic(
|
1918
|
-
range(
|
1917
|
+
diagnostic(lexer, fatal, string_eof, Qnil,
|
1918
|
+
range(lexer, ts, ts + 1), empty_array);
|
1919
1919
|
};
|
1920
1920
|
|
1921
1921
|
'<<' [~\-]?
|
@@ -1923,9 +1923,9 @@ void Init_lexer()
|
|
1923
1923
|
| "'" ( any - "'" )* "'"
|
1924
1924
|
| "`" ( any - "`" )* "`"
|
1925
1925
|
| bareword ) % { heredoc_e = p; }
|
1926
|
-
c_line* c_nl % { if (!
|
1926
|
+
c_line* c_nl % { if (!lexer->herebody_s) lexer->herebody_s = p; } => {
|
1927
1927
|
|
1928
|
-
VALUE heredoc = tok(
|
1928
|
+
VALUE heredoc = tok(lexer, ts, heredoc_e);
|
1929
1929
|
VALUE type;
|
1930
1930
|
char *cp = RSTRING_PTR(heredoc);
|
1931
1931
|
int indent = 0, dedent_body = 0;
|
@@ -1959,100 +1959,100 @@ void Init_lexer()
|
|
1959
1959
|
type = rb_str_new2("<<\"");
|
1960
1960
|
}
|
1961
1961
|
|
1962
|
-
VALUE delimiter = tok(
|
1962
|
+
VALUE delimiter = tok(lexer, rng_s, rng_e);
|
1963
1963
|
|
1964
|
-
if (
|
1964
|
+
if (lexer->version >= 24) {
|
1965
1965
|
if (NUM2INT(rb_funcall(delimiter, rb_intern("count"), 1, newline)) > 0) {
|
1966
1966
|
if (str_end_with_p(delimiter, "\n")) {
|
1967
|
-
diagnostic(
|
1968
|
-
range(
|
1967
|
+
diagnostic(lexer, warning, heredoc_id_ends_with_nl, Qnil,
|
1968
|
+
range(lexer, ts, ts + 1), empty_array);
|
1969
1969
|
|
1970
1970
|
delimiter = rb_funcall(delimiter, rb_intern("rstrip"), 0);
|
1971
1971
|
} else {
|
1972
|
-
diagnostic(
|
1973
|
-
range(
|
1972
|
+
diagnostic(lexer, fatal, heredoc_id_has_newline, Qnil,
|
1973
|
+
range(lexer, ts, ts + 1), empty_array);
|
1974
1974
|
}
|
1975
1975
|
}
|
1976
1976
|
}
|
1977
1977
|
|
1978
|
-
if (dedent_body &&
|
1979
|
-
emit_token(
|
1978
|
+
if (dedent_body && lexer->version >= 18 && lexer->version <= 22) {
|
1979
|
+
emit_token(lexer, tLSHFT, rb_str_new2("<<"), ts, ts + 2);
|
1980
1980
|
p = ts + 1;
|
1981
1981
|
fnext expr_beg; fbreak;
|
1982
1982
|
} else {
|
1983
|
-
fnext *push_literal(
|
1983
|
+
fnext *push_literal(lexer, type, delimiter, ts, heredoc_e, indent,
|
1984
1984
|
dedent_body, 0);
|
1985
|
-
p =
|
1985
|
+
p = lexer->herebody_s - 1;
|
1986
1986
|
}
|
1987
1987
|
};
|
1988
1988
|
|
1989
1989
|
':' ('&&' | '||') => {
|
1990
1990
|
fhold; fhold;
|
1991
|
-
emit_token(
|
1991
|
+
emit_token(lexer, tSYMBEG, tok(lexer, ts, ts + 1), ts, ts + 1);
|
1992
1992
|
fgoto expr_fname;
|
1993
1993
|
};
|
1994
1994
|
|
1995
1995
|
':' ['"] => { /* ' */
|
1996
|
-
VALUE type = tok(
|
1997
|
-
VALUE delimiter = tok(
|
1998
|
-
fgoto *push_literal(
|
1996
|
+
VALUE type = tok(lexer, ts, te);
|
1997
|
+
VALUE delimiter = tok(lexer, te - 1, te);
|
1998
|
+
fgoto *push_literal(lexer, type, delimiter, ts, 0, 0, 0, 0);
|
1999
1999
|
};
|
2000
2000
|
|
2001
2001
|
':' [!~] '@'
|
2002
2002
|
=> {
|
2003
|
-
emit_token(
|
2003
|
+
emit_token(lexer, tSYMBOL, tok(lexer, ts + 1, ts + 2), ts, te);
|
2004
2004
|
fnext expr_end; fbreak;
|
2005
2005
|
};
|
2006
2006
|
|
2007
2007
|
':' bareword ambiguous_symbol_suffix => {
|
2008
|
-
emit_token(
|
2008
|
+
emit_token(lexer, tSYMBOL, tok(lexer, ts + 1, tm), ts, tm);
|
2009
2009
|
p = tm - 1;
|
2010
2010
|
fnext expr_end; fbreak;
|
2011
2011
|
};
|
2012
2012
|
|
2013
2013
|
':' ( bareword | global_var | class_var | instance_var |
|
2014
2014
|
operator_fname | operator_arithmetic | operator_rest ) => {
|
2015
|
-
emit_token(
|
2015
|
+
emit_token(lexer, tSYMBOL, tok(lexer, ts + 1, te), ts, te);
|
2016
2016
|
fnext expr_end; fbreak;
|
2017
2017
|
};
|
2018
2018
|
|
2019
2019
|
'?' ( e_bs ( escape - ( '\u{' (xdigit+ [ \t]+)+ xdigit+ '}' ))
|
2020
|
-
| (c_any - c_space_nl - e_bs) % {
|
2020
|
+
| (c_any - c_space_nl - e_bs) % { lexer->escape = Qnil; }
|
2021
2021
|
) => {
|
2022
|
-
VALUE value =
|
2022
|
+
VALUE value = lexer->escape;
|
2023
2023
|
if (value == Qnil)
|
2024
|
-
value = tok(
|
2024
|
+
value = tok(lexer, ts + 1, te);
|
2025
2025
|
|
2026
|
-
if (
|
2027
|
-
emit_token(
|
2026
|
+
if (lexer->version == 18)
|
2027
|
+
emit_token(lexer, tINTEGER, rb_funcall(value, rb_intern("getbyte"), 1, INT2NUM(0)), ts, te);
|
2028
2028
|
else
|
2029
|
-
emit_token(
|
2029
|
+
emit_token(lexer, tCHARACTER, value, ts, te);
|
2030
2030
|
|
2031
2031
|
fnext expr_end; fbreak;
|
2032
2032
|
};
|
2033
2033
|
|
2034
2034
|
'?' c_space_nl => {
|
2035
|
-
VALUE escape = escape_char(rb_str_subseq(
|
2035
|
+
VALUE escape = escape_char(rb_str_subseq(lexer->source, ts + 1, 1));
|
2036
2036
|
VALUE hash = rb_hash_new();
|
2037
2037
|
rb_hash_aset(hash, ID2SYM(rb_intern("escape")), escape);
|
2038
|
-
diagnostic(
|
2039
|
-
range(
|
2038
|
+
diagnostic(lexer, warning, invalid_escape_use, hash,
|
2039
|
+
range(lexer, ts, te), empty_array);
|
2040
2040
|
|
2041
2041
|
p = ts - 1;
|
2042
2042
|
fgoto expr_end;
|
2043
2043
|
};
|
2044
2044
|
|
2045
2045
|
'?' c_eof => {
|
2046
|
-
diagnostic(
|
2047
|
-
range(
|
2046
|
+
diagnostic(lexer, fatal, incomplete_escape, Qnil,
|
2047
|
+
range(lexer, ts, ts + 1), empty_array);
|
2048
2048
|
};
|
2049
2049
|
|
2050
2050
|
'?' [A-Za-z_] bareword => { p = ts - 1; fgoto expr_end; };
|
2051
2051
|
|
2052
2052
|
e_lbrace => {
|
2053
|
-
VALUE val = array_last(
|
2054
|
-
if (val != Qnil && NUM2INT(val) ==
|
2055
|
-
rb_ary_pop(
|
2053
|
+
VALUE val = array_last(lexer->lambda_stack);
|
2054
|
+
if (val != Qnil && NUM2INT(val) == lexer->paren_nest) {
|
2055
|
+
rb_ary_pop(lexer->lambda_stack);
|
2056
2056
|
emit(tLAMBEG);
|
2057
2057
|
} else {
|
2058
2058
|
emit(tLBRACE);
|
@@ -2061,37 +2061,37 @@ void Init_lexer()
|
|
2061
2061
|
};
|
2062
2062
|
|
2063
2063
|
e_lbrack => {
|
2064
|
-
emit_token(
|
2064
|
+
emit_token(lexer, tLBRACK, tok(lexer, ts, te), ts, te);
|
2065
2065
|
fbreak;
|
2066
2066
|
};
|
2067
2067
|
|
2068
2068
|
e_lparen => {
|
2069
|
-
emit_token(
|
2069
|
+
emit_token(lexer, tLPAREN, tok(lexer, ts, te), ts, te);
|
2070
2070
|
fbreak;
|
2071
2071
|
};
|
2072
2072
|
|
2073
2073
|
punctuation_begin
|
2074
|
-
=> { emit_table_PUNCTUATION_BEGIN(
|
2074
|
+
=> { emit_table_PUNCTUATION_BEGIN(lexer, tok(lexer, ts, te), ts, te);
|
2075
2075
|
fbreak; };
|
2076
2076
|
|
2077
2077
|
'rescue' %{ tm = p; } '=>'? => {
|
2078
|
-
emit_token(
|
2078
|
+
emit_token(lexer, kRESCUE, tok(lexer, ts, tm), ts, tm);
|
2079
2079
|
p = tm - 1;
|
2080
2080
|
fnext expr_mid; fbreak;
|
2081
2081
|
};
|
2082
2082
|
|
2083
2083
|
keyword_modifier
|
2084
|
-
=> { emit_table_KEYWORDS_BEGIN(
|
2084
|
+
=> { emit_table_KEYWORDS_BEGIN(lexer, tok(lexer, ts, te), ts, te);
|
2085
2085
|
fnext expr_value; fbreak; };
|
2086
2086
|
|
2087
2087
|
label ( any - ':' )
|
2088
2088
|
=> {
|
2089
2089
|
fhold;
|
2090
2090
|
|
2091
|
-
if (
|
2092
|
-
VALUE ident = tok(
|
2091
|
+
if (lexer->version == 18) {
|
2092
|
+
VALUE ident = tok(lexer, ts, te - 2);
|
2093
2093
|
|
2094
|
-
emit_token(
|
2094
|
+
emit_token(lexer, is_capitalized(ident) ? tCONSTANT : tIDENTIFIER,
|
2095
2095
|
ident, ts, te - 2);
|
2096
2096
|
fhold;
|
2097
2097
|
|
@@ -2101,7 +2101,7 @@ void Init_lexer()
|
|
2101
2101
|
fnext *arg_or_cmdarg(command_state);
|
2102
2102
|
}
|
2103
2103
|
} else {
|
2104
|
-
emit_token(
|
2104
|
+
emit_token(lexer, tLABEL, tok(lexer, ts, te - 2), ts, te - 1);
|
2105
2105
|
fnext expr_labelarg;
|
2106
2106
|
}
|
2107
2107
|
|
@@ -2113,13 +2113,13 @@ void Init_lexer()
|
|
2113
2113
|
call_or_var => local_ident;
|
2114
2114
|
|
2115
2115
|
(call_or_var - keyword)
|
2116
|
-
% { ident_tok = tok(
|
2116
|
+
% { ident_tok = tok(lexer, ts, te); ident_ts = ts; ident_te = te; }
|
2117
2117
|
w_space+ '('
|
2118
2118
|
=> {
|
2119
|
-
emit_token(
|
2119
|
+
emit_token(lexer, tIDENTIFIER, ident_tok, ident_ts, ident_te);
|
2120
2120
|
p = ident_te - 1;
|
2121
2121
|
|
2122
|
-
if (STATIC_ENV_DECLARED(ident_tok) &&
|
2122
|
+
if (STATIC_ENV_DECLARED(ident_tok) && lexer->version < 25) {
|
2123
2123
|
fnext expr_endfn;
|
2124
2124
|
} else {
|
2125
2125
|
fnext expr_cmdarg;
|
@@ -2132,7 +2132,7 @@ void Init_lexer()
|
|
2132
2132
|
|
2133
2133
|
e_heredoc_nl '=begin' ( c_space | c_nl_zlen ) => {
|
2134
2134
|
p = ts - 1;
|
2135
|
-
|
2135
|
+
lexer->cs_before_block_comment = lexer->cs;
|
2136
2136
|
fgoto line_begin;
|
2137
2137
|
};
|
2138
2138
|
|
@@ -2149,7 +2149,7 @@ void Init_lexer()
|
|
2149
2149
|
w_space_comment;
|
2150
2150
|
|
2151
2151
|
w_newline => {
|
2152
|
-
if (
|
2152
|
+
if (lexer->in_kwarg) {
|
2153
2153
|
fhold; fgoto expr_end;
|
2154
2154
|
} else {
|
2155
2155
|
fgoto line_begin;
|
@@ -2165,8 +2165,8 @@ void Init_lexer()
|
|
2165
2165
|
label (any - ':') => { p = ts - 1; fgoto expr_end; };
|
2166
2166
|
|
2167
2167
|
['"] => { /* ' */
|
2168
|
-
VALUE type = tok(
|
2169
|
-
fgoto *push_literal(
|
2168
|
+
VALUE type = tok(lexer, ts, te);
|
2169
|
+
fgoto *push_literal(lexer, type, type, ts, 0, 0, 0, 0);
|
2170
2170
|
};
|
2171
2171
|
|
2172
2172
|
w_space_comment;
|
@@ -2180,15 +2180,15 @@ void Init_lexer()
|
|
2180
2180
|
|
2181
2181
|
expr_end := |*
|
2182
2182
|
'->' => {
|
2183
|
-
emit_token(
|
2184
|
-
rb_ary_push(
|
2183
|
+
emit_token(lexer, tLAMBDA, tok(lexer, ts, ts + 2), ts, ts + 2);
|
2184
|
+
rb_ary_push(lexer->lambda_stack, INT2NUM(lexer->paren_nest));
|
2185
2185
|
fnext expr_endfn; fbreak;
|
2186
2186
|
};
|
2187
2187
|
|
2188
2188
|
e_lbrace => {
|
2189
|
-
VALUE val = array_last(
|
2190
|
-
if (val != Qnil && NUM2INT(val) ==
|
2191
|
-
rb_ary_pop(
|
2189
|
+
VALUE val = array_last(lexer->lambda_stack);
|
2190
|
+
if (val != Qnil && NUM2INT(val) == lexer->paren_nest) {
|
2191
|
+
rb_ary_pop(lexer->lambda_stack);
|
2192
2192
|
emit(tLAMBEG);
|
2193
2193
|
} else {
|
2194
2194
|
emit(tLCURLY);
|
@@ -2197,43 +2197,43 @@ void Init_lexer()
|
|
2197
2197
|
};
|
2198
2198
|
|
2199
2199
|
'do' => {
|
2200
|
-
VALUE val = array_last(
|
2201
|
-
if (val != Qnil && NUM2INT(val) ==
|
2202
|
-
rb_ary_pop(
|
2200
|
+
VALUE val = array_last(lexer->lambda_stack);
|
2201
|
+
if (val != Qnil && NUM2INT(val) == lexer->paren_nest) {
|
2202
|
+
rb_ary_pop(lexer->lambda_stack);
|
2203
2203
|
emit(kDO_LAMBDA);
|
2204
2204
|
} else {
|
2205
|
-
emit_do(
|
2205
|
+
emit_do(lexer, 0, ts, te);
|
2206
2206
|
}
|
2207
2207
|
fnext expr_value; fbreak;
|
2208
2208
|
};
|
2209
2209
|
|
2210
2210
|
keyword_with_fname
|
2211
|
-
=> { emit_table_KEYWORDS(
|
2211
|
+
=> { emit_table_KEYWORDS(lexer, tok(lexer, ts, te), ts, te);
|
2212
2212
|
fnext expr_fname; fbreak; };
|
2213
2213
|
|
2214
2214
|
'class' w_any* '<<'
|
2215
|
-
=> { emit_token(
|
2216
|
-
emit_token(
|
2215
|
+
=> { emit_token(lexer, kCLASS, rb_str_new2("class"), ts, ts + 5);
|
2216
|
+
emit_token(lexer, tLSHFT, rb_str_new2("<<"), te - 2, te);
|
2217
2217
|
fnext expr_value; fbreak; };
|
2218
2218
|
|
2219
2219
|
keyword_modifier
|
2220
|
-
=> { emit_table_KEYWORDS(
|
2220
|
+
=> { emit_table_KEYWORDS(lexer, tok(lexer, ts, te), ts, te);
|
2221
2221
|
fnext expr_beg; fbreak; };
|
2222
2222
|
|
2223
2223
|
keyword_with_value
|
2224
|
-
=> { emit_table_KEYWORDS(
|
2224
|
+
=> { emit_table_KEYWORDS(lexer, tok(lexer, ts, te), ts, te);
|
2225
2225
|
fnext expr_value; fbreak; };
|
2226
2226
|
|
2227
2227
|
keyword_with_mid
|
2228
|
-
=> { emit_table_KEYWORDS(
|
2228
|
+
=> { emit_table_KEYWORDS(lexer, tok(lexer, ts, te), ts, te);
|
2229
2229
|
fnext expr_mid; fbreak; };
|
2230
2230
|
|
2231
2231
|
keyword_with_arg
|
2232
2232
|
=> {
|
2233
|
-
VALUE keyword = tok(
|
2234
|
-
emit_table_KEYWORDS(
|
2233
|
+
VALUE keyword = tok(lexer, ts, te);
|
2234
|
+
emit_table_KEYWORDS(lexer, keyword, ts, te);
|
2235
2235
|
|
2236
|
-
if (
|
2236
|
+
if (lexer->version == 18 && strcmp(RSTRING_PTR(keyword), "not") == 0) {
|
2237
2237
|
fnext expr_beg; fbreak;
|
2238
2238
|
} else {
|
2239
2239
|
fnext expr_arg; fbreak;
|
@@ -2241,8 +2241,8 @@ void Init_lexer()
|
|
2241
2241
|
};
|
2242
2242
|
|
2243
2243
|
'__ENCODING__' => {
|
2244
|
-
if (
|
2245
|
-
VALUE str = tok(
|
2244
|
+
if (lexer->version == 18) {
|
2245
|
+
VALUE str = tok(lexer, ts, te);
|
2246
2246
|
emit(tIDENTIFIER);
|
2247
2247
|
|
2248
2248
|
if (STATIC_ENV_DECLARED(str)) {
|
@@ -2257,7 +2257,7 @@ void Init_lexer()
|
|
2257
2257
|
};
|
2258
2258
|
|
2259
2259
|
keyword_with_end
|
2260
|
-
=> { emit_table_KEYWORDS(
|
2260
|
+
=> { emit_table_KEYWORDS(lexer, tok(lexer, ts, te), ts, te);
|
2261
2261
|
fbreak; };
|
2262
2262
|
|
2263
2263
|
( '0' [Xx] %{ num_base = 16; num_digits_s = p; } int_hex
|
@@ -2269,30 +2269,30 @@ void Init_lexer()
|
|
2269
2269
|
) %{ num_suffix_s = p; } int_suffix
|
2270
2270
|
=> {
|
2271
2271
|
int invalid_idx;
|
2272
|
-
VALUE digits = tok(
|
2272
|
+
VALUE digits = tok(lexer, num_digits_s, num_suffix_s);
|
2273
2273
|
|
2274
|
-
if (NUM2INT(rb_ary_entry(
|
2274
|
+
if (NUM2INT(rb_ary_entry(lexer->source_pts, num_suffix_s - 1)) == '_') {
|
2275
2275
|
VALUE hash = rb_hash_new();
|
2276
2276
|
rb_hash_aset(hash, character, rb_str_new2("_"));
|
2277
|
-
diagnostic(
|
2278
|
-
range(
|
2279
|
-
} else if (RSTRING_LEN(digits) == 0 && num_base == 8 &&
|
2277
|
+
diagnostic(lexer, severity_error, trailing_in_number, hash,
|
2278
|
+
range(lexer, te - 1, te), empty_array);
|
2279
|
+
} else if (RSTRING_LEN(digits) == 0 && num_base == 8 && lexer->version == 18) {
|
2280
2280
|
digits = rb_str_new2("0");
|
2281
2281
|
} else if (RSTRING_LEN(digits) == 0) {
|
2282
|
-
diagnostic(
|
2283
|
-
range(
|
2282
|
+
diagnostic(lexer, severity_error, empty_numeric, Qnil,
|
2283
|
+
range(lexer, ts, te), empty_array);
|
2284
2284
|
} else if (num_base == 8 && (invalid_idx = find_8_or_9(digits)) != -1) {
|
2285
2285
|
long invalid_s = num_digits_s + invalid_idx;
|
2286
|
-
diagnostic(
|
2287
|
-
range(
|
2286
|
+
diagnostic(lexer, severity_error, invalid_octal, Qnil,
|
2287
|
+
range(lexer, invalid_s, invalid_s + 1), empty_array);
|
2288
2288
|
}
|
2289
2289
|
|
2290
2290
|
VALUE integer = rb_str_to_inum(digits, num_base, 0);
|
2291
|
-
if (
|
2292
|
-
emit_token(
|
2291
|
+
if (lexer->version >= 18 && lexer->version <= 20) {
|
2292
|
+
emit_token(lexer, tINTEGER, integer, numeric_s, num_suffix_s);
|
2293
2293
|
p = num_suffix_s - 1;
|
2294
2294
|
} else {
|
2295
|
-
num_xfrm(
|
2295
|
+
num_xfrm(lexer, integer, numeric_s, te);
|
2296
2296
|
}
|
2297
2297
|
|
2298
2298
|
fbreak;
|
@@ -2300,34 +2300,34 @@ void Init_lexer()
|
|
2300
2300
|
|
2301
2301
|
flo_frac flo_pow?
|
2302
2302
|
=> {
|
2303
|
-
diagnostic(
|
2304
|
-
range(
|
2303
|
+
diagnostic(lexer, severity_error, no_dot_digit_literal, Qnil,
|
2304
|
+
range(lexer, ts, te), empty_array);
|
2305
2305
|
};
|
2306
2306
|
|
2307
2307
|
flo_int [eE]
|
2308
2308
|
=> {
|
2309
|
-
if (
|
2309
|
+
if (lexer->version >= 18 && lexer->version <= 20) {
|
2310
2310
|
VALUE hash = rb_hash_new();
|
2311
|
-
rb_hash_aset(hash, character, tok(
|
2312
|
-
diagnostic(
|
2313
|
-
range(
|
2311
|
+
rb_hash_aset(hash, character, tok(lexer, te - 1, te));
|
2312
|
+
diagnostic(lexer, severity_error, trailing_in_number, hash,
|
2313
|
+
range(lexer, te - 1, te), empty_array);
|
2314
2314
|
} else {
|
2315
|
-
VALUE integer = rb_str_to_inum(tok(
|
2316
|
-
emit_token(
|
2315
|
+
VALUE integer = rb_str_to_inum(tok(lexer, ts, te - 1), 10, 0);
|
2316
|
+
emit_token(lexer, tINTEGER, integer, ts, te - 1);
|
2317
2317
|
fhold; fbreak;
|
2318
2318
|
}
|
2319
2319
|
};
|
2320
2320
|
|
2321
2321
|
flo_int flo_frac [eE]
|
2322
2322
|
=> {
|
2323
|
-
if (
|
2323
|
+
if (lexer->version >= 18 && lexer->version <= 20) {
|
2324
2324
|
VALUE hash = rb_hash_new();
|
2325
|
-
rb_hash_aset(hash, character, tok(
|
2326
|
-
diagnostic(
|
2327
|
-
range(
|
2325
|
+
rb_hash_aset(hash, character, tok(lexer, te - 1, te));
|
2326
|
+
diagnostic(lexer, severity_error, trailing_in_number, hash,
|
2327
|
+
range(lexer, te - 1, te), empty_array);
|
2328
2328
|
} else {
|
2329
|
-
VALUE fval = rb_funcall(tok(
|
2330
|
-
emit_token(
|
2329
|
+
VALUE fval = rb_funcall(tok(lexer, ts, te - 1), rb_intern("to_f"), 0);
|
2330
|
+
emit_token(lexer, tFLOAT, fval, ts, te - 1);
|
2331
2331
|
fhold; fbreak;
|
2332
2332
|
}
|
2333
2333
|
};
|
@@ -2337,28 +2337,28 @@ void Init_lexer()
|
|
2337
2337
|
| flo_frac %{ num_suffix_s = p; } flo_suffix
|
2338
2338
|
)
|
2339
2339
|
=> {
|
2340
|
-
VALUE digits = tok(
|
2340
|
+
VALUE digits = tok(lexer, ts, num_suffix_s);
|
2341
2341
|
|
2342
|
-
if (
|
2342
|
+
if (lexer->version >= 18 && lexer->version <= 20) {
|
2343
2343
|
VALUE fval = rb_Float(digits);
|
2344
|
-
emit_token(
|
2344
|
+
emit_token(lexer, tFLOAT, fval, ts, num_suffix_s);
|
2345
2345
|
p = num_suffix_s - 1;
|
2346
2346
|
} else {
|
2347
|
-
num_xfrm(
|
2347
|
+
num_xfrm(lexer, digits, ts, te);
|
2348
2348
|
}
|
2349
2349
|
fbreak;
|
2350
2350
|
};
|
2351
2351
|
|
2352
2352
|
'`' | ['"] => { /* ' */
|
2353
|
-
VALUE type = tok(
|
2354
|
-
VALUE delimiter = tok(
|
2355
|
-
fgoto *push_literal(
|
2353
|
+
VALUE type = tok(lexer, ts, te);
|
2354
|
+
VALUE delimiter = tok(lexer, te - 1, te);
|
2355
|
+
fgoto *push_literal(lexer, type, delimiter, ts, 0, 0, 0, 1);
|
2356
2356
|
};
|
2357
2357
|
|
2358
2358
|
constant => { emit(tCONSTANT); fnext *arg_or_cmdarg(command_state); fbreak; };
|
2359
2359
|
|
2360
2360
|
constant ambiguous_const_suffix => {
|
2361
|
-
emit_token(
|
2361
|
+
emit_token(lexer, tCONSTANT, tok(lexer, ts, tm), ts, tm);
|
2362
2362
|
p = tm - 1;
|
2363
2363
|
fbreak;
|
2364
2364
|
};
|
@@ -2367,7 +2367,7 @@ void Init_lexer()
|
|
2367
2367
|
=> { p = ts - 1; fcall expr_variable; };
|
2368
2368
|
|
2369
2369
|
'.' | '&.' | '::'
|
2370
|
-
=> { emit_table_PUNCTUATION(
|
2370
|
+
=> { emit_table_PUNCTUATION(lexer, tok(lexer, ts, te), ts, te);
|
2371
2371
|
fnext expr_dot; fbreak; };
|
2372
2372
|
|
2373
2373
|
call_or_var => local_ident;
|
@@ -2376,39 +2376,39 @@ void Init_lexer()
|
|
2376
2376
|
if (tm == te) {
|
2377
2377
|
emit(tFID);
|
2378
2378
|
} else {
|
2379
|
-
emit_token(
|
2379
|
+
emit_token(lexer, tIDENTIFIER, tok(lexer, ts, tm), ts, tm);
|
2380
2380
|
p = tm - 1;
|
2381
2381
|
}
|
2382
2382
|
fnext expr_arg; fbreak;
|
2383
2383
|
};
|
2384
2384
|
|
2385
|
-
'*' => {
|
2386
|
-
|
2385
|
+
'*' | '=>' => {
|
2386
|
+
emit_table_PUNCTUATION(lexer, tok(lexer, ts, te), ts, te);
|
2387
2387
|
fgoto expr_value;
|
2388
2388
|
};
|
2389
2389
|
|
2390
2390
|
( operator_arithmetic | operator_rest ) - ( '|' | '~' | '!' | '*' )
|
2391
2391
|
=> {
|
2392
|
-
emit_table_PUNCTUATION(
|
2392
|
+
emit_table_PUNCTUATION(lexer, tok(lexer, ts, te), ts, te);
|
2393
2393
|
fnext expr_value; fbreak;
|
2394
2394
|
};
|
2395
2395
|
|
2396
2396
|
( e_lparen | '|' | '~' | '!' )
|
2397
|
-
=> { emit_table_PUNCTUATION(
|
2397
|
+
=> { emit_table_PUNCTUATION(lexer, tok(lexer, ts, te), ts, te);
|
2398
2398
|
fnext expr_beg; fbreak; };
|
2399
2399
|
|
2400
2400
|
e_rbrace => {
|
2401
2401
|
emit(tRCURLY);
|
2402
2402
|
|
2403
|
-
if (
|
2404
|
-
stack_state_lexpop(&
|
2405
|
-
stack_state_lexpop(&
|
2403
|
+
if (lexer->version < 24) {
|
2404
|
+
stack_state_lexpop(&lexer->cond);
|
2405
|
+
stack_state_lexpop(&lexer->cmdarg);
|
2406
2406
|
} else {
|
2407
|
-
stack_state_pop(&
|
2408
|
-
stack_state_pop(&
|
2407
|
+
stack_state_pop(&lexer->cond);
|
2408
|
+
stack_state_pop(&lexer->cmdarg);
|
2409
2409
|
}
|
2410
2410
|
|
2411
|
-
if (
|
2411
|
+
if (lexer->version >= 25) {
|
2412
2412
|
fnext expr_end;
|
2413
2413
|
} else {
|
2414
2414
|
fnext expr_endarg;
|
@@ -2420,12 +2420,12 @@ void Init_lexer()
|
|
2420
2420
|
e_rparen => {
|
2421
2421
|
emit(tRPAREN);
|
2422
2422
|
|
2423
|
-
if (
|
2424
|
-
stack_state_lexpop(&
|
2425
|
-
stack_state_lexpop(&
|
2423
|
+
if (lexer->version < 24) {
|
2424
|
+
stack_state_lexpop(&lexer->cond);
|
2425
|
+
stack_state_lexpop(&lexer->cmdarg);
|
2426
2426
|
} else {
|
2427
|
-
stack_state_pop(&
|
2428
|
-
stack_state_pop(&
|
2427
|
+
stack_state_pop(&lexer->cond);
|
2428
|
+
stack_state_pop(&lexer->cmdarg);
|
2429
2429
|
}
|
2430
2430
|
|
2431
2431
|
fbreak;
|
@@ -2434,15 +2434,15 @@ void Init_lexer()
|
|
2434
2434
|
']' => {
|
2435
2435
|
emit(tRBRACK);
|
2436
2436
|
|
2437
|
-
if (
|
2438
|
-
stack_state_lexpop(&
|
2439
|
-
stack_state_lexpop(&
|
2437
|
+
if (lexer->version < 24) {
|
2438
|
+
stack_state_lexpop(&lexer->cond);
|
2439
|
+
stack_state_lexpop(&lexer->cmdarg);
|
2440
2440
|
} else {
|
2441
|
-
stack_state_pop(&
|
2442
|
-
stack_state_pop(&
|
2441
|
+
stack_state_pop(&lexer->cond);
|
2442
|
+
stack_state_pop(&lexer->cmdarg);
|
2443
2443
|
}
|
2444
2444
|
|
2445
|
-
if (
|
2445
|
+
if (lexer->version >= 25) {
|
2446
2446
|
fnext expr_end;
|
2447
2447
|
} else {
|
2448
2448
|
fnext expr_endarg;
|
@@ -2452,7 +2452,7 @@ void Init_lexer()
|
|
2452
2452
|
};
|
2453
2453
|
|
2454
2454
|
operator_arithmetic '='
|
2455
|
-
=> { emit_token(
|
2455
|
+
=> { emit_token(lexer, tOP_ASGN, tok(lexer, ts, te - 1), ts, te);
|
2456
2456
|
fnext expr_beg; fbreak; };
|
2457
2457
|
|
2458
2458
|
'?' => { emit(tEH); fnext expr_value; fbreak; };
|
@@ -2460,7 +2460,7 @@ void Init_lexer()
|
|
2460
2460
|
e_lbrack => { emit(tLBRACK2); fnext expr_beg; fbreak; };
|
2461
2461
|
|
2462
2462
|
punctuation_end
|
2463
|
-
=> { emit_table_PUNCTUATION(
|
2463
|
+
=> { emit_table_PUNCTUATION(lexer, tok(lexer, ts, te), ts, te);
|
2464
2464
|
fnext expr_beg; fbreak; };
|
2465
2465
|
|
2466
2466
|
w_space_comment;
|
@@ -2470,17 +2470,17 @@ void Init_lexer()
|
|
2470
2470
|
';' => { emit(tSEMI); fnext expr_value; fbreak; };
|
2471
2471
|
|
2472
2472
|
'\\' c_line {
|
2473
|
-
diagnostic(
|
2474
|
-
range(
|
2473
|
+
diagnostic(lexer, severity_error, bare_backslash, Qnil,
|
2474
|
+
range(lexer, ts, ts + 1), empty_array);
|
2475
2475
|
fhold;
|
2476
2476
|
};
|
2477
2477
|
|
2478
2478
|
c_any
|
2479
2479
|
=> {
|
2480
2480
|
VALUE hash = rb_hash_new();
|
2481
|
-
VALUE str = rb_str_inspect(tok(
|
2481
|
+
VALUE str = rb_str_inspect(tok(lexer, ts, te));
|
2482
2482
|
rb_hash_aset(hash, character, rb_str_substr(str, 1, NUM2INT(rb_str_length(str)) - 2));
|
2483
|
-
diagnostic(
|
2483
|
+
diagnostic(lexer, fatal, unexpected, hash, range(lexer, ts, te), empty_array);
|
2484
2484
|
};
|
2485
2485
|
|
2486
2486
|
c_eof => do_eof;
|
@@ -2490,22 +2490,22 @@ void Init_lexer()
|
|
2490
2490
|
c_space* %{ tm = p; } ('.' | '&.') => { p = tm - 1; fgoto expr_end; };
|
2491
2491
|
|
2492
2492
|
any => {
|
2493
|
-
emit_token(
|
2493
|
+
emit_token(lexer, tNL, Qnil, lexer->newline_s, lexer->newline_s + 1);
|
2494
2494
|
fhold; fnext line_begin; fbreak;
|
2495
2495
|
};
|
2496
2496
|
*|;
|
2497
2497
|
|
2498
2498
|
line_comment := |*
|
2499
2499
|
'=end' c_line* c_nl_zlen => {
|
2500
|
-
emit_comment(
|
2501
|
-
fgoto *
|
2500
|
+
emit_comment(lexer, lexer->eq_begin_s, te);
|
2501
|
+
fgoto *lexer->cs_before_block_comment;
|
2502
2502
|
};
|
2503
2503
|
|
2504
2504
|
c_line* c_nl;
|
2505
2505
|
|
2506
2506
|
c_line* zlen => {
|
2507
|
-
diagnostic(
|
2508
|
-
range(
|
2507
|
+
diagnostic(lexer, fatal, embedded_document, Qnil,
|
2508
|
+
range(lexer, lexer->eq_begin_s, lexer->eq_begin_s + 6),
|
2509
2509
|
empty_array);
|
2510
2510
|
};
|
2511
2511
|
*|;
|
@@ -2514,7 +2514,7 @@ void Init_lexer()
|
|
2514
2514
|
w_any;
|
2515
2515
|
|
2516
2516
|
'=begin' ( c_space | c_nl_zlen ) => {
|
2517
|
-
|
2517
|
+
lexer->eq_begin_s = ts;
|
2518
2518
|
fgoto line_comment;
|
2519
2519
|
};
|
2520
2520
|
|