c_lexer 2.5.1.2.0 → 2.5.3.0.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/Gemfile.lock +3 -3
- data/appveyor.yml +1 -0
- data/c_lexer.gemspec +1 -1
- data/ext/lexer/emit_tables.h +5 -5
- data/ext/lexer/lexer.c +3654 -3647
- data/ext/lexer/lexer.h +29 -29
- data/ext/lexer/lexer.rl +508 -508
- data/ext/lexer/{literal.h → literal/literal.h} +2 -2
- data/ext/lexer/literal/methods.h +1 -1
- data/ext/lexer/stack_state/cmdarg.h +47 -0
- data/ext/lexer/stack_state/cond.h +47 -0
- data/ext/lexer/{stack.h → stack_state/stack.h} +0 -0
- data/ext/lexer/{stack_state.h → stack_state/stack_state.h} +2 -2
- data/lib/c_lexer/version.rb +1 -1
- metadata +9 -9
- data/ext/lexer/cmdarg.h +0 -47
- data/ext/lexer/cond.h +0 -47
data/ext/lexer/lexer.rl
CHANGED
@@ -5,16 +5,16 @@
|
|
5
5
|
#include <stdint.h>
|
6
6
|
#include <stdio.h>
|
7
7
|
|
8
|
-
#include "stack.h"
|
9
|
-
#include "stack_state.h"
|
8
|
+
#include "stack_state/stack.h"
|
9
|
+
#include "stack_state/stack_state.h"
|
10
10
|
#include "lexer.h"
|
11
11
|
|
12
|
-
#define
|
12
|
+
#define GET_LEXER(self) Data_Get_Struct(self, Lexer, lexer)
|
13
13
|
#define STATIC_ENV_DECLARED(name) \
|
14
|
-
|
14
|
+
lexer->static_env != Qnil && RTEST(rb_funcall(lexer->static_env, rb_intern("declared?"), 1, name))
|
15
15
|
|
16
|
-
#include "cmdarg.h"
|
17
|
-
#include "cond.h"
|
16
|
+
#include "stack_state/cmdarg.h"
|
17
|
+
#include "stack_state/cond.h"
|
18
18
|
|
19
19
|
#include "literal/methods.h"
|
20
20
|
#include "emit_tables.h"
|
@@ -24,50 +24,50 @@
|
|
24
24
|
|
25
25
|
static VALUE lexer_alloc(VALUE klass)
|
26
26
|
{
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
ss_stack_init(&
|
49
|
-
ss_stack_init(&
|
50
|
-
lit_stack_init(&
|
51
|
-
|
52
|
-
return Data_Wrap_Struct(klass, lexer_mark, lexer_dealloc,
|
27
|
+
Lexer *lexer = xmalloc(sizeof(Lexer));
|
28
|
+
|
29
|
+
lexer->cs = lexer->p = lexer->pe = 0;
|
30
|
+
lexer->paren_nest = 0;
|
31
|
+
|
32
|
+
lexer->cs_stack = xmalloc(4 * sizeof(int));
|
33
|
+
lexer->cs_stack_top = 0;
|
34
|
+
lexer->cs_stack_size = 4;
|
35
|
+
|
36
|
+
lexer->source_buffer = Qnil;
|
37
|
+
lexer->source = Qnil;
|
38
|
+
lexer->source_pts = Qnil;
|
39
|
+
lexer->token_queue = Qnil;
|
40
|
+
lexer->static_env = Qnil;
|
41
|
+
lexer->lambda_stack = Qnil;
|
42
|
+
lexer->diagnostics = Qnil;
|
43
|
+
lexer->tokens = Qnil;
|
44
|
+
lexer->comments = Qnil;
|
45
|
+
lexer->encoding = Qnil;
|
46
|
+
lexer->escape = Qnil;
|
47
|
+
|
48
|
+
ss_stack_init(&lexer->cond_stack);
|
49
|
+
ss_stack_init(&lexer->cmdarg_stack);
|
50
|
+
lit_stack_init(&lexer->literal_stack);
|
51
|
+
|
52
|
+
return Data_Wrap_Struct(klass, lexer_mark, lexer_dealloc, lexer);
|
53
53
|
}
|
54
54
|
|
55
55
|
static void lexer_mark(void *ptr)
|
56
56
|
{
|
57
|
-
|
58
|
-
rb_gc_mark(
|
59
|
-
rb_gc_mark(
|
60
|
-
rb_gc_mark(
|
61
|
-
rb_gc_mark(
|
62
|
-
rb_gc_mark(
|
63
|
-
rb_gc_mark(
|
64
|
-
rb_gc_mark(
|
65
|
-
rb_gc_mark(
|
66
|
-
rb_gc_mark(
|
67
|
-
rb_gc_mark(
|
68
|
-
rb_gc_mark(
|
69
|
-
|
70
|
-
for (literal *lit =
|
57
|
+
Lexer *lexer = ptr;
|
58
|
+
rb_gc_mark(lexer->source_buffer);
|
59
|
+
rb_gc_mark(lexer->source);
|
60
|
+
rb_gc_mark(lexer->source_pts);
|
61
|
+
rb_gc_mark(lexer->token_queue);
|
62
|
+
rb_gc_mark(lexer->static_env);
|
63
|
+
rb_gc_mark(lexer->lambda_stack);
|
64
|
+
rb_gc_mark(lexer->diagnostics);
|
65
|
+
rb_gc_mark(lexer->tokens);
|
66
|
+
rb_gc_mark(lexer->comments);
|
67
|
+
rb_gc_mark(lexer->encoding);
|
68
|
+
rb_gc_mark(lexer->escape);
|
69
|
+
|
70
|
+
for (literal *lit = lexer->literal_stack.bottom; lit < lexer->literal_stack.top; lit++) {
|
71
71
|
rb_gc_mark(lit->buffer);
|
72
72
|
rb_gc_mark(lit->start_tok);
|
73
73
|
rb_gc_mark(lit->start_delim);
|
@@ -78,25 +78,25 @@ static void lexer_mark(void *ptr)
|
|
78
78
|
|
79
79
|
static void lexer_dealloc(void *ptr)
|
80
80
|
{
|
81
|
-
|
82
|
-
ss_stack_dealloc(&
|
83
|
-
ss_stack_dealloc(&
|
84
|
-
lit_stack_dealloc(&
|
81
|
+
Lexer *lexer = ptr;
|
82
|
+
ss_stack_dealloc(&lexer->cond_stack);
|
83
|
+
ss_stack_dealloc(&lexer->cmdarg_stack);
|
84
|
+
lit_stack_dealloc(&lexer->literal_stack);
|
85
85
|
xfree(ptr);
|
86
86
|
}
|
87
87
|
|
88
88
|
static VALUE lexer_initialize(VALUE self, VALUE version)
|
89
89
|
{
|
90
|
-
|
90
|
+
Lexer* lexer = GET_LEXER(self);
|
91
91
|
|
92
|
-
|
92
|
+
lexer->version = NUM2INT(version);
|
93
93
|
|
94
94
|
return lexer_reset(0, NULL, self);
|
95
95
|
}
|
96
96
|
|
97
97
|
static VALUE lexer_reset(int argc, VALUE *argv, VALUE self)
|
98
98
|
{
|
99
|
-
|
99
|
+
Lexer* lexer = GET_LEXER(self);
|
100
100
|
|
101
101
|
VALUE reset_state;
|
102
102
|
rb_scan_args(argc, argv, "01", &reset_state);
|
@@ -104,21 +104,21 @@ static VALUE lexer_reset(int argc, VALUE *argv, VALUE self)
|
|
104
104
|
reset_state = Qtrue;
|
105
105
|
|
106
106
|
if (RTEST(reset_state)) {
|
107
|
-
|
107
|
+
lexer->cs = lex_en_line_begin;
|
108
108
|
|
109
|
-
|
110
|
-
|
111
|
-
ss_stack_clear(&
|
112
|
-
ss_stack_clear(&
|
109
|
+
lexer->cond = 0;
|
110
|
+
lexer->cmdarg = 0;
|
111
|
+
ss_stack_clear(&lexer->cond_stack);
|
112
|
+
ss_stack_clear(&lexer->cmdarg_stack);
|
113
113
|
}
|
114
114
|
|
115
|
-
|
115
|
+
lexer->force_utf32 = 0;
|
116
116
|
|
117
|
-
|
118
|
-
|
119
|
-
|
117
|
+
lexer->source = Qnil;
|
118
|
+
lexer->source_pts = Qnil;
|
119
|
+
lexer->encoding = Qnil;
|
120
120
|
|
121
|
-
|
121
|
+
lexer->p = 0;
|
122
122
|
// @ts is a local variable
|
123
123
|
// @te is a local variable
|
124
124
|
// @act is a local variable
|
@@ -127,65 +127,65 @@ static VALUE lexer_reset(int argc, VALUE *argv, VALUE self)
|
|
127
127
|
// @top is handled on prepush
|
128
128
|
|
129
129
|
// Lexer state
|
130
|
-
|
131
|
-
lit_stack_clear(&
|
130
|
+
lexer->token_queue = rb_ary_new();
|
131
|
+
lit_stack_clear(&lexer->literal_stack);
|
132
132
|
|
133
|
-
|
133
|
+
lexer->eq_begin_s = 0;
|
134
134
|
// @sharp_s is a local variable
|
135
135
|
|
136
|
-
|
136
|
+
lexer->newline_s = 0;
|
137
137
|
|
138
138
|
// @num_base is a local variable
|
139
139
|
// @num_digits_s is a local variable
|
140
140
|
// @num_suffix_s is a local variable
|
141
141
|
// @num_xfrm is a local variable
|
142
142
|
|
143
|
-
|
144
|
-
|
143
|
+
lexer->escape_s = 0;
|
144
|
+
lexer->escape = Qnil;
|
145
145
|
|
146
|
-
|
146
|
+
lexer->herebody_s = 0;
|
147
147
|
|
148
|
-
|
149
|
-
|
148
|
+
lexer->paren_nest = 0;
|
149
|
+
lexer->lambda_stack = rb_ary_new();
|
150
150
|
|
151
|
-
|
151
|
+
lexer->dedent_level = -1;
|
152
152
|
|
153
153
|
// @command_state is a local variable
|
154
154
|
|
155
|
-
|
155
|
+
lexer->in_kwarg = 0;
|
156
156
|
|
157
|
-
|
157
|
+
lexer->cs_before_block_comment = lex_en_line_begin;
|
158
158
|
|
159
159
|
return self;
|
160
160
|
}
|
161
161
|
|
162
162
|
static VALUE lexer_set_source_buffer(VALUE self, VALUE buffer)
|
163
163
|
{
|
164
|
-
|
164
|
+
Lexer* lexer = GET_LEXER(self);
|
165
165
|
|
166
|
-
|
166
|
+
lexer->source_buffer = buffer;
|
167
167
|
|
168
168
|
if (RTEST(buffer)) {
|
169
|
-
|
170
|
-
|
169
|
+
lexer->source = rb_funcall(buffer, rb_intern("source"), 0);
|
170
|
+
lexer->encoding = rb_obj_encoding(lexer->source);
|
171
171
|
|
172
|
-
if (
|
173
|
-
|
172
|
+
if (lexer->encoding == utf8_encoding) {
|
173
|
+
lexer->source_pts = rb_funcall(lexer->source, rb_intern("unpack"), 1, rb_str_new2("U*"));
|
174
174
|
} else {
|
175
|
-
|
175
|
+
lexer->source_pts = rb_funcall(lexer->source, rb_intern("unpack"), 1, rb_str_new2("C*"));
|
176
176
|
}
|
177
177
|
|
178
|
-
|
178
|
+
lexer->pe = RARRAY_LEN(lexer->source_pts) + 2; /* pretend there is a null at the end */
|
179
179
|
|
180
|
-
VALUE source_pt = rb_ary_entry(
|
180
|
+
VALUE source_pt = rb_ary_entry(lexer->source_pts, 0);
|
181
181
|
if (source_pt != Qnil && NUM2INT(source_pt) == 0xfeff) {
|
182
|
-
|
182
|
+
lexer->p = 1;
|
183
183
|
}
|
184
184
|
} else {
|
185
|
-
|
186
|
-
|
187
|
-
|
188
|
-
|
185
|
+
lexer->source = Qnil;
|
186
|
+
lexer->source_pts = Qnil;
|
187
|
+
lexer->encoding = Qnil;
|
188
|
+
lexer->pe = 0;
|
189
189
|
}
|
190
190
|
|
191
191
|
return self;
|
@@ -193,9 +193,9 @@ static VALUE lexer_set_source_buffer(VALUE self, VALUE buffer)
|
|
193
193
|
|
194
194
|
static VALUE lexer_get_state(VALUE self)
|
195
195
|
{
|
196
|
-
|
196
|
+
Lexer* lexer = GET_LEXER(self);
|
197
197
|
|
198
|
-
switch (
|
198
|
+
switch (lexer->cs) {
|
199
199
|
case lex_en_line_begin: return ID2SYM(rb_intern("line_begin"));
|
200
200
|
case lex_en_expr_dot: return ID2SYM(rb_intern("expr_dot"));
|
201
201
|
case lex_en_expr_fname: return ID2SYM(rb_intern("expr_fname"));
|
@@ -220,42 +220,42 @@ static VALUE lexer_get_state(VALUE self)
|
|
220
220
|
|
221
221
|
static VALUE lexer_set_state(VALUE self, VALUE state_sym)
|
222
222
|
{
|
223
|
-
|
223
|
+
Lexer* lexer = GET_LEXER(self);
|
224
224
|
const char *state_name = rb_id2name(SYM2ID(state_sym));
|
225
225
|
|
226
226
|
if (strcmp(state_name, "line_begin") == 0)
|
227
|
-
|
227
|
+
lexer->cs = lex_en_line_begin;
|
228
228
|
else if (strcmp(state_name, "expr_dot") == 0)
|
229
|
-
|
229
|
+
lexer->cs = lex_en_expr_dot;
|
230
230
|
else if (strcmp(state_name, "expr_fname") == 0)
|
231
|
-
|
231
|
+
lexer->cs = lex_en_expr_fname;
|
232
232
|
else if (strcmp(state_name, "expr_value") == 0)
|
233
|
-
|
233
|
+
lexer->cs = lex_en_expr_value;
|
234
234
|
else if (strcmp(state_name, "expr_beg") == 0)
|
235
|
-
|
235
|
+
lexer->cs = lex_en_expr_beg;
|
236
236
|
else if (strcmp(state_name, "expr_mid") == 0)
|
237
|
-
|
237
|
+
lexer->cs = lex_en_expr_mid;
|
238
238
|
else if (strcmp(state_name, "expr_arg") == 0)
|
239
|
-
|
239
|
+
lexer->cs = lex_en_expr_arg;
|
240
240
|
else if (strcmp(state_name, "expr_cmdarg") == 0)
|
241
|
-
|
241
|
+
lexer->cs = lex_en_expr_cmdarg;
|
242
242
|
else if (strcmp(state_name, "expr_end") == 0)
|
243
|
-
|
243
|
+
lexer->cs = lex_en_expr_end;
|
244
244
|
else if (strcmp(state_name, "expr_endarg") == 0)
|
245
|
-
|
245
|
+
lexer->cs = lex_en_expr_endarg;
|
246
246
|
else if (strcmp(state_name, "expr_endfn") == 0)
|
247
|
-
|
247
|
+
lexer->cs = lex_en_expr_endfn;
|
248
248
|
else if (strcmp(state_name, "expr_labelarg") == 0)
|
249
|
-
|
249
|
+
lexer->cs = lex_en_expr_labelarg;
|
250
250
|
|
251
251
|
else if (strcmp(state_name, "interp_string") == 0)
|
252
|
-
|
252
|
+
lexer->cs = lex_en_interp_string;
|
253
253
|
else if (strcmp(state_name, "interp_words") == 0)
|
254
|
-
|
254
|
+
lexer->cs = lex_en_interp_words;
|
255
255
|
else if (strcmp(state_name, "plain_string") == 0)
|
256
|
-
|
256
|
+
lexer->cs = lex_en_plain_string;
|
257
257
|
else if (strcmp(state_name, "plain_words") == 0)
|
258
|
-
|
258
|
+
lexer->cs = lex_en_plain_words;
|
259
259
|
else
|
260
260
|
rb_raise(rb_eArgError, "Invalid state: %s", state_name);
|
261
261
|
|
@@ -264,52 +264,52 @@ static VALUE lexer_set_state(VALUE self, VALUE state_sym)
|
|
264
264
|
|
265
265
|
static VALUE lexer_push_cmdarg(VALUE self)
|
266
266
|
{
|
267
|
-
|
268
|
-
ss_stack_push(&
|
269
|
-
|
267
|
+
Lexer* lexer = GET_LEXER(self);
|
268
|
+
ss_stack_push(&lexer->cmdarg_stack, lexer->cmdarg);
|
269
|
+
lexer->cmdarg = 0;
|
270
270
|
return Qnil;
|
271
271
|
}
|
272
272
|
|
273
273
|
static VALUE lexer_pop_cmdarg(VALUE self)
|
274
274
|
{
|
275
|
-
|
276
|
-
|
275
|
+
Lexer* lexer = GET_LEXER(self);
|
276
|
+
lexer->cmdarg = ss_stack_pop(&lexer->cmdarg_stack);
|
277
277
|
return Qnil;
|
278
278
|
}
|
279
279
|
|
280
280
|
static VALUE lexer_push_cond(VALUE self)
|
281
281
|
{
|
282
|
-
|
283
|
-
ss_stack_push(&
|
284
|
-
|
282
|
+
Lexer* lexer = GET_LEXER(self);
|
283
|
+
ss_stack_push(&lexer->cond_stack, lexer->cond);
|
284
|
+
lexer->cond = 0;
|
285
285
|
return Qnil;
|
286
286
|
}
|
287
287
|
|
288
288
|
static VALUE lexer_pop_cond(VALUE self)
|
289
289
|
{
|
290
|
-
|
291
|
-
|
290
|
+
Lexer* lexer = GET_LEXER(self);
|
291
|
+
lexer->cond = ss_stack_pop(&lexer->cond_stack);
|
292
292
|
return Qnil;
|
293
293
|
}
|
294
294
|
|
295
295
|
static VALUE lexer_get_in_kwarg(VALUE self)
|
296
296
|
{
|
297
|
-
|
298
|
-
return
|
297
|
+
Lexer* lexer = GET_LEXER(self);
|
298
|
+
return lexer->in_kwarg ? Qtrue : Qfalse;
|
299
299
|
}
|
300
300
|
|
301
301
|
static VALUE lexer_set_in_kwarg(VALUE self, VALUE val)
|
302
302
|
{
|
303
|
-
|
304
|
-
|
303
|
+
Lexer* lexer = GET_LEXER(self);
|
304
|
+
lexer->in_kwarg = RTEST(val) ? 1 : 0;
|
305
305
|
return val;
|
306
306
|
}
|
307
307
|
|
308
308
|
static VALUE lexer_get_dedent_level(VALUE self)
|
309
309
|
{
|
310
|
-
|
311
|
-
int result =
|
312
|
-
|
310
|
+
Lexer* lexer = GET_LEXER(self);
|
311
|
+
int result = lexer->dedent_level;
|
312
|
+
lexer->dedent_level = -1;
|
313
313
|
if (result == -1)
|
314
314
|
return Qnil;
|
315
315
|
else
|
@@ -327,22 +327,22 @@ static VALUE lexer_advance(VALUE self)
|
|
327
327
|
int num_base = 0;
|
328
328
|
long p, pe, eof, ts = 0, te = 0, tm = 0, sharp_s = 0, heredoc_e = 0;
|
329
329
|
long num_digits_s = 0, num_suffix_s = 0;
|
330
|
-
void (*num_xfrm)(
|
331
|
-
|
330
|
+
void (*num_xfrm)(Lexer*, VALUE, long, long); /* numeric suffix-induced transformation */
|
331
|
+
Lexer *lexer;
|
332
332
|
int *stack;
|
333
333
|
VALUE ident_tok = Qnil;
|
334
334
|
long ident_ts = 0, ident_te = 0;
|
335
335
|
long numeric_s = 0;
|
336
|
-
Data_Get_Struct(self,
|
336
|
+
Data_Get_Struct(self, Lexer, lexer);
|
337
337
|
|
338
|
-
if (RARRAY_LEN(
|
339
|
-
return rb_ary_shift(
|
338
|
+
if (RARRAY_LEN(lexer->token_queue) > 0)
|
339
|
+
return rb_ary_shift(lexer->token_queue);
|
340
340
|
|
341
|
-
cs =
|
342
|
-
p =
|
343
|
-
pe = eof =
|
344
|
-
stack =
|
345
|
-
top =
|
341
|
+
cs = lexer->cs;
|
342
|
+
p = lexer->p;
|
343
|
+
pe = eof = lexer->pe;
|
344
|
+
stack = lexer->cs_stack;
|
345
|
+
top = lexer->cs_stack_top;
|
346
346
|
|
347
347
|
command_state = (cs == lex_en_expr_value || cs == lex_en_line_begin);
|
348
348
|
|
@@ -350,18 +350,18 @@ static VALUE lexer_advance(VALUE self)
|
|
350
350
|
write exec;
|
351
351
|
}%%
|
352
352
|
|
353
|
-
|
354
|
-
|
355
|
-
|
353
|
+
lexer->p = p;
|
354
|
+
lexer->cs = cs;
|
355
|
+
lexer->cs_stack_top = top;
|
356
356
|
|
357
|
-
if (RARRAY_LEN(
|
358
|
-
return rb_ary_shift(
|
357
|
+
if (RARRAY_LEN(lexer->token_queue) > 0) {
|
358
|
+
return rb_ary_shift(lexer->token_queue);
|
359
359
|
} else if (cs == lex_error) {
|
360
|
-
VALUE info = rb_ary_new3(2, rb_str_new2("$error"), range(
|
360
|
+
VALUE info = rb_ary_new3(2, rb_str_new2("$error"), range(lexer, p - 1, p));
|
361
361
|
VALUE token = rb_ary_new3(2, Qfalse, info);
|
362
362
|
return token;
|
363
363
|
} else {
|
364
|
-
VALUE info = rb_ary_new3(2, rb_str_new2("$eof"), range(
|
364
|
+
VALUE info = rb_ary_new3(2, rb_str_new2("$eof"), range(lexer, eof - 2, eof - 2));
|
365
365
|
VALUE token = rb_ary_new3(2, Qfalse, info);
|
366
366
|
return token;
|
367
367
|
}
|
@@ -372,62 +372,62 @@ static inline void force_encoding(VALUE str, VALUE enc)
|
|
372
372
|
rb_enc_associate(str, rb_to_encoding(enc));
|
373
373
|
}
|
374
374
|
|
375
|
-
static void emit_token(
|
375
|
+
static void emit_token(Lexer *lexer, VALUE type, VALUE value, long start, long end)
|
376
376
|
{
|
377
|
-
VALUE info = rb_ary_new3(2, value, range(
|
377
|
+
VALUE info = rb_ary_new3(2, value, range(lexer, start, end));
|
378
378
|
VALUE token = rb_ary_new3(2, type, info);
|
379
379
|
|
380
|
-
rb_ary_push(
|
380
|
+
rb_ary_push(lexer->token_queue, token);
|
381
381
|
|
382
|
-
if (
|
383
|
-
rb_ary_push(
|
382
|
+
if (lexer->tokens != Qnil)
|
383
|
+
rb_ary_push(lexer->tokens, token);
|
384
384
|
}
|
385
385
|
|
386
|
-
static void emit_comment(
|
386
|
+
static void emit_comment(Lexer *lexer, long start, long end)
|
387
387
|
{
|
388
388
|
VALUE rng = Qnil;
|
389
389
|
|
390
|
-
if (
|
391
|
-
rng = range(
|
390
|
+
if (lexer->tokens != Qnil) {
|
391
|
+
rng = range(lexer, start, end);
|
392
392
|
|
393
|
-
VALUE info = rb_ary_new3(2, tok(
|
393
|
+
VALUE info = rb_ary_new3(2, tok(lexer, start, end), rng);
|
394
394
|
VALUE token = rb_ary_new3(2, tCOMMENT, info);
|
395
|
-
rb_ary_push(
|
395
|
+
rb_ary_push(lexer->tokens, token);
|
396
396
|
}
|
397
397
|
|
398
|
-
if (
|
398
|
+
if (lexer->comments != Qnil) {
|
399
399
|
if (rng == Qnil)
|
400
|
-
rng = range(
|
400
|
+
rng = range(lexer, start, end);
|
401
401
|
VALUE comment = rb_class_new_instance(1, &rng, comment_klass);
|
402
|
-
rb_ary_push(
|
402
|
+
rb_ary_push(lexer->comments, comment);
|
403
403
|
}
|
404
404
|
}
|
405
405
|
|
406
|
-
static void emit_do(
|
406
|
+
static void emit_do(Lexer *lexer, int do_block, long ts, long te)
|
407
407
|
{
|
408
|
-
if (stack_state_active(&
|
408
|
+
if (stack_state_active(&lexer->cond))
|
409
409
|
emit(kDO_COND);
|
410
|
-
else if (stack_state_active(&
|
410
|
+
else if (stack_state_active(&lexer->cmdarg) || do_block)
|
411
411
|
emit(kDO_BLOCK);
|
412
412
|
else
|
413
413
|
emit(kDO);
|
414
414
|
}
|
415
415
|
|
416
|
-
static VALUE tok(
|
416
|
+
static VALUE tok(Lexer *lexer, long start, long end)
|
417
417
|
{
|
418
|
-
return rb_str_substr(
|
418
|
+
return rb_str_substr(lexer->source, start, end - start);
|
419
419
|
}
|
420
420
|
|
421
|
-
static VALUE range(
|
421
|
+
static VALUE range(Lexer *lexer, long start, long end)
|
422
422
|
{
|
423
423
|
VALUE args[3];
|
424
|
-
args[0] =
|
424
|
+
args[0] = lexer->source_buffer;
|
425
425
|
args[1] = INT2NUM(start);
|
426
426
|
args[2] = INT2NUM(end);
|
427
427
|
return rb_class_new_instance(3, args, range_klass);
|
428
428
|
}
|
429
429
|
|
430
|
-
static void diagnostic(
|
430
|
+
static void diagnostic(Lexer *lexer, VALUE type, VALUE reason,
|
431
431
|
VALUE arguments, VALUE loc, VALUE hilights)
|
432
432
|
{
|
433
433
|
VALUE args[5];
|
@@ -437,15 +437,15 @@ static void diagnostic(lexer_state *state, VALUE type, VALUE reason,
|
|
437
437
|
args[3] = loc;
|
438
438
|
args[4] = hilights;
|
439
439
|
VALUE diagnostic = rb_class_new_instance(5, args, diagnostic_klass);
|
440
|
-
rb_funcall(
|
440
|
+
rb_funcall(lexer->diagnostics, rb_intern("process"), 1, diagnostic);
|
441
441
|
}
|
442
442
|
|
443
|
-
static int get_codepoint(
|
443
|
+
static int get_codepoint(Lexer *lexer, long p)
|
444
444
|
{
|
445
|
-
if (p >= RARRAY_LEN(
|
445
|
+
if (p >= RARRAY_LEN(lexer->source_pts))
|
446
446
|
return 0;
|
447
447
|
else
|
448
|
-
return NUM2INT(rb_ary_entry(
|
448
|
+
return NUM2INT(rb_ary_entry(lexer->source_pts, p));
|
449
449
|
}
|
450
450
|
|
451
451
|
static int arg_or_cmdarg(int command_state)
|
@@ -558,53 +558,53 @@ static int find_8_or_9(VALUE str)
|
|
558
558
|
return -1;
|
559
559
|
}
|
560
560
|
|
561
|
-
static void emit_int(
|
561
|
+
static void emit_int(Lexer *lexer, VALUE val, long start, long end)
|
562
562
|
{
|
563
|
-
emit_token(
|
563
|
+
emit_token(lexer, tINTEGER, val, start, end);
|
564
564
|
}
|
565
565
|
|
566
|
-
static void emit_rational(
|
566
|
+
static void emit_rational(Lexer *lexer, VALUE val, long start, long end)
|
567
567
|
{
|
568
|
-
emit_token(
|
568
|
+
emit_token(lexer, tRATIONAL, rb_Rational1(val), start, end);
|
569
569
|
}
|
570
570
|
|
571
|
-
static void emit_complex(
|
571
|
+
static void emit_complex(Lexer *lexer, VALUE val, long start, long end)
|
572
572
|
{
|
573
|
-
emit_token(
|
573
|
+
emit_token(lexer, tIMAGINARY, rb_Complex(Qzero, val), start, end);
|
574
574
|
}
|
575
575
|
|
576
|
-
static void emit_complex_rational(
|
576
|
+
static void emit_complex_rational(Lexer *lexer, VALUE val, long start, long end)
|
577
577
|
{
|
578
|
-
emit_token(
|
578
|
+
emit_token(lexer, tIMAGINARY, rb_Complex(Qzero, rb_Rational1(val)), start, end);
|
579
579
|
}
|
580
580
|
|
581
|
-
static void emit_float(
|
581
|
+
static void emit_float(Lexer *lexer, VALUE val, long start, long end)
|
582
582
|
{
|
583
|
-
emit_token(
|
583
|
+
emit_token(lexer, tFLOAT, rb_Float(val), start, end);
|
584
584
|
}
|
585
585
|
|
586
|
-
static void emit_complex_float(
|
586
|
+
static void emit_complex_float(Lexer *lexer, VALUE val, long start, long end)
|
587
587
|
{
|
588
|
-
emit_token(
|
588
|
+
emit_token(lexer, tIMAGINARY, rb_Complex(Qzero, rb_Float(val)), start, end);
|
589
589
|
}
|
590
590
|
|
591
|
-
static void emit_int_followed_by_if(
|
591
|
+
static void emit_int_followed_by_if(Lexer *lexer, VALUE val, long start, long end)
|
592
592
|
{
|
593
|
-
emit_token(
|
593
|
+
emit_token(lexer, tINTEGER, val, start, end);
|
594
594
|
}
|
595
595
|
|
596
|
-
static void emit_int_followed_by_rescue(
|
596
|
+
static void emit_int_followed_by_rescue(Lexer *lexer, VALUE val, long start, long end)
|
597
597
|
{
|
598
|
-
emit_token(
|
598
|
+
emit_token(lexer, tINTEGER, val, start, end);
|
599
599
|
}
|
600
600
|
|
601
|
-
static void emit_float_followed_by_if(
|
601
|
+
static void emit_float_followed_by_if(Lexer *lexer, VALUE val, long start, long end)
|
602
602
|
{
|
603
|
-
emit_token(
|
603
|
+
emit_token(lexer, tFLOAT, rb_Float(val), start, end);
|
604
604
|
}
|
605
|
-
static void emit_float_followed_by_rescue(
|
605
|
+
static void emit_float_followed_by_rescue(Lexer *lexer, VALUE val, long start, long end)
|
606
606
|
{
|
607
|
-
emit_token(
|
607
|
+
emit_token(lexer, tFLOAT, rb_Float(val), start, end);
|
608
608
|
}
|
609
609
|
|
610
610
|
static int next_state_for_literal(literal *lit) {
|
@@ -635,23 +635,23 @@ static int next_state_for_literal(literal *lit) {
|
|
635
635
|
}
|
636
636
|
}
|
637
637
|
|
638
|
-
static int push_literal(
|
638
|
+
static int push_literal(Lexer *lexer, VALUE str_type, VALUE delimiter,
|
639
639
|
long str_s, long heredoc_e, int indent, int dedent_body,
|
640
640
|
int label_allowed)
|
641
641
|
{
|
642
642
|
literal lit;
|
643
|
-
literal_init(&lit,
|
643
|
+
literal_init(&lit, lexer, str_type, delimiter, str_s, heredoc_e, indent,
|
644
644
|
dedent_body, label_allowed);
|
645
|
-
lit_stack_push(&
|
645
|
+
lit_stack_push(&lexer->literal_stack, lit);
|
646
646
|
|
647
647
|
return next_state_for_literal(&lit);
|
648
648
|
}
|
649
649
|
|
650
|
-
static int pop_literal(
|
650
|
+
static int pop_literal(Lexer *lexer)
|
651
651
|
{
|
652
|
-
literal old_literal = lit_stack_pop(&
|
652
|
+
literal old_literal = lit_stack_pop(&lexer->literal_stack);
|
653
653
|
|
654
|
-
|
654
|
+
lexer->dedent_level = old_literal.dedent_level;
|
655
655
|
|
656
656
|
if (old_literal.start_tok == tREGEXP_BEG) {
|
657
657
|
return lex_en_regexp_modifiers;
|
@@ -986,20 +986,20 @@ void Init_lexer()
|
|
986
986
|
|
987
987
|
%%{
|
988
988
|
alphtype int;
|
989
|
-
getkey (get_codepoint(
|
989
|
+
getkey (get_codepoint(lexer, p));
|
990
990
|
|
991
991
|
prepush {
|
992
992
|
/* grow the state stack as needed */
|
993
|
-
if (
|
994
|
-
int *new_stack = xmalloc(
|
995
|
-
memcpy(new_stack,
|
996
|
-
xfree(
|
997
|
-
stack =
|
998
|
-
|
993
|
+
if (lexer->cs_stack_top == lexer->cs_stack_size) {
|
994
|
+
int *new_stack = xmalloc(lexer->cs_stack_size * 2 * sizeof(int));
|
995
|
+
memcpy(new_stack, lexer->cs_stack, lexer->cs_stack_size * sizeof(int));
|
996
|
+
xfree(lexer->cs_stack);
|
997
|
+
stack = lexer->cs_stack = new_stack;
|
998
|
+
lexer->cs_stack_size = lexer->cs_stack_size * 2;
|
999
999
|
}
|
1000
1000
|
}
|
1001
1001
|
|
1002
|
-
action do_nl {
|
1002
|
+
action do_nl { lexer->newline_s = p; }
|
1003
1003
|
|
1004
1004
|
c_nl = '\n' $ do_nl;
|
1005
1005
|
c_space = [ \t\r\f\v];
|
@@ -1094,30 +1094,30 @@ void Init_lexer()
|
|
1094
1094
|
escaped_nl = "\\" c_nl;
|
1095
1095
|
|
1096
1096
|
action unicode_points {
|
1097
|
-
|
1097
|
+
lexer->escape = rb_str_new2("");
|
1098
1098
|
|
1099
|
-
VALUE codepoints = tok(
|
1100
|
-
long codepoint_s =
|
1099
|
+
VALUE codepoints = tok(lexer, lexer->escape_s + 2, p - 1);
|
1100
|
+
long codepoint_s = lexer->escape_s + 2;
|
1101
1101
|
|
1102
1102
|
VALUE regexp;
|
1103
1103
|
|
1104
|
-
if (
|
1104
|
+
if (lexer->version < 24) {
|
1105
1105
|
if (str_start_with_p(codepoints, " ") || str_start_with_p(codepoints, "\t")) {
|
1106
|
-
diagnostic(
|
1107
|
-
range(
|
1106
|
+
diagnostic(lexer, severity_error, invalid_unicode_escape, Qnil,
|
1107
|
+
range(lexer, lexer->escape_s + 2, lexer->escape_s + 3), empty_array);
|
1108
1108
|
}
|
1109
1109
|
|
1110
1110
|
regexp = rb_reg_regcomp(rb_str_new2("[ \\t]{2}"));
|
1111
1111
|
VALUE space_p = rb_funcall(codepoints, rb_intern("index"), 1, regexp);
|
1112
1112
|
|
1113
1113
|
if (RTEST(space_p)) {
|
1114
|
-
diagnostic(
|
1115
|
-
range(
|
1114
|
+
diagnostic(lexer, severity_error, invalid_unicode_escape, Qnil,
|
1115
|
+
range(lexer, codepoint_s + NUM2INT(space_p) + 1, codepoint_s + NUM2INT(space_p) + 1), empty_array);
|
1116
1116
|
}
|
1117
1117
|
|
1118
1118
|
if (str_end_with_p(codepoints, " ") || str_end_with_p(codepoints, "\t")) {
|
1119
|
-
diagnostic(
|
1120
|
-
range(
|
1119
|
+
diagnostic(lexer, severity_error, invalid_unicode_escape, Qnil,
|
1120
|
+
range(lexer, p - 1, p), empty_array);
|
1121
1121
|
}
|
1122
1122
|
}
|
1123
1123
|
|
@@ -1136,93 +1136,93 @@ void Init_lexer()
|
|
1136
1136
|
} else {
|
1137
1137
|
VALUE codepoint = rb_str_to_inum(codepoint_str, 16, 0);
|
1138
1138
|
if (NUM2INT(codepoint) >= 0x110000) {
|
1139
|
-
diagnostic(
|
1140
|
-
range(
|
1139
|
+
diagnostic(lexer, severity_error, unicode_point_too_large, Qnil,
|
1140
|
+
range(lexer, codepoint_s, codepoint_s + RSTRING_LEN(codepoint_str)), empty_array);
|
1141
1141
|
break;
|
1142
1142
|
}
|
1143
1143
|
|
1144
1144
|
codepoint = rb_funcall(codepoint, rb_intern("chr"), 1, utf8_encoding);
|
1145
|
-
|
1145
|
+
lexer->escape = rb_str_plus(lexer->escape, codepoint);
|
1146
1146
|
codepoint_s += RSTRING_LEN(codepoint_str);
|
1147
1147
|
}
|
1148
1148
|
}
|
1149
1149
|
}
|
1150
1150
|
|
1151
1151
|
action unescape_char {
|
1152
|
-
char c = NUM2INT(rb_ary_entry(
|
1153
|
-
|
1152
|
+
char c = NUM2INT(rb_ary_entry(lexer->source_pts, p - 1));
|
1153
|
+
lexer->escape = unescape_char(c);
|
1154
1154
|
|
1155
|
-
if (
|
1156
|
-
VALUE codepoint = rb_funcall(
|
1157
|
-
|
1158
|
-
force_encoding(codepoint,
|
1155
|
+
if (lexer->escape == Qnil) {
|
1156
|
+
VALUE codepoint = rb_funcall(lexer->source_buffer, rb_intern("slice"), 1, INT2NUM(p - 1));
|
1157
|
+
lexer->escape = codepoint;
|
1158
|
+
force_encoding(codepoint, lexer->encoding);
|
1159
1159
|
}
|
1160
1160
|
}
|
1161
1161
|
|
1162
1162
|
action invalid_complex_escape {
|
1163
|
-
diagnostic(
|
1163
|
+
diagnostic(lexer, fatal, invalid_escape, Qnil, range(lexer, ts, te),
|
1164
1164
|
empty_array);
|
1165
1165
|
}
|
1166
1166
|
|
1167
1167
|
action slash_c_char {
|
1168
|
-
char c = *RSTRING_PTR(
|
1169
|
-
|
1170
|
-
force_encoding(
|
1168
|
+
char c = *RSTRING_PTR(lexer->escape) & 0x9f;
|
1169
|
+
lexer->escape = rb_str_new(&c, 1);
|
1170
|
+
force_encoding(lexer->escape, lexer->encoding);
|
1171
1171
|
}
|
1172
1172
|
|
1173
1173
|
action slash_m_char {
|
1174
|
-
char c = *RSTRING_PTR(
|
1175
|
-
|
1176
|
-
force_encoding(
|
1174
|
+
char c = *RSTRING_PTR(lexer->escape) | 0x80;
|
1175
|
+
lexer->escape = rb_str_new(&c, 1);
|
1176
|
+
force_encoding(lexer->escape, lexer->encoding);
|
1177
1177
|
}
|
1178
1178
|
|
1179
1179
|
maybe_escaped_char = (
|
1180
1180
|
'\\' c_any %unescape_char
|
1181
|
-
| ( c_any - [\\] ) % {
|
1181
|
+
| ( c_any - [\\] ) % { lexer->escape = rb_str_substr(lexer->source, p - 1, 1); }
|
1182
1182
|
);
|
1183
1183
|
|
1184
1184
|
maybe_escaped_ctrl_char = (
|
1185
1185
|
'\\' c_any %unescape_char %slash_c_char
|
1186
|
-
| '?' % {
|
1187
|
-
| ( c_any - [\\?] ) % {
|
1186
|
+
| '?' % { lexer->escape = rb_str_new2("\x7f"); }
|
1187
|
+
| ( c_any - [\\?] ) % { lexer->escape = rb_str_substr(lexer->source, p - 1, 1); } %slash_c_char
|
1188
1188
|
);
|
1189
1189
|
|
1190
1190
|
escape = (
|
1191
1191
|
[0-7]{1,3} % {
|
1192
|
-
VALUE token = tok(
|
1192
|
+
VALUE token = tok(lexer, lexer->escape_s, p);
|
1193
1193
|
char c = NUM2INT(rb_str_to_inum(token, 8, 0));
|
1194
1194
|
c = c % 0x100;
|
1195
|
-
|
1196
|
-
force_encoding(
|
1195
|
+
lexer->escape = rb_str_new(&c, 1);
|
1196
|
+
force_encoding(lexer->escape, lexer->encoding);
|
1197
1197
|
}
|
1198
1198
|
|
1199
1199
|
| 'x' xdigit{1,2} % {
|
1200
|
-
VALUE token = tok(
|
1200
|
+
VALUE token = tok(lexer, lexer->escape_s + 1, p);
|
1201
1201
|
char c = NUM2INT(rb_str_to_inum(token, 16, 0));
|
1202
|
-
|
1203
|
-
force_encoding(
|
1202
|
+
lexer->escape = rb_str_new(&c, 1);
|
1203
|
+
force_encoding(lexer->escape, lexer->encoding);
|
1204
1204
|
}
|
1205
1205
|
|
1206
1206
|
| 'x' ( c_any - xdigit )
|
1207
1207
|
% {
|
1208
|
-
diagnostic(
|
1209
|
-
range(
|
1208
|
+
diagnostic(lexer, fatal, invalid_hex_escape, Qnil,
|
1209
|
+
range(lexer, lexer->escape_s - 1, p + 2), empty_array);
|
1210
1210
|
}
|
1211
1211
|
|
1212
1212
|
| 'u' xdigit{4} % {
|
1213
|
-
VALUE token = tok(
|
1213
|
+
VALUE token = tok(lexer, lexer->escape_s + 1, p);
|
1214
1214
|
int i = NUM2INT(rb_str_to_inum(token, 16, 0));
|
1215
|
-
|
1215
|
+
lexer->escape = rb_enc_uint_chr(i, rb_to_encoding(utf8_encoding));
|
1216
1216
|
}
|
1217
1217
|
|
1218
1218
|
| 'u' xdigit{0,3} % {
|
1219
|
-
diagnostic(
|
1220
|
-
range(
|
1219
|
+
diagnostic(lexer, fatal, invalid_unicode_escape, Qnil,
|
1220
|
+
range(lexer, lexer->escape_s - 1, p), empty_array);
|
1221
1221
|
}
|
1222
1222
|
|
1223
1223
|
| 'u{' ( c_any - xdigit - [ \t}] )* '}' % {
|
1224
|
-
diagnostic(
|
1225
|
-
range(
|
1224
|
+
diagnostic(lexer, fatal, invalid_unicode_escape, Qnil,
|
1225
|
+
range(lexer, lexer->escape_s - 1, p), empty_array);
|
1226
1226
|
}
|
1227
1227
|
|
1228
1228
|
| 'u{' [ \t]* ( xdigit{1,6} [ \t]+ )*
|
@@ -1235,8 +1235,8 @@ void Init_lexer()
|
|
1235
1235
|
| ( c_any - [ \t}] )* c_eof
|
1236
1236
|
| xdigit{7,}
|
1237
1237
|
) % {
|
1238
|
-
diagnostic(
|
1239
|
-
range(
|
1238
|
+
diagnostic(lexer, fatal, invalid_unicode_escape, Qnil,
|
1239
|
+
range(lexer, p - 1, p), empty_array);
|
1240
1240
|
}
|
1241
1241
|
)
|
1242
1242
|
|
@@ -1259,42 +1259,42 @@ void Init_lexer()
|
|
1259
1259
|
| ( c_any - [0-7xuCMc] ) %unescape_char
|
1260
1260
|
|
1261
1261
|
| c_eof % {
|
1262
|
-
diagnostic(
|
1262
|
+
diagnostic(lexer, fatal, escape_eof, Qnil, range(lexer, p - 1, p),
|
1263
1263
|
empty_array);
|
1264
1264
|
}
|
1265
1265
|
);
|
1266
1266
|
|
1267
1267
|
e_bs = '\\' % {
|
1268
|
-
|
1269
|
-
|
1268
|
+
lexer->escape_s = p;
|
1269
|
+
lexer->escape = Qnil;
|
1270
1270
|
};
|
1271
1271
|
|
1272
1272
|
e_heredoc_nl = c_nl % {
|
1273
|
-
if (
|
1274
|
-
p =
|
1275
|
-
|
1273
|
+
if (lexer->herebody_s) {
|
1274
|
+
p = lexer->herebody_s;
|
1275
|
+
lexer->herebody_s = 0;
|
1276
1276
|
}
|
1277
1277
|
};
|
1278
1278
|
|
1279
1279
|
action extend_string {
|
1280
|
-
VALUE string = tok(
|
1280
|
+
VALUE string = tok(lexer, ts, te);
|
1281
1281
|
VALUE lookahead = Qnil;
|
1282
1282
|
|
1283
|
-
if (
|
1284
|
-
lookahead = tok(
|
1283
|
+
if (lexer->version >= 22 && !stack_state_active(&lexer->cond)) {
|
1284
|
+
lookahead = tok(lexer, te, te + 2);
|
1285
1285
|
}
|
1286
1286
|
|
1287
|
-
literal *current_literal = lit_stack_top(&
|
1287
|
+
literal *current_literal = lit_stack_top(&lexer->literal_stack);
|
1288
1288
|
|
1289
1289
|
if (!current_literal->heredoc_e &&
|
1290
1290
|
literal_nest_and_try_closing(current_literal, string, ts, te, lookahead)) {
|
1291
|
-
VALUE token = array_last(
|
1291
|
+
VALUE token = array_last(lexer->token_queue);
|
1292
1292
|
if (rb_ary_entry(token, 0) == tLABEL_END) {
|
1293
1293
|
p += 1;
|
1294
|
-
pop_literal(
|
1294
|
+
pop_literal(lexer);
|
1295
1295
|
fnext expr_labelarg;
|
1296
1296
|
} else {
|
1297
|
-
fnext *pop_literal(
|
1297
|
+
fnext *pop_literal(lexer);
|
1298
1298
|
}
|
1299
1299
|
|
1300
1300
|
fbreak;
|
@@ -1304,93 +1304,93 @@ void Init_lexer()
|
|
1304
1304
|
}
|
1305
1305
|
|
1306
1306
|
action extend_string_escaped {
|
1307
|
-
literal *current_literal = lit_stack_top(&
|
1308
|
-
VALUE escaped_char = rb_str_substr(
|
1307
|
+
literal *current_literal = lit_stack_top(&lexer->literal_stack);
|
1308
|
+
VALUE escaped_char = rb_str_substr(lexer->source, lexer->escape_s, 1);
|
1309
1309
|
|
1310
1310
|
if (literal_munge_escape_p(current_literal, escaped_char)) {
|
1311
1311
|
if (literal_regexp_p(current_literal) && is_regexp_metachar(escaped_char)) {
|
1312
|
-
literal_extend_string(current_literal, tok(
|
1312
|
+
literal_extend_string(current_literal, tok(lexer, ts, te), ts, te);
|
1313
1313
|
} else {
|
1314
1314
|
literal_extend_string(current_literal, escaped_char, ts, te);
|
1315
1315
|
}
|
1316
1316
|
} else {
|
1317
1317
|
if (literal_regexp_p(current_literal)) {
|
1318
|
-
VALUE token = tok(
|
1318
|
+
VALUE token = tok(lexer, ts, te);
|
1319
1319
|
rb_funcall(token, rb_intern("gsub!"), 2, escaped_newline, blank_string);
|
1320
1320
|
literal_extend_string(current_literal, token, ts, te);
|
1321
1321
|
} else if (literal_heredoc_p(current_literal) && newline_char_p(escaped_char)) {
|
1322
1322
|
if (literal_squiggly_heredoc_p(current_literal)) {
|
1323
|
-
literal_extend_string(current_literal, tok(
|
1323
|
+
literal_extend_string(current_literal, tok(lexer, ts, te), ts, te);
|
1324
1324
|
} else {
|
1325
|
-
VALUE token = tok(
|
1325
|
+
VALUE token = tok(lexer, ts, te);
|
1326
1326
|
rb_funcall(token, rb_intern("gsub!"), 2, escaped_newline, blank_string);
|
1327
1327
|
literal_extend_string(current_literal, token, ts, te);
|
1328
1328
|
}
|
1329
|
-
} else if (
|
1330
|
-
literal_extend_string(current_literal, tok(
|
1329
|
+
} else if (lexer->escape == Qnil) {
|
1330
|
+
literal_extend_string(current_literal, tok(lexer, ts, te), ts, te);
|
1331
1331
|
} else {
|
1332
|
-
literal_extend_string(current_literal,
|
1332
|
+
literal_extend_string(current_literal, lexer->escape, ts, te);
|
1333
1333
|
}
|
1334
1334
|
}
|
1335
1335
|
}
|
1336
1336
|
|
1337
1337
|
action extend_string_eol {
|
1338
|
-
literal *current_literal = lit_stack_top(&
|
1338
|
+
literal *current_literal = lit_stack_top(&lexer->literal_stack);
|
1339
1339
|
long str_s = current_literal->str_s;
|
1340
1340
|
|
1341
1341
|
if (te == pe) {
|
1342
|
-
diagnostic(
|
1343
|
-
range(
|
1342
|
+
diagnostic(lexer, fatal, string_eof, Qnil,
|
1343
|
+
range(lexer, str_s, str_s + 1), empty_array);
|
1344
1344
|
}
|
1345
1345
|
|
1346
1346
|
if (literal_heredoc_p(current_literal)) {
|
1347
|
-
VALUE line = tok(
|
1347
|
+
VALUE line = tok(lexer, lexer->herebody_s, ts);
|
1348
1348
|
rb_funcall(line, rb_intern("gsub!"), 2, crs_to_eol, blank_string);
|
1349
1349
|
|
1350
|
-
if (
|
1350
|
+
if (lexer->version >= 18 && lexer->version <= 20) {
|
1351
1351
|
rb_funcall(line, rb_intern("gsub!"), 2, cr_then_anything_to_eol, blank_string);
|
1352
1352
|
}
|
1353
1353
|
|
1354
|
-
if (literal_nest_and_try_closing(current_literal, line,
|
1355
|
-
|
1354
|
+
if (literal_nest_and_try_closing(current_literal, line, lexer->herebody_s, ts, Qnil)) {
|
1355
|
+
lexer->herebody_s = te;
|
1356
1356
|
p = current_literal->heredoc_e - 1;
|
1357
|
-
fnext *pop_literal(
|
1357
|
+
fnext *pop_literal(lexer); fbreak;
|
1358
1358
|
} else {
|
1359
1359
|
literal_infer_indent_level(current_literal, line);
|
1360
|
-
|
1360
|
+
lexer->herebody_s = te;
|
1361
1361
|
}
|
1362
1362
|
} else {
|
1363
|
-
if (literal_nest_and_try_closing(current_literal, tok(
|
1364
|
-
fnext *pop_literal(
|
1363
|
+
if (literal_nest_and_try_closing(current_literal, tok(lexer, ts, te), ts, te, Qnil)) {
|
1364
|
+
fnext *pop_literal(lexer); fbreak;
|
1365
1365
|
}
|
1366
1366
|
|
1367
|
-
if (
|
1368
|
-
p =
|
1369
|
-
|
1367
|
+
if (lexer->herebody_s) {
|
1368
|
+
p = lexer->herebody_s - 1;
|
1369
|
+
lexer->herebody_s = 0;
|
1370
1370
|
}
|
1371
1371
|
}
|
1372
1372
|
|
1373
|
-
if (literal_words_p(current_literal) && !eof_codepoint(get_codepoint(
|
1373
|
+
if (literal_words_p(current_literal) && !eof_codepoint(get_codepoint(lexer, p))) {
|
1374
1374
|
literal_extend_space(current_literal, ts, te);
|
1375
1375
|
} else {
|
1376
|
-
literal_extend_string(current_literal, tok(
|
1376
|
+
literal_extend_string(current_literal, tok(lexer, ts, te), ts, te);
|
1377
1377
|
literal_flush_string(current_literal);
|
1378
1378
|
}
|
1379
1379
|
}
|
1380
1380
|
|
1381
1381
|
action extend_string_space {
|
1382
|
-
literal *current_literal = lit_stack_top(&
|
1382
|
+
literal *current_literal = lit_stack_top(&lexer->literal_stack);
|
1383
1383
|
literal_extend_space(current_literal, ts, te);
|
1384
1384
|
}
|
1385
1385
|
|
1386
1386
|
interp_var = '#' ( global_var | class_var_v | instance_var_v );
|
1387
1387
|
|
1388
1388
|
action extend_interp_var {
|
1389
|
-
literal *current_literal = lit_stack_top(&
|
1389
|
+
literal *current_literal = lit_stack_top(&lexer->literal_stack);
|
1390
1390
|
literal_flush_string(current_literal);
|
1391
1391
|
literal_extend_content(current_literal);
|
1392
1392
|
|
1393
|
-
emit_token(
|
1393
|
+
emit_token(lexer, tSTRING_DVAR, Qnil, ts, ts + 1);
|
1394
1394
|
|
1395
1395
|
p = ts;
|
1396
1396
|
fcall expr_variable;
|
@@ -1399,34 +1399,34 @@ void Init_lexer()
|
|
1399
1399
|
interp_code = '#{';
|
1400
1400
|
|
1401
1401
|
e_lbrace = '{' % {
|
1402
|
-
stack_state_push(&
|
1403
|
-
stack_state_push(&
|
1402
|
+
stack_state_push(&lexer->cond, 0);
|
1403
|
+
stack_state_push(&lexer->cmdarg, 0);
|
1404
1404
|
|
1405
|
-
literal *current_literal = lit_stack_top(&
|
1405
|
+
literal *current_literal = lit_stack_top(&lexer->literal_stack);
|
1406
1406
|
if (current_literal != NULL) {
|
1407
1407
|
literal_start_interp_brace(current_literal);
|
1408
1408
|
}
|
1409
1409
|
};
|
1410
1410
|
|
1411
1411
|
e_rbrace = '}' % {
|
1412
|
-
literal *current_literal = lit_stack_top(&
|
1412
|
+
literal *current_literal = lit_stack_top(&lexer->literal_stack);
|
1413
1413
|
if (current_literal != NULL) {
|
1414
1414
|
if (literal_end_interp_brace_and_try_closing(current_literal)) {
|
1415
|
-
if (
|
1416
|
-
emit_token(
|
1417
|
-
if (
|
1418
|
-
stack_state_lexpop(&
|
1419
|
-
stack_state_lexpop(&
|
1415
|
+
if (lexer->version == 18 || lexer->version == 19) {
|
1416
|
+
emit_token(lexer, tRCURLY, rb_str_new2("}"), p - 1, p);
|
1417
|
+
if (lexer->version < 24) {
|
1418
|
+
stack_state_lexpop(&lexer->cond);
|
1419
|
+
stack_state_lexpop(&lexer->cmdarg);
|
1420
1420
|
} else {
|
1421
|
-
stack_state_pop(&
|
1422
|
-
stack_state_pop(&
|
1421
|
+
stack_state_pop(&lexer->cond);
|
1422
|
+
stack_state_pop(&lexer->cmdarg);
|
1423
1423
|
}
|
1424
1424
|
} else {
|
1425
|
-
emit_token(
|
1425
|
+
emit_token(lexer, tSTRING_DEND, rb_str_new2("}"), p - 1, p);
|
1426
1426
|
}
|
1427
1427
|
|
1428
1428
|
if (current_literal->herebody_s) {
|
1429
|
-
|
1429
|
+
lexer->herebody_s = current_literal->herebody_s;
|
1430
1430
|
}
|
1431
1431
|
|
1432
1432
|
fhold;
|
@@ -1437,15 +1437,15 @@ void Init_lexer()
|
|
1437
1437
|
};
|
1438
1438
|
|
1439
1439
|
action extend_interp_code {
|
1440
|
-
literal *current_literal = lit_stack_top(&
|
1440
|
+
literal *current_literal = lit_stack_top(&lexer->literal_stack);
|
1441
1441
|
literal_flush_string(current_literal);
|
1442
1442
|
literal_extend_content(current_literal);
|
1443
1443
|
|
1444
|
-
emit_token(
|
1444
|
+
emit_token(lexer, tSTRING_DBEG, rb_str_new2("#{"), ts, te);
|
1445
1445
|
|
1446
1446
|
if (current_literal->heredoc_e) {
|
1447
|
-
current_literal->herebody_s =
|
1448
|
-
|
1447
|
+
current_literal->herebody_s = lexer->herebody_s;
|
1448
|
+
lexer->herebody_s = 0;
|
1449
1449
|
}
|
1450
1450
|
|
1451
1451
|
literal_start_interp_brace(current_literal);
|
@@ -1513,13 +1513,13 @@ void Init_lexer()
|
|
1513
1513
|
regexp_modifiers := |*
|
1514
1514
|
[A-Za-z]+
|
1515
1515
|
=> {
|
1516
|
-
VALUE unknown_options = find_unknown_options(tok(
|
1516
|
+
VALUE unknown_options = find_unknown_options(tok(lexer, ts, te));
|
1517
1517
|
|
1518
1518
|
if (unknown_options != Qnil) {
|
1519
1519
|
VALUE hash = rb_hash_new();
|
1520
1520
|
rb_hash_aset(hash, ID2SYM(rb_intern("options")), unknown_options);
|
1521
|
-
diagnostic(
|
1522
|
-
range(
|
1521
|
+
diagnostic(lexer, severity_error, regexp_options, hash,
|
1522
|
+
range(lexer, ts, te), empty_array);
|
1523
1523
|
}
|
1524
1524
|
|
1525
1525
|
emit(tREGEXP_OPT);
|
@@ -1529,7 +1529,7 @@ void Init_lexer()
|
|
1529
1529
|
|
1530
1530
|
any
|
1531
1531
|
=> {
|
1532
|
-
emit_token(
|
1532
|
+
emit_token(lexer, tREGEXP_OPT, tok(lexer, ts, te - 1), ts, te - 1);
|
1533
1533
|
fhold;
|
1534
1534
|
fgoto expr_end;
|
1535
1535
|
};
|
@@ -1542,7 +1542,7 @@ void Init_lexer()
|
|
1542
1542
|
|
1543
1543
|
w_comment =
|
1544
1544
|
'#' %{ sharp_s = p - 1; }
|
1545
|
-
c_line* %{ emit_comment(
|
1545
|
+
c_line* %{ emit_comment(lexer, sharp_s, p == pe ? p - 2 : p); }
|
1546
1546
|
;
|
1547
1547
|
|
1548
1548
|
w_space_comment =
|
@@ -1583,22 +1583,22 @@ void Init_lexer()
|
|
1583
1583
|
;
|
1584
1584
|
|
1585
1585
|
e_lbrack = '[' % {
|
1586
|
-
stack_state_push(&
|
1587
|
-
stack_state_push(&
|
1586
|
+
stack_state_push(&lexer->cond, 0);
|
1587
|
+
stack_state_push(&lexer->cmdarg, 0);
|
1588
1588
|
};
|
1589
1589
|
|
1590
1590
|
e_lparen = '(' % {
|
1591
|
-
stack_state_push(&
|
1592
|
-
stack_state_push(&
|
1593
|
-
|
1591
|
+
stack_state_push(&lexer->cond, 0);
|
1592
|
+
stack_state_push(&lexer->cmdarg, 0);
|
1593
|
+
lexer->paren_nest += 1;
|
1594
1594
|
};
|
1595
1595
|
|
1596
1596
|
e_rparen = ')' % {
|
1597
|
-
|
1597
|
+
lexer->paren_nest -= 1;
|
1598
1598
|
};
|
1599
1599
|
|
1600
1600
|
action local_ident {
|
1601
|
-
VALUE str = tok(
|
1601
|
+
VALUE str = tok(lexer, ts, te);
|
1602
1602
|
emit(tIDENTIFIER);
|
1603
1603
|
|
1604
1604
|
if (STATIC_ENV_DECLARED(str)) {
|
@@ -1610,11 +1610,11 @@ void Init_lexer()
|
|
1610
1610
|
|
1611
1611
|
expr_variable := |*
|
1612
1612
|
global_var => {
|
1613
|
-
VALUE str = tok(
|
1613
|
+
VALUE str = tok(lexer, ts, te);
|
1614
1614
|
|
1615
1615
|
if (is_nthref(str)) {
|
1616
|
-
VALUE integer = rb_str_to_inum(tok(
|
1617
|
-
emit_token(
|
1616
|
+
VALUE integer = rb_str_to_inum(tok(lexer, ts + 1, te), 10, 0);
|
1617
|
+
emit_token(lexer, tNTH_REF, integer, ts, te);
|
1618
1618
|
} else if (is_backref(str)) {
|
1619
1619
|
emit(tBACK_REF);
|
1620
1620
|
} else {
|
@@ -1625,12 +1625,12 @@ void Init_lexer()
|
|
1625
1625
|
};
|
1626
1626
|
|
1627
1627
|
class_var_v => {
|
1628
|
-
VALUE str = tok(
|
1628
|
+
VALUE str = tok(lexer, ts, te);
|
1629
1629
|
|
1630
1630
|
if (bad_cvar_name(str)) {
|
1631
1631
|
VALUE hash = rb_hash_new();
|
1632
1632
|
rb_hash_aset(hash, ID2SYM(rb_intern("name")), str);
|
1633
|
-
diagnostic(
|
1633
|
+
diagnostic(lexer, severity_error, cvar_name, hash, range(lexer, ts, te), empty_array);
|
1634
1634
|
}
|
1635
1635
|
|
1636
1636
|
emit(tCVAR);
|
@@ -1638,12 +1638,12 @@ void Init_lexer()
|
|
1638
1638
|
};
|
1639
1639
|
|
1640
1640
|
instance_var_v => {
|
1641
|
-
VALUE str = tok(
|
1641
|
+
VALUE str = tok(lexer, ts, te);
|
1642
1642
|
|
1643
1643
|
if (bad_ivar_name(str)) {
|
1644
1644
|
VALUE hash = rb_hash_new();
|
1645
1645
|
rb_hash_aset(hash, ID2SYM(rb_intern("name")), str);
|
1646
|
-
diagnostic(
|
1646
|
+
diagnostic(lexer, severity_error, ivar_name, hash, range(lexer, ts, te), empty_array);
|
1647
1647
|
}
|
1648
1648
|
|
1649
1649
|
emit(tIVAR);
|
@@ -1653,7 +1653,7 @@ void Init_lexer()
|
|
1653
1653
|
|
1654
1654
|
expr_fname := |*
|
1655
1655
|
keyword
|
1656
|
-
=> { emit_table_KEYWORDS_BEGIN(
|
1656
|
+
=> { emit_table_KEYWORDS_BEGIN(lexer, tok(lexer, ts, te), ts, te);
|
1657
1657
|
fnext expr_endfn; fbreak; };
|
1658
1658
|
|
1659
1659
|
constant => { emit(tCONSTANT); fnext expr_endfn; fbreak; };
|
@@ -1665,7 +1665,7 @@ void Init_lexer()
|
|
1665
1665
|
operator_fname |
|
1666
1666
|
operator_arithmetic |
|
1667
1667
|
operator_rest
|
1668
|
-
=> { emit_table_PUNCTUATION(
|
1668
|
+
=> { emit_table_PUNCTUATION(lexer, tok(lexer, ts, te), ts, te);
|
1669
1669
|
fnext expr_endfn; fbreak; };
|
1670
1670
|
|
1671
1671
|
'::' => { fhold; fhold; fgoto expr_end; };
|
@@ -1674,13 +1674,13 @@ void Init_lexer()
|
|
1674
1674
|
|
1675
1675
|
'%s' c_any
|
1676
1676
|
=> {
|
1677
|
-
if (
|
1678
|
-
VALUE type = rb_str_substr(
|
1679
|
-
VALUE delimiter = rb_str_substr(
|
1677
|
+
if (lexer->version == 23) {
|
1678
|
+
VALUE type = rb_str_substr(lexer->source, ts, te - ts - 1);
|
1679
|
+
VALUE delimiter = rb_str_substr(lexer->source, te - 1, 1);
|
1680
1680
|
if (delimiter == Qnil)
|
1681
1681
|
delimiter = blank_string;
|
1682
1682
|
|
1683
|
-
fgoto *push_literal(
|
1683
|
+
fgoto *push_literal(lexer, type, delimiter, ts, 0, 0, 0, 0);
|
1684
1684
|
} else {
|
1685
1685
|
p = ts - 1;
|
1686
1686
|
fgoto expr_end;
|
@@ -1696,7 +1696,7 @@ void Init_lexer()
|
|
1696
1696
|
|
1697
1697
|
expr_endfn := |*
|
1698
1698
|
label ( any - ':' ) => {
|
1699
|
-
emit_token(
|
1699
|
+
emit_token(lexer, tLABEL, tok(lexer, ts, te - 2), ts, te - 1);
|
1700
1700
|
fhold; fnext expr_labelarg; fbreak;
|
1701
1701
|
};
|
1702
1702
|
|
@@ -1713,13 +1713,13 @@ void Init_lexer()
|
|
1713
1713
|
call_or_var => { emit(tIDENTIFIER); fnext *arg_or_cmdarg(command_state); fbreak; };
|
1714
1714
|
|
1715
1715
|
bareword ambiguous_fid_suffix
|
1716
|
-
=> { emit_token(
|
1716
|
+
=> { emit_token(lexer, tFID, tok(lexer, ts, tm), ts, tm);
|
1717
1717
|
fnext *arg_or_cmdarg(command_state); p = tm - 1; fbreak; };
|
1718
1718
|
|
1719
1719
|
operator_fname |
|
1720
1720
|
operator_arithmetic |
|
1721
1721
|
operator_rest
|
1722
|
-
=> { emit_table_PUNCTUATION(
|
1722
|
+
=> { emit_table_PUNCTUATION(lexer, tok(lexer, ts, te), ts, te);
|
1723
1723
|
fnext expr_arg; fbreak; };
|
1724
1724
|
|
1725
1725
|
w_any;
|
@@ -1732,11 +1732,11 @@ void Init_lexer()
|
|
1732
1732
|
|
1733
1733
|
expr_arg := |*
|
1734
1734
|
w_space+ e_lparen => {
|
1735
|
-
if (
|
1736
|
-
emit_token(
|
1735
|
+
if (lexer->version == 18) {
|
1736
|
+
emit_token(lexer, tLPAREN2, rb_str_new2("("), te - 1, te);
|
1737
1737
|
fnext expr_value; fbreak;
|
1738
1738
|
} else {
|
1739
|
-
emit_token(
|
1739
|
+
emit_token(lexer, tLPAREN_ARG, rb_str_new2("("), te - 1, te);
|
1740
1740
|
fnext expr_beg; fbreak;
|
1741
1741
|
}
|
1742
1742
|
};
|
@@ -1744,17 +1744,17 @@ void Init_lexer()
|
|
1744
1744
|
e_lparen => { emit(tLPAREN2); fnext expr_beg; fbreak; };
|
1745
1745
|
|
1746
1746
|
w_space+ e_lbrack => {
|
1747
|
-
emit_token(
|
1747
|
+
emit_token(lexer, tLBRACK, rb_str_new2("["), te - 1, te);
|
1748
1748
|
fnext expr_beg; fbreak;
|
1749
1749
|
};
|
1750
1750
|
|
1751
1751
|
w_space* e_lbrace => {
|
1752
|
-
VALUE val = array_last(
|
1753
|
-
if (val != Qnil && NUM2INT(val) ==
|
1754
|
-
rb_ary_pop(
|
1755
|
-
emit_token(
|
1752
|
+
VALUE val = array_last(lexer->lambda_stack);
|
1753
|
+
if (val != Qnil && NUM2INT(val) == lexer->paren_nest) {
|
1754
|
+
rb_ary_pop(lexer->lambda_stack);
|
1755
|
+
emit_token(lexer, tLAMBEG, rb_str_new2("{"), te - 1, te);
|
1756
1756
|
} else {
|
1757
|
-
emit_token(
|
1757
|
+
emit_token(lexer, tLCURLY, rb_str_new2("{"), te - 1, te);
|
1758
1758
|
}
|
1759
1759
|
fnext expr_value; fbreak;
|
1760
1760
|
};
|
@@ -1765,9 +1765,9 @@ void Init_lexer()
|
|
1765
1765
|
|
1766
1766
|
w_space+ %{ tm = p; }
|
1767
1767
|
( [%/] ( c_any - c_space_nl - '=' ) | '<<' ) => {
|
1768
|
-
if (NUM2INT(rb_ary_entry(
|
1769
|
-
diagnostic(
|
1770
|
-
range(
|
1768
|
+
if (NUM2INT(rb_ary_entry(lexer->source_pts, tm)) == '/') {
|
1769
|
+
diagnostic(lexer, warning, ambiguous_literal, Qnil,
|
1770
|
+
range(lexer, tm, tm + 1), empty_array);
|
1771
1771
|
}
|
1772
1772
|
|
1773
1773
|
p = tm - 1;
|
@@ -1776,9 +1776,9 @@ void Init_lexer()
|
|
1776
1776
|
|
1777
1777
|
w_space+ %{ tm = p; } ( '+' | '-' | '*' | '&' | '**' ) => {
|
1778
1778
|
VALUE hash = rb_hash_new();
|
1779
|
-
VALUE str = tok(
|
1779
|
+
VALUE str = tok(lexer, tm, te);
|
1780
1780
|
rb_hash_aset(hash, prefix, str);
|
1781
|
-
diagnostic(
|
1781
|
+
diagnostic(lexer, warning, ambiguous_prefix, hash, range(lexer, tm, te),
|
1782
1782
|
empty_array);
|
1783
1783
|
|
1784
1784
|
p = tm - 1;
|
@@ -1817,8 +1817,8 @@ void Init_lexer()
|
|
1817
1817
|
expr_cmdarg := |*
|
1818
1818
|
w_space+ e_lparen
|
1819
1819
|
=> {
|
1820
|
-
emit_token(
|
1821
|
-
if (
|
1820
|
+
emit_token(lexer, tLPAREN_ARG, rb_str_new2("("), te - 1, te);
|
1821
|
+
if (lexer->version == 18) {
|
1822
1822
|
fnext expr_value; fbreak;
|
1823
1823
|
} else {
|
1824
1824
|
fnext expr_beg; fbreak;
|
@@ -1827,10 +1827,10 @@ void Init_lexer()
|
|
1827
1827
|
|
1828
1828
|
w_space* 'do'
|
1829
1829
|
=> {
|
1830
|
-
if (stack_state_active(&
|
1831
|
-
emit_token(
|
1830
|
+
if (stack_state_active(&lexer->cond)) {
|
1831
|
+
emit_token(lexer, kDO_COND, rb_str_new2("do"), te - 2, te);
|
1832
1832
|
} else {
|
1833
|
-
emit_token(
|
1833
|
+
emit_token(lexer, kDO, rb_str_new2("do"), te - 2, te);
|
1834
1834
|
}
|
1835
1835
|
fnext expr_value; fbreak;
|
1836
1836
|
};
|
@@ -1846,17 +1846,17 @@ void Init_lexer()
|
|
1846
1846
|
|
1847
1847
|
expr_endarg := |*
|
1848
1848
|
e_lbrace => {
|
1849
|
-
VALUE val = array_last(
|
1850
|
-
if (val != Qnil && NUM2INT(val) ==
|
1851
|
-
rb_ary_pop(
|
1852
|
-
emit_token(
|
1849
|
+
VALUE val = array_last(lexer->lambda_stack);
|
1850
|
+
if (val != Qnil && NUM2INT(val) == lexer->paren_nest) {
|
1851
|
+
rb_ary_pop(lexer->lambda_stack);
|
1852
|
+
emit_token(lexer, tLAMBEG, rb_str_new2("{"), te - 1, te);
|
1853
1853
|
} else {
|
1854
|
-
emit_token(
|
1854
|
+
emit_token(lexer, tLBRACE_ARG, rb_str_new2("{"), te - 1, te);
|
1855
1855
|
}
|
1856
1856
|
fnext expr_value; fbreak;
|
1857
1857
|
};
|
1858
1858
|
|
1859
|
-
'do' => { emit_do(
|
1859
|
+
'do' => { emit_do(lexer, 1, ts, te); fnext expr_value; fbreak; };
|
1860
1860
|
|
1861
1861
|
w_space_comment;
|
1862
1862
|
|
@@ -1868,7 +1868,7 @@ void Init_lexer()
|
|
1868
1868
|
|
1869
1869
|
expr_mid := |*
|
1870
1870
|
keyword_modifier
|
1871
|
-
=> { emit_table_KEYWORDS(
|
1871
|
+
=> { emit_table_KEYWORDS(lexer, tok(lexer, ts, te), ts, te);
|
1872
1872
|
fnext expr_beg; fbreak; };
|
1873
1873
|
|
1874
1874
|
bareword => { p = ts - 1; fgoto expr_beg; };
|
@@ -1884,38 +1884,38 @@ void Init_lexer()
|
|
1884
1884
|
|
1885
1885
|
expr_beg := |*
|
1886
1886
|
[+\-] w_any* [0-9] => {
|
1887
|
-
emit_token(
|
1887
|
+
emit_token(lexer, tUNARY_NUM, tok(lexer, ts, ts + 1), ts, ts + 1);
|
1888
1888
|
fhold; fnext expr_end; fbreak;
|
1889
1889
|
};
|
1890
1890
|
|
1891
1891
|
'*' => { emit(tSTAR); fbreak; };
|
1892
1892
|
|
1893
1893
|
'/' c_any => {
|
1894
|
-
VALUE delimiter = rb_str_substr(
|
1895
|
-
fhold; fgoto *push_literal(
|
1894
|
+
VALUE delimiter = rb_str_substr(lexer->source, ts, 1);
|
1895
|
+
fhold; fgoto *push_literal(lexer, delimiter, delimiter, ts, 0, 0, 0, 0);
|
1896
1896
|
};
|
1897
1897
|
|
1898
1898
|
'%' ( any - [A-Za-z] ) => {
|
1899
|
-
VALUE type = rb_str_substr(
|
1900
|
-
VALUE delimiter = rb_str_substr(
|
1899
|
+
VALUE type = rb_str_substr(lexer->source, ts, 1);
|
1900
|
+
VALUE delimiter = rb_str_substr(lexer->source, te - 1, 1);
|
1901
1901
|
if (delimiter == Qnil)
|
1902
1902
|
delimiter = blank_string;
|
1903
1903
|
|
1904
|
-
fgoto *push_literal(
|
1904
|
+
fgoto *push_literal(lexer, type, delimiter, ts, 0, 0, 0, 0);
|
1905
1905
|
};
|
1906
1906
|
|
1907
1907
|
'%' [A-Za-z]+ c_any => {
|
1908
|
-
VALUE type = rb_str_substr(
|
1909
|
-
VALUE delimiter = rb_str_substr(
|
1908
|
+
VALUE type = rb_str_substr(lexer->source, ts, te - ts - 1);
|
1909
|
+
VALUE delimiter = rb_str_substr(lexer->source, te - 1, 1);
|
1910
1910
|
if (delimiter == Qnil)
|
1911
1911
|
delimiter = blank_string;
|
1912
1912
|
|
1913
|
-
fgoto *push_literal(
|
1913
|
+
fgoto *push_literal(lexer, type, delimiter, ts, 0, 0, 0, 0);
|
1914
1914
|
};
|
1915
1915
|
|
1916
1916
|
'%' c_eof => {
|
1917
|
-
diagnostic(
|
1918
|
-
range(
|
1917
|
+
diagnostic(lexer, fatal, string_eof, Qnil,
|
1918
|
+
range(lexer, ts, ts + 1), empty_array);
|
1919
1919
|
};
|
1920
1920
|
|
1921
1921
|
'<<' [~\-]?
|
@@ -1923,9 +1923,9 @@ void Init_lexer()
|
|
1923
1923
|
| "'" ( any - "'" )* "'"
|
1924
1924
|
| "`" ( any - "`" )* "`"
|
1925
1925
|
| bareword ) % { heredoc_e = p; }
|
1926
|
-
c_line* c_nl % { if (!
|
1926
|
+
c_line* c_nl % { if (!lexer->herebody_s) lexer->herebody_s = p; } => {
|
1927
1927
|
|
1928
|
-
VALUE heredoc = tok(
|
1928
|
+
VALUE heredoc = tok(lexer, ts, heredoc_e);
|
1929
1929
|
VALUE type;
|
1930
1930
|
char *cp = RSTRING_PTR(heredoc);
|
1931
1931
|
int indent = 0, dedent_body = 0;
|
@@ -1959,100 +1959,100 @@ void Init_lexer()
|
|
1959
1959
|
type = rb_str_new2("<<\"");
|
1960
1960
|
}
|
1961
1961
|
|
1962
|
-
VALUE delimiter = tok(
|
1962
|
+
VALUE delimiter = tok(lexer, rng_s, rng_e);
|
1963
1963
|
|
1964
|
-
if (
|
1964
|
+
if (lexer->version >= 24) {
|
1965
1965
|
if (NUM2INT(rb_funcall(delimiter, rb_intern("count"), 1, newline)) > 0) {
|
1966
1966
|
if (str_end_with_p(delimiter, "\n")) {
|
1967
|
-
diagnostic(
|
1968
|
-
range(
|
1967
|
+
diagnostic(lexer, warning, heredoc_id_ends_with_nl, Qnil,
|
1968
|
+
range(lexer, ts, ts + 1), empty_array);
|
1969
1969
|
|
1970
1970
|
delimiter = rb_funcall(delimiter, rb_intern("rstrip"), 0);
|
1971
1971
|
} else {
|
1972
|
-
diagnostic(
|
1973
|
-
range(
|
1972
|
+
diagnostic(lexer, fatal, heredoc_id_has_newline, Qnil,
|
1973
|
+
range(lexer, ts, ts + 1), empty_array);
|
1974
1974
|
}
|
1975
1975
|
}
|
1976
1976
|
}
|
1977
1977
|
|
1978
|
-
if (dedent_body &&
|
1979
|
-
emit_token(
|
1978
|
+
if (dedent_body && lexer->version >= 18 && lexer->version <= 22) {
|
1979
|
+
emit_token(lexer, tLSHFT, rb_str_new2("<<"), ts, ts + 2);
|
1980
1980
|
p = ts + 1;
|
1981
1981
|
fnext expr_beg; fbreak;
|
1982
1982
|
} else {
|
1983
|
-
fnext *push_literal(
|
1983
|
+
fnext *push_literal(lexer, type, delimiter, ts, heredoc_e, indent,
|
1984
1984
|
dedent_body, 0);
|
1985
|
-
p =
|
1985
|
+
p = lexer->herebody_s - 1;
|
1986
1986
|
}
|
1987
1987
|
};
|
1988
1988
|
|
1989
1989
|
':' ('&&' | '||') => {
|
1990
1990
|
fhold; fhold;
|
1991
|
-
emit_token(
|
1991
|
+
emit_token(lexer, tSYMBEG, tok(lexer, ts, ts + 1), ts, ts + 1);
|
1992
1992
|
fgoto expr_fname;
|
1993
1993
|
};
|
1994
1994
|
|
1995
1995
|
':' ['"] => { /* ' */
|
1996
|
-
VALUE type = tok(
|
1997
|
-
VALUE delimiter = tok(
|
1998
|
-
fgoto *push_literal(
|
1996
|
+
VALUE type = tok(lexer, ts, te);
|
1997
|
+
VALUE delimiter = tok(lexer, te - 1, te);
|
1998
|
+
fgoto *push_literal(lexer, type, delimiter, ts, 0, 0, 0, 0);
|
1999
1999
|
};
|
2000
2000
|
|
2001
2001
|
':' [!~] '@'
|
2002
2002
|
=> {
|
2003
|
-
emit_token(
|
2003
|
+
emit_token(lexer, tSYMBOL, tok(lexer, ts + 1, ts + 2), ts, te);
|
2004
2004
|
fnext expr_end; fbreak;
|
2005
2005
|
};
|
2006
2006
|
|
2007
2007
|
':' bareword ambiguous_symbol_suffix => {
|
2008
|
-
emit_token(
|
2008
|
+
emit_token(lexer, tSYMBOL, tok(lexer, ts + 1, tm), ts, tm);
|
2009
2009
|
p = tm - 1;
|
2010
2010
|
fnext expr_end; fbreak;
|
2011
2011
|
};
|
2012
2012
|
|
2013
2013
|
':' ( bareword | global_var | class_var | instance_var |
|
2014
2014
|
operator_fname | operator_arithmetic | operator_rest ) => {
|
2015
|
-
emit_token(
|
2015
|
+
emit_token(lexer, tSYMBOL, tok(lexer, ts + 1, te), ts, te);
|
2016
2016
|
fnext expr_end; fbreak;
|
2017
2017
|
};
|
2018
2018
|
|
2019
2019
|
'?' ( e_bs ( escape - ( '\u{' (xdigit+ [ \t]+)+ xdigit+ '}' ))
|
2020
|
-
| (c_any - c_space_nl - e_bs) % {
|
2020
|
+
| (c_any - c_space_nl - e_bs) % { lexer->escape = Qnil; }
|
2021
2021
|
) => {
|
2022
|
-
VALUE value =
|
2022
|
+
VALUE value = lexer->escape;
|
2023
2023
|
if (value == Qnil)
|
2024
|
-
value = tok(
|
2024
|
+
value = tok(lexer, ts + 1, te);
|
2025
2025
|
|
2026
|
-
if (
|
2027
|
-
emit_token(
|
2026
|
+
if (lexer->version == 18)
|
2027
|
+
emit_token(lexer, tINTEGER, rb_funcall(value, rb_intern("getbyte"), 1, INT2NUM(0)), ts, te);
|
2028
2028
|
else
|
2029
|
-
emit_token(
|
2029
|
+
emit_token(lexer, tCHARACTER, value, ts, te);
|
2030
2030
|
|
2031
2031
|
fnext expr_end; fbreak;
|
2032
2032
|
};
|
2033
2033
|
|
2034
2034
|
'?' c_space_nl => {
|
2035
|
-
VALUE escape = escape_char(rb_str_subseq(
|
2035
|
+
VALUE escape = escape_char(rb_str_subseq(lexer->source, ts + 1, 1));
|
2036
2036
|
VALUE hash = rb_hash_new();
|
2037
2037
|
rb_hash_aset(hash, ID2SYM(rb_intern("escape")), escape);
|
2038
|
-
diagnostic(
|
2039
|
-
range(
|
2038
|
+
diagnostic(lexer, warning, invalid_escape_use, hash,
|
2039
|
+
range(lexer, ts, te), empty_array);
|
2040
2040
|
|
2041
2041
|
p = ts - 1;
|
2042
2042
|
fgoto expr_end;
|
2043
2043
|
};
|
2044
2044
|
|
2045
2045
|
'?' c_eof => {
|
2046
|
-
diagnostic(
|
2047
|
-
range(
|
2046
|
+
diagnostic(lexer, fatal, incomplete_escape, Qnil,
|
2047
|
+
range(lexer, ts, ts + 1), empty_array);
|
2048
2048
|
};
|
2049
2049
|
|
2050
2050
|
'?' [A-Za-z_] bareword => { p = ts - 1; fgoto expr_end; };
|
2051
2051
|
|
2052
2052
|
e_lbrace => {
|
2053
|
-
VALUE val = array_last(
|
2054
|
-
if (val != Qnil && NUM2INT(val) ==
|
2055
|
-
rb_ary_pop(
|
2053
|
+
VALUE val = array_last(lexer->lambda_stack);
|
2054
|
+
if (val != Qnil && NUM2INT(val) == lexer->paren_nest) {
|
2055
|
+
rb_ary_pop(lexer->lambda_stack);
|
2056
2056
|
emit(tLAMBEG);
|
2057
2057
|
} else {
|
2058
2058
|
emit(tLBRACE);
|
@@ -2061,37 +2061,37 @@ void Init_lexer()
|
|
2061
2061
|
};
|
2062
2062
|
|
2063
2063
|
e_lbrack => {
|
2064
|
-
emit_token(
|
2064
|
+
emit_token(lexer, tLBRACK, tok(lexer, ts, te), ts, te);
|
2065
2065
|
fbreak;
|
2066
2066
|
};
|
2067
2067
|
|
2068
2068
|
e_lparen => {
|
2069
|
-
emit_token(
|
2069
|
+
emit_token(lexer, tLPAREN, tok(lexer, ts, te), ts, te);
|
2070
2070
|
fbreak;
|
2071
2071
|
};
|
2072
2072
|
|
2073
2073
|
punctuation_begin
|
2074
|
-
=> { emit_table_PUNCTUATION_BEGIN(
|
2074
|
+
=> { emit_table_PUNCTUATION_BEGIN(lexer, tok(lexer, ts, te), ts, te);
|
2075
2075
|
fbreak; };
|
2076
2076
|
|
2077
2077
|
'rescue' %{ tm = p; } '=>'? => {
|
2078
|
-
emit_token(
|
2078
|
+
emit_token(lexer, kRESCUE, tok(lexer, ts, tm), ts, tm);
|
2079
2079
|
p = tm - 1;
|
2080
2080
|
fnext expr_mid; fbreak;
|
2081
2081
|
};
|
2082
2082
|
|
2083
2083
|
keyword_modifier
|
2084
|
-
=> { emit_table_KEYWORDS_BEGIN(
|
2084
|
+
=> { emit_table_KEYWORDS_BEGIN(lexer, tok(lexer, ts, te), ts, te);
|
2085
2085
|
fnext expr_value; fbreak; };
|
2086
2086
|
|
2087
2087
|
label ( any - ':' )
|
2088
2088
|
=> {
|
2089
2089
|
fhold;
|
2090
2090
|
|
2091
|
-
if (
|
2092
|
-
VALUE ident = tok(
|
2091
|
+
if (lexer->version == 18) {
|
2092
|
+
VALUE ident = tok(lexer, ts, te - 2);
|
2093
2093
|
|
2094
|
-
emit_token(
|
2094
|
+
emit_token(lexer, is_capitalized(ident) ? tCONSTANT : tIDENTIFIER,
|
2095
2095
|
ident, ts, te - 2);
|
2096
2096
|
fhold;
|
2097
2097
|
|
@@ -2101,7 +2101,7 @@ void Init_lexer()
|
|
2101
2101
|
fnext *arg_or_cmdarg(command_state);
|
2102
2102
|
}
|
2103
2103
|
} else {
|
2104
|
-
emit_token(
|
2104
|
+
emit_token(lexer, tLABEL, tok(lexer, ts, te - 2), ts, te - 1);
|
2105
2105
|
fnext expr_labelarg;
|
2106
2106
|
}
|
2107
2107
|
|
@@ -2113,13 +2113,13 @@ void Init_lexer()
|
|
2113
2113
|
call_or_var => local_ident;
|
2114
2114
|
|
2115
2115
|
(call_or_var - keyword)
|
2116
|
-
% { ident_tok = tok(
|
2116
|
+
% { ident_tok = tok(lexer, ts, te); ident_ts = ts; ident_te = te; }
|
2117
2117
|
w_space+ '('
|
2118
2118
|
=> {
|
2119
|
-
emit_token(
|
2119
|
+
emit_token(lexer, tIDENTIFIER, ident_tok, ident_ts, ident_te);
|
2120
2120
|
p = ident_te - 1;
|
2121
2121
|
|
2122
|
-
if (STATIC_ENV_DECLARED(ident_tok) &&
|
2122
|
+
if (STATIC_ENV_DECLARED(ident_tok) && lexer->version < 25) {
|
2123
2123
|
fnext expr_endfn;
|
2124
2124
|
} else {
|
2125
2125
|
fnext expr_cmdarg;
|
@@ -2132,7 +2132,7 @@ void Init_lexer()
|
|
2132
2132
|
|
2133
2133
|
e_heredoc_nl '=begin' ( c_space | c_nl_zlen ) => {
|
2134
2134
|
p = ts - 1;
|
2135
|
-
|
2135
|
+
lexer->cs_before_block_comment = lexer->cs;
|
2136
2136
|
fgoto line_begin;
|
2137
2137
|
};
|
2138
2138
|
|
@@ -2149,7 +2149,7 @@ void Init_lexer()
|
|
2149
2149
|
w_space_comment;
|
2150
2150
|
|
2151
2151
|
w_newline => {
|
2152
|
-
if (
|
2152
|
+
if (lexer->in_kwarg) {
|
2153
2153
|
fhold; fgoto expr_end;
|
2154
2154
|
} else {
|
2155
2155
|
fgoto line_begin;
|
@@ -2165,8 +2165,8 @@ void Init_lexer()
|
|
2165
2165
|
label (any - ':') => { p = ts - 1; fgoto expr_end; };
|
2166
2166
|
|
2167
2167
|
['"] => { /* ' */
|
2168
|
-
VALUE type = tok(
|
2169
|
-
fgoto *push_literal(
|
2168
|
+
VALUE type = tok(lexer, ts, te);
|
2169
|
+
fgoto *push_literal(lexer, type, type, ts, 0, 0, 0, 0);
|
2170
2170
|
};
|
2171
2171
|
|
2172
2172
|
w_space_comment;
|
@@ -2180,15 +2180,15 @@ void Init_lexer()
|
|
2180
2180
|
|
2181
2181
|
expr_end := |*
|
2182
2182
|
'->' => {
|
2183
|
-
emit_token(
|
2184
|
-
rb_ary_push(
|
2183
|
+
emit_token(lexer, tLAMBDA, tok(lexer, ts, ts + 2), ts, ts + 2);
|
2184
|
+
rb_ary_push(lexer->lambda_stack, INT2NUM(lexer->paren_nest));
|
2185
2185
|
fnext expr_endfn; fbreak;
|
2186
2186
|
};
|
2187
2187
|
|
2188
2188
|
e_lbrace => {
|
2189
|
-
VALUE val = array_last(
|
2190
|
-
if (val != Qnil && NUM2INT(val) ==
|
2191
|
-
rb_ary_pop(
|
2189
|
+
VALUE val = array_last(lexer->lambda_stack);
|
2190
|
+
if (val != Qnil && NUM2INT(val) == lexer->paren_nest) {
|
2191
|
+
rb_ary_pop(lexer->lambda_stack);
|
2192
2192
|
emit(tLAMBEG);
|
2193
2193
|
} else {
|
2194
2194
|
emit(tLCURLY);
|
@@ -2197,43 +2197,43 @@ void Init_lexer()
|
|
2197
2197
|
};
|
2198
2198
|
|
2199
2199
|
'do' => {
|
2200
|
-
VALUE val = array_last(
|
2201
|
-
if (val != Qnil && NUM2INT(val) ==
|
2202
|
-
rb_ary_pop(
|
2200
|
+
VALUE val = array_last(lexer->lambda_stack);
|
2201
|
+
if (val != Qnil && NUM2INT(val) == lexer->paren_nest) {
|
2202
|
+
rb_ary_pop(lexer->lambda_stack);
|
2203
2203
|
emit(kDO_LAMBDA);
|
2204
2204
|
} else {
|
2205
|
-
emit_do(
|
2205
|
+
emit_do(lexer, 0, ts, te);
|
2206
2206
|
}
|
2207
2207
|
fnext expr_value; fbreak;
|
2208
2208
|
};
|
2209
2209
|
|
2210
2210
|
keyword_with_fname
|
2211
|
-
=> { emit_table_KEYWORDS(
|
2211
|
+
=> { emit_table_KEYWORDS(lexer, tok(lexer, ts, te), ts, te);
|
2212
2212
|
fnext expr_fname; fbreak; };
|
2213
2213
|
|
2214
2214
|
'class' w_any* '<<'
|
2215
|
-
=> { emit_token(
|
2216
|
-
emit_token(
|
2215
|
+
=> { emit_token(lexer, kCLASS, rb_str_new2("class"), ts, ts + 5);
|
2216
|
+
emit_token(lexer, tLSHFT, rb_str_new2("<<"), te - 2, te);
|
2217
2217
|
fnext expr_value; fbreak; };
|
2218
2218
|
|
2219
2219
|
keyword_modifier
|
2220
|
-
=> { emit_table_KEYWORDS(
|
2220
|
+
=> { emit_table_KEYWORDS(lexer, tok(lexer, ts, te), ts, te);
|
2221
2221
|
fnext expr_beg; fbreak; };
|
2222
2222
|
|
2223
2223
|
keyword_with_value
|
2224
|
-
=> { emit_table_KEYWORDS(
|
2224
|
+
=> { emit_table_KEYWORDS(lexer, tok(lexer, ts, te), ts, te);
|
2225
2225
|
fnext expr_value; fbreak; };
|
2226
2226
|
|
2227
2227
|
keyword_with_mid
|
2228
|
-
=> { emit_table_KEYWORDS(
|
2228
|
+
=> { emit_table_KEYWORDS(lexer, tok(lexer, ts, te), ts, te);
|
2229
2229
|
fnext expr_mid; fbreak; };
|
2230
2230
|
|
2231
2231
|
keyword_with_arg
|
2232
2232
|
=> {
|
2233
|
-
VALUE keyword = tok(
|
2234
|
-
emit_table_KEYWORDS(
|
2233
|
+
VALUE keyword = tok(lexer, ts, te);
|
2234
|
+
emit_table_KEYWORDS(lexer, keyword, ts, te);
|
2235
2235
|
|
2236
|
-
if (
|
2236
|
+
if (lexer->version == 18 && strcmp(RSTRING_PTR(keyword), "not") == 0) {
|
2237
2237
|
fnext expr_beg; fbreak;
|
2238
2238
|
} else {
|
2239
2239
|
fnext expr_arg; fbreak;
|
@@ -2241,8 +2241,8 @@ void Init_lexer()
|
|
2241
2241
|
};
|
2242
2242
|
|
2243
2243
|
'__ENCODING__' => {
|
2244
|
-
if (
|
2245
|
-
VALUE str = tok(
|
2244
|
+
if (lexer->version == 18) {
|
2245
|
+
VALUE str = tok(lexer, ts, te);
|
2246
2246
|
emit(tIDENTIFIER);
|
2247
2247
|
|
2248
2248
|
if (STATIC_ENV_DECLARED(str)) {
|
@@ -2257,7 +2257,7 @@ void Init_lexer()
|
|
2257
2257
|
};
|
2258
2258
|
|
2259
2259
|
keyword_with_end
|
2260
|
-
=> { emit_table_KEYWORDS(
|
2260
|
+
=> { emit_table_KEYWORDS(lexer, tok(lexer, ts, te), ts, te);
|
2261
2261
|
fbreak; };
|
2262
2262
|
|
2263
2263
|
( '0' [Xx] %{ num_base = 16; num_digits_s = p; } int_hex
|
@@ -2269,30 +2269,30 @@ void Init_lexer()
|
|
2269
2269
|
) %{ num_suffix_s = p; } int_suffix
|
2270
2270
|
=> {
|
2271
2271
|
int invalid_idx;
|
2272
|
-
VALUE digits = tok(
|
2272
|
+
VALUE digits = tok(lexer, num_digits_s, num_suffix_s);
|
2273
2273
|
|
2274
|
-
if (NUM2INT(rb_ary_entry(
|
2274
|
+
if (NUM2INT(rb_ary_entry(lexer->source_pts, num_suffix_s - 1)) == '_') {
|
2275
2275
|
VALUE hash = rb_hash_new();
|
2276
2276
|
rb_hash_aset(hash, character, rb_str_new2("_"));
|
2277
|
-
diagnostic(
|
2278
|
-
range(
|
2279
|
-
} else if (RSTRING_LEN(digits) == 0 && num_base == 8 &&
|
2277
|
+
diagnostic(lexer, severity_error, trailing_in_number, hash,
|
2278
|
+
range(lexer, te - 1, te), empty_array);
|
2279
|
+
} else if (RSTRING_LEN(digits) == 0 && num_base == 8 && lexer->version == 18) {
|
2280
2280
|
digits = rb_str_new2("0");
|
2281
2281
|
} else if (RSTRING_LEN(digits) == 0) {
|
2282
|
-
diagnostic(
|
2283
|
-
range(
|
2282
|
+
diagnostic(lexer, severity_error, empty_numeric, Qnil,
|
2283
|
+
range(lexer, ts, te), empty_array);
|
2284
2284
|
} else if (num_base == 8 && (invalid_idx = find_8_or_9(digits)) != -1) {
|
2285
2285
|
long invalid_s = num_digits_s + invalid_idx;
|
2286
|
-
diagnostic(
|
2287
|
-
range(
|
2286
|
+
diagnostic(lexer, severity_error, invalid_octal, Qnil,
|
2287
|
+
range(lexer, invalid_s, invalid_s + 1), empty_array);
|
2288
2288
|
}
|
2289
2289
|
|
2290
2290
|
VALUE integer = rb_str_to_inum(digits, num_base, 0);
|
2291
|
-
if (
|
2292
|
-
emit_token(
|
2291
|
+
if (lexer->version >= 18 && lexer->version <= 20) {
|
2292
|
+
emit_token(lexer, tINTEGER, integer, numeric_s, num_suffix_s);
|
2293
2293
|
p = num_suffix_s - 1;
|
2294
2294
|
} else {
|
2295
|
-
num_xfrm(
|
2295
|
+
num_xfrm(lexer, integer, numeric_s, te);
|
2296
2296
|
}
|
2297
2297
|
|
2298
2298
|
fbreak;
|
@@ -2300,34 +2300,34 @@ void Init_lexer()
|
|
2300
2300
|
|
2301
2301
|
flo_frac flo_pow?
|
2302
2302
|
=> {
|
2303
|
-
diagnostic(
|
2304
|
-
range(
|
2303
|
+
diagnostic(lexer, severity_error, no_dot_digit_literal, Qnil,
|
2304
|
+
range(lexer, ts, te), empty_array);
|
2305
2305
|
};
|
2306
2306
|
|
2307
2307
|
flo_int [eE]
|
2308
2308
|
=> {
|
2309
|
-
if (
|
2309
|
+
if (lexer->version >= 18 && lexer->version <= 20) {
|
2310
2310
|
VALUE hash = rb_hash_new();
|
2311
|
-
rb_hash_aset(hash, character, tok(
|
2312
|
-
diagnostic(
|
2313
|
-
range(
|
2311
|
+
rb_hash_aset(hash, character, tok(lexer, te - 1, te));
|
2312
|
+
diagnostic(lexer, severity_error, trailing_in_number, hash,
|
2313
|
+
range(lexer, te - 1, te), empty_array);
|
2314
2314
|
} else {
|
2315
|
-
VALUE integer = rb_str_to_inum(tok(
|
2316
|
-
emit_token(
|
2315
|
+
VALUE integer = rb_str_to_inum(tok(lexer, ts, te - 1), 10, 0);
|
2316
|
+
emit_token(lexer, tINTEGER, integer, ts, te - 1);
|
2317
2317
|
fhold; fbreak;
|
2318
2318
|
}
|
2319
2319
|
};
|
2320
2320
|
|
2321
2321
|
flo_int flo_frac [eE]
|
2322
2322
|
=> {
|
2323
|
-
if (
|
2323
|
+
if (lexer->version >= 18 && lexer->version <= 20) {
|
2324
2324
|
VALUE hash = rb_hash_new();
|
2325
|
-
rb_hash_aset(hash, character, tok(
|
2326
|
-
diagnostic(
|
2327
|
-
range(
|
2325
|
+
rb_hash_aset(hash, character, tok(lexer, te - 1, te));
|
2326
|
+
diagnostic(lexer, severity_error, trailing_in_number, hash,
|
2327
|
+
range(lexer, te - 1, te), empty_array);
|
2328
2328
|
} else {
|
2329
|
-
VALUE fval = rb_funcall(tok(
|
2330
|
-
emit_token(
|
2329
|
+
VALUE fval = rb_funcall(tok(lexer, ts, te - 1), rb_intern("to_f"), 0);
|
2330
|
+
emit_token(lexer, tFLOAT, fval, ts, te - 1);
|
2331
2331
|
fhold; fbreak;
|
2332
2332
|
}
|
2333
2333
|
};
|
@@ -2337,28 +2337,28 @@ void Init_lexer()
|
|
2337
2337
|
| flo_frac %{ num_suffix_s = p; } flo_suffix
|
2338
2338
|
)
|
2339
2339
|
=> {
|
2340
|
-
VALUE digits = tok(
|
2340
|
+
VALUE digits = tok(lexer, ts, num_suffix_s);
|
2341
2341
|
|
2342
|
-
if (
|
2342
|
+
if (lexer->version >= 18 && lexer->version <= 20) {
|
2343
2343
|
VALUE fval = rb_Float(digits);
|
2344
|
-
emit_token(
|
2344
|
+
emit_token(lexer, tFLOAT, fval, ts, num_suffix_s);
|
2345
2345
|
p = num_suffix_s - 1;
|
2346
2346
|
} else {
|
2347
|
-
num_xfrm(
|
2347
|
+
num_xfrm(lexer, digits, ts, te);
|
2348
2348
|
}
|
2349
2349
|
fbreak;
|
2350
2350
|
};
|
2351
2351
|
|
2352
2352
|
'`' | ['"] => { /* ' */
|
2353
|
-
VALUE type = tok(
|
2354
|
-
VALUE delimiter = tok(
|
2355
|
-
fgoto *push_literal(
|
2353
|
+
VALUE type = tok(lexer, ts, te);
|
2354
|
+
VALUE delimiter = tok(lexer, te - 1, te);
|
2355
|
+
fgoto *push_literal(lexer, type, delimiter, ts, 0, 0, 0, 1);
|
2356
2356
|
};
|
2357
2357
|
|
2358
2358
|
constant => { emit(tCONSTANT); fnext *arg_or_cmdarg(command_state); fbreak; };
|
2359
2359
|
|
2360
2360
|
constant ambiguous_const_suffix => {
|
2361
|
-
emit_token(
|
2361
|
+
emit_token(lexer, tCONSTANT, tok(lexer, ts, tm), ts, tm);
|
2362
2362
|
p = tm - 1;
|
2363
2363
|
fbreak;
|
2364
2364
|
};
|
@@ -2367,7 +2367,7 @@ void Init_lexer()
|
|
2367
2367
|
=> { p = ts - 1; fcall expr_variable; };
|
2368
2368
|
|
2369
2369
|
'.' | '&.' | '::'
|
2370
|
-
=> { emit_table_PUNCTUATION(
|
2370
|
+
=> { emit_table_PUNCTUATION(lexer, tok(lexer, ts, te), ts, te);
|
2371
2371
|
fnext expr_dot; fbreak; };
|
2372
2372
|
|
2373
2373
|
call_or_var => local_ident;
|
@@ -2376,39 +2376,39 @@ void Init_lexer()
|
|
2376
2376
|
if (tm == te) {
|
2377
2377
|
emit(tFID);
|
2378
2378
|
} else {
|
2379
|
-
emit_token(
|
2379
|
+
emit_token(lexer, tIDENTIFIER, tok(lexer, ts, tm), ts, tm);
|
2380
2380
|
p = tm - 1;
|
2381
2381
|
}
|
2382
2382
|
fnext expr_arg; fbreak;
|
2383
2383
|
};
|
2384
2384
|
|
2385
|
-
'*' => {
|
2386
|
-
|
2385
|
+
'*' | '=>' => {
|
2386
|
+
emit_table_PUNCTUATION(lexer, tok(lexer, ts, te), ts, te);
|
2387
2387
|
fgoto expr_value;
|
2388
2388
|
};
|
2389
2389
|
|
2390
2390
|
( operator_arithmetic | operator_rest ) - ( '|' | '~' | '!' | '*' )
|
2391
2391
|
=> {
|
2392
|
-
emit_table_PUNCTUATION(
|
2392
|
+
emit_table_PUNCTUATION(lexer, tok(lexer, ts, te), ts, te);
|
2393
2393
|
fnext expr_value; fbreak;
|
2394
2394
|
};
|
2395
2395
|
|
2396
2396
|
( e_lparen | '|' | '~' | '!' )
|
2397
|
-
=> { emit_table_PUNCTUATION(
|
2397
|
+
=> { emit_table_PUNCTUATION(lexer, tok(lexer, ts, te), ts, te);
|
2398
2398
|
fnext expr_beg; fbreak; };
|
2399
2399
|
|
2400
2400
|
e_rbrace => {
|
2401
2401
|
emit(tRCURLY);
|
2402
2402
|
|
2403
|
-
if (
|
2404
|
-
stack_state_lexpop(&
|
2405
|
-
stack_state_lexpop(&
|
2403
|
+
if (lexer->version < 24) {
|
2404
|
+
stack_state_lexpop(&lexer->cond);
|
2405
|
+
stack_state_lexpop(&lexer->cmdarg);
|
2406
2406
|
} else {
|
2407
|
-
stack_state_pop(&
|
2408
|
-
stack_state_pop(&
|
2407
|
+
stack_state_pop(&lexer->cond);
|
2408
|
+
stack_state_pop(&lexer->cmdarg);
|
2409
2409
|
}
|
2410
2410
|
|
2411
|
-
if (
|
2411
|
+
if (lexer->version >= 25) {
|
2412
2412
|
fnext expr_end;
|
2413
2413
|
} else {
|
2414
2414
|
fnext expr_endarg;
|
@@ -2420,12 +2420,12 @@ void Init_lexer()
|
|
2420
2420
|
e_rparen => {
|
2421
2421
|
emit(tRPAREN);
|
2422
2422
|
|
2423
|
-
if (
|
2424
|
-
stack_state_lexpop(&
|
2425
|
-
stack_state_lexpop(&
|
2423
|
+
if (lexer->version < 24) {
|
2424
|
+
stack_state_lexpop(&lexer->cond);
|
2425
|
+
stack_state_lexpop(&lexer->cmdarg);
|
2426
2426
|
} else {
|
2427
|
-
stack_state_pop(&
|
2428
|
-
stack_state_pop(&
|
2427
|
+
stack_state_pop(&lexer->cond);
|
2428
|
+
stack_state_pop(&lexer->cmdarg);
|
2429
2429
|
}
|
2430
2430
|
|
2431
2431
|
fbreak;
|
@@ -2434,15 +2434,15 @@ void Init_lexer()
|
|
2434
2434
|
']' => {
|
2435
2435
|
emit(tRBRACK);
|
2436
2436
|
|
2437
|
-
if (
|
2438
|
-
stack_state_lexpop(&
|
2439
|
-
stack_state_lexpop(&
|
2437
|
+
if (lexer->version < 24) {
|
2438
|
+
stack_state_lexpop(&lexer->cond);
|
2439
|
+
stack_state_lexpop(&lexer->cmdarg);
|
2440
2440
|
} else {
|
2441
|
-
stack_state_pop(&
|
2442
|
-
stack_state_pop(&
|
2441
|
+
stack_state_pop(&lexer->cond);
|
2442
|
+
stack_state_pop(&lexer->cmdarg);
|
2443
2443
|
}
|
2444
2444
|
|
2445
|
-
if (
|
2445
|
+
if (lexer->version >= 25) {
|
2446
2446
|
fnext expr_end;
|
2447
2447
|
} else {
|
2448
2448
|
fnext expr_endarg;
|
@@ -2452,7 +2452,7 @@ void Init_lexer()
|
|
2452
2452
|
};
|
2453
2453
|
|
2454
2454
|
operator_arithmetic '='
|
2455
|
-
=> { emit_token(
|
2455
|
+
=> { emit_token(lexer, tOP_ASGN, tok(lexer, ts, te - 1), ts, te);
|
2456
2456
|
fnext expr_beg; fbreak; };
|
2457
2457
|
|
2458
2458
|
'?' => { emit(tEH); fnext expr_value; fbreak; };
|
@@ -2460,7 +2460,7 @@ void Init_lexer()
|
|
2460
2460
|
e_lbrack => { emit(tLBRACK2); fnext expr_beg; fbreak; };
|
2461
2461
|
|
2462
2462
|
punctuation_end
|
2463
|
-
=> { emit_table_PUNCTUATION(
|
2463
|
+
=> { emit_table_PUNCTUATION(lexer, tok(lexer, ts, te), ts, te);
|
2464
2464
|
fnext expr_beg; fbreak; };
|
2465
2465
|
|
2466
2466
|
w_space_comment;
|
@@ -2470,17 +2470,17 @@ void Init_lexer()
|
|
2470
2470
|
';' => { emit(tSEMI); fnext expr_value; fbreak; };
|
2471
2471
|
|
2472
2472
|
'\\' c_line {
|
2473
|
-
diagnostic(
|
2474
|
-
range(
|
2473
|
+
diagnostic(lexer, severity_error, bare_backslash, Qnil,
|
2474
|
+
range(lexer, ts, ts + 1), empty_array);
|
2475
2475
|
fhold;
|
2476
2476
|
};
|
2477
2477
|
|
2478
2478
|
c_any
|
2479
2479
|
=> {
|
2480
2480
|
VALUE hash = rb_hash_new();
|
2481
|
-
VALUE str = rb_str_inspect(tok(
|
2481
|
+
VALUE str = rb_str_inspect(tok(lexer, ts, te));
|
2482
2482
|
rb_hash_aset(hash, character, rb_str_substr(str, 1, NUM2INT(rb_str_length(str)) - 2));
|
2483
|
-
diagnostic(
|
2483
|
+
diagnostic(lexer, fatal, unexpected, hash, range(lexer, ts, te), empty_array);
|
2484
2484
|
};
|
2485
2485
|
|
2486
2486
|
c_eof => do_eof;
|
@@ -2490,22 +2490,22 @@ void Init_lexer()
|
|
2490
2490
|
c_space* %{ tm = p; } ('.' | '&.') => { p = tm - 1; fgoto expr_end; };
|
2491
2491
|
|
2492
2492
|
any => {
|
2493
|
-
emit_token(
|
2493
|
+
emit_token(lexer, tNL, Qnil, lexer->newline_s, lexer->newline_s + 1);
|
2494
2494
|
fhold; fnext line_begin; fbreak;
|
2495
2495
|
};
|
2496
2496
|
*|;
|
2497
2497
|
|
2498
2498
|
line_comment := |*
|
2499
2499
|
'=end' c_line* c_nl_zlen => {
|
2500
|
-
emit_comment(
|
2501
|
-
fgoto *
|
2500
|
+
emit_comment(lexer, lexer->eq_begin_s, te);
|
2501
|
+
fgoto *lexer->cs_before_block_comment;
|
2502
2502
|
};
|
2503
2503
|
|
2504
2504
|
c_line* c_nl;
|
2505
2505
|
|
2506
2506
|
c_line* zlen => {
|
2507
|
-
diagnostic(
|
2508
|
-
range(
|
2507
|
+
diagnostic(lexer, fatal, embedded_document, Qnil,
|
2508
|
+
range(lexer, lexer->eq_begin_s, lexer->eq_begin_s + 6),
|
2509
2509
|
empty_array);
|
2510
2510
|
};
|
2511
2511
|
*|;
|
@@ -2514,7 +2514,7 @@ void Init_lexer()
|
|
2514
2514
|
w_any;
|
2515
2515
|
|
2516
2516
|
'=begin' ( c_space | c_nl_zlen ) => {
|
2517
|
-
|
2517
|
+
lexer->eq_begin_s = ts;
|
2518
2518
|
fgoto line_comment;
|
2519
2519
|
};
|
2520
2520
|
|