c_lexer 2.5.1.0.pre1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.gitignore +16 -0
- data/.gitmodules +3 -0
- data/.travis.yml +7 -0
- data/Gemfile +9 -0
- data/Gemfile.lock +53 -0
- data/README.md +26 -0
- data/Rakefile +51 -0
- data/bin/c-ruby-parse +10 -0
- data/bin/console +14 -0
- data/bin/setup +8 -0
- data/c_lexer.gemspec +36 -0
- data/ext/lexer/cmdarg.h +47 -0
- data/ext/lexer/cond.h +47 -0
- data/ext/lexer/emit_tables.h +177 -0
- data/ext/lexer/extconf.rb +5 -0
- data/ext/lexer/lexer.h +308 -0
- data/ext/lexer/lexer.rl +2522 -0
- data/ext/lexer/literal.h +62 -0
- data/ext/lexer/literal/methods.h +371 -0
- data/ext/lexer/literal/type.h +28 -0
- data/ext/lexer/stack.h +41 -0
- data/ext/lexer/stack_state.h +48 -0
- data/lib/c_lexer.rb +80 -0
- data/lib/c_lexer/version.rb +3 -0
- metadata +195 -0
data/ext/lexer/lexer.h
ADDED
@@ -0,0 +1,308 @@
|
|
1
|
+
#ifndef LEXER_H
|
2
|
+
#define LEXER_H
|
3
|
+
|
4
|
+
typedef struct lexer_state lexer_state;
|
5
|
+
#include "literal.h"
|
6
|
+
define_stack_type(lit_stack, literal, {0});
|
7
|
+
|
8
|
+
struct lexer_state {
|
9
|
+
uint cs; /* DFA state */
|
10
|
+
long p; /* stream position */
|
11
|
+
long pe; /* end-of-stream position */
|
12
|
+
|
13
|
+
int *cs_stack;
|
14
|
+
int cs_stack_top;
|
15
|
+
int cs_stack_size;
|
16
|
+
|
17
|
+
VALUE token_queue;
|
18
|
+
VALUE static_env;
|
19
|
+
|
20
|
+
VALUE source_buffer;
|
21
|
+
VALUE source; /* source code string */
|
22
|
+
VALUE source_pts; /* source as a codepoint array */
|
23
|
+
VALUE encoding;
|
24
|
+
|
25
|
+
VALUE tokens;
|
26
|
+
VALUE comments;
|
27
|
+
|
28
|
+
stack_state cond;
|
29
|
+
stack_state cmdarg;
|
30
|
+
ss_stack cond_stack; /* for saving 'cond' */
|
31
|
+
ss_stack cmdarg_stack; /* for saving 'cmdarg' */
|
32
|
+
|
33
|
+
VALUE lambda_stack;
|
34
|
+
int paren_nest;
|
35
|
+
lit_stack literal_stack;
|
36
|
+
|
37
|
+
int version;
|
38
|
+
int in_kwarg;
|
39
|
+
int force_utf32;
|
40
|
+
|
41
|
+
VALUE diagnostics;
|
42
|
+
|
43
|
+
long newline_s; /* position of last newline encountered */
|
44
|
+
long eq_begin_s;
|
45
|
+
long herebody_s;
|
46
|
+
long escape_s;
|
47
|
+
|
48
|
+
int dedent_level;
|
49
|
+
|
50
|
+
VALUE escape;
|
51
|
+
|
52
|
+
uint cs_before_block_comment;
|
53
|
+
};
|
54
|
+
|
55
|
+
static void lexer_mark(void*);
|
56
|
+
static void lexer_dealloc(void*);
|
57
|
+
static VALUE lexer_reset(int, VALUE*, VALUE);
|
58
|
+
|
59
|
+
static void emit_token(lexer_state*, VALUE, VALUE, long, long);
|
60
|
+
static void emit_comment(lexer_state*, long, long);
|
61
|
+
static void emit_do(lexer_state*, int, long, long);
|
62
|
+
|
63
|
+
static VALUE tok(lexer_state*, long, long);
|
64
|
+
static VALUE range(lexer_state*, long, long);
|
65
|
+
static void diagnostic(lexer_state*, VALUE, VALUE, VALUE, VALUE, VALUE);
|
66
|
+
static int get_codepoint(lexer_state*, long);
|
67
|
+
static int arg_or_cmdarg(int);
|
68
|
+
static int is_nthref(VALUE);
|
69
|
+
static int is_backref(VALUE);
|
70
|
+
static int is_capitalized(VALUE);
|
71
|
+
static int is_regexp_metachar(VALUE);
|
72
|
+
static int eof_codepoint(int);
|
73
|
+
static VALUE find_unknown_options(VALUE);
|
74
|
+
static int bad_cvar_name(VALUE);
|
75
|
+
static int bad_ivar_name(VALUE);
|
76
|
+
static int find_8_or_9(VALUE str);
|
77
|
+
static void emit_int(lexer_state*, VALUE, long, long);
|
78
|
+
static void emit_rational(lexer_state*, VALUE, long, long);
|
79
|
+
static void emit_complex(lexer_state*, VALUE, long, long);
|
80
|
+
static void emit_complex_rational(lexer_state*, VALUE, long, long);
|
81
|
+
static void emit_float(lexer_state*, VALUE, long, long);
|
82
|
+
static void emit_complex_float(lexer_state*, VALUE, long, long);
|
83
|
+
static void emit_int_followed_by_if(lexer_state*, VALUE, long, long);
|
84
|
+
static void emit_int_followed_by_rescue(lexer_state*, VALUE, long, long);
|
85
|
+
static void emit_float_followed_by_if(lexer_state*, VALUE, long, long);
|
86
|
+
static void emit_float_followed_by_rescue(lexer_state*, VALUE, long, long);
|
87
|
+
static int push_literal(lexer_state*, VALUE, VALUE, long, long, int, int, int);
|
88
|
+
static int pop_literal(lexer_state*);
|
89
|
+
static VALUE array_last(VALUE);
|
90
|
+
static VALUE unescape_char(char);
|
91
|
+
static VALUE escape_char(VALUE);
|
92
|
+
static inline int str_start_with_p(VALUE, const char*);
|
93
|
+
static inline int str_end_with_p(VALUE, const char*);
|
94
|
+
static inline void force_encoding(VALUE, VALUE);
|
95
|
+
|
96
|
+
#define emit(type) emit_token(state, type, tok(state, ts, te), ts, te)
|
97
|
+
|
98
|
+
#define def_lexer_attr_reader(name) \
|
99
|
+
static VALUE lexer_get_ ## name(VALUE self) { \
|
100
|
+
lexer_state *state; \
|
101
|
+
Data_Get_Struct(self, lexer_state, state); \
|
102
|
+
return state->name; }
|
103
|
+
|
104
|
+
#define def_lexer_attr_writer(name) \
|
105
|
+
static VALUE lexer_set_ ## name(VALUE self, VALUE val) { \
|
106
|
+
lexer_state *state; \
|
107
|
+
Data_Get_Struct(self, lexer_state, state); \
|
108
|
+
return state->name = val; } \
|
109
|
+
|
110
|
+
#define def_lexer_attribute(name) \
|
111
|
+
def_lexer_attr_reader(name) \
|
112
|
+
def_lexer_attr_writer(name)
|
113
|
+
|
114
|
+
#define Qzero INT2NUM(0)
|
115
|
+
|
116
|
+
#define init_symbol(name) \
|
117
|
+
name = ID2SYM(rb_intern(#name)); \
|
118
|
+
rb_gc_register_address(&name)
|
119
|
+
|
120
|
+
VALUE k__ENCODING__;
|
121
|
+
VALUE k__FILE__;
|
122
|
+
VALUE k__LINE__;
|
123
|
+
VALUE kALIAS;
|
124
|
+
VALUE kAND;
|
125
|
+
VALUE kBEGIN;
|
126
|
+
VALUE klBEGIN;
|
127
|
+
VALUE kBREAK;
|
128
|
+
VALUE kCASE;
|
129
|
+
VALUE kCLASS;
|
130
|
+
VALUE kDEF;
|
131
|
+
VALUE kDEFINED;
|
132
|
+
VALUE kDO;
|
133
|
+
VALUE kDO_BLOCK;
|
134
|
+
VALUE kDO_COND;
|
135
|
+
VALUE kDO_LAMBDA;
|
136
|
+
VALUE kELSE;
|
137
|
+
VALUE kELSIF;
|
138
|
+
VALUE kEND;
|
139
|
+
VALUE klEND;
|
140
|
+
VALUE kENSURE;
|
141
|
+
VALUE kFALSE;
|
142
|
+
VALUE kFOR;
|
143
|
+
VALUE kIF;
|
144
|
+
VALUE kIF_MOD;
|
145
|
+
VALUE kIN;
|
146
|
+
VALUE kMODULE;
|
147
|
+
VALUE kNEXT;
|
148
|
+
VALUE kNIL;
|
149
|
+
VALUE kNOT;
|
150
|
+
VALUE kOR;
|
151
|
+
VALUE kREDO;
|
152
|
+
VALUE kRESCUE;
|
153
|
+
VALUE kRESCUE_MOD;
|
154
|
+
VALUE kRETRY;
|
155
|
+
VALUE kRETURN;
|
156
|
+
VALUE kSELF;
|
157
|
+
VALUE kSUPER;
|
158
|
+
VALUE kTHEN;
|
159
|
+
VALUE kTRUE;
|
160
|
+
VALUE kUNDEF;
|
161
|
+
VALUE kUNLESS;
|
162
|
+
VALUE kUNLESS_MOD;
|
163
|
+
VALUE kUNTIL;
|
164
|
+
VALUE kUNTIL_MOD;
|
165
|
+
VALUE kWHEN;
|
166
|
+
VALUE kWHILE;
|
167
|
+
VALUE kWHILE_MOD;
|
168
|
+
VALUE kYIELD;
|
169
|
+
|
170
|
+
VALUE tAMPER;
|
171
|
+
VALUE tAMPER2;
|
172
|
+
VALUE tANDDOT;
|
173
|
+
VALUE tANDOP;
|
174
|
+
VALUE tAREF;
|
175
|
+
VALUE tASET;
|
176
|
+
VALUE tASSOC;
|
177
|
+
VALUE tBACK_REF;
|
178
|
+
VALUE tBACK_REF2;
|
179
|
+
VALUE tBANG;
|
180
|
+
VALUE tCARET;
|
181
|
+
VALUE tCHARACTER;
|
182
|
+
VALUE tCMP;
|
183
|
+
VALUE tCOLON;
|
184
|
+
VALUE tCOLON2;
|
185
|
+
VALUE tCOLON3;
|
186
|
+
VALUE tCOMMA;
|
187
|
+
VALUE tCOMMENT;
|
188
|
+
VALUE tCONSTANT;
|
189
|
+
VALUE tCVAR;
|
190
|
+
VALUE tDIVIDE;
|
191
|
+
VALUE tDOT;
|
192
|
+
VALUE tDOT2;
|
193
|
+
VALUE tDOT3;
|
194
|
+
VALUE tDSTAR;
|
195
|
+
VALUE tEH;
|
196
|
+
VALUE tEQ;
|
197
|
+
VALUE tEQL;
|
198
|
+
VALUE tEQQ;
|
199
|
+
VALUE tFID;
|
200
|
+
VALUE tFLOAT;
|
201
|
+
VALUE tGEQ;
|
202
|
+
VALUE tGT;
|
203
|
+
VALUE tGVAR;
|
204
|
+
VALUE tIDENTIFIER;
|
205
|
+
VALUE tIMAGINARY;
|
206
|
+
VALUE tINTEGER;
|
207
|
+
VALUE tIVAR;
|
208
|
+
VALUE tLABEL;
|
209
|
+
VALUE tLABEL_END;
|
210
|
+
VALUE tLAMBDA;
|
211
|
+
VALUE tLAMBEG;
|
212
|
+
VALUE tLBRACE;
|
213
|
+
VALUE tLBRACE_ARG;
|
214
|
+
VALUE tLBRACK;
|
215
|
+
VALUE tLBRACK2;
|
216
|
+
VALUE tLCURLY;
|
217
|
+
VALUE tLEQ;
|
218
|
+
VALUE tLPAREN;
|
219
|
+
VALUE tLPAREN_ARG;
|
220
|
+
VALUE tLPAREN2;
|
221
|
+
VALUE tLSHFT;
|
222
|
+
VALUE tLT;
|
223
|
+
VALUE tMATCH;
|
224
|
+
VALUE tMINUS;
|
225
|
+
VALUE tNEQ;
|
226
|
+
VALUE tNL;
|
227
|
+
VALUE tNMATCH;
|
228
|
+
VALUE tNTH_REF;
|
229
|
+
VALUE tOP_ASGN;
|
230
|
+
VALUE tOROP;
|
231
|
+
VALUE tPERCENT;
|
232
|
+
VALUE tPIPE;
|
233
|
+
VALUE tPLUS;
|
234
|
+
VALUE tPOW;
|
235
|
+
VALUE tQWORDS_BEG;
|
236
|
+
VALUE tQSYMBOLS_BEG;
|
237
|
+
VALUE tRATIONAL;
|
238
|
+
VALUE tRBRACK;
|
239
|
+
VALUE tRCURLY;
|
240
|
+
VALUE tREGEXP_BEG;
|
241
|
+
VALUE tREGEXP_OPT;
|
242
|
+
VALUE tRPAREN;
|
243
|
+
VALUE tRSHFT;
|
244
|
+
VALUE tSEMI;
|
245
|
+
VALUE tSPACE;
|
246
|
+
VALUE tSTAR;
|
247
|
+
VALUE tSTAR2;
|
248
|
+
VALUE tSTRING;
|
249
|
+
VALUE tSTRING_BEG;
|
250
|
+
VALUE tSTRING_CONTENT;
|
251
|
+
VALUE tSTRING_DBEG;
|
252
|
+
VALUE tSTRING_DEND;
|
253
|
+
VALUE tSTRING_DVAR;
|
254
|
+
VALUE tSTRING_END;
|
255
|
+
VALUE tSYMBEG;
|
256
|
+
VALUE tSYMBOL;
|
257
|
+
VALUE tSYMBOLS_BEG;
|
258
|
+
VALUE tTILDE;
|
259
|
+
VALUE tUMINUS;
|
260
|
+
VALUE tUNARY_NUM;
|
261
|
+
VALUE tUPLUS;
|
262
|
+
VALUE tWORDS_BEG;
|
263
|
+
VALUE tXSTRING_BEG;
|
264
|
+
|
265
|
+
VALUE comment_klass;
|
266
|
+
VALUE diagnostic_klass;
|
267
|
+
VALUE range_klass;
|
268
|
+
|
269
|
+
VALUE severity_error;
|
270
|
+
VALUE fatal;
|
271
|
+
VALUE warning;
|
272
|
+
|
273
|
+
VALUE ambiguous_literal;
|
274
|
+
VALUE ambiguous_prefix;
|
275
|
+
VALUE bare_backslash;
|
276
|
+
VALUE character;
|
277
|
+
VALUE cvar_name;
|
278
|
+
VALUE embedded_document;
|
279
|
+
VALUE empty_numeric;
|
280
|
+
VALUE escape_eof;
|
281
|
+
VALUE incomplete_escape;
|
282
|
+
VALUE invalid_escape;
|
283
|
+
VALUE invalid_escape_use;
|
284
|
+
VALUE invalid_hex_escape;
|
285
|
+
VALUE invalid_octal;
|
286
|
+
VALUE invalid_unicode_escape;
|
287
|
+
VALUE ivar_name;
|
288
|
+
VALUE heredoc_id_ends_with_nl;
|
289
|
+
VALUE heredoc_id_has_newline;
|
290
|
+
VALUE no_dot_digit_literal;
|
291
|
+
VALUE prefix;
|
292
|
+
VALUE regexp_options;
|
293
|
+
VALUE string_eof;
|
294
|
+
VALUE trailing_in_number;
|
295
|
+
VALUE unexpected;
|
296
|
+
VALUE unexpected_percent_str;
|
297
|
+
VALUE unicode_point_too_large;
|
298
|
+
VALUE unterminated_unicode;
|
299
|
+
|
300
|
+
VALUE empty_array;
|
301
|
+
VALUE blank_string;
|
302
|
+
VALUE newline;
|
303
|
+
VALUE escaped_newline;
|
304
|
+
VALUE utf8_encoding;
|
305
|
+
VALUE cr_then_anything_to_eol;
|
306
|
+
VALUE crs_to_eol;
|
307
|
+
|
308
|
+
#endif
|
data/ext/lexer/lexer.rl
ADDED
@@ -0,0 +1,2522 @@
|
|
1
|
+
#include <ruby.h>
|
2
|
+
#include <ruby/encoding.h>
|
3
|
+
#include <ruby/re.h>
|
4
|
+
|
5
|
+
#include <stdint.h>
|
6
|
+
#include <stdio.h>
|
7
|
+
|
8
|
+
#include "stack.h"
|
9
|
+
#include "stack_state.h"
|
10
|
+
#include "lexer.h"
|
11
|
+
|
12
|
+
#define INIT_LEXER_STATE(l, s) lexer_state *s; Data_Get_Struct(l, lexer_state, s);
|
13
|
+
#define STATIC_ENV_DECLARED(name) \
|
14
|
+
state->static_env != Qnil && RTEST(rb_funcall(state->static_env, rb_intern("declared?"), 1, name))
|
15
|
+
|
16
|
+
#include "cmdarg.h"
|
17
|
+
#include "cond.h"
|
18
|
+
|
19
|
+
#include "literal/methods.h"
|
20
|
+
#include "emit_tables.h"
|
21
|
+
|
22
|
+
%%machine lex;
|
23
|
+
%%write data;
|
24
|
+
|
25
|
+
static VALUE lexer_alloc(VALUE klass)
|
26
|
+
{
|
27
|
+
lexer_state *state = xmalloc(sizeof(lexer_state));
|
28
|
+
|
29
|
+
state->cs = state->p = state->pe = 0;
|
30
|
+
state->paren_nest = 0;
|
31
|
+
|
32
|
+
state->cs_stack = xmalloc(4 * sizeof(int));
|
33
|
+
state->cs_stack_top = 0;
|
34
|
+
state->cs_stack_size = 4;
|
35
|
+
|
36
|
+
state->source_buffer = Qnil;
|
37
|
+
state->source = Qnil;
|
38
|
+
state->source_pts = Qnil;
|
39
|
+
state->token_queue = Qnil;
|
40
|
+
state->static_env = Qnil;
|
41
|
+
state->lambda_stack = Qnil;
|
42
|
+
state->diagnostics = Qnil;
|
43
|
+
state->tokens = Qnil;
|
44
|
+
state->comments = Qnil;
|
45
|
+
state->encoding = Qnil;
|
46
|
+
state->escape = Qnil;
|
47
|
+
|
48
|
+
ss_stack_init(&state->cond_stack);
|
49
|
+
ss_stack_init(&state->cmdarg_stack);
|
50
|
+
lit_stack_init(&state->literal_stack);
|
51
|
+
|
52
|
+
return Data_Wrap_Struct(klass, lexer_mark, lexer_dealloc, state);
|
53
|
+
}
|
54
|
+
|
55
|
+
static void lexer_mark(void *ptr)
|
56
|
+
{
|
57
|
+
lexer_state *state = ptr;
|
58
|
+
rb_gc_mark(state->source_buffer);
|
59
|
+
rb_gc_mark(state->source);
|
60
|
+
rb_gc_mark(state->source_pts);
|
61
|
+
rb_gc_mark(state->token_queue);
|
62
|
+
rb_gc_mark(state->static_env);
|
63
|
+
rb_gc_mark(state->lambda_stack);
|
64
|
+
rb_gc_mark(state->diagnostics);
|
65
|
+
rb_gc_mark(state->tokens);
|
66
|
+
rb_gc_mark(state->comments);
|
67
|
+
rb_gc_mark(state->encoding);
|
68
|
+
rb_gc_mark(state->escape);
|
69
|
+
|
70
|
+
for (literal *lit = state->literal_stack.bottom; lit < state->literal_stack.top; lit++) {
|
71
|
+
rb_gc_mark(lit->buffer);
|
72
|
+
rb_gc_mark(lit->start_tok);
|
73
|
+
rb_gc_mark(lit->start_delim);
|
74
|
+
rb_gc_mark(lit->end_delim);
|
75
|
+
rb_gc_mark(lit->delimiter);
|
76
|
+
}
|
77
|
+
}
|
78
|
+
|
79
|
+
static void lexer_dealloc(void *ptr)
|
80
|
+
{
|
81
|
+
lexer_state *state = ptr;
|
82
|
+
ss_stack_dealloc(&state->cond_stack);
|
83
|
+
ss_stack_dealloc(&state->cmdarg_stack);
|
84
|
+
lit_stack_dealloc(&state->literal_stack);
|
85
|
+
xfree(ptr);
|
86
|
+
}
|
87
|
+
|
88
|
+
static VALUE lexer_initialize(VALUE self, VALUE version)
|
89
|
+
{
|
90
|
+
INIT_LEXER_STATE(self, state);
|
91
|
+
|
92
|
+
state->version = NUM2INT(version);
|
93
|
+
|
94
|
+
return lexer_reset(0, NULL, self);
|
95
|
+
}
|
96
|
+
|
97
|
+
static VALUE lexer_reset(int argc, VALUE *argv, VALUE self)
|
98
|
+
{
|
99
|
+
INIT_LEXER_STATE(self, state);
|
100
|
+
|
101
|
+
VALUE reset_state;
|
102
|
+
rb_scan_args(argc, argv, "01", &reset_state);
|
103
|
+
if (NIL_P(reset_state))
|
104
|
+
reset_state = Qtrue;
|
105
|
+
|
106
|
+
if (RTEST(reset_state)) {
|
107
|
+
state->cs = lex_en_line_begin;
|
108
|
+
|
109
|
+
state->cond = 0;
|
110
|
+
state->cmdarg = 0;
|
111
|
+
ss_stack_clear(&state->cond_stack);
|
112
|
+
ss_stack_clear(&state->cmdarg_stack);
|
113
|
+
}
|
114
|
+
|
115
|
+
state->force_utf32 = 0;
|
116
|
+
|
117
|
+
state->source = Qnil;
|
118
|
+
state->source_pts = Qnil;
|
119
|
+
state->encoding = Qnil;
|
120
|
+
|
121
|
+
state->p = 0;
|
122
|
+
// @ts is a local variable
|
123
|
+
// @te is a local variable
|
124
|
+
// @act is a local variable
|
125
|
+
|
126
|
+
// @stack is handled on prepush
|
127
|
+
// @top is handled on prepush
|
128
|
+
|
129
|
+
// Lexer state
|
130
|
+
state->token_queue = rb_ary_new();
|
131
|
+
lit_stack_clear(&state->literal_stack);
|
132
|
+
|
133
|
+
state->eq_begin_s = 0;
|
134
|
+
// @sharp_s is a local variable
|
135
|
+
|
136
|
+
state->newline_s = 0;
|
137
|
+
|
138
|
+
// @num_base is a local variable
|
139
|
+
// @num_digits_s is a local variable
|
140
|
+
// @num_suffix_s is a local variable
|
141
|
+
// @num_xfrm is a local variable
|
142
|
+
|
143
|
+
state->escape_s = 0;
|
144
|
+
state->escape = Qnil;
|
145
|
+
|
146
|
+
state->herebody_s = 0;
|
147
|
+
|
148
|
+
state->paren_nest = 0;
|
149
|
+
state->lambda_stack = rb_ary_new();
|
150
|
+
|
151
|
+
state->dedent_level = -1;
|
152
|
+
|
153
|
+
// @command_state is a local variable
|
154
|
+
|
155
|
+
state->in_kwarg = 0;
|
156
|
+
|
157
|
+
state->cs_before_block_comment = lex_en_line_begin;
|
158
|
+
|
159
|
+
return self;
|
160
|
+
}
|
161
|
+
|
162
|
+
static VALUE lexer_set_source_buffer(VALUE self, VALUE buffer)
|
163
|
+
{
|
164
|
+
INIT_LEXER_STATE(self, state);
|
165
|
+
|
166
|
+
state->source_buffer = buffer;
|
167
|
+
|
168
|
+
if (RTEST(buffer)) {
|
169
|
+
state->source = rb_funcall(buffer, rb_intern("source"), 0);
|
170
|
+
state->encoding = rb_obj_encoding(state->source);
|
171
|
+
|
172
|
+
if (state->encoding == utf8_encoding) {
|
173
|
+
state->source_pts = rb_funcall(state->source, rb_intern("unpack"), 1, rb_str_new2("U*"));
|
174
|
+
} else {
|
175
|
+
state->source_pts = rb_funcall(state->source, rb_intern("unpack"), 1, rb_str_new2("C*"));
|
176
|
+
}
|
177
|
+
|
178
|
+
state->pe = RARRAY_LEN(state->source_pts) + 2; /* pretend there is a null at the end */
|
179
|
+
|
180
|
+
VALUE source_pt = rb_ary_entry(state->source_pts, 0);
|
181
|
+
if (source_pt != Qnil && NUM2INT(source_pt) == 0xfeff) {
|
182
|
+
state->p = 1;
|
183
|
+
}
|
184
|
+
} else {
|
185
|
+
state->source = Qnil;
|
186
|
+
state->source_pts = Qnil;
|
187
|
+
state->encoding = Qnil;
|
188
|
+
state->pe = 0;
|
189
|
+
}
|
190
|
+
|
191
|
+
return self;
|
192
|
+
}
|
193
|
+
|
194
|
+
static VALUE lexer_get_state(VALUE self)
|
195
|
+
{
|
196
|
+
INIT_LEXER_STATE(self, state);
|
197
|
+
|
198
|
+
switch (state->cs) {
|
199
|
+
case lex_en_line_begin: return ID2SYM(rb_intern("line_begin"));
|
200
|
+
case lex_en_expr_dot: return ID2SYM(rb_intern("expr_dot"));
|
201
|
+
case lex_en_expr_fname: return ID2SYM(rb_intern("expr_fname"));
|
202
|
+
case lex_en_expr_value: return ID2SYM(rb_intern("expr_value"));
|
203
|
+
case lex_en_expr_beg: return ID2SYM(rb_intern("expr_beg"));
|
204
|
+
case lex_en_expr_mid: return ID2SYM(rb_intern("expr_mid"));
|
205
|
+
case lex_en_expr_arg: return ID2SYM(rb_intern("expr_arg"));
|
206
|
+
case lex_en_expr_cmdarg: return ID2SYM(rb_intern("expr_cmdarg"));
|
207
|
+
case lex_en_expr_end: return ID2SYM(rb_intern("expr_end"));
|
208
|
+
case lex_en_expr_endarg: return ID2SYM(rb_intern("expr_endarg"));
|
209
|
+
case lex_en_expr_endfn: return ID2SYM(rb_intern("expr_endfn"));
|
210
|
+
case lex_en_expr_labelarg: return ID2SYM(rb_intern("expr_labelarg"));
|
211
|
+
|
212
|
+
case lex_en_interp_string: return ID2SYM(rb_intern("interp_string"));
|
213
|
+
case lex_en_interp_words: return ID2SYM(rb_intern("interp_words"));
|
214
|
+
case lex_en_plain_string: return ID2SYM(rb_intern("plain_string"));
|
215
|
+
case lex_en_plain_words: return ID2SYM(rb_intern("plain_words"));
|
216
|
+
default:
|
217
|
+
rb_raise(rb_eRuntimeError, "Lexer state variable is borked");
|
218
|
+
}
|
219
|
+
}
|
220
|
+
|
221
|
+
static VALUE lexer_set_state(VALUE self, VALUE state_sym)
|
222
|
+
{
|
223
|
+
INIT_LEXER_STATE(self, state);
|
224
|
+
const char *state_name = rb_id2name(SYM2ID(state_sym));
|
225
|
+
|
226
|
+
if (strcmp(state_name, "line_begin") == 0)
|
227
|
+
state->cs = lex_en_line_begin;
|
228
|
+
else if (strcmp(state_name, "expr_dot") == 0)
|
229
|
+
state->cs = lex_en_expr_dot;
|
230
|
+
else if (strcmp(state_name, "expr_fname") == 0)
|
231
|
+
state->cs = lex_en_expr_fname;
|
232
|
+
else if (strcmp(state_name, "expr_value") == 0)
|
233
|
+
state->cs = lex_en_expr_value;
|
234
|
+
else if (strcmp(state_name, "expr_beg") == 0)
|
235
|
+
state->cs = lex_en_expr_beg;
|
236
|
+
else if (strcmp(state_name, "expr_mid") == 0)
|
237
|
+
state->cs = lex_en_expr_mid;
|
238
|
+
else if (strcmp(state_name, "expr_arg") == 0)
|
239
|
+
state->cs = lex_en_expr_arg;
|
240
|
+
else if (strcmp(state_name, "expr_cmdarg") == 0)
|
241
|
+
state->cs = lex_en_expr_cmdarg;
|
242
|
+
else if (strcmp(state_name, "expr_end") == 0)
|
243
|
+
state->cs = lex_en_expr_end;
|
244
|
+
else if (strcmp(state_name, "expr_endarg") == 0)
|
245
|
+
state->cs = lex_en_expr_endarg;
|
246
|
+
else if (strcmp(state_name, "expr_endfn") == 0)
|
247
|
+
state->cs = lex_en_expr_endfn;
|
248
|
+
else if (strcmp(state_name, "expr_labelarg") == 0)
|
249
|
+
state->cs = lex_en_expr_labelarg;
|
250
|
+
|
251
|
+
else if (strcmp(state_name, "interp_string") == 0)
|
252
|
+
state->cs = lex_en_interp_string;
|
253
|
+
else if (strcmp(state_name, "interp_words") == 0)
|
254
|
+
state->cs = lex_en_interp_words;
|
255
|
+
else if (strcmp(state_name, "plain_string") == 0)
|
256
|
+
state->cs = lex_en_plain_string;
|
257
|
+
else if (strcmp(state_name, "plain_words") == 0)
|
258
|
+
state->cs = lex_en_plain_words;
|
259
|
+
else
|
260
|
+
rb_raise(rb_eArgError, "Invalid state: %s", state_name);
|
261
|
+
|
262
|
+
return state_sym;
|
263
|
+
}
|
264
|
+
|
265
|
+
static VALUE lexer_push_cmdarg(VALUE self)
|
266
|
+
{
|
267
|
+
INIT_LEXER_STATE(self, state);
|
268
|
+
ss_stack_push(&state->cmdarg_stack, state->cmdarg);
|
269
|
+
state->cmdarg = 0;
|
270
|
+
return Qnil;
|
271
|
+
}
|
272
|
+
|
273
|
+
static VALUE lexer_pop_cmdarg(VALUE self)
|
274
|
+
{
|
275
|
+
INIT_LEXER_STATE(self, state);
|
276
|
+
state->cmdarg = ss_stack_pop(&state->cmdarg_stack);
|
277
|
+
return Qnil;
|
278
|
+
}
|
279
|
+
|
280
|
+
static VALUE lexer_push_cond(VALUE self)
|
281
|
+
{
|
282
|
+
INIT_LEXER_STATE(self, state);
|
283
|
+
ss_stack_push(&state->cond_stack, state->cond);
|
284
|
+
state->cond = 0;
|
285
|
+
return Qnil;
|
286
|
+
}
|
287
|
+
|
288
|
+
static VALUE lexer_pop_cond(VALUE self)
|
289
|
+
{
|
290
|
+
INIT_LEXER_STATE(self, state);
|
291
|
+
state->cond = ss_stack_pop(&state->cond_stack);
|
292
|
+
return Qnil;
|
293
|
+
}
|
294
|
+
|
295
|
+
static VALUE lexer_get_in_kwarg(VALUE self)
|
296
|
+
{
|
297
|
+
INIT_LEXER_STATE(self, state);
|
298
|
+
return state->in_kwarg ? Qtrue : Qfalse;
|
299
|
+
}
|
300
|
+
|
301
|
+
static VALUE lexer_set_in_kwarg(VALUE self, VALUE val)
|
302
|
+
{
|
303
|
+
INIT_LEXER_STATE(self, state);
|
304
|
+
state->in_kwarg = RTEST(val) ? 1 : 0;
|
305
|
+
return val;
|
306
|
+
}
|
307
|
+
|
308
|
+
static VALUE lexer_get_dedent_level(VALUE self)
|
309
|
+
{
|
310
|
+
INIT_LEXER_STATE(self, state);
|
311
|
+
int result = state->dedent_level;
|
312
|
+
state->dedent_level = -1;
|
313
|
+
if (result == -1)
|
314
|
+
return Qnil;
|
315
|
+
else
|
316
|
+
return INT2NUM(result);
|
317
|
+
}
|
318
|
+
|
319
|
+
static VALUE lexer_set_force_utf32(VALUE self, VALUE arg)
|
320
|
+
{
|
321
|
+
return arg;
|
322
|
+
}
|
323
|
+
|
324
|
+
static VALUE lexer_advance(VALUE self)
|
325
|
+
{
|
326
|
+
int cs, act = 0, top, command_state;
|
327
|
+
int num_base = 0;
|
328
|
+
long p, pe, eof, ts = 0, te = 0, tm = 0, sharp_s = 0, heredoc_e = 0;
|
329
|
+
long num_digits_s = 0, num_suffix_s = 0;
|
330
|
+
void (*num_xfrm)(lexer_state*, VALUE, long, long); /* numeric suffix-induced transformation */
|
331
|
+
lexer_state *state;
|
332
|
+
int *stack;
|
333
|
+
VALUE ident_tok = Qnil;
|
334
|
+
long ident_ts = 0, ident_te = 0;
|
335
|
+
long numeric_s = 0;
|
336
|
+
Data_Get_Struct(self, lexer_state, state);
|
337
|
+
|
338
|
+
if (RARRAY_LEN(state->token_queue) > 0)
|
339
|
+
return rb_ary_shift(state->token_queue);
|
340
|
+
|
341
|
+
cs = state->cs;
|
342
|
+
p = state->p;
|
343
|
+
pe = eof = state->pe;
|
344
|
+
stack = state->cs_stack;
|
345
|
+
top = state->cs_stack_top;
|
346
|
+
|
347
|
+
command_state = (cs == lex_en_expr_value || cs == lex_en_line_begin);
|
348
|
+
|
349
|
+
%%{
|
350
|
+
write exec;
|
351
|
+
}%%
|
352
|
+
|
353
|
+
state->p = p;
|
354
|
+
state->cs = cs;
|
355
|
+
state->cs_stack_top = top;
|
356
|
+
|
357
|
+
if (RARRAY_LEN(state->token_queue) > 0) {
|
358
|
+
return rb_ary_shift(state->token_queue);
|
359
|
+
} else if (cs == lex_error) {
|
360
|
+
VALUE info = rb_ary_new3(2, rb_str_new2("$error"), range(state, p - 1, p));
|
361
|
+
VALUE token = rb_ary_new3(2, Qfalse, info);
|
362
|
+
return token;
|
363
|
+
} else {
|
364
|
+
VALUE info = rb_ary_new3(2, rb_str_new2("$eof"), range(state, eof - 2, eof - 2));
|
365
|
+
VALUE token = rb_ary_new3(2, Qfalse, info);
|
366
|
+
return token;
|
367
|
+
}
|
368
|
+
}
|
369
|
+
|
370
|
+
static inline void force_encoding(VALUE str, VALUE enc)
|
371
|
+
{
|
372
|
+
rb_enc_associate(str, rb_to_encoding(enc));
|
373
|
+
}
|
374
|
+
|
375
|
+
static void emit_token(lexer_state *state, VALUE type, VALUE value, long start, long end)
|
376
|
+
{
|
377
|
+
VALUE info = rb_ary_new3(2, value, range(state, start, end));
|
378
|
+
VALUE token = rb_ary_new3(2, type, info);
|
379
|
+
|
380
|
+
rb_ary_push(state->token_queue, token);
|
381
|
+
|
382
|
+
if (state->tokens != Qnil)
|
383
|
+
rb_ary_push(state->tokens, token);
|
384
|
+
}
|
385
|
+
|
386
|
+
static void emit_comment(lexer_state *state, long start, long end)
|
387
|
+
{
|
388
|
+
VALUE rng = Qnil;
|
389
|
+
|
390
|
+
if (state->tokens != Qnil) {
|
391
|
+
rng = range(state, start, end);
|
392
|
+
|
393
|
+
VALUE info = rb_ary_new3(2, tok(state, start, end), rng);
|
394
|
+
VALUE token = rb_ary_new3(2, tCOMMENT, info);
|
395
|
+
rb_ary_push(state->tokens, token);
|
396
|
+
}
|
397
|
+
|
398
|
+
if (state->comments != Qnil) {
|
399
|
+
if (rng == Qnil)
|
400
|
+
rng = range(state, start, end);
|
401
|
+
VALUE comment = rb_class_new_instance(1, &rng, comment_klass);
|
402
|
+
rb_ary_push(state->comments, comment);
|
403
|
+
}
|
404
|
+
}
|
405
|
+
|
406
|
+
static void emit_do(lexer_state *state, int do_block, long ts, long te)
|
407
|
+
{
|
408
|
+
if (stack_state_active(&state->cond))
|
409
|
+
emit(kDO_COND);
|
410
|
+
else if (stack_state_active(&state->cmdarg) || do_block)
|
411
|
+
emit(kDO_BLOCK);
|
412
|
+
else
|
413
|
+
emit(kDO);
|
414
|
+
}
|
415
|
+
|
416
|
+
static VALUE tok(lexer_state *state, long start, long end)
|
417
|
+
{
|
418
|
+
return rb_str_substr(state->source, start, end - start);
|
419
|
+
}
|
420
|
+
|
421
|
+
static VALUE range(lexer_state *state, long start, long end)
|
422
|
+
{
|
423
|
+
VALUE args[3];
|
424
|
+
args[0] = state->source_buffer;
|
425
|
+
args[1] = INT2NUM(start);
|
426
|
+
args[2] = INT2NUM(end);
|
427
|
+
return rb_class_new_instance(3, args, range_klass);
|
428
|
+
}
|
429
|
+
|
430
|
+
static void diagnostic(lexer_state *state, VALUE type, VALUE reason,
|
431
|
+
VALUE arguments, VALUE loc, VALUE hilights)
|
432
|
+
{
|
433
|
+
VALUE args[5];
|
434
|
+
args[0] = type;
|
435
|
+
args[1] = reason;
|
436
|
+
args[2] = arguments;
|
437
|
+
args[3] = loc;
|
438
|
+
args[4] = hilights;
|
439
|
+
VALUE diagnostic = rb_class_new_instance(5, args, diagnostic_klass);
|
440
|
+
rb_funcall(state->diagnostics, rb_intern("process"), 1, diagnostic);
|
441
|
+
}
|
442
|
+
|
443
|
+
static int get_codepoint(lexer_state *state, long p)
|
444
|
+
{
|
445
|
+
if (p >= RARRAY_LEN(state->source_pts))
|
446
|
+
return 0;
|
447
|
+
else
|
448
|
+
return NUM2INT(rb_ary_entry(state->source_pts, p));
|
449
|
+
}
|
450
|
+
|
451
|
+
static int arg_or_cmdarg(int command_state)
|
452
|
+
{
|
453
|
+
if (command_state) {
|
454
|
+
return lex_en_expr_cmdarg;
|
455
|
+
} else {
|
456
|
+
return lex_en_expr_arg;
|
457
|
+
}
|
458
|
+
}
|
459
|
+
|
460
|
+
static int is_nthref(VALUE str)
|
461
|
+
{
|
462
|
+
char c;
|
463
|
+
char *p = RSTRING_PTR(str);
|
464
|
+
|
465
|
+
if (*p++ != '$') return 0;
|
466
|
+
|
467
|
+
c = *p++;
|
468
|
+
if (c < '1' || c > '9') return 0;
|
469
|
+
|
470
|
+
while ((c = *p++)) {
|
471
|
+
if (c < '0' || c > '9') return 0;
|
472
|
+
}
|
473
|
+
|
474
|
+
return 1;
|
475
|
+
}
|
476
|
+
|
477
|
+
static int is_backref(VALUE str)
|
478
|
+
{
|
479
|
+
char c;
|
480
|
+
char *p = RSTRING_PTR(str);
|
481
|
+
|
482
|
+
if (*p++ != '$') return 0;
|
483
|
+
|
484
|
+
c = *p++;
|
485
|
+
if (c != '&' && c != '`' && c != '\'' && c != '+') return 0;
|
486
|
+
|
487
|
+
return *p == '\0'; /* are we at end of string? */
|
488
|
+
}
|
489
|
+
|
490
|
+
static int is_capitalized(VALUE str)
|
491
|
+
{
|
492
|
+
char *p = RSTRING_PTR(str);
|
493
|
+
return *p >= 'A' && *p <= 'Z';
|
494
|
+
}
|
495
|
+
|
496
|
+
static int is_regexp_metachar(VALUE str)
|
497
|
+
{
|
498
|
+
char c = *RSTRING_PTR(str);
|
499
|
+
return c == '\\' || c == '$' || c == '(' || c == ')' || c == '*' ||
|
500
|
+
c == '+' || c == '.' || c == '<' || c == '>' || c == '?' ||
|
501
|
+
c == '[' || c == ']' || c == '^' || c == '{' || c == '|' ||
|
502
|
+
c == '}';
|
503
|
+
}
|
504
|
+
|
505
|
+
static int eof_codepoint(int codepoint)
|
506
|
+
{
|
507
|
+
return codepoint == 0x04 || codepoint == 0x1a || codepoint == 0x00;
|
508
|
+
}
|
509
|
+
|
510
|
+
static VALUE find_unknown_options(VALUE str)
|
511
|
+
{
|
512
|
+
char c, *p = RSTRING_PTR(str);
|
513
|
+
VALUE result = Qnil;
|
514
|
+
|
515
|
+
while ((c = *p++)) {
|
516
|
+
if (c != 'i' && c != 'm' && c != 'x' && c != 'o' && c != 'u' && c != 'e' &&
|
517
|
+
c != 's' && c != 'n') {
|
518
|
+
if (result == Qnil) {
|
519
|
+
result = rb_str_new(&c, 1);
|
520
|
+
} else {
|
521
|
+
rb_str_concat(result, rb_str_new(&c, 1));
|
522
|
+
}
|
523
|
+
}
|
524
|
+
}
|
525
|
+
|
526
|
+
return result;
|
527
|
+
}
|
528
|
+
|
529
|
+
static int bad_cvar_name(VALUE str)
|
530
|
+
{
|
531
|
+
char *p = RSTRING_PTR(str);
|
532
|
+
|
533
|
+
if (*p++ != '@') return 0;
|
534
|
+
if (*p++ != '@') return 0;
|
535
|
+
return *p >= '0' && *p <= '9';
|
536
|
+
}
|
537
|
+
|
538
|
+
static int bad_ivar_name(VALUE str)
|
539
|
+
{
|
540
|
+
char *p = RSTRING_PTR(str);
|
541
|
+
|
542
|
+
if (*p++ != '@') return 0;
|
543
|
+
return *p >= '0' && *p <= '9';
|
544
|
+
}
|
545
|
+
|
546
|
+
static int find_8_or_9(VALUE str)
|
547
|
+
{
|
548
|
+
int idx = 0;
|
549
|
+
char *p = RSTRING_PTR(str);
|
550
|
+
|
551
|
+
while (*p) {
|
552
|
+
if (*p == '8' || *p == '9')
|
553
|
+
return idx;
|
554
|
+
idx++;
|
555
|
+
p++;
|
556
|
+
}
|
557
|
+
|
558
|
+
return -1;
|
559
|
+
}
|
560
|
+
|
561
|
+
static void emit_int(lexer_state *state, VALUE val, long start, long end)
|
562
|
+
{
|
563
|
+
emit_token(state, tINTEGER, val, start, end);
|
564
|
+
}
|
565
|
+
|
566
|
+
static void emit_rational(lexer_state *state, VALUE val, long start, long end)
|
567
|
+
{
|
568
|
+
emit_token(state, tRATIONAL, rb_Rational1(val), start, end);
|
569
|
+
}
|
570
|
+
|
571
|
+
static void emit_complex(lexer_state *state, VALUE val, long start, long end)
|
572
|
+
{
|
573
|
+
emit_token(state, tIMAGINARY, rb_Complex(Qzero, val), start, end);
|
574
|
+
}
|
575
|
+
|
576
|
+
static void emit_complex_rational(lexer_state *state, VALUE val, long start, long end)
|
577
|
+
{
|
578
|
+
emit_token(state, tIMAGINARY, rb_Complex(Qzero, rb_Rational1(val)), start, end);
|
579
|
+
}
|
580
|
+
|
581
|
+
static void emit_float(lexer_state *state, VALUE val, long start, long end)
|
582
|
+
{
|
583
|
+
emit_token(state, tFLOAT, rb_Float(val), start, end);
|
584
|
+
}
|
585
|
+
|
586
|
+
static void emit_complex_float(lexer_state *state, VALUE val, long start, long end)
|
587
|
+
{
|
588
|
+
emit_token(state, tIMAGINARY, rb_Complex(Qzero, rb_Float(val)), start, end);
|
589
|
+
}
|
590
|
+
|
591
|
+
static void emit_int_followed_by_if(lexer_state *state, VALUE val, long start, long end)
|
592
|
+
{
|
593
|
+
emit_token(state, tINTEGER, val, start, end);
|
594
|
+
}
|
595
|
+
|
596
|
+
static void emit_int_followed_by_rescue(lexer_state *state, VALUE val, long start, long end)
|
597
|
+
{
|
598
|
+
emit_token(state, tINTEGER, val, start, end);
|
599
|
+
}
|
600
|
+
|
601
|
+
static void emit_float_followed_by_if(lexer_state *state, VALUE val, long start, long end)
|
602
|
+
{
|
603
|
+
emit_token(state, tFLOAT, rb_Float(val), start, end);
|
604
|
+
}
|
605
|
+
static void emit_float_followed_by_rescue(lexer_state *state, VALUE val, long start, long end)
|
606
|
+
{
|
607
|
+
emit_token(state, tFLOAT, rb_Float(val), start, end);
|
608
|
+
}
|
609
|
+
|
610
|
+
static int next_state_for_literal(literal *lit) {
|
611
|
+
if (literal_words_p(lit) && literal_backslash_delimited_p(lit)) {
|
612
|
+
if (lit->interpolate) {
|
613
|
+
return lex_en_interp_backslash_delimited_words;
|
614
|
+
} else {
|
615
|
+
return lex_en_plain_backslash_delimited_words;
|
616
|
+
}
|
617
|
+
} else if (literal_words_p(lit) && !literal_backslash_delimited_p(lit)) {
|
618
|
+
if (lit->interpolate) {
|
619
|
+
return lex_en_interp_words;
|
620
|
+
} else {
|
621
|
+
return lex_en_plain_words;
|
622
|
+
}
|
623
|
+
} else if (!literal_words_p(lit) && literal_backslash_delimited_p(lit)) {
|
624
|
+
if (lit->interpolate) {
|
625
|
+
return lex_en_interp_backslash_delimited;
|
626
|
+
} else {
|
627
|
+
return lex_en_plain_backslash_delimited;
|
628
|
+
}
|
629
|
+
} else {
|
630
|
+
if (lit->interpolate) {
|
631
|
+
return lex_en_interp_string;
|
632
|
+
} else {
|
633
|
+
return lex_en_plain_string;
|
634
|
+
}
|
635
|
+
}
|
636
|
+
}
|
637
|
+
|
638
|
+
static int push_literal(lexer_state *state, VALUE str_type, VALUE delimiter,
|
639
|
+
long str_s, long heredoc_e, int indent, int dedent_body,
|
640
|
+
int label_allowed)
|
641
|
+
{
|
642
|
+
literal lit;
|
643
|
+
literal_init(&lit, state, str_type, delimiter, str_s, heredoc_e, indent,
|
644
|
+
dedent_body, label_allowed);
|
645
|
+
lit_stack_push(&state->literal_stack, lit);
|
646
|
+
|
647
|
+
return next_state_for_literal(&lit);
|
648
|
+
}
|
649
|
+
|
650
|
+
static int pop_literal(lexer_state *state)
|
651
|
+
{
|
652
|
+
literal old_literal = lit_stack_pop(&state->literal_stack);
|
653
|
+
|
654
|
+
state->dedent_level = old_literal.dedent_level;
|
655
|
+
|
656
|
+
if (old_literal.start_tok == tREGEXP_BEG) {
|
657
|
+
return lex_en_regexp_modifiers;
|
658
|
+
} else {
|
659
|
+
return lex_en_expr_end;
|
660
|
+
}
|
661
|
+
}
|
662
|
+
|
663
|
+
static VALUE array_last(VALUE array)
|
664
|
+
{
|
665
|
+
long len = RARRAY_LEN(array);
|
666
|
+
if (len == 0)
|
667
|
+
return Qnil;
|
668
|
+
else
|
669
|
+
return rb_ary_entry(array, len - 1);
|
670
|
+
}
|
671
|
+
|
672
|
+
static VALUE unescape_char(char c)
|
673
|
+
{
|
674
|
+
switch (c) {
|
675
|
+
case 'a': return rb_str_new("\a", 1);
|
676
|
+
case 'b': return rb_str_new("\b", 1);
|
677
|
+
case 'e': return rb_str_new("\e", 1);
|
678
|
+
case 'f': return rb_str_new("\f", 1);
|
679
|
+
case 'n': return rb_str_new("\n", 1);
|
680
|
+
case 'r': return rb_str_new("\r", 1);
|
681
|
+
case 's': return rb_str_new(" ", 1);
|
682
|
+
case 't': return rb_str_new("\t", 1);
|
683
|
+
case 'v': return rb_str_new("\v", 1);
|
684
|
+
default: return Qnil;
|
685
|
+
}
|
686
|
+
}
|
687
|
+
|
688
|
+
static VALUE escape_char(VALUE str)
|
689
|
+
{
|
690
|
+
char c = *RSTRING_PTR(str);
|
691
|
+
|
692
|
+
switch (c) {
|
693
|
+
case '\f': return rb_str_new("\\f", 2);
|
694
|
+
case '\n': return rb_str_new("\\n", 2);
|
695
|
+
case '\r': return rb_str_new("\\r", 2);
|
696
|
+
case ' ': return rb_str_new("\\s", 2);
|
697
|
+
case '\t': return rb_str_new("\\t", 2);
|
698
|
+
case '\v': return rb_str_new("\\v", 2);
|
699
|
+
default: return Qnil;
|
700
|
+
}
|
701
|
+
}
|
702
|
+
|
703
|
+
// assumes that pattern contains only one char
|
704
|
+
static inline int str_start_with_p(VALUE str, const char *pattern)
|
705
|
+
{
|
706
|
+
if (RSTRING_LEN(str) == 0) {
|
707
|
+
return 0;
|
708
|
+
}
|
709
|
+
return (memcmp(RSTRING_PTR(str), pattern, 1) == 0);
|
710
|
+
}
|
711
|
+
|
712
|
+
// assumes that pattern contains only one char
|
713
|
+
static inline int str_end_with_p(VALUE str, const char *pattern)
|
714
|
+
{
|
715
|
+
if (RSTRING_LEN(str) == 0) {
|
716
|
+
return 0;
|
717
|
+
}
|
718
|
+
return (memcmp(RSTRING_PTR(str) + RSTRING_LEN(str) - 1, pattern, 1) == 0);
|
719
|
+
}
|
720
|
+
|
721
|
+
def_lexer_attribute(diagnostics);
|
722
|
+
def_lexer_attribute(static_env);
|
723
|
+
def_lexer_attribute(tokens);
|
724
|
+
def_lexer_attribute(comments);
|
725
|
+
def_lexer_attribute(encoding);
|
726
|
+
|
727
|
+
def_lexer_attr_reader(source_buffer);
|
728
|
+
|
729
|
+
void Init_lexer()
|
730
|
+
{
|
731
|
+
init_symbol(k__ENCODING__);
|
732
|
+
init_symbol(k__FILE__);
|
733
|
+
init_symbol(k__LINE__);
|
734
|
+
init_symbol(kALIAS);
|
735
|
+
init_symbol(kAND);
|
736
|
+
init_symbol(kBEGIN);
|
737
|
+
init_symbol(klBEGIN);
|
738
|
+
init_symbol(kBREAK);
|
739
|
+
init_symbol(kCASE);
|
740
|
+
init_symbol(kCLASS);
|
741
|
+
init_symbol(kDEF);
|
742
|
+
init_symbol(kDEFINED);
|
743
|
+
init_symbol(kDO);
|
744
|
+
init_symbol(kDO_BLOCK);
|
745
|
+
init_symbol(kDO_COND);
|
746
|
+
init_symbol(kDO_LAMBDA);
|
747
|
+
init_symbol(kELSE);
|
748
|
+
init_symbol(kELSIF);
|
749
|
+
init_symbol(kEND);
|
750
|
+
init_symbol(klEND);
|
751
|
+
init_symbol(kENSURE);
|
752
|
+
init_symbol(kFALSE);
|
753
|
+
init_symbol(kFOR);
|
754
|
+
init_symbol(kIF);
|
755
|
+
init_symbol(kIF_MOD);
|
756
|
+
init_symbol(kIN);
|
757
|
+
init_symbol(kMODULE);
|
758
|
+
init_symbol(kNEXT);
|
759
|
+
init_symbol(kNIL);
|
760
|
+
init_symbol(kNOT);
|
761
|
+
init_symbol(kOR);
|
762
|
+
init_symbol(kREDO);
|
763
|
+
init_symbol(kRESCUE);
|
764
|
+
init_symbol(kRESCUE_MOD);
|
765
|
+
init_symbol(kRETRY);
|
766
|
+
init_symbol(kRETURN);
|
767
|
+
init_symbol(kSELF);
|
768
|
+
init_symbol(kSUPER);
|
769
|
+
init_symbol(kTHEN);
|
770
|
+
init_symbol(kTRUE);
|
771
|
+
init_symbol(kUNDEF);
|
772
|
+
init_symbol(kUNLESS);
|
773
|
+
init_symbol(kUNLESS_MOD);
|
774
|
+
init_symbol(kUNTIL);
|
775
|
+
init_symbol(kUNTIL_MOD);
|
776
|
+
init_symbol(kWHEN);
|
777
|
+
init_symbol(kWHILE);
|
778
|
+
init_symbol(kWHILE_MOD);
|
779
|
+
init_symbol(kYIELD);
|
780
|
+
|
781
|
+
init_symbol(tAMPER);
|
782
|
+
init_symbol(tAMPER2);
|
783
|
+
init_symbol(tANDDOT);
|
784
|
+
init_symbol(tANDOP);
|
785
|
+
init_symbol(tAREF);
|
786
|
+
init_symbol(tASET);
|
787
|
+
init_symbol(tASSOC);
|
788
|
+
init_symbol(tBACK_REF);
|
789
|
+
init_symbol(tBACK_REF2);
|
790
|
+
init_symbol(tBANG);
|
791
|
+
init_symbol(tCARET);
|
792
|
+
init_symbol(tCHARACTER);
|
793
|
+
init_symbol(tCMP);
|
794
|
+
init_symbol(tCOLON);
|
795
|
+
init_symbol(tCOLON2);
|
796
|
+
init_symbol(tCOLON3);
|
797
|
+
init_symbol(tCOMMA);
|
798
|
+
init_symbol(tCOMMENT);
|
799
|
+
init_symbol(tCONSTANT);
|
800
|
+
init_symbol(tCVAR);
|
801
|
+
init_symbol(tDIVIDE);
|
802
|
+
init_symbol(tDOT);
|
803
|
+
init_symbol(tDOT2);
|
804
|
+
init_symbol(tDOT3);
|
805
|
+
init_symbol(tDSTAR);
|
806
|
+
init_symbol(tEH);
|
807
|
+
init_symbol(tEQ);
|
808
|
+
init_symbol(tEQL);
|
809
|
+
init_symbol(tEQQ);
|
810
|
+
init_symbol(tFID);
|
811
|
+
init_symbol(tFLOAT);
|
812
|
+
init_symbol(tGEQ);
|
813
|
+
init_symbol(tGT);
|
814
|
+
init_symbol(tGVAR);
|
815
|
+
init_symbol(tIDENTIFIER);
|
816
|
+
init_symbol(tIMAGINARY);
|
817
|
+
init_symbol(tINTEGER);
|
818
|
+
init_symbol(tIVAR);
|
819
|
+
init_symbol(tLABEL);
|
820
|
+
init_symbol(tLABEL_END);
|
821
|
+
init_symbol(tLAMBDA);
|
822
|
+
init_symbol(tLAMBEG);
|
823
|
+
init_symbol(tLBRACE);
|
824
|
+
init_symbol(tLBRACE_ARG);
|
825
|
+
init_symbol(tLBRACK);
|
826
|
+
init_symbol(tLBRACK2);
|
827
|
+
init_symbol(tLCURLY);
|
828
|
+
init_symbol(tLEQ);
|
829
|
+
init_symbol(tLPAREN);
|
830
|
+
init_symbol(tLPAREN_ARG);
|
831
|
+
init_symbol(tLPAREN2);
|
832
|
+
init_symbol(tLSHFT);
|
833
|
+
init_symbol(tLT);
|
834
|
+
init_symbol(tMATCH);
|
835
|
+
init_symbol(tMINUS);
|
836
|
+
init_symbol(tNEQ);
|
837
|
+
init_symbol(tNL);
|
838
|
+
init_symbol(tNMATCH);
|
839
|
+
init_symbol(tNTH_REF);
|
840
|
+
init_symbol(tOP_ASGN);
|
841
|
+
init_symbol(tOROP);
|
842
|
+
init_symbol(tPERCENT);
|
843
|
+
init_symbol(tPIPE);
|
844
|
+
init_symbol(tPLUS);
|
845
|
+
init_symbol(tPOW);
|
846
|
+
init_symbol(tQWORDS_BEG);
|
847
|
+
init_symbol(tQSYMBOLS_BEG);
|
848
|
+
init_symbol(tRATIONAL);
|
849
|
+
init_symbol(tRBRACK);
|
850
|
+
init_symbol(tRCURLY);
|
851
|
+
init_symbol(tREGEXP_BEG);
|
852
|
+
init_symbol(tREGEXP_OPT);
|
853
|
+
init_symbol(tRPAREN);
|
854
|
+
init_symbol(tRSHFT);
|
855
|
+
init_symbol(tSEMI);
|
856
|
+
init_symbol(tSPACE);
|
857
|
+
init_symbol(tSTAR);
|
858
|
+
init_symbol(tSTAR2);
|
859
|
+
init_symbol(tSTRING);
|
860
|
+
init_symbol(tSTRING_BEG);
|
861
|
+
init_symbol(tSTRING_CONTENT);
|
862
|
+
init_symbol(tSTRING_DBEG);
|
863
|
+
init_symbol(tSTRING_DEND);
|
864
|
+
init_symbol(tSTRING_DVAR);
|
865
|
+
init_symbol(tSTRING_END);
|
866
|
+
init_symbol(tSYMBEG);
|
867
|
+
init_symbol(tSYMBOL);
|
868
|
+
init_symbol(tSYMBOLS_BEG);
|
869
|
+
init_symbol(tTILDE);
|
870
|
+
init_symbol(tUMINUS);
|
871
|
+
init_symbol(tUNARY_NUM);
|
872
|
+
init_symbol(tUPLUS);
|
873
|
+
init_symbol(tWORDS_BEG);
|
874
|
+
init_symbol(tXSTRING_BEG);
|
875
|
+
|
876
|
+
severity_error = ID2SYM(rb_intern("error"));
|
877
|
+
rb_gc_register_address(&severity_error);
|
878
|
+
init_symbol(fatal);
|
879
|
+
init_symbol(warning);
|
880
|
+
|
881
|
+
init_symbol(ambiguous_literal);
|
882
|
+
init_symbol(ambiguous_prefix);
|
883
|
+
init_symbol(bare_backslash);
|
884
|
+
init_symbol(character);
|
885
|
+
init_symbol(cvar_name);
|
886
|
+
init_symbol(embedded_document);
|
887
|
+
init_symbol(empty_numeric);
|
888
|
+
init_symbol(escape_eof);
|
889
|
+
init_symbol(incomplete_escape);
|
890
|
+
init_symbol(invalid_escape);
|
891
|
+
init_symbol(invalid_escape_use);
|
892
|
+
init_symbol(invalid_hex_escape);
|
893
|
+
init_symbol(invalid_octal);
|
894
|
+
init_symbol(invalid_unicode_escape);
|
895
|
+
init_symbol(ivar_name);
|
896
|
+
init_symbol(heredoc_id_ends_with_nl);
|
897
|
+
init_symbol(heredoc_id_has_newline);
|
898
|
+
init_symbol(no_dot_digit_literal);
|
899
|
+
init_symbol(prefix);
|
900
|
+
init_symbol(regexp_options);
|
901
|
+
init_symbol(string_eof);
|
902
|
+
init_symbol(trailing_in_number);
|
903
|
+
init_symbol(unexpected);
|
904
|
+
init_symbol(unexpected_percent_str);
|
905
|
+
init_symbol(unicode_point_too_large);
|
906
|
+
init_symbol(unterminated_unicode);
|
907
|
+
|
908
|
+
VALUE m_Parser = rb_define_module("Parser");
|
909
|
+
VALUE c_Lexer = rb_define_class_under(m_Parser, "CLexer", rb_cObject);
|
910
|
+
|
911
|
+
rb_define_alloc_func(c_Lexer, lexer_alloc);
|
912
|
+
|
913
|
+
rb_define_method(c_Lexer, "initialize", lexer_initialize, 1);
|
914
|
+
rb_define_method(c_Lexer, "advance", lexer_advance, 0);
|
915
|
+
rb_define_method(c_Lexer, "reset", lexer_reset, -1);
|
916
|
+
|
917
|
+
rb_define_method(c_Lexer, "push_cmdarg", lexer_push_cmdarg, 0);
|
918
|
+
rb_define_method(c_Lexer, "pop_cmdarg", lexer_pop_cmdarg, 0);
|
919
|
+
rb_define_method(c_Lexer, "push_cond", lexer_push_cond, 0);
|
920
|
+
rb_define_method(c_Lexer, "pop_cond", lexer_pop_cond, 0);
|
921
|
+
|
922
|
+
rb_define_method(c_Lexer, "push_cmdarg_state", lexer_push_cmdarg_state, 1);
|
923
|
+
rb_define_method(c_Lexer, "pop_cmdarg_state", lexer_pop_cmdarg_state, 0);
|
924
|
+
rb_define_method(c_Lexer, "lexpop_cmdarg_state", lexer_lexpop_cmdarg_state, 0);
|
925
|
+
rb_define_method(c_Lexer, "clear_cmdarg_state", lexer_clear_cmdarg_state, 0);
|
926
|
+
rb_define_method(c_Lexer, "cmdarg_state_empty?", lexer_cmdarg_state_empty_p, 0);
|
927
|
+
rb_define_method(c_Lexer, "cmdarg_state_value", lexer_cmdarg_state_value, 0);
|
928
|
+
rb_define_method(c_Lexer, "set_cmdarg_state", lexer_set_cmdarg_state, 1);
|
929
|
+
|
930
|
+
rb_define_method(c_Lexer, "push_cond_state", lexer_push_cond_state, 1);
|
931
|
+
rb_define_method(c_Lexer, "pop_cond_state", lexer_pop_cond_state, 0);
|
932
|
+
rb_define_method(c_Lexer, "lexpop_cond_state", lexer_lexpop_cond_state, 0);
|
933
|
+
rb_define_method(c_Lexer, "clear_cond_state", lexer_clear_cond_state, 0);
|
934
|
+
rb_define_method(c_Lexer, "cond_state_empty?", lexer_cond_state_empty_p, 0);
|
935
|
+
rb_define_method(c_Lexer, "cond_state_value", lexer_cond_state_value, 0);
|
936
|
+
rb_define_method(c_Lexer, "set_cond_state", lexer_set_cond_state, 1);
|
937
|
+
|
938
|
+
rb_define_method(c_Lexer, "state", lexer_get_state, 0);
|
939
|
+
rb_define_method(c_Lexer, "state=", lexer_set_state, 1);
|
940
|
+
rb_define_method(c_Lexer, "in_kwarg", lexer_get_in_kwarg, 0);
|
941
|
+
rb_define_method(c_Lexer, "in_kwarg=", lexer_set_in_kwarg, 1);
|
942
|
+
rb_define_method(c_Lexer, "diagnostics", lexer_get_diagnostics, 0);
|
943
|
+
rb_define_method(c_Lexer, "diagnostics=", lexer_set_diagnostics, 1);
|
944
|
+
rb_define_method(c_Lexer, "static_env", lexer_get_static_env, 0);
|
945
|
+
rb_define_method(c_Lexer, "static_env=", lexer_set_static_env, 1);
|
946
|
+
rb_define_method(c_Lexer, "tokens", lexer_get_tokens, 0);
|
947
|
+
rb_define_method(c_Lexer, "tokens=", lexer_set_tokens, 1);
|
948
|
+
rb_define_method(c_Lexer, "comments", lexer_get_comments, 0);
|
949
|
+
rb_define_method(c_Lexer, "comments=", lexer_set_comments, 1);
|
950
|
+
rb_define_method(c_Lexer, "encoding", lexer_get_encoding, 0);
|
951
|
+
rb_define_method(c_Lexer, "encoding=", lexer_set_encoding, 1);
|
952
|
+
rb_define_method(c_Lexer, "dedent_level", lexer_get_dedent_level, 0);
|
953
|
+
rb_define_method(c_Lexer, "source_buffer", lexer_get_source_buffer, 0);
|
954
|
+
rb_define_method(c_Lexer, "source_buffer=", lexer_set_source_buffer, 1);
|
955
|
+
rb_define_method(c_Lexer, "force_utf32=", lexer_set_force_utf32, 1);
|
956
|
+
|
957
|
+
VALUE m_Source = rb_const_get(m_Parser, rb_intern("Source"));
|
958
|
+
comment_klass = rb_const_get(m_Source, rb_intern("Comment"));
|
959
|
+
diagnostic_klass = rb_const_get(m_Parser, rb_intern("Diagnostic"));
|
960
|
+
range_klass = rb_const_get(m_Source, rb_intern("Range"));
|
961
|
+
|
962
|
+
empty_array = rb_obj_freeze(rb_ary_new2(0));
|
963
|
+
rb_gc_register_address(&empty_array);
|
964
|
+
blank_string = rb_obj_freeze(rb_str_new2(""));
|
965
|
+
rb_gc_register_address(&blank_string);
|
966
|
+
newline = rb_obj_freeze(rb_str_new2("\n"));
|
967
|
+
rb_gc_register_address(&newline);
|
968
|
+
escaped_newline = rb_obj_freeze(rb_str_new2("\\\n"));
|
969
|
+
rb_gc_register_address(&escaped_newline);
|
970
|
+
|
971
|
+
if (rb_const_defined(rb_cObject, rb_intern("Encoding"))) {
|
972
|
+
VALUE encoding = rb_const_get(rb_cObject, rb_intern("Encoding"));
|
973
|
+
utf8_encoding = rb_const_get(encoding, rb_intern("UTF_8"));
|
974
|
+
rb_gc_register_address(&utf8_encoding);
|
975
|
+
} else {
|
976
|
+
utf8_encoding = Qnil;
|
977
|
+
}
|
978
|
+
|
979
|
+
VALUE regex_str = rb_str_new2("\\r.*$");
|
980
|
+
cr_then_anything_to_eol = rb_class_new_instance(1, ®ex_str, rb_cRegexp);
|
981
|
+
rb_gc_register_address(&cr_then_anything_to_eol);
|
982
|
+
regex_str = rb_str_new2("\\r+$");
|
983
|
+
crs_to_eol = rb_class_new_instance(1, ®ex_str, rb_cRegexp);
|
984
|
+
rb_gc_register_address(&crs_to_eol);
|
985
|
+
}
|
986
|
+
|
987
|
+
%%{
|
988
|
+
alphtype int;
|
989
|
+
getkey (get_codepoint(state, p));
|
990
|
+
|
991
|
+
prepush {
|
992
|
+
/* grow the state stack as needed */
|
993
|
+
if (state->cs_stack_top == state->cs_stack_size) {
|
994
|
+
int *new_stack = xmalloc(state->cs_stack_size * 2 * sizeof(int));
|
995
|
+
memcpy(new_stack, state->cs_stack, state->cs_stack_size * sizeof(int));
|
996
|
+
xfree(state->cs_stack);
|
997
|
+
stack = state->cs_stack = new_stack;
|
998
|
+
state->cs_stack_size = state->cs_stack_size * 2;
|
999
|
+
}
|
1000
|
+
}
|
1001
|
+
|
1002
|
+
action do_nl { state->newline_s = p; }
|
1003
|
+
|
1004
|
+
c_nl = '\n' $ do_nl;
|
1005
|
+
c_space = [ \t\r\f\v];
|
1006
|
+
c_space_nl = c_space | c_nl;
|
1007
|
+
|
1008
|
+
c_eof = 0x04 | 0x1a | 0 | zlen; # ^D, ^Z, \0, EOF
|
1009
|
+
c_eol = c_nl | c_eof;
|
1010
|
+
c_any = any - c_eof;
|
1011
|
+
|
1012
|
+
c_nl_zlen = c_nl | zlen;
|
1013
|
+
c_line = any - c_nl_zlen;
|
1014
|
+
|
1015
|
+
c_unicode = c_any - 0x00..0x7f;
|
1016
|
+
c_upper = [A-Z];
|
1017
|
+
c_lower = [a-z_] | c_unicode;
|
1018
|
+
c_alpha = c_lower | c_upper;
|
1019
|
+
c_alnum = c_alpha | [0-9];
|
1020
|
+
|
1021
|
+
action do_eof { fhold; fbreak; }
|
1022
|
+
|
1023
|
+
operator_fname = '[]' | '[]=' | '`' | '-@' | '+@' | '~@' | '!@' ;
|
1024
|
+
operator_arithmetic = '&' | '|' | '&&' | '||' | '^' | '+' | '-' |
|
1025
|
+
'*' | '/' | '**' | '~' | '<<' | '>>' | '%' ;
|
1026
|
+
operator_rest = '=~' | '!~' | '==' | '!=' | '!' | '===' |
|
1027
|
+
'<' | '<=' | '>' | '>=' | '<=>' | '=>' ;
|
1028
|
+
|
1029
|
+
punctuation_begin = '-' | '+' | '::' | '(' | '[' |
|
1030
|
+
'*' | '**' | '&' ;
|
1031
|
+
punctuation_end = ',' | '=' | '->' | '(' | '[' | ']' |
|
1032
|
+
'::' | '?' | ':' | '.' | '..' | '...' ;
|
1033
|
+
|
1034
|
+
keyword_modifier = 'if' | 'unless' | 'while' | 'until' | 'rescue' ;
|
1035
|
+
keyword_with_arg = 'yield' | 'super' | 'not' | 'defined?' ;
|
1036
|
+
keyword_with_fname = 'def' | 'undef' | 'alias' ;
|
1037
|
+
keyword_with_value = 'else' | 'case' | 'ensure' | 'module' | 'elsif' | 'then' |
|
1038
|
+
'for' | 'in' | 'do' | 'when' | 'begin' | 'class' |
|
1039
|
+
'and' | 'or' ;
|
1040
|
+
keyword_with_mid = 'rescue' | 'return' | 'break' | 'next' ;
|
1041
|
+
keyword_with_end = 'end' | 'self' | 'true' | 'false' | 'retry' |
|
1042
|
+
'redo' | 'nil' | 'BEGIN' | 'END' | '__FILE__' |
|
1043
|
+
'__LINE__' | '__ENCODING__';
|
1044
|
+
|
1045
|
+
keyword = keyword_with_value | keyword_with_mid |
|
1046
|
+
keyword_with_end | keyword_with_arg |
|
1047
|
+
keyword_with_fname | keyword_modifier ;
|
1048
|
+
|
1049
|
+
constant = c_upper c_alnum*;
|
1050
|
+
bareword = c_alpha c_alnum*;
|
1051
|
+
|
1052
|
+
call_or_var = c_lower c_alnum*;
|
1053
|
+
class_var = '@@' bareword;
|
1054
|
+
instance_var = '@' bareword;
|
1055
|
+
global_var = '$'
|
1056
|
+
( bareword | digit+
|
1057
|
+
| [`'+~*$&?!@/\\;,.=:<>"] # '
|
1058
|
+
| '-' c_alnum
|
1059
|
+
)
|
1060
|
+
;
|
1061
|
+
|
1062
|
+
class_var_v = '@@' c_alnum+;
|
1063
|
+
instance_var_v = '@' c_alnum+;
|
1064
|
+
|
1065
|
+
label = bareword [?!]? ':';
|
1066
|
+
|
1067
|
+
int_hex = ( xdigit+ '_' )* xdigit* '_'? ;
|
1068
|
+
int_dec = ( digit+ '_' )* digit* '_'? ;
|
1069
|
+
int_bin = ( [01]+ '_' )* [01]* '_'? ;
|
1070
|
+
|
1071
|
+
flo_int = [1-9] [0-9]* ( '_' digit+ )* | '0';
|
1072
|
+
flo_frac = '.' ( digit+ '_' )* digit+;
|
1073
|
+
flo_pow = [eE] [+\-]? ( digit+ '_' )* digit+;
|
1074
|
+
|
1075
|
+
int_suffix =
|
1076
|
+
'' % { num_xfrm = emit_int; numeric_s = ts; }
|
1077
|
+
| 'r' % { num_xfrm = emit_rational; numeric_s = ts; }
|
1078
|
+
| 'i' % { num_xfrm = emit_complex; numeric_s = ts; }
|
1079
|
+
| 'ri' % { num_xfrm = emit_complex_rational; numeric_s = ts; }
|
1080
|
+
| 'if' % { num_xfrm = emit_int_followed_by_if; numeric_s = ts; p -= 2; }
|
1081
|
+
| 'rescue' % { num_xfrm = emit_int_followed_by_rescue; numeric_s = ts; p -= 6; } ;
|
1082
|
+
|
1083
|
+
flo_pow_suffix =
|
1084
|
+
'' % { num_xfrm = emit_float; numeric_s = ts; }
|
1085
|
+
| 'i' % { num_xfrm = emit_complex_float; numeric_s = ts; }
|
1086
|
+
| 'if' % { num_xfrm = emit_float_followed_by_if; numeric_s = ts; p -= 2; };
|
1087
|
+
|
1088
|
+
flo_suffix =
|
1089
|
+
flo_pow_suffix
|
1090
|
+
| 'r' % { num_xfrm = emit_rational; numeric_s = ts; }
|
1091
|
+
| 'ri' % { num_xfrm = emit_complex_rational; numeric_s = ts; }
|
1092
|
+
| 'rescue' % { num_xfrm = emit_float_followed_by_rescue; numeric_s = ts; p -= 6; };
|
1093
|
+
|
1094
|
+
escaped_nl = "\\" c_nl;
|
1095
|
+
|
1096
|
+
action unicode_points {
|
1097
|
+
state->escape = rb_str_new2("");
|
1098
|
+
|
1099
|
+
VALUE codepoints = tok(state, state->escape_s + 2, p - 1);
|
1100
|
+
long codepoint_s = state->escape_s + 2;
|
1101
|
+
|
1102
|
+
VALUE regexp;
|
1103
|
+
|
1104
|
+
if (state->version < 24) {
|
1105
|
+
if (str_start_with_p(codepoints, " ") || str_start_with_p(codepoints, "\t")) {
|
1106
|
+
diagnostic(state, severity_error, invalid_unicode_escape, Qnil,
|
1107
|
+
range(state, state->escape_s + 2, state->escape_s + 3), empty_array);
|
1108
|
+
}
|
1109
|
+
|
1110
|
+
regexp = rb_reg_regcomp(rb_str_new2("[ \\t]{2}"));
|
1111
|
+
VALUE space_p = rb_funcall(codepoints, rb_intern("index"), 1, regexp);
|
1112
|
+
|
1113
|
+
if (RTEST(space_p)) {
|
1114
|
+
diagnostic(state, severity_error, invalid_unicode_escape, Qnil,
|
1115
|
+
range(state, codepoint_s + NUM2INT(space_p) + 1, codepoint_s + NUM2INT(space_p) + 1), empty_array);
|
1116
|
+
}
|
1117
|
+
|
1118
|
+
if (str_end_with_p(codepoints, " ") || str_end_with_p(codepoints, "\t")) {
|
1119
|
+
diagnostic(state, severity_error, invalid_unicode_escape, Qnil,
|
1120
|
+
range(state, p - 1, p), empty_array);
|
1121
|
+
}
|
1122
|
+
}
|
1123
|
+
|
1124
|
+
regexp = rb_reg_regcomp(rb_str_new2("([0-9a-fA-F]+)|([ \t]+)"));
|
1125
|
+
|
1126
|
+
VALUE matches = rb_funcall(codepoints, rb_intern("scan"), 1, regexp);
|
1127
|
+
long len = RARRAY_LEN(matches);
|
1128
|
+
|
1129
|
+
for (long i = 0; i < len; i++) {
|
1130
|
+
VALUE match = RARRAY_AREF(matches, i);
|
1131
|
+
VALUE codepoint_str = RARRAY_AREF(match, 0);
|
1132
|
+
VALUE spaces = RARRAY_AREF(match, 1);
|
1133
|
+
|
1134
|
+
if (RTEST(spaces)) {
|
1135
|
+
codepoint_s += RSTRING_LEN(spaces);
|
1136
|
+
} else {
|
1137
|
+
VALUE codepoint = rb_str_to_inum(codepoint_str, 16, 0);
|
1138
|
+
if (NUM2INT(codepoint) >= 0x110000) {
|
1139
|
+
diagnostic(state, severity_error, unicode_point_too_large, Qnil,
|
1140
|
+
range(state, codepoint_s, codepoint_s + RSTRING_LEN(codepoint_str)), empty_array);
|
1141
|
+
break;
|
1142
|
+
}
|
1143
|
+
|
1144
|
+
codepoint = rb_funcall(codepoint, rb_intern("chr"), 1, utf8_encoding);
|
1145
|
+
state->escape = rb_str_plus(state->escape, codepoint);
|
1146
|
+
codepoint_s += RSTRING_LEN(codepoint_str);
|
1147
|
+
}
|
1148
|
+
}
|
1149
|
+
}
|
1150
|
+
|
1151
|
+
action unescape_char {
|
1152
|
+
char c = NUM2INT(rb_ary_entry(state->source_pts, p - 1));
|
1153
|
+
state->escape = unescape_char(c);
|
1154
|
+
|
1155
|
+
if (state->escape == Qnil) {
|
1156
|
+
VALUE codepoint = rb_funcall(state->source_buffer, rb_intern("slice"), 1, INT2NUM(p - 1));
|
1157
|
+
state->escape = codepoint;
|
1158
|
+
force_encoding(codepoint, state->encoding);
|
1159
|
+
}
|
1160
|
+
}
|
1161
|
+
|
1162
|
+
action invalid_complex_escape {
|
1163
|
+
diagnostic(state, fatal, invalid_escape, Qnil, range(state, ts, te),
|
1164
|
+
empty_array);
|
1165
|
+
}
|
1166
|
+
|
1167
|
+
action slash_c_char {
|
1168
|
+
char c = *RSTRING_PTR(state->escape) & 0x9f;
|
1169
|
+
state->escape = rb_str_new(&c, 1);
|
1170
|
+
force_encoding(state->escape, state->encoding);
|
1171
|
+
}
|
1172
|
+
|
1173
|
+
action slash_m_char {
|
1174
|
+
char c = *RSTRING_PTR(state->escape) | 0x80;
|
1175
|
+
state->escape = rb_str_new(&c, 1);
|
1176
|
+
force_encoding(state->escape, state->encoding);
|
1177
|
+
}
|
1178
|
+
|
1179
|
+
maybe_escaped_char = (
|
1180
|
+
'\\' c_any %unescape_char
|
1181
|
+
| ( c_any - [\\] ) % { state->escape = rb_str_substr(state->source, p - 1, 1); }
|
1182
|
+
);
|
1183
|
+
|
1184
|
+
maybe_escaped_ctrl_char = (
|
1185
|
+
'\\' c_any %unescape_char %slash_c_char
|
1186
|
+
| '?' % { state->escape = rb_str_new2("\x7f"); }
|
1187
|
+
| ( c_any - [\\?] ) % { state->escape = rb_str_substr(state->source, p - 1, 1); } %slash_c_char
|
1188
|
+
);
|
1189
|
+
|
1190
|
+
escape = (
|
1191
|
+
[0-7]{1,3} % {
|
1192
|
+
VALUE token = tok(state, state->escape_s, p);
|
1193
|
+
char c = NUM2INT(rb_str_to_inum(token, 8, 0));
|
1194
|
+
c = c % 0x100;
|
1195
|
+
state->escape = rb_str_new(&c, 1);
|
1196
|
+
force_encoding(state->escape, state->encoding);
|
1197
|
+
}
|
1198
|
+
|
1199
|
+
| 'x' xdigit{1,2} % {
|
1200
|
+
VALUE token = tok(state, state->escape_s + 1, p);
|
1201
|
+
char c = NUM2INT(rb_str_to_inum(token, 16, 0));
|
1202
|
+
state->escape = rb_str_new(&c, 1);
|
1203
|
+
force_encoding(state->escape, state->encoding);
|
1204
|
+
}
|
1205
|
+
|
1206
|
+
| 'x' ( c_any - xdigit )
|
1207
|
+
% {
|
1208
|
+
diagnostic(state, fatal, invalid_hex_escape, Qnil,
|
1209
|
+
range(state, state->escape_s - 1, p + 2), empty_array);
|
1210
|
+
}
|
1211
|
+
|
1212
|
+
| 'u' xdigit{4} % {
|
1213
|
+
VALUE token = tok(state, state->escape_s + 1, p);
|
1214
|
+
int i = NUM2INT(rb_str_to_inum(token, 16, 0));
|
1215
|
+
state->escape = rb_enc_uint_chr(i, rb_to_encoding(utf8_encoding));
|
1216
|
+
}
|
1217
|
+
|
1218
|
+
| 'u' xdigit{0,3} % {
|
1219
|
+
diagnostic(state, fatal, invalid_unicode_escape, Qnil,
|
1220
|
+
range(state, state->escape_s - 1, p), empty_array);
|
1221
|
+
}
|
1222
|
+
|
1223
|
+
| 'u{' ( c_any - xdigit - [ \t}] )* '}' % {
|
1224
|
+
diagnostic(state, fatal, invalid_unicode_escape, Qnil,
|
1225
|
+
range(state, state->escape_s - 1, p), empty_array);
|
1226
|
+
}
|
1227
|
+
|
1228
|
+
| 'u{' [ \t]* ( xdigit{1,6} [ \t]+ )*
|
1229
|
+
(
|
1230
|
+
( xdigit{1,6} [ \t]* '}'
|
1231
|
+
%unicode_points
|
1232
|
+
)
|
1233
|
+
|
|
1234
|
+
( xdigit* ( c_any - xdigit - [ \t}] )+ '}'
|
1235
|
+
| ( c_any - [ \t}] )* c_eof
|
1236
|
+
| xdigit{7,}
|
1237
|
+
) % {
|
1238
|
+
diagnostic(state, fatal, invalid_unicode_escape, Qnil,
|
1239
|
+
range(state, p - 1, p), empty_array);
|
1240
|
+
}
|
1241
|
+
)
|
1242
|
+
|
1243
|
+
| ( 'C-' | 'c' ) escaped_nl?
|
1244
|
+
maybe_escaped_ctrl_char
|
1245
|
+
|
1246
|
+
| 'M-' escaped_nl?
|
1247
|
+
maybe_escaped_char
|
1248
|
+
%slash_m_char
|
1249
|
+
|
1250
|
+
| ( ( 'C-' | 'c' ) escaped_nl? '\\M-'
|
1251
|
+
| 'M-\\' escaped_nl? ( 'C-' | 'c' ) ) escaped_nl?
|
1252
|
+
maybe_escaped_ctrl_char
|
1253
|
+
%slash_m_char
|
1254
|
+
|
1255
|
+
| 'C' c_any %invalid_complex_escape
|
1256
|
+
| 'M' c_any %invalid_complex_escape
|
1257
|
+
| ( 'M-\\C' | 'C-\\M' ) c_any %invalid_complex_escape
|
1258
|
+
|
1259
|
+
| ( c_any - [0-7xuCMc] ) %unescape_char
|
1260
|
+
|
1261
|
+
| c_eof % {
|
1262
|
+
diagnostic(state, fatal, escape_eof, Qnil, range(state, p - 1, p),
|
1263
|
+
empty_array);
|
1264
|
+
}
|
1265
|
+
);
|
1266
|
+
|
1267
|
+
e_bs = '\\' % {
|
1268
|
+
state->escape_s = p;
|
1269
|
+
state->escape = Qnil;
|
1270
|
+
};
|
1271
|
+
|
1272
|
+
e_heredoc_nl = c_nl % {
|
1273
|
+
if (state->herebody_s) {
|
1274
|
+
p = state->herebody_s;
|
1275
|
+
state->herebody_s = 0;
|
1276
|
+
}
|
1277
|
+
};
|
1278
|
+
|
1279
|
+
action extend_string {
|
1280
|
+
VALUE string = tok(state, ts, te);
|
1281
|
+
VALUE lookahead = Qnil;
|
1282
|
+
|
1283
|
+
if (state->version >= 22 && !stack_state_active(&state->cond)) {
|
1284
|
+
lookahead = tok(state, te, te + 2);
|
1285
|
+
}
|
1286
|
+
|
1287
|
+
literal *current_literal = lit_stack_top(&state->literal_stack);
|
1288
|
+
|
1289
|
+
if (!current_literal->heredoc_e &&
|
1290
|
+
literal_nest_and_try_closing(current_literal, string, ts, te, lookahead)) {
|
1291
|
+
VALUE token = array_last(state->token_queue);
|
1292
|
+
if (rb_ary_entry(token, 0) == tLABEL_END) {
|
1293
|
+
p += 1;
|
1294
|
+
pop_literal(state);
|
1295
|
+
fnext expr_labelarg;
|
1296
|
+
} else {
|
1297
|
+
fnext *pop_literal(state);
|
1298
|
+
}
|
1299
|
+
|
1300
|
+
fbreak;
|
1301
|
+
} else {
|
1302
|
+
literal_extend_string(current_literal, string, ts, te);
|
1303
|
+
}
|
1304
|
+
}
|
1305
|
+
|
1306
|
+
action extend_string_escaped {
|
1307
|
+
literal *current_literal = lit_stack_top(&state->literal_stack);
|
1308
|
+
VALUE escaped_char = rb_str_substr(state->source, state->escape_s, 1);
|
1309
|
+
|
1310
|
+
if (literal_munge_escape_p(current_literal, escaped_char)) {
|
1311
|
+
if (literal_regexp_p(current_literal) && is_regexp_metachar(escaped_char)) {
|
1312
|
+
literal_extend_string(current_literal, tok(state, ts, te), ts, te);
|
1313
|
+
} else {
|
1314
|
+
literal_extend_string(current_literal, escaped_char, ts, te);
|
1315
|
+
}
|
1316
|
+
} else {
|
1317
|
+
if (literal_regexp_p(current_literal)) {
|
1318
|
+
VALUE token = tok(state, ts, te);
|
1319
|
+
rb_funcall(token, rb_intern("gsub!"), 2, escaped_newline, blank_string);
|
1320
|
+
literal_extend_string(current_literal, token, ts, te);
|
1321
|
+
} else if (literal_heredoc_p(current_literal) && newline_char_p(escaped_char)) {
|
1322
|
+
if (literal_squiggly_heredoc_p(current_literal)) {
|
1323
|
+
literal_extend_string(current_literal, tok(state, ts, te), ts, te);
|
1324
|
+
} else {
|
1325
|
+
VALUE token = tok(state, ts, te);
|
1326
|
+
rb_funcall(token, rb_intern("gsub!"), 2, escaped_newline, blank_string);
|
1327
|
+
literal_extend_string(current_literal, token, ts, te);
|
1328
|
+
}
|
1329
|
+
} else if (state->escape == Qnil) {
|
1330
|
+
literal_extend_string(current_literal, tok(state, ts, te), ts, te);
|
1331
|
+
} else {
|
1332
|
+
literal_extend_string(current_literal, state->escape, ts, te);
|
1333
|
+
}
|
1334
|
+
}
|
1335
|
+
}
|
1336
|
+
|
1337
|
+
action extend_string_eol {
|
1338
|
+
literal *current_literal = lit_stack_top(&state->literal_stack);
|
1339
|
+
long str_s = current_literal->str_s;
|
1340
|
+
|
1341
|
+
if (te == pe) {
|
1342
|
+
diagnostic(state, fatal, string_eof, Qnil,
|
1343
|
+
range(state, str_s, str_s + 1), empty_array);
|
1344
|
+
}
|
1345
|
+
|
1346
|
+
if (literal_heredoc_p(current_literal)) {
|
1347
|
+
VALUE line = tok(state, state->herebody_s, ts);
|
1348
|
+
rb_funcall(line, rb_intern("gsub!"), 2, crs_to_eol, blank_string);
|
1349
|
+
|
1350
|
+
if (state->version >= 18 && state->version <= 20) {
|
1351
|
+
rb_funcall(line, rb_intern("gsub!"), 2, cr_then_anything_to_eol, blank_string);
|
1352
|
+
}
|
1353
|
+
|
1354
|
+
if (literal_nest_and_try_closing(current_literal, line, state->herebody_s, ts, Qnil)) {
|
1355
|
+
state->herebody_s = te;
|
1356
|
+
p = current_literal->heredoc_e - 1;
|
1357
|
+
fnext *pop_literal(state); fbreak;
|
1358
|
+
} else {
|
1359
|
+
literal_infer_indent_level(current_literal, line);
|
1360
|
+
state->herebody_s = te;
|
1361
|
+
}
|
1362
|
+
} else {
|
1363
|
+
if (literal_nest_and_try_closing(current_literal, tok(state, ts, te), ts, te, Qnil)) {
|
1364
|
+
fnext *pop_literal(state); fbreak;
|
1365
|
+
}
|
1366
|
+
|
1367
|
+
if (state->herebody_s) {
|
1368
|
+
p = state->herebody_s - 1;
|
1369
|
+
state->herebody_s = 0;
|
1370
|
+
}
|
1371
|
+
}
|
1372
|
+
|
1373
|
+
if (literal_words_p(current_literal) && !eof_codepoint(get_codepoint(state, p))) {
|
1374
|
+
literal_extend_space(current_literal, ts, te);
|
1375
|
+
} else {
|
1376
|
+
literal_extend_string(current_literal, tok(state, ts, te), ts, te);
|
1377
|
+
literal_flush_string(current_literal);
|
1378
|
+
}
|
1379
|
+
}
|
1380
|
+
|
1381
|
+
action extend_string_space {
|
1382
|
+
literal *current_literal = lit_stack_top(&state->literal_stack);
|
1383
|
+
literal_extend_space(current_literal, ts, te);
|
1384
|
+
}
|
1385
|
+
|
1386
|
+
interp_var = '#' ( global_var | class_var_v | instance_var_v );
|
1387
|
+
|
1388
|
+
action extend_interp_var {
|
1389
|
+
literal *current_literal = lit_stack_top(&state->literal_stack);
|
1390
|
+
literal_flush_string(current_literal);
|
1391
|
+
literal_extend_content(current_literal);
|
1392
|
+
|
1393
|
+
emit_token(state, tSTRING_DVAR, Qnil, ts, ts + 1);
|
1394
|
+
|
1395
|
+
p = ts;
|
1396
|
+
fcall expr_variable;
|
1397
|
+
}
|
1398
|
+
|
1399
|
+
interp_code = '#{';
|
1400
|
+
|
1401
|
+
e_lbrace = '{' % {
|
1402
|
+
stack_state_push(&state->cond, 0);
|
1403
|
+
stack_state_push(&state->cmdarg, 0);
|
1404
|
+
|
1405
|
+
literal *current_literal = lit_stack_top(&state->literal_stack);
|
1406
|
+
if (current_literal != NULL) {
|
1407
|
+
literal_start_interp_brace(current_literal);
|
1408
|
+
}
|
1409
|
+
};
|
1410
|
+
|
1411
|
+
e_rbrace = '}' % {
|
1412
|
+
literal *current_literal = lit_stack_top(&state->literal_stack);
|
1413
|
+
if (current_literal != NULL) {
|
1414
|
+
if (literal_end_interp_brace_and_try_closing(current_literal)) {
|
1415
|
+
if (state->version == 18 || state->version == 19) {
|
1416
|
+
emit_token(state, tRCURLY, rb_str_new2("}"), p - 1, p);
|
1417
|
+
if (state->version < 24) {
|
1418
|
+
stack_state_lexpop(&state->cond);
|
1419
|
+
stack_state_lexpop(&state->cmdarg);
|
1420
|
+
} else {
|
1421
|
+
stack_state_pop(&state->cond);
|
1422
|
+
stack_state_pop(&state->cmdarg);
|
1423
|
+
}
|
1424
|
+
} else {
|
1425
|
+
emit_token(state, tSTRING_DEND, rb_str_new2("}"), p - 1, p);
|
1426
|
+
}
|
1427
|
+
|
1428
|
+
if (current_literal->herebody_s) {
|
1429
|
+
state->herebody_s = current_literal->herebody_s;
|
1430
|
+
}
|
1431
|
+
|
1432
|
+
fhold;
|
1433
|
+
fnext *next_state_for_literal(current_literal);
|
1434
|
+
fbreak;
|
1435
|
+
}
|
1436
|
+
}
|
1437
|
+
};
|
1438
|
+
|
1439
|
+
action extend_interp_code {
|
1440
|
+
literal *current_literal = lit_stack_top(&state->literal_stack);
|
1441
|
+
literal_flush_string(current_literal);
|
1442
|
+
literal_extend_content(current_literal);
|
1443
|
+
|
1444
|
+
emit_token(state, tSTRING_DBEG, rb_str_new2("#{"), ts, te);
|
1445
|
+
|
1446
|
+
if (current_literal->heredoc_e) {
|
1447
|
+
current_literal->herebody_s = state->herebody_s;
|
1448
|
+
state->herebody_s = 0;
|
1449
|
+
}
|
1450
|
+
|
1451
|
+
literal_start_interp_brace(current_literal);
|
1452
|
+
fnext expr_value;
|
1453
|
+
fbreak;
|
1454
|
+
}
|
1455
|
+
|
1456
|
+
interp_words := |*
|
1457
|
+
interp_code => extend_interp_code;
|
1458
|
+
interp_var => extend_interp_var;
|
1459
|
+
e_bs escape => extend_string_escaped;
|
1460
|
+
c_space+ => extend_string_space;
|
1461
|
+
c_eol => extend_string_eol;
|
1462
|
+
c_any => extend_string;
|
1463
|
+
*|;
|
1464
|
+
|
1465
|
+
interp_string := |*
|
1466
|
+
interp_code => extend_interp_code;
|
1467
|
+
interp_var => extend_interp_var;
|
1468
|
+
e_bs escape => extend_string_escaped;
|
1469
|
+
c_eol => extend_string_eol;
|
1470
|
+
c_any => extend_string;
|
1471
|
+
*|;
|
1472
|
+
|
1473
|
+
plain_words := |*
|
1474
|
+
e_bs c_any => extend_string_escaped;
|
1475
|
+
c_space+ => extend_string_space;
|
1476
|
+
c_eol => extend_string_eol;
|
1477
|
+
c_any => extend_string;
|
1478
|
+
*|;
|
1479
|
+
|
1480
|
+
plain_string := |*
|
1481
|
+
'\\' c_nl => extend_string_eol;
|
1482
|
+
e_bs c_any => extend_string_escaped;
|
1483
|
+
c_eol => extend_string_eol;
|
1484
|
+
c_any => extend_string;
|
1485
|
+
*|;
|
1486
|
+
|
1487
|
+
interp_backslash_delimited := |*
|
1488
|
+
interp_code => extend_interp_code;
|
1489
|
+
interp_var => extend_interp_var;
|
1490
|
+
c_eol => extend_string_eol;
|
1491
|
+
c_any => extend_string;
|
1492
|
+
*|;
|
1493
|
+
|
1494
|
+
plain_backslash_delimited := |*
|
1495
|
+
c_eol => extend_string_eol;
|
1496
|
+
c_any => extend_string;
|
1497
|
+
*|;
|
1498
|
+
|
1499
|
+
interp_backslash_delimited_words := |*
|
1500
|
+
interp_code => extend_interp_code;
|
1501
|
+
interp_var => extend_interp_var;
|
1502
|
+
c_space+ => extend_string_space;
|
1503
|
+
c_eol => extend_string_eol;
|
1504
|
+
c_any => extend_string;
|
1505
|
+
*|;
|
1506
|
+
|
1507
|
+
plain_backslash_delimited_words := |*
|
1508
|
+
c_space+ => extend_string_space;
|
1509
|
+
c_eol => extend_string_eol;
|
1510
|
+
c_any => extend_string;
|
1511
|
+
*|;
|
1512
|
+
|
1513
|
+
regexp_modifiers := |*
|
1514
|
+
[A-Za-z]+
|
1515
|
+
=> {
|
1516
|
+
VALUE unknown_options = find_unknown_options(tok(state, ts, te));
|
1517
|
+
|
1518
|
+
if (unknown_options != Qnil) {
|
1519
|
+
VALUE hash = rb_hash_new();
|
1520
|
+
rb_hash_aset(hash, ID2SYM(rb_intern("options")), unknown_options);
|
1521
|
+
diagnostic(state, severity_error, regexp_options, hash,
|
1522
|
+
range(state, ts, te), empty_array);
|
1523
|
+
}
|
1524
|
+
|
1525
|
+
emit(tREGEXP_OPT);
|
1526
|
+
fnext expr_end;
|
1527
|
+
fbreak;
|
1528
|
+
};
|
1529
|
+
|
1530
|
+
any
|
1531
|
+
=> {
|
1532
|
+
emit_token(state, tREGEXP_OPT, tok(state, ts, te - 1), ts, te - 1);
|
1533
|
+
fhold;
|
1534
|
+
fgoto expr_end;
|
1535
|
+
};
|
1536
|
+
*|;
|
1537
|
+
|
1538
|
+
w_space =
|
1539
|
+
c_space+
|
1540
|
+
| '\\' e_heredoc_nl
|
1541
|
+
;
|
1542
|
+
|
1543
|
+
w_comment =
|
1544
|
+
'#' %{ sharp_s = p - 1; }
|
1545
|
+
c_line* %{ emit_comment(state, sharp_s, p == pe ? p - 2 : p); }
|
1546
|
+
;
|
1547
|
+
|
1548
|
+
w_space_comment =
|
1549
|
+
w_space
|
1550
|
+
| w_comment
|
1551
|
+
;
|
1552
|
+
|
1553
|
+
w_newline =
|
1554
|
+
e_heredoc_nl;
|
1555
|
+
|
1556
|
+
w_any =
|
1557
|
+
w_space
|
1558
|
+
| w_comment
|
1559
|
+
| w_newline
|
1560
|
+
;
|
1561
|
+
|
1562
|
+
ambiguous_fid_suffix =
|
1563
|
+
[?!] %{ tm = p; } |
|
1564
|
+
[?!]'=' %{ tm = p - 2; }
|
1565
|
+
;
|
1566
|
+
|
1567
|
+
ambiguous_ident_suffix =
|
1568
|
+
ambiguous_fid_suffix |
|
1569
|
+
'=' %{ tm = p; } |
|
1570
|
+
'==' %{ tm = p - 2; } |
|
1571
|
+
'=~' %{ tm = p - 2; } |
|
1572
|
+
'=>' %{ tm = p - 2; } |
|
1573
|
+
'===' %{ tm = p - 3; }
|
1574
|
+
;
|
1575
|
+
|
1576
|
+
ambiguous_symbol_suffix =
|
1577
|
+
ambiguous_ident_suffix |
|
1578
|
+
'==>' %{ tm = p - 2; }
|
1579
|
+
;
|
1580
|
+
|
1581
|
+
ambiguous_const_suffix =
|
1582
|
+
'::' %{ tm = p - 2; }
|
1583
|
+
;
|
1584
|
+
|
1585
|
+
e_lbrack = '[' % {
|
1586
|
+
stack_state_push(&state->cond, 0);
|
1587
|
+
stack_state_push(&state->cmdarg, 0);
|
1588
|
+
};
|
1589
|
+
|
1590
|
+
e_lparen = '(' % {
|
1591
|
+
stack_state_push(&state->cond, 0);
|
1592
|
+
stack_state_push(&state->cmdarg, 0);
|
1593
|
+
state->paren_nest += 1;
|
1594
|
+
};
|
1595
|
+
|
1596
|
+
e_rparen = ')' % {
|
1597
|
+
state->paren_nest -= 1;
|
1598
|
+
};
|
1599
|
+
|
1600
|
+
action local_ident {
|
1601
|
+
VALUE str = tok(state, ts, te);
|
1602
|
+
emit(tIDENTIFIER);
|
1603
|
+
|
1604
|
+
if (STATIC_ENV_DECLARED(str)) {
|
1605
|
+
fnext expr_endfn; fbreak;
|
1606
|
+
} else {
|
1607
|
+
fnext *arg_or_cmdarg(command_state); fbreak;
|
1608
|
+
}
|
1609
|
+
}
|
1610
|
+
|
1611
|
+
expr_variable := |*
|
1612
|
+
global_var => {
|
1613
|
+
VALUE str = tok(state, ts, te);
|
1614
|
+
|
1615
|
+
if (is_nthref(str)) {
|
1616
|
+
VALUE integer = rb_str_to_inum(tok(state, ts + 1, te), 10, 0);
|
1617
|
+
emit_token(state, tNTH_REF, integer, ts, te);
|
1618
|
+
} else if (is_backref(str)) {
|
1619
|
+
emit(tBACK_REF);
|
1620
|
+
} else {
|
1621
|
+
emit(tGVAR);
|
1622
|
+
}
|
1623
|
+
|
1624
|
+
fnext *stack[--top]; fbreak;
|
1625
|
+
};
|
1626
|
+
|
1627
|
+
class_var_v => {
|
1628
|
+
VALUE str = tok(state, ts, te);
|
1629
|
+
|
1630
|
+
if (bad_cvar_name(str)) {
|
1631
|
+
VALUE hash = rb_hash_new();
|
1632
|
+
rb_hash_aset(hash, ID2SYM(rb_intern("name")), str);
|
1633
|
+
diagnostic(state, severity_error, cvar_name, hash, range(state, ts, te), empty_array);
|
1634
|
+
}
|
1635
|
+
|
1636
|
+
emit(tCVAR);
|
1637
|
+
fnext *stack[--top]; fbreak;
|
1638
|
+
};
|
1639
|
+
|
1640
|
+
instance_var_v => {
|
1641
|
+
VALUE str = tok(state, ts, te);
|
1642
|
+
|
1643
|
+
if (bad_ivar_name(str)) {
|
1644
|
+
VALUE hash = rb_hash_new();
|
1645
|
+
rb_hash_aset(hash, ID2SYM(rb_intern("name")), str);
|
1646
|
+
diagnostic(state, severity_error, ivar_name, hash, range(state, ts, te), empty_array);
|
1647
|
+
}
|
1648
|
+
|
1649
|
+
emit(tIVAR);
|
1650
|
+
fnext *stack[--top]; fbreak;
|
1651
|
+
};
|
1652
|
+
*|;
|
1653
|
+
|
1654
|
+
expr_fname := |*
|
1655
|
+
keyword
|
1656
|
+
=> { emit_table_KEYWORDS_BEGIN(state, tok(state, ts, te), ts, te);
|
1657
|
+
fnext expr_endfn; fbreak; };
|
1658
|
+
|
1659
|
+
constant => { emit(tCONSTANT); fnext expr_endfn; fbreak; };
|
1660
|
+
|
1661
|
+
bareword [?=!]? => { emit(tIDENTIFIER); fnext expr_endfn; fbreak; };
|
1662
|
+
|
1663
|
+
global_var => { p = ts - 1; fnext expr_end; fcall expr_variable; };
|
1664
|
+
|
1665
|
+
operator_fname |
|
1666
|
+
operator_arithmetic |
|
1667
|
+
operator_rest
|
1668
|
+
=> { emit_table_PUNCTUATION(state, tok(state, ts, te), ts, te);
|
1669
|
+
fnext expr_endfn; fbreak; };
|
1670
|
+
|
1671
|
+
'::' => { fhold; fhold; fgoto expr_end; };
|
1672
|
+
|
1673
|
+
':' => { fhold; fgoto expr_beg; };
|
1674
|
+
|
1675
|
+
'%s' c_any
|
1676
|
+
=> {
|
1677
|
+
if (state->version == 23) {
|
1678
|
+
VALUE type = rb_str_substr(state->source, ts, te - ts - 1);
|
1679
|
+
VALUE delimiter = rb_str_substr(state->source, te - 1, 1);
|
1680
|
+
if (delimiter == Qnil)
|
1681
|
+
delimiter = blank_string;
|
1682
|
+
|
1683
|
+
fgoto *push_literal(state, type, delimiter, ts, 0, 0, 0, 0);
|
1684
|
+
} else {
|
1685
|
+
p = ts - 1;
|
1686
|
+
fgoto expr_end;
|
1687
|
+
}
|
1688
|
+
};
|
1689
|
+
|
1690
|
+
w_any;
|
1691
|
+
|
1692
|
+
c_any => { fhold; fgoto expr_end; };
|
1693
|
+
|
1694
|
+
c_eof => do_eof;
|
1695
|
+
*|;
|
1696
|
+
|
1697
|
+
expr_endfn := |*
|
1698
|
+
label ( any - ':' ) => {
|
1699
|
+
emit_token(state, tLABEL, tok(state, ts, te - 2), ts, te - 1);
|
1700
|
+
fhold; fnext expr_labelarg; fbreak;
|
1701
|
+
};
|
1702
|
+
|
1703
|
+
w_space_comment;
|
1704
|
+
|
1705
|
+
c_any => { fhold; fgoto expr_end; };
|
1706
|
+
|
1707
|
+
c_eof => do_eof;
|
1708
|
+
*|;
|
1709
|
+
|
1710
|
+
expr_dot := |*
|
1711
|
+
constant => { emit(tCONSTANT); fnext *arg_or_cmdarg(command_state); fbreak; };
|
1712
|
+
|
1713
|
+
call_or_var => { emit(tIDENTIFIER); fnext *arg_or_cmdarg(command_state); fbreak; };
|
1714
|
+
|
1715
|
+
bareword ambiguous_fid_suffix
|
1716
|
+
=> { emit_token(state, tFID, tok(state, ts, tm), ts, tm);
|
1717
|
+
fnext *arg_or_cmdarg(command_state); p = tm - 1; fbreak; };
|
1718
|
+
|
1719
|
+
operator_fname |
|
1720
|
+
operator_arithmetic |
|
1721
|
+
operator_rest
|
1722
|
+
=> { emit_table_PUNCTUATION(state, tok(state, ts, te), ts, te);
|
1723
|
+
fnext expr_arg; fbreak; };
|
1724
|
+
|
1725
|
+
w_any;
|
1726
|
+
|
1727
|
+
c_any
|
1728
|
+
=> { fhold; fgoto expr_end; };
|
1729
|
+
|
1730
|
+
c_eof => do_eof;
|
1731
|
+
*|;
|
1732
|
+
|
1733
|
+
expr_arg := |*
|
1734
|
+
w_space+ e_lparen => {
|
1735
|
+
if (state->version == 18) {
|
1736
|
+
emit_token(state, tLPAREN2, rb_str_new2("("), te - 1, te);
|
1737
|
+
fnext expr_value; fbreak;
|
1738
|
+
} else {
|
1739
|
+
emit_token(state, tLPAREN_ARG, rb_str_new2("("), te - 1, te);
|
1740
|
+
fnext expr_beg; fbreak;
|
1741
|
+
}
|
1742
|
+
};
|
1743
|
+
|
1744
|
+
e_lparen => { emit(tLPAREN2); fnext expr_beg; fbreak; };
|
1745
|
+
|
1746
|
+
w_space+ e_lbrack => {
|
1747
|
+
emit_token(state, tLBRACK, rb_str_new2("["), te - 1, te);
|
1748
|
+
fnext expr_beg; fbreak;
|
1749
|
+
};
|
1750
|
+
|
1751
|
+
w_space* e_lbrace => {
|
1752
|
+
VALUE val = array_last(state->lambda_stack);
|
1753
|
+
if (val != Qnil && NUM2INT(val) == state->paren_nest) {
|
1754
|
+
rb_ary_pop(state->lambda_stack);
|
1755
|
+
emit_token(state, tLAMBEG, rb_str_new2("{"), te - 1, te);
|
1756
|
+
} else {
|
1757
|
+
emit_token(state, tLCURLY, rb_str_new2("{"), te - 1, te);
|
1758
|
+
}
|
1759
|
+
fnext expr_value; fbreak;
|
1760
|
+
};
|
1761
|
+
|
1762
|
+
'?' c_space_nl => { p = ts - 1; fgoto expr_end; };
|
1763
|
+
|
1764
|
+
w_space* '?' => { fhold; fgoto expr_beg; };
|
1765
|
+
|
1766
|
+
w_space+ %{ tm = p; }
|
1767
|
+
( [%/] ( c_any - c_space_nl - '=' ) | '<<' ) => {
|
1768
|
+
if (NUM2INT(rb_ary_entry(state->source_pts, tm)) == '/') {
|
1769
|
+
diagnostic(state, warning, ambiguous_literal, Qnil,
|
1770
|
+
range(state, tm, tm + 1), empty_array);
|
1771
|
+
}
|
1772
|
+
|
1773
|
+
p = tm - 1;
|
1774
|
+
fgoto expr_beg;
|
1775
|
+
};
|
1776
|
+
|
1777
|
+
w_space+ %{ tm = p; } ( '+' | '-' | '*' | '&' | '**' ) => {
|
1778
|
+
VALUE hash = rb_hash_new();
|
1779
|
+
VALUE str = tok(state, tm, te);
|
1780
|
+
rb_hash_aset(hash, prefix, str);
|
1781
|
+
diagnostic(state, warning, ambiguous_prefix, hash, range(state, tm, te),
|
1782
|
+
empty_array);
|
1783
|
+
|
1784
|
+
p = tm - 1;
|
1785
|
+
fgoto expr_beg;
|
1786
|
+
};
|
1787
|
+
|
1788
|
+
w_space+ '::' => { fhold; fhold; fgoto expr_beg; };
|
1789
|
+
|
1790
|
+
w_space* ':' => { fhold; fgoto expr_beg; };
|
1791
|
+
|
1792
|
+
w_space+ label => { p = ts - 1; fgoto expr_beg; };
|
1793
|
+
|
1794
|
+
w_space+ %{ tm = p; } '?' c_space_nl => { p = tm - 1; fgoto expr_end; };
|
1795
|
+
|
1796
|
+
w_space* operator_arithmetic
|
1797
|
+
( '=' | c_space_nl )? |
|
1798
|
+
w_space* keyword_modifier |
|
1799
|
+
w_space* '&.' |
|
1800
|
+
w_space* punctuation_end
|
1801
|
+
=> {
|
1802
|
+
p = ts - 1;
|
1803
|
+
fgoto expr_end;
|
1804
|
+
};
|
1805
|
+
|
1806
|
+
w_space;
|
1807
|
+
|
1808
|
+
w_comment => { fgoto expr_end; };
|
1809
|
+
|
1810
|
+
w_newline => { fhold; fgoto expr_end; };
|
1811
|
+
|
1812
|
+
c_any => { fhold; fgoto expr_beg; };
|
1813
|
+
|
1814
|
+
c_eof => do_eof;
|
1815
|
+
*|;
|
1816
|
+
|
1817
|
+
expr_cmdarg := |*
|
1818
|
+
w_space+ e_lparen
|
1819
|
+
=> {
|
1820
|
+
emit_token(state, tLPAREN_ARG, rb_str_new2("("), te - 1, te);
|
1821
|
+
if (state->version == 18) {
|
1822
|
+
fnext expr_value; fbreak;
|
1823
|
+
} else {
|
1824
|
+
fnext expr_beg; fbreak;
|
1825
|
+
}
|
1826
|
+
};
|
1827
|
+
|
1828
|
+
w_space* 'do'
|
1829
|
+
=> {
|
1830
|
+
if (stack_state_active(&state->cond)) {
|
1831
|
+
emit_token(state, kDO_COND, rb_str_new2("do"), te - 2, te);
|
1832
|
+
} else {
|
1833
|
+
emit_token(state, kDO, rb_str_new2("do"), te - 2, te);
|
1834
|
+
}
|
1835
|
+
fnext expr_value; fbreak;
|
1836
|
+
};
|
1837
|
+
|
1838
|
+
c_any |
|
1839
|
+
w_space* bareword |
|
1840
|
+
w_space* label
|
1841
|
+
=> { p = ts - 1;
|
1842
|
+
fgoto expr_arg; };
|
1843
|
+
|
1844
|
+
c_eof => do_eof;
|
1845
|
+
*|;
|
1846
|
+
|
1847
|
+
expr_endarg := |*
|
1848
|
+
e_lbrace => {
|
1849
|
+
VALUE val = array_last(state->lambda_stack);
|
1850
|
+
if (val != Qnil && NUM2INT(val) == state->paren_nest) {
|
1851
|
+
rb_ary_pop(state->lambda_stack);
|
1852
|
+
emit_token(state, tLAMBEG, rb_str_new2("{"), te - 1, te);
|
1853
|
+
} else {
|
1854
|
+
emit_token(state, tLBRACE_ARG, rb_str_new2("{"), te - 1, te);
|
1855
|
+
}
|
1856
|
+
fnext expr_value; fbreak;
|
1857
|
+
};
|
1858
|
+
|
1859
|
+
'do' => { emit_do(state, 1, ts, te); fnext expr_value; fbreak; };
|
1860
|
+
|
1861
|
+
w_space_comment;
|
1862
|
+
|
1863
|
+
c_any
|
1864
|
+
=> { fhold; fgoto expr_end; };
|
1865
|
+
|
1866
|
+
c_eof => do_eof;
|
1867
|
+
*|;
|
1868
|
+
|
1869
|
+
expr_mid := |*
|
1870
|
+
keyword_modifier
|
1871
|
+
=> { emit_table_KEYWORDS(state, tok(state, ts, te), ts, te);
|
1872
|
+
fnext expr_beg; fbreak; };
|
1873
|
+
|
1874
|
+
bareword => { p = ts - 1; fgoto expr_beg; };
|
1875
|
+
|
1876
|
+
w_space_comment;
|
1877
|
+
|
1878
|
+
w_newline => { fhold; fgoto expr_end; };
|
1879
|
+
|
1880
|
+
c_any => { fhold; fgoto expr_beg; };
|
1881
|
+
|
1882
|
+
c_eof => do_eof;
|
1883
|
+
*|;
|
1884
|
+
|
1885
|
+
expr_beg := |*
|
1886
|
+
[+\-] w_any* [0-9] => {
|
1887
|
+
emit_token(state, tUNARY_NUM, tok(state, ts, ts + 1), ts, ts + 1);
|
1888
|
+
fhold; fnext expr_end; fbreak;
|
1889
|
+
};
|
1890
|
+
|
1891
|
+
'*' => { emit(tSTAR); fbreak; };
|
1892
|
+
|
1893
|
+
'/' c_any => {
|
1894
|
+
VALUE delimiter = rb_str_substr(state->source, ts, 1);
|
1895
|
+
fhold; fgoto *push_literal(state, delimiter, delimiter, ts, 0, 0, 0, 0);
|
1896
|
+
};
|
1897
|
+
|
1898
|
+
'%' ( any - [A-Za-z] ) => {
|
1899
|
+
VALUE type = rb_str_substr(state->source, ts, 1);
|
1900
|
+
VALUE delimiter = rb_str_substr(state->source, te - 1, 1);
|
1901
|
+
if (delimiter == Qnil)
|
1902
|
+
delimiter = blank_string;
|
1903
|
+
|
1904
|
+
fgoto *push_literal(state, type, delimiter, ts, 0, 0, 0, 0);
|
1905
|
+
};
|
1906
|
+
|
1907
|
+
'%' [A-Za-z]+ c_any => {
|
1908
|
+
VALUE type = rb_str_substr(state->source, ts, te - ts - 1);
|
1909
|
+
VALUE delimiter = rb_str_substr(state->source, te - 1, 1);
|
1910
|
+
if (delimiter == Qnil)
|
1911
|
+
delimiter = blank_string;
|
1912
|
+
|
1913
|
+
fgoto *push_literal(state, type, delimiter, ts, 0, 0, 0, 0);
|
1914
|
+
};
|
1915
|
+
|
1916
|
+
'%' c_eof => {
|
1917
|
+
diagnostic(state, fatal, string_eof, Qnil,
|
1918
|
+
range(state, ts, ts + 1), empty_array);
|
1919
|
+
};
|
1920
|
+
|
1921
|
+
'<<' [~\-]?
|
1922
|
+
( '"' ( any - '"' )* '"'
|
1923
|
+
| "'" ( any - "'" )* "'"
|
1924
|
+
| "`" ( any - "`" )* "`"
|
1925
|
+
| bareword ) % { heredoc_e = p; }
|
1926
|
+
c_line* c_nl % { if (!state->herebody_s) state->herebody_s = p; } => {
|
1927
|
+
|
1928
|
+
VALUE heredoc = tok(state, ts, heredoc_e);
|
1929
|
+
VALUE type;
|
1930
|
+
char *cp = RSTRING_PTR(heredoc);
|
1931
|
+
int indent = 0, dedent_body = 0;
|
1932
|
+
long rng_s = ts, rng_e = heredoc_e;
|
1933
|
+
|
1934
|
+
if (cp[2] == '-') {
|
1935
|
+
indent = 1;
|
1936
|
+
cp += 3;
|
1937
|
+
rng_s += 3;
|
1938
|
+
} else if (cp[2] == '~') {
|
1939
|
+
dedent_body = indent = 1;
|
1940
|
+
cp += 3;
|
1941
|
+
rng_s += 3;
|
1942
|
+
} else {
|
1943
|
+
cp += 2;
|
1944
|
+
rng_s += 2;
|
1945
|
+
}
|
1946
|
+
|
1947
|
+
if (*cp == '"' || *cp == '\'' || *cp == '`') {
|
1948
|
+
char type_str[3];
|
1949
|
+
type_str[0] = '<';
|
1950
|
+
type_str[1] = '<';
|
1951
|
+
type_str[2] = *cp;
|
1952
|
+
|
1953
|
+
cp += 1;
|
1954
|
+
rng_s += 1;
|
1955
|
+
rng_e -= 1;
|
1956
|
+
|
1957
|
+
type = rb_str_new(type_str, 3);
|
1958
|
+
} else {
|
1959
|
+
type = rb_str_new2("<<\"");
|
1960
|
+
}
|
1961
|
+
|
1962
|
+
VALUE delimiter = tok(state, rng_s, rng_e);
|
1963
|
+
|
1964
|
+
if (state->version >= 24) {
|
1965
|
+
if (NUM2INT(rb_funcall(delimiter, rb_intern("count"), 1, newline)) > 0) {
|
1966
|
+
if (str_end_with_p(delimiter, "\n")) {
|
1967
|
+
diagnostic(state, warning, heredoc_id_ends_with_nl, Qnil,
|
1968
|
+
range(state, ts, ts + 1), empty_array);
|
1969
|
+
|
1970
|
+
delimiter = rb_funcall(delimiter, rb_intern("rstrip"), 0);
|
1971
|
+
} else {
|
1972
|
+
diagnostic(state, fatal, heredoc_id_has_newline, Qnil,
|
1973
|
+
range(state, ts, ts + 1), empty_array);
|
1974
|
+
}
|
1975
|
+
}
|
1976
|
+
}
|
1977
|
+
|
1978
|
+
if (dedent_body && state->version >= 18 && state->version <= 22) {
|
1979
|
+
emit_token(state, tLSHFT, rb_str_new2("<<"), ts, ts + 2);
|
1980
|
+
p = ts + 1;
|
1981
|
+
fnext expr_beg; fbreak;
|
1982
|
+
} else {
|
1983
|
+
fnext *push_literal(state, type, delimiter, ts, heredoc_e, indent,
|
1984
|
+
dedent_body, 0);
|
1985
|
+
p = state->herebody_s - 1;
|
1986
|
+
}
|
1987
|
+
};
|
1988
|
+
|
1989
|
+
':' ('&&' | '||') => {
|
1990
|
+
fhold; fhold;
|
1991
|
+
emit_token(state, tSYMBEG, tok(state, ts, ts + 1), ts, ts + 1);
|
1992
|
+
fgoto expr_fname;
|
1993
|
+
};
|
1994
|
+
|
1995
|
+
':' ['"] => { /* ' */
|
1996
|
+
VALUE type = tok(state, ts, te);
|
1997
|
+
VALUE delimiter = tok(state, te - 1, te);
|
1998
|
+
fgoto *push_literal(state, type, delimiter, ts, 0, 0, 0, 0);
|
1999
|
+
};
|
2000
|
+
|
2001
|
+
':' [!~] '@'
|
2002
|
+
=> {
|
2003
|
+
emit_token(state, tSYMBOL, tok(state, ts + 1, ts + 2), ts, te);
|
2004
|
+
fnext expr_end; fbreak;
|
2005
|
+
};
|
2006
|
+
|
2007
|
+
':' bareword ambiguous_symbol_suffix => {
|
2008
|
+
emit_token(state, tSYMBOL, tok(state, ts + 1, tm), ts, tm);
|
2009
|
+
p = tm - 1;
|
2010
|
+
fnext expr_end; fbreak;
|
2011
|
+
};
|
2012
|
+
|
2013
|
+
':' ( bareword | global_var | class_var | instance_var |
|
2014
|
+
operator_fname | operator_arithmetic | operator_rest ) => {
|
2015
|
+
emit_token(state, tSYMBOL, tok(state, ts + 1, te), ts, te);
|
2016
|
+
fnext expr_end; fbreak;
|
2017
|
+
};
|
2018
|
+
|
2019
|
+
'?' ( e_bs ( escape - ( '\u{' (xdigit+ [ \t]+)+ xdigit+ '}' ))
|
2020
|
+
| (c_any - c_space_nl - e_bs) % { state->escape = Qnil; }
|
2021
|
+
) => {
|
2022
|
+
VALUE value = state->escape;
|
2023
|
+
if (value == Qnil)
|
2024
|
+
value = tok(state, ts + 1, te);
|
2025
|
+
|
2026
|
+
if (state->version == 18)
|
2027
|
+
emit_token(state, tINTEGER, rb_funcall(value, rb_intern("getbyte"), 1, INT2NUM(0)), ts, te);
|
2028
|
+
else
|
2029
|
+
emit_token(state, tCHARACTER, value, ts, te);
|
2030
|
+
|
2031
|
+
fnext expr_end; fbreak;
|
2032
|
+
};
|
2033
|
+
|
2034
|
+
'?' c_space_nl => {
|
2035
|
+
VALUE escape = escape_char(rb_str_subseq(state->source, ts + 1, 1));
|
2036
|
+
VALUE hash = rb_hash_new();
|
2037
|
+
rb_hash_aset(hash, ID2SYM(rb_intern("escape")), escape);
|
2038
|
+
diagnostic(state, warning, invalid_escape_use, hash,
|
2039
|
+
range(state, ts, te), empty_array);
|
2040
|
+
|
2041
|
+
p = ts - 1;
|
2042
|
+
fgoto expr_end;
|
2043
|
+
};
|
2044
|
+
|
2045
|
+
'?' c_eof => {
|
2046
|
+
diagnostic(state, fatal, incomplete_escape, Qnil,
|
2047
|
+
range(state, ts, ts + 1), empty_array);
|
2048
|
+
};
|
2049
|
+
|
2050
|
+
'?' [A-Za-z_] bareword => { p = ts - 1; fgoto expr_end; };
|
2051
|
+
|
2052
|
+
e_lbrace => {
|
2053
|
+
VALUE val = array_last(state->lambda_stack);
|
2054
|
+
if (val != Qnil && NUM2INT(val) == state->paren_nest) {
|
2055
|
+
rb_ary_pop(state->lambda_stack);
|
2056
|
+
emit(tLAMBEG);
|
2057
|
+
} else {
|
2058
|
+
emit(tLBRACE);
|
2059
|
+
}
|
2060
|
+
fbreak;
|
2061
|
+
};
|
2062
|
+
|
2063
|
+
e_lbrack => {
|
2064
|
+
emit_token(state, tLBRACK, tok(state, ts, te), ts, te);
|
2065
|
+
fbreak;
|
2066
|
+
};
|
2067
|
+
|
2068
|
+
e_lparen => {
|
2069
|
+
emit_token(state, tLPAREN, tok(state, ts, te), ts, te);
|
2070
|
+
fbreak;
|
2071
|
+
};
|
2072
|
+
|
2073
|
+
punctuation_begin
|
2074
|
+
=> { emit_table_PUNCTUATION_BEGIN(state, tok(state, ts, te), ts, te);
|
2075
|
+
fbreak; };
|
2076
|
+
|
2077
|
+
'rescue' %{ tm = p; } '=>'? => {
|
2078
|
+
emit_token(state, kRESCUE, tok(state, ts, tm), ts, tm);
|
2079
|
+
p = tm - 1;
|
2080
|
+
fnext expr_mid; fbreak;
|
2081
|
+
};
|
2082
|
+
|
2083
|
+
keyword_modifier
|
2084
|
+
=> { emit_table_KEYWORDS_BEGIN(state, tok(state, ts, te), ts, te);
|
2085
|
+
fnext expr_value; fbreak; };
|
2086
|
+
|
2087
|
+
label ( any - ':' )
|
2088
|
+
=> {
|
2089
|
+
fhold;
|
2090
|
+
|
2091
|
+
if (state->version == 18) {
|
2092
|
+
VALUE ident = tok(state, ts, te - 2);
|
2093
|
+
|
2094
|
+
emit_token(state, is_capitalized(ident) ? tCONSTANT : tIDENTIFIER,
|
2095
|
+
ident, ts, te - 2);
|
2096
|
+
fhold;
|
2097
|
+
|
2098
|
+
if (STATIC_ENV_DECLARED(ident)) {
|
2099
|
+
fnext expr_end;
|
2100
|
+
} else {
|
2101
|
+
fnext *arg_or_cmdarg(command_state);
|
2102
|
+
}
|
2103
|
+
} else {
|
2104
|
+
emit_token(state, tLABEL, tok(state, ts, te - 2), ts, te - 1);
|
2105
|
+
fnext expr_labelarg;
|
2106
|
+
}
|
2107
|
+
|
2108
|
+
fbreak;
|
2109
|
+
};
|
2110
|
+
|
2111
|
+
bareword ambiguous_ident_suffix | keyword => { p = ts - 1; fgoto expr_end; };
|
2112
|
+
|
2113
|
+
call_or_var => local_ident;
|
2114
|
+
|
2115
|
+
(call_or_var - keyword)
|
2116
|
+
% { ident_tok = tok(state, ts, te); ident_ts = ts; ident_te = te; }
|
2117
|
+
w_space+ '('
|
2118
|
+
=> {
|
2119
|
+
emit_token(state, tIDENTIFIER, ident_tok, ident_ts, ident_te);
|
2120
|
+
p = ident_te - 1;
|
2121
|
+
|
2122
|
+
if (STATIC_ENV_DECLARED(ident_tok) && state->version < 25) {
|
2123
|
+
fnext expr_endfn;
|
2124
|
+
} else {
|
2125
|
+
fnext expr_cmdarg;
|
2126
|
+
}
|
2127
|
+
|
2128
|
+
fbreak;
|
2129
|
+
};
|
2130
|
+
|
2131
|
+
w_any;
|
2132
|
+
|
2133
|
+
e_heredoc_nl '=begin' ( c_space | c_nl_zlen ) => {
|
2134
|
+
p = ts - 1;
|
2135
|
+
state->cs_before_block_comment = state->cs;
|
2136
|
+
fgoto line_begin;
|
2137
|
+
};
|
2138
|
+
|
2139
|
+
operator_arithmetic '=' |
|
2140
|
+
operator_rest |
|
2141
|
+
punctuation_end |
|
2142
|
+
c_any
|
2143
|
+
=> { p = ts - 1; fgoto expr_end; };
|
2144
|
+
|
2145
|
+
c_eof => do_eof;
|
2146
|
+
*|;
|
2147
|
+
|
2148
|
+
expr_labelarg := |*
|
2149
|
+
w_space_comment;
|
2150
|
+
|
2151
|
+
w_newline => {
|
2152
|
+
if (state->in_kwarg) {
|
2153
|
+
fhold; fgoto expr_end;
|
2154
|
+
} else {
|
2155
|
+
fgoto line_begin;
|
2156
|
+
}
|
2157
|
+
};
|
2158
|
+
|
2159
|
+
c_any => { fhold; fgoto expr_beg; };
|
2160
|
+
|
2161
|
+
c_eof => do_eof;
|
2162
|
+
*|;
|
2163
|
+
|
2164
|
+
expr_value := |*
|
2165
|
+
label (any - ':') => { p = ts - 1; fgoto expr_end; };
|
2166
|
+
|
2167
|
+
['"] => { /* ' */
|
2168
|
+
VALUE type = tok(state, ts, te);
|
2169
|
+
fgoto *push_literal(state, type, type, ts, 0, 0, 0, 0);
|
2170
|
+
};
|
2171
|
+
|
2172
|
+
w_space_comment;
|
2173
|
+
|
2174
|
+
w_newline => { fgoto line_begin; };
|
2175
|
+
|
2176
|
+
c_any => { fhold; fgoto expr_beg; };
|
2177
|
+
|
2178
|
+
c_eof => do_eof;
|
2179
|
+
*|;
|
2180
|
+
|
2181
|
+
expr_end := |*
|
2182
|
+
'->' => {
|
2183
|
+
emit_token(state, tLAMBDA, tok(state, ts, ts + 2), ts, ts + 2);
|
2184
|
+
rb_ary_push(state->lambda_stack, INT2NUM(state->paren_nest));
|
2185
|
+
fnext expr_endfn; fbreak;
|
2186
|
+
};
|
2187
|
+
|
2188
|
+
e_lbrace => {
|
2189
|
+
VALUE val = array_last(state->lambda_stack);
|
2190
|
+
if (val != Qnil && NUM2INT(val) == state->paren_nest) {
|
2191
|
+
rb_ary_pop(state->lambda_stack);
|
2192
|
+
emit(tLAMBEG);
|
2193
|
+
} else {
|
2194
|
+
emit(tLCURLY);
|
2195
|
+
}
|
2196
|
+
fnext expr_value; fbreak;
|
2197
|
+
};
|
2198
|
+
|
2199
|
+
'do' => {
|
2200
|
+
VALUE val = array_last(state->lambda_stack);
|
2201
|
+
if (val != Qnil && NUM2INT(val) == state->paren_nest) {
|
2202
|
+
rb_ary_pop(state->lambda_stack);
|
2203
|
+
emit(kDO_LAMBDA);
|
2204
|
+
} else {
|
2205
|
+
emit_do(state, 0, ts, te);
|
2206
|
+
}
|
2207
|
+
fnext expr_value; fbreak;
|
2208
|
+
};
|
2209
|
+
|
2210
|
+
keyword_with_fname
|
2211
|
+
=> { emit_table_KEYWORDS(state, tok(state, ts, te), ts, te);
|
2212
|
+
fnext expr_fname; fbreak; };
|
2213
|
+
|
2214
|
+
'class' w_any* '<<'
|
2215
|
+
=> { emit_token(state, kCLASS, rb_str_new2("class"), ts, ts + 5);
|
2216
|
+
emit_token(state, tLSHFT, rb_str_new2("<<"), te - 2, te);
|
2217
|
+
fnext expr_value; fbreak; };
|
2218
|
+
|
2219
|
+
keyword_modifier
|
2220
|
+
=> { emit_table_KEYWORDS(state, tok(state, ts, te), ts, te);
|
2221
|
+
fnext expr_beg; fbreak; };
|
2222
|
+
|
2223
|
+
keyword_with_value
|
2224
|
+
=> { emit_table_KEYWORDS(state, tok(state, ts, te), ts, te);
|
2225
|
+
fnext expr_value; fbreak; };
|
2226
|
+
|
2227
|
+
keyword_with_mid
|
2228
|
+
=> { emit_table_KEYWORDS(state, tok(state, ts, te), ts, te);
|
2229
|
+
fnext expr_mid; fbreak; };
|
2230
|
+
|
2231
|
+
keyword_with_arg
|
2232
|
+
=> {
|
2233
|
+
VALUE keyword = tok(state, ts, te);
|
2234
|
+
emit_table_KEYWORDS(state, keyword, ts, te);
|
2235
|
+
|
2236
|
+
if (state->version == 18 && strcmp(RSTRING_PTR(keyword), "not") == 0) {
|
2237
|
+
fnext expr_beg; fbreak;
|
2238
|
+
} else {
|
2239
|
+
fnext expr_arg; fbreak;
|
2240
|
+
}
|
2241
|
+
};
|
2242
|
+
|
2243
|
+
'__ENCODING__' => {
|
2244
|
+
if (state->version == 18) {
|
2245
|
+
VALUE str = tok(state, ts, te);
|
2246
|
+
emit(tIDENTIFIER);
|
2247
|
+
|
2248
|
+
if (STATIC_ENV_DECLARED(str)) {
|
2249
|
+
fnext expr_end;
|
2250
|
+
} else {
|
2251
|
+
fnext *arg_or_cmdarg(command_state);
|
2252
|
+
}
|
2253
|
+
} else {
|
2254
|
+
emit(k__ENCODING__);
|
2255
|
+
}
|
2256
|
+
fbreak;
|
2257
|
+
};
|
2258
|
+
|
2259
|
+
keyword_with_end
|
2260
|
+
=> { emit_table_KEYWORDS(state, tok(state, ts, te), ts, te);
|
2261
|
+
fbreak; };
|
2262
|
+
|
2263
|
+
( '0' [Xx] %{ num_base = 16; num_digits_s = p; } int_hex
|
2264
|
+
| '0' [Dd] %{ num_base = 10; num_digits_s = p; } int_dec
|
2265
|
+
| '0' [Oo] %{ num_base = 8; num_digits_s = p; } int_dec
|
2266
|
+
| '0' [Bb] %{ num_base = 2; num_digits_s = p; } int_bin
|
2267
|
+
| [1-9] digit* '_'? %{ num_base = 10; num_digits_s = ts; } int_dec
|
2268
|
+
| '0' digit* '_'? %{ num_base = 8; num_digits_s = ts; } int_dec
|
2269
|
+
) %{ num_suffix_s = p; } int_suffix
|
2270
|
+
=> {
|
2271
|
+
int invalid_idx;
|
2272
|
+
VALUE digits = tok(state, num_digits_s, num_suffix_s);
|
2273
|
+
|
2274
|
+
if (NUM2INT(rb_ary_entry(state->source_pts, num_suffix_s - 1)) == '_') {
|
2275
|
+
VALUE hash = rb_hash_new();
|
2276
|
+
rb_hash_aset(hash, character, rb_str_new2("_"));
|
2277
|
+
diagnostic(state, severity_error, trailing_in_number, hash,
|
2278
|
+
range(state, te - 1, te), empty_array);
|
2279
|
+
} else if (RSTRING_LEN(digits) == 0 && num_base == 8 && state->version == 18) {
|
2280
|
+
digits = rb_str_new2("0");
|
2281
|
+
} else if (RSTRING_LEN(digits) == 0) {
|
2282
|
+
diagnostic(state, severity_error, empty_numeric, Qnil,
|
2283
|
+
range(state, ts, te), empty_array);
|
2284
|
+
} else if (num_base == 8 && (invalid_idx = find_8_or_9(digits)) != -1) {
|
2285
|
+
long invalid_s = num_digits_s + invalid_idx;
|
2286
|
+
diagnostic(state, severity_error, invalid_octal, Qnil,
|
2287
|
+
range(state, invalid_s, invalid_s + 1), empty_array);
|
2288
|
+
}
|
2289
|
+
|
2290
|
+
VALUE integer = rb_str_to_inum(digits, num_base, 0);
|
2291
|
+
if (state->version >= 18 && state->version <= 20) {
|
2292
|
+
emit_token(state, tINTEGER, integer, numeric_s, num_suffix_s);
|
2293
|
+
p = num_suffix_s - 1;
|
2294
|
+
} else {
|
2295
|
+
num_xfrm(state, integer, numeric_s, te);
|
2296
|
+
}
|
2297
|
+
|
2298
|
+
fbreak;
|
2299
|
+
};
|
2300
|
+
|
2301
|
+
flo_frac flo_pow?
|
2302
|
+
=> {
|
2303
|
+
diagnostic(state, severity_error, no_dot_digit_literal, Qnil,
|
2304
|
+
range(state, ts, te), empty_array);
|
2305
|
+
};
|
2306
|
+
|
2307
|
+
flo_int [eE]
|
2308
|
+
=> {
|
2309
|
+
if (state->version >= 18 && state->version <= 20) {
|
2310
|
+
VALUE hash = rb_hash_new();
|
2311
|
+
rb_hash_aset(hash, character, tok(state, te - 1, te));
|
2312
|
+
diagnostic(state, severity_error, trailing_in_number, hash,
|
2313
|
+
range(state, te - 1, te), empty_array);
|
2314
|
+
} else {
|
2315
|
+
VALUE integer = rb_str_to_inum(tok(state, ts, te - 1), 10, 0);
|
2316
|
+
emit_token(state, tINTEGER, integer, ts, te - 1);
|
2317
|
+
fhold; fbreak;
|
2318
|
+
}
|
2319
|
+
};
|
2320
|
+
|
2321
|
+
flo_int flo_frac [eE]
|
2322
|
+
=> {
|
2323
|
+
if (state->version >= 18 && state->version <= 20) {
|
2324
|
+
VALUE hash = rb_hash_new();
|
2325
|
+
rb_hash_aset(hash, character, tok(state, te - 1, te));
|
2326
|
+
diagnostic(state, severity_error, trailing_in_number, hash,
|
2327
|
+
range(state, te - 1, te), empty_array);
|
2328
|
+
} else {
|
2329
|
+
VALUE fval = rb_funcall(tok(state, ts, te - 1), rb_intern("to_f"), 0);
|
2330
|
+
emit_token(state, tFLOAT, fval, ts, te - 1);
|
2331
|
+
fhold; fbreak;
|
2332
|
+
}
|
2333
|
+
};
|
2334
|
+
|
2335
|
+
flo_int
|
2336
|
+
( flo_frac? flo_pow %{ num_suffix_s = p; } flo_pow_suffix
|
2337
|
+
| flo_frac %{ num_suffix_s = p; } flo_suffix
|
2338
|
+
)
|
2339
|
+
=> {
|
2340
|
+
VALUE digits = tok(state, ts, num_suffix_s);
|
2341
|
+
|
2342
|
+
if (state->version >= 18 && state->version <= 20) {
|
2343
|
+
VALUE fval = rb_Float(digits);
|
2344
|
+
emit_token(state, tFLOAT, fval, ts, num_suffix_s);
|
2345
|
+
p = num_suffix_s - 1;
|
2346
|
+
} else {
|
2347
|
+
num_xfrm(state, digits, ts, te);
|
2348
|
+
}
|
2349
|
+
fbreak;
|
2350
|
+
};
|
2351
|
+
|
2352
|
+
'`' | ['"] => { /* ' */
|
2353
|
+
VALUE type = tok(state, ts, te);
|
2354
|
+
VALUE delimiter = tok(state, te - 1, te);
|
2355
|
+
fgoto *push_literal(state, type, delimiter, ts, 0, 0, 0, 1);
|
2356
|
+
};
|
2357
|
+
|
2358
|
+
constant => { emit(tCONSTANT); fnext *arg_or_cmdarg(command_state); fbreak; };
|
2359
|
+
|
2360
|
+
constant ambiguous_const_suffix => {
|
2361
|
+
emit_token(state, tCONSTANT, tok(state, ts, tm), ts, tm);
|
2362
|
+
p = tm - 1;
|
2363
|
+
fbreak;
|
2364
|
+
};
|
2365
|
+
|
2366
|
+
global_var | class_var_v | instance_var_v
|
2367
|
+
=> { p = ts - 1; fcall expr_variable; };
|
2368
|
+
|
2369
|
+
'.' | '&.' | '::'
|
2370
|
+
=> { emit_table_PUNCTUATION(state, tok(state, ts, te), ts, te);
|
2371
|
+
fnext expr_dot; fbreak; };
|
2372
|
+
|
2373
|
+
call_or_var => local_ident;
|
2374
|
+
|
2375
|
+
bareword ambiguous_fid_suffix => {
|
2376
|
+
if (tm == te) {
|
2377
|
+
emit(tFID);
|
2378
|
+
} else {
|
2379
|
+
emit_token(state, tIDENTIFIER, tok(state, ts, tm), ts, tm);
|
2380
|
+
p = tm - 1;
|
2381
|
+
}
|
2382
|
+
fnext expr_arg; fbreak;
|
2383
|
+
};
|
2384
|
+
|
2385
|
+
( operator_arithmetic | operator_rest ) - ( '|' | '~' | '!' )
|
2386
|
+
=> {
|
2387
|
+
emit_table_PUNCTUATION(state, tok(state, ts, te), ts, te);
|
2388
|
+
fnext expr_value; fbreak;
|
2389
|
+
};
|
2390
|
+
|
2391
|
+
( e_lparen | '|' | '~' | '!' )
|
2392
|
+
=> { emit_table_PUNCTUATION(state, tok(state, ts, te), ts, te);
|
2393
|
+
fnext expr_beg; fbreak; };
|
2394
|
+
|
2395
|
+
e_rbrace => {
|
2396
|
+
emit(tRCURLY);
|
2397
|
+
|
2398
|
+
if (state->version < 24) {
|
2399
|
+
stack_state_lexpop(&state->cond);
|
2400
|
+
stack_state_lexpop(&state->cmdarg);
|
2401
|
+
} else {
|
2402
|
+
stack_state_pop(&state->cond);
|
2403
|
+
stack_state_pop(&state->cmdarg);
|
2404
|
+
}
|
2405
|
+
|
2406
|
+
if (state->version >= 25) {
|
2407
|
+
fnext expr_end;
|
2408
|
+
} else {
|
2409
|
+
fnext expr_endarg;
|
2410
|
+
}
|
2411
|
+
|
2412
|
+
fbreak;
|
2413
|
+
};
|
2414
|
+
|
2415
|
+
e_rparen => {
|
2416
|
+
emit(tRPAREN);
|
2417
|
+
|
2418
|
+
if (state->version < 24) {
|
2419
|
+
stack_state_lexpop(&state->cond);
|
2420
|
+
stack_state_lexpop(&state->cmdarg);
|
2421
|
+
} else {
|
2422
|
+
stack_state_pop(&state->cond);
|
2423
|
+
stack_state_pop(&state->cmdarg);
|
2424
|
+
}
|
2425
|
+
|
2426
|
+
fbreak;
|
2427
|
+
};
|
2428
|
+
|
2429
|
+
']' => {
|
2430
|
+
emit(tRBRACK);
|
2431
|
+
|
2432
|
+
if (state->version < 24) {
|
2433
|
+
stack_state_lexpop(&state->cond);
|
2434
|
+
stack_state_lexpop(&state->cmdarg);
|
2435
|
+
} else {
|
2436
|
+
stack_state_pop(&state->cond);
|
2437
|
+
stack_state_pop(&state->cmdarg);
|
2438
|
+
}
|
2439
|
+
|
2440
|
+
if (state->version >= 25) {
|
2441
|
+
fnext expr_end;
|
2442
|
+
} else {
|
2443
|
+
fnext expr_endarg;
|
2444
|
+
}
|
2445
|
+
|
2446
|
+
fbreak;
|
2447
|
+
};
|
2448
|
+
|
2449
|
+
operator_arithmetic '='
|
2450
|
+
=> { emit_token(state, tOP_ASGN, tok(state, ts, te - 1), ts, te);
|
2451
|
+
fnext expr_beg; fbreak; };
|
2452
|
+
|
2453
|
+
'?' => { emit(tEH); fnext expr_value; fbreak; };
|
2454
|
+
|
2455
|
+
e_lbrack => { emit(tLBRACK2); fnext expr_beg; fbreak; };
|
2456
|
+
|
2457
|
+
punctuation_end
|
2458
|
+
=> { emit_table_PUNCTUATION(state, tok(state, ts, te), ts, te);
|
2459
|
+
fnext expr_beg; fbreak; };
|
2460
|
+
|
2461
|
+
w_space_comment;
|
2462
|
+
|
2463
|
+
w_newline => { fgoto leading_dot; };
|
2464
|
+
|
2465
|
+
';' => { emit(tSEMI); fnext expr_value; fbreak; };
|
2466
|
+
|
2467
|
+
'\\' c_line {
|
2468
|
+
diagnostic(state, severity_error, bare_backslash, Qnil,
|
2469
|
+
range(state, ts, ts + 1), empty_array);
|
2470
|
+
fhold;
|
2471
|
+
};
|
2472
|
+
|
2473
|
+
c_any
|
2474
|
+
=> {
|
2475
|
+
VALUE hash = rb_hash_new();
|
2476
|
+
VALUE str = rb_str_inspect(tok(state, ts, te));
|
2477
|
+
rb_hash_aset(hash, character, rb_str_substr(str, 1, NUM2INT(rb_str_length(str)) - 2));
|
2478
|
+
diagnostic(state, fatal, unexpected, hash, range(state, ts, te), empty_array);
|
2479
|
+
};
|
2480
|
+
|
2481
|
+
c_eof => do_eof;
|
2482
|
+
*|;
|
2483
|
+
|
2484
|
+
leading_dot := |*
|
2485
|
+
c_space* %{ tm = p; } ('.' | '&.') => { p = tm - 1; fgoto expr_end; };
|
2486
|
+
|
2487
|
+
any => {
|
2488
|
+
emit_token(state, tNL, Qnil, state->newline_s, state->newline_s + 1);
|
2489
|
+
fhold; fnext line_begin; fbreak;
|
2490
|
+
};
|
2491
|
+
*|;
|
2492
|
+
|
2493
|
+
line_comment := |*
|
2494
|
+
'=end' c_line* c_nl_zlen => {
|
2495
|
+
emit_comment(state, state->eq_begin_s, te);
|
2496
|
+
fgoto *state->cs_before_block_comment;
|
2497
|
+
};
|
2498
|
+
|
2499
|
+
c_line* c_nl;
|
2500
|
+
|
2501
|
+
c_line* zlen => {
|
2502
|
+
diagnostic(state, fatal, embedded_document, Qnil,
|
2503
|
+
range(state, state->eq_begin_s, state->eq_begin_s + 6),
|
2504
|
+
empty_array);
|
2505
|
+
};
|
2506
|
+
*|;
|
2507
|
+
|
2508
|
+
line_begin := |*
|
2509
|
+
w_any;
|
2510
|
+
|
2511
|
+
'=begin' ( c_space | c_nl_zlen ) => {
|
2512
|
+
state->eq_begin_s = ts;
|
2513
|
+
fgoto line_comment;
|
2514
|
+
};
|
2515
|
+
|
2516
|
+
'__END__' ( c_eol - zlen ) => { p = pe - 3; };
|
2517
|
+
|
2518
|
+
c_any => { fhold; fgoto expr_value; };
|
2519
|
+
|
2520
|
+
c_eof => do_eof;
|
2521
|
+
*|;
|
2522
|
+
}%%
|