rbs 1.7.0.beta.1 → 1.7.0.beta.5
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.github/workflows/ruby.yml +20 -1
- data/.gitignore +9 -1
- data/CHANGELOG.md +20 -9
- data/Rakefile +16 -1
- data/core/binding.rbs +2 -0
- data/core/complex.rbs +0 -2
- data/core/float.rbs +0 -2
- data/core/integer.rbs +0 -2
- data/core/numeric.rbs +7 -0
- data/core/object.rbs +1 -1
- data/core/proc.rbs +2 -0
- data/core/rational.rbs +0 -2
- data/core/unbound_method.rbs +13 -0
- data/docs/rbs_by_example.md +2 -2
- data/docs/syntax.md +2 -3
- data/ext/{rbs/extension → rbs_extension}/constants.c +0 -1
- data/ext/{rbs/extension → rbs_extension}/constants.h +0 -0
- data/ext/{rbs/extension → rbs_extension}/extconf.rb +1 -1
- data/ext/rbs_extension/lexer.c +2533 -0
- data/ext/{rbs/extension → rbs_extension}/lexer.h +33 -17
- data/ext/rbs_extension/lexer.re +140 -0
- data/ext/rbs_extension/lexstate.c +139 -0
- data/ext/{rbs/extension → rbs_extension}/location.c +0 -0
- data/ext/{rbs/extension → rbs_extension}/location.h +0 -0
- data/ext/{rbs/extension → rbs_extension}/main.c +1 -1
- data/ext/{rbs/extension → rbs_extension}/parser.c +6 -32
- data/ext/{rbs/extension → rbs_extension}/parser.h +0 -5
- data/ext/{rbs/extension → rbs_extension}/parserstate.c +0 -1
- data/ext/{rbs/extension → rbs_extension}/parserstate.h +0 -0
- data/ext/{rbs/extension → rbs_extension}/rbs_extension.h +1 -1
- data/ext/{rbs/extension → rbs_extension}/ruby_objs.c +84 -148
- data/ext/{rbs/extension → rbs_extension}/ruby_objs.h +0 -2
- data/ext/{rbs/extension → rbs_extension}/unescape.c +0 -0
- data/lib/rbs/collection/installer.rb +1 -0
- data/lib/rbs/collection/sources/git.rb +6 -1
- data/lib/rbs/errors.rb +14 -0
- data/lib/rbs/location_aux.rb +13 -0
- data/lib/rbs/parser_aux.rb +39 -0
- data/lib/rbs/parser_compat/lexer_error.rb +4 -0
- data/lib/rbs/parser_compat/located_value.rb +5 -0
- data/lib/rbs/parser_compat/semantics_error.rb +4 -0
- data/lib/rbs/parser_compat/syntax_error.rb +4 -0
- data/lib/rbs/prototype/helpers.rb +113 -0
- data/lib/rbs/prototype/rb.rb +2 -105
- data/lib/rbs/prototype/runtime.rb +16 -0
- data/lib/rbs/types.rb +2 -2
- data/lib/rbs/version.rb +1 -1
- data/lib/rbs.rb +13 -1
- data/rbs.gemspec +1 -1
- data/sig/errors.rbs +10 -0
- data/sig/location.rbs +5 -0
- data/sig/parser.rbs +5 -0
- data/sig/rbs.rbs +4 -0
- data/stdlib/io-console/0/io-console.rbs +137 -0
- data/stdlib/net-http/0/net-http.rbs +2 -1
- data/stdlib/tempfile/0/tempfile.rbs +4 -6
- metadata +27 -19
- data/ext/rbs/extension/lexer.c +0 -1070
data/ext/rbs/extension/lexer.c
DELETED
@@ -1,1070 +0,0 @@
|
|
1
|
-
#include "rbs_extension.h"
|
2
|
-
|
3
|
-
#define ONE_CHAR_PATTERN(c, t) case c: tok = next_token(state, t); break
|
4
|
-
|
5
|
-
/**
|
6
|
-
* Returns one character at current.
|
7
|
-
*
|
8
|
-
* ... A B C ...
|
9
|
-
* ^ current => A
|
10
|
-
* */
|
11
|
-
#define peek(state) rb_enc_mbc_to_codepoint(RSTRING_PTR(state->string) + state->current.byte_pos, RSTRING_END(state->string), rb_enc_get(state->string))
|
12
|
-
|
13
|
-
static const char *RBS_TOKENTYPE_NAMES[] = {
|
14
|
-
"NullType",
|
15
|
-
"pEOF",
|
16
|
-
"ErrorToken",
|
17
|
-
|
18
|
-
"pLPAREN", /* ( */
|
19
|
-
"pRPAREN", /* ) */
|
20
|
-
"pCOLON", /* : */
|
21
|
-
"pCOLON2", /* :: */
|
22
|
-
"pLBRACKET", /* [ */
|
23
|
-
"pRBRACKET", /* ] */
|
24
|
-
"pLBRACE", /* { */
|
25
|
-
"pRBRACE", /* } */
|
26
|
-
"pHAT", /* ^ */
|
27
|
-
"pARROW", /* -> */
|
28
|
-
"pFATARROW", /* => */
|
29
|
-
"pCOMMA", /* , */
|
30
|
-
"pBAR", /* | */
|
31
|
-
"pAMP", /* & */
|
32
|
-
"pSTAR", /* * */
|
33
|
-
"pSTAR2", /* ** */
|
34
|
-
"pDOT", /* . */
|
35
|
-
"pDOT3", /* ... */
|
36
|
-
"pBANG", /* ! */
|
37
|
-
"pQUESTION", /* ? */
|
38
|
-
"pLT", /* < */
|
39
|
-
"pEQ", /* = */
|
40
|
-
|
41
|
-
"kBOOL", /* bool */
|
42
|
-
"kBOT", /* bot */
|
43
|
-
"kCLASS", /* class */
|
44
|
-
"kFALSE", /* kFALSE */
|
45
|
-
"kINSTANCE", /* instance */
|
46
|
-
"kINTERFACE", /* interface */
|
47
|
-
"kNIL", /* nil */
|
48
|
-
"kSELF", /* self */
|
49
|
-
"kSINGLETON", /* singleton */
|
50
|
-
"kTOP", /* top */
|
51
|
-
"kTRUE", /* true */
|
52
|
-
"kVOID", /* void */
|
53
|
-
"kTYPE", /* type */
|
54
|
-
"kUNCHECKED", /* unchecked */
|
55
|
-
"kIN", /* in */
|
56
|
-
"kOUT", /* out */
|
57
|
-
"kEND", /* end */
|
58
|
-
"kDEF", /* def */
|
59
|
-
"kINCLUDE", /* include */
|
60
|
-
"kEXTEND", /* extend */
|
61
|
-
"kPREPEND", /* prepend */
|
62
|
-
"kALIAS", /* alias */
|
63
|
-
"kMODULE", /* module */
|
64
|
-
"kATTRREADER", /* attr_reader */
|
65
|
-
"kATTRWRITER", /* attr_writer */
|
66
|
-
"kATTRACCESSOR", /* attr_accessor */
|
67
|
-
"kPUBLIC", /* public */
|
68
|
-
"kPRIVATE", /* private */
|
69
|
-
"kUNTYPED", /* untyped */
|
70
|
-
|
71
|
-
"tLIDENT", /* Identifiers starting with lower case */
|
72
|
-
"tUIDENT", /* Identifiers starting with upper case */
|
73
|
-
"tULIDENT", /* Identifiers starting with `_` */
|
74
|
-
"tULLIDENT",
|
75
|
-
"tGIDENT", /* Identifiers starting with `$` */
|
76
|
-
"tAIDENT", /* Identifiers starting with `@` */
|
77
|
-
"tA2IDENT", /* Identifiers starting with `@@` */
|
78
|
-
"tBANGIDENT",
|
79
|
-
"tEQIDENT",
|
80
|
-
"tQIDENT", /* Quoted identifier */
|
81
|
-
"tOPERATOR", /* Operator identifier */
|
82
|
-
|
83
|
-
"tCOMMENT",
|
84
|
-
"tLINECOMMENT",
|
85
|
-
|
86
|
-
"tDQSTRING", /* Double quoted string */
|
87
|
-
"tSQSTRING", /* Single quoted string */
|
88
|
-
"tINTEGER", /* Integer */
|
89
|
-
"tSYMBOL", /* Symbol */
|
90
|
-
"tDQSYMBOL",
|
91
|
-
"tSQSYMBOL",
|
92
|
-
"tANNOTATION", /* Annotation */
|
93
|
-
};
|
94
|
-
|
95
|
-
token NullToken = { NullType };
|
96
|
-
position NullPosition = { -1, -1, -1, -1 };
|
97
|
-
range NULL_RANGE = { { -1, -1, -1, -1 }, { -1, -1, -1, -1 } };
|
98
|
-
|
99
|
-
const char *token_type_str(enum TokenType type) {
|
100
|
-
return RBS_TOKENTYPE_NAMES[type];
|
101
|
-
}
|
102
|
-
|
103
|
-
unsigned int peekn(lexstate *state, unsigned int chars[], size_t length) {
|
104
|
-
int byteoffset = 0;
|
105
|
-
|
106
|
-
rb_encoding *encoding = rb_enc_get(state->string);
|
107
|
-
char *start = RSTRING_PTR(state->string) + state->current.byte_pos;
|
108
|
-
char *end = RSTRING_END(state->string);
|
109
|
-
|
110
|
-
for (size_t i = 0; i < length; i++)
|
111
|
-
{
|
112
|
-
chars[i] = rb_enc_mbc_to_codepoint(start + byteoffset, end, encoding);
|
113
|
-
byteoffset += rb_enc_codelen(chars[i], rb_enc_get(state->string));
|
114
|
-
}
|
115
|
-
|
116
|
-
return byteoffset;
|
117
|
-
}
|
118
|
-
|
119
|
-
int token_chars(token tok) {
|
120
|
-
return tok.range.end.char_pos - tok.range.start.char_pos;
|
121
|
-
}
|
122
|
-
|
123
|
-
int token_bytes(token tok) {
|
124
|
-
return RANGE_BYTES(tok.range);
|
125
|
-
}
|
126
|
-
|
127
|
-
/**
|
128
|
-
* ... token ...
|
129
|
-
* ^ start
|
130
|
-
* ^ current
|
131
|
-
*
|
132
|
-
* */
|
133
|
-
token next_token(lexstate *state, enum TokenType type) {
|
134
|
-
token t;
|
135
|
-
|
136
|
-
t.type = type;
|
137
|
-
t.range.start = state->start;
|
138
|
-
t.range.end = state->current;
|
139
|
-
state->start = state->current;
|
140
|
-
state->first_token_of_line = false;
|
141
|
-
|
142
|
-
return t;
|
143
|
-
}
|
144
|
-
|
145
|
-
void advance_skip(lexstate *state, unsigned int c, bool skip) {
|
146
|
-
int len = rb_enc_codelen(c, rb_enc_get(state->string));
|
147
|
-
|
148
|
-
state->current.char_pos += 1;
|
149
|
-
state->current.byte_pos += len;
|
150
|
-
|
151
|
-
if (c == '\n') {
|
152
|
-
state->current.line += 1;
|
153
|
-
state->current.column = 0;
|
154
|
-
state->first_token_of_line = true;
|
155
|
-
} else {
|
156
|
-
state->current.column += 1;
|
157
|
-
}
|
158
|
-
|
159
|
-
if (skip) {
|
160
|
-
state->start = state->current;
|
161
|
-
}
|
162
|
-
}
|
163
|
-
|
164
|
-
void advance_char(lexstate *state, unsigned int c) {
|
165
|
-
advance_skip(state, c, false);
|
166
|
-
}
|
167
|
-
|
168
|
-
void skip_char(lexstate *state, unsigned int c) {
|
169
|
-
advance_skip(state, c, true);
|
170
|
-
}
|
171
|
-
|
172
|
-
void skip(lexstate *state) {
|
173
|
-
unsigned char c = peek(state);
|
174
|
-
skip_char(state, c);
|
175
|
-
}
|
176
|
-
|
177
|
-
void advance(lexstate *state) {
|
178
|
-
unsigned char c = peek(state);
|
179
|
-
advance_char(state, c);
|
180
|
-
}
|
181
|
-
|
182
|
-
/*
|
183
|
-
1. Peek one character from state
|
184
|
-
2. If read characetr equals to given `c`, skip the character and return true.
|
185
|
-
3. Return false otherwise.
|
186
|
-
*/
|
187
|
-
static bool advance_next_character_if(lexstate *state, unsigned int c) {
|
188
|
-
if (peek(state) == c) {
|
189
|
-
advance_char(state, c);
|
190
|
-
return true;
|
191
|
-
} else {
|
192
|
-
return false;
|
193
|
-
}
|
194
|
-
}
|
195
|
-
|
196
|
-
/*
|
197
|
-
... 0 1 ...
|
198
|
-
^ current
|
199
|
-
^ current (return)
|
200
|
-
*/
|
201
|
-
static token lex_number(lexstate *state) {
|
202
|
-
unsigned int c;
|
203
|
-
|
204
|
-
while (true) {
|
205
|
-
c = peek(state);
|
206
|
-
|
207
|
-
if (rb_isdigit(c) || c == '_') {
|
208
|
-
advance_char(state, c);
|
209
|
-
} else {
|
210
|
-
break;
|
211
|
-
}
|
212
|
-
}
|
213
|
-
|
214
|
-
return next_token(state, tINTEGER);
|
215
|
-
}
|
216
|
-
|
217
|
-
/*
|
218
|
-
lex_hyphen ::= - (tOPERATOR)
|
219
|
-
| - @ (tOPERATOR)
|
220
|
-
| - > (pARROW)
|
221
|
-
| - 1 ... (tINTEGER)
|
222
|
-
*/
|
223
|
-
static token lex_hyphen(lexstate* state) {
|
224
|
-
if (advance_next_character_if(state, '>')) {
|
225
|
-
return next_token(state, pARROW);
|
226
|
-
} else if (advance_next_character_if(state, '@')) {
|
227
|
-
return next_token(state, tOPERATOR);
|
228
|
-
} else {
|
229
|
-
unsigned int c = peek(state);
|
230
|
-
|
231
|
-
if (rb_isdigit(c)) {
|
232
|
-
advance_char(state, c);
|
233
|
-
return lex_number(state);
|
234
|
-
} else {
|
235
|
-
return next_token(state, tOPERATOR);
|
236
|
-
}
|
237
|
-
}
|
238
|
-
}
|
239
|
-
|
240
|
-
/*
|
241
|
-
lex_plus ::= +
|
242
|
-
| + @
|
243
|
-
| + \d
|
244
|
-
*/
|
245
|
-
static token lex_plus(lexstate *state) {
|
246
|
-
if (advance_next_character_if(state, '@')) {
|
247
|
-
return next_token(state, tOPERATOR);
|
248
|
-
} else if (rb_isdigit(peek(state))) {
|
249
|
-
return lex_number(state);
|
250
|
-
} else {
|
251
|
-
return next_token(state, tOPERATOR);
|
252
|
-
}
|
253
|
-
}
|
254
|
-
|
255
|
-
/*
|
256
|
-
lex_dot ::= . pDOT
|
257
|
-
| . . . pDOT3
|
258
|
-
*/
|
259
|
-
static token lex_dot(lexstate *state) {
|
260
|
-
unsigned int cs[2];
|
261
|
-
|
262
|
-
peekn(state, cs, 2);
|
263
|
-
|
264
|
-
if (cs[0] == '.' && cs[1] == '.') {
|
265
|
-
advance_char(state, '.');
|
266
|
-
advance_char(state, '.');
|
267
|
-
return next_token(state, pDOT3);
|
268
|
-
} else {
|
269
|
-
return next_token(state, pDOT);
|
270
|
-
}
|
271
|
-
}
|
272
|
-
|
273
|
-
/*
|
274
|
-
lex_eq ::= =
|
275
|
-
| ==
|
276
|
-
| ===
|
277
|
-
| =~
|
278
|
-
| =>
|
279
|
-
*/
|
280
|
-
static token lex_eq(lexstate *state) {
|
281
|
-
unsigned int cs[2];
|
282
|
-
peekn(state, cs, 2);
|
283
|
-
|
284
|
-
if (cs[0] == '=' && cs[1] == '=') {
|
285
|
-
// ===
|
286
|
-
advance_char(state, cs[0]);
|
287
|
-
advance_char(state, cs[1]);
|
288
|
-
return next_token(state, tOPERATOR);
|
289
|
-
} else if (cs[0] == '=') {
|
290
|
-
// ==
|
291
|
-
advance_char(state, cs[0]);
|
292
|
-
return next_token(state, tOPERATOR);
|
293
|
-
} else if (cs[0] == '~') {
|
294
|
-
// =~
|
295
|
-
advance_char(state, cs[0]);
|
296
|
-
return next_token(state, tOPERATOR);
|
297
|
-
} else if (cs[0] == '>') {
|
298
|
-
// =>
|
299
|
-
advance_char(state, cs[0]);
|
300
|
-
return next_token(state, pFATARROW);
|
301
|
-
} else {
|
302
|
-
return next_token(state, pEQ);
|
303
|
-
}
|
304
|
-
}
|
305
|
-
|
306
|
-
/*
|
307
|
-
underscore ::= _A tULIDENT
|
308
|
-
| _a tULLIDENT
|
309
|
-
| _ tULLIDENT
|
310
|
-
*/
|
311
|
-
static token lex_underscore(lexstate *state) {
|
312
|
-
unsigned int c;
|
313
|
-
|
314
|
-
c = peek(state);
|
315
|
-
|
316
|
-
if ('A' <= c && c <= 'Z') {
|
317
|
-
advance_char(state, c);
|
318
|
-
|
319
|
-
while (true) {
|
320
|
-
c = peek(state);
|
321
|
-
|
322
|
-
if (rb_isalnum(c) || c == '_') {
|
323
|
-
// ok
|
324
|
-
advance_char(state, c);
|
325
|
-
} else {
|
326
|
-
break;
|
327
|
-
}
|
328
|
-
}
|
329
|
-
|
330
|
-
return next_token(state, tULIDENT);
|
331
|
-
} else if (rb_isalnum(c) || c == '_') {
|
332
|
-
advance_char(state, c);
|
333
|
-
|
334
|
-
while (true) {
|
335
|
-
c = peek(state);
|
336
|
-
|
337
|
-
if (rb_isalnum(c) || c == '_') {
|
338
|
-
// ok
|
339
|
-
advance_char(state, c);
|
340
|
-
} else {
|
341
|
-
break;
|
342
|
-
}
|
343
|
-
}
|
344
|
-
|
345
|
-
if (c == '!') {
|
346
|
-
advance_char(state, c);
|
347
|
-
return next_token(state, tBANGIDENT);
|
348
|
-
} else if (c == '=') {
|
349
|
-
advance_char(state, c);
|
350
|
-
return next_token(state, tEQIDENT);
|
351
|
-
} else {
|
352
|
-
return next_token(state, tULLIDENT);
|
353
|
-
}
|
354
|
-
} else {
|
355
|
-
return next_token(state, tULLIDENT);
|
356
|
-
}
|
357
|
-
}
|
358
|
-
|
359
|
-
static bool is_opr(unsigned int c) {
|
360
|
-
switch (c) {
|
361
|
-
case ':':
|
362
|
-
case ';':
|
363
|
-
case '=':
|
364
|
-
case '.':
|
365
|
-
case ',':
|
366
|
-
case '!':
|
367
|
-
case '"':
|
368
|
-
case '$':
|
369
|
-
case '%':
|
370
|
-
case '&':
|
371
|
-
case '(':
|
372
|
-
case ')':
|
373
|
-
case '-':
|
374
|
-
case '+':
|
375
|
-
case '~':
|
376
|
-
case '|':
|
377
|
-
case '\\':
|
378
|
-
case '\'':
|
379
|
-
case '[':
|
380
|
-
case ']':
|
381
|
-
case '{':
|
382
|
-
case '}':
|
383
|
-
case '*':
|
384
|
-
case '/':
|
385
|
-
case '<':
|
386
|
-
case '>':
|
387
|
-
case '^':
|
388
|
-
return true;
|
389
|
-
default:
|
390
|
-
return false;
|
391
|
-
}
|
392
|
-
}
|
393
|
-
|
394
|
-
static token lex_global(lexstate *state) {
|
395
|
-
unsigned int c;
|
396
|
-
|
397
|
-
c = peek(state);
|
398
|
-
|
399
|
-
if (rb_isspace(c) || c == 0) {
|
400
|
-
return next_token(state, ErrorToken);
|
401
|
-
}
|
402
|
-
|
403
|
-
if (rb_isdigit(c)) {
|
404
|
-
// `$` [`0`-`9`]+
|
405
|
-
advance_char(state, c);
|
406
|
-
|
407
|
-
while (true) {
|
408
|
-
c = peek(state);
|
409
|
-
if (rb_isdigit(c)) {
|
410
|
-
advance_char(state, c);
|
411
|
-
} else {
|
412
|
-
return next_token(state, tGIDENT);
|
413
|
-
}
|
414
|
-
}
|
415
|
-
}
|
416
|
-
|
417
|
-
if (c == '-') {
|
418
|
-
// `$` `-` [a-zA-Z0-9_]
|
419
|
-
advance_char(state, c);
|
420
|
-
c = peek(state);
|
421
|
-
|
422
|
-
if (rb_isalnum(c) || c == '_') {
|
423
|
-
advance_char(state, c);
|
424
|
-
return next_token(state, tGIDENT);
|
425
|
-
} else {
|
426
|
-
return next_token(state, ErrorToken);
|
427
|
-
}
|
428
|
-
}
|
429
|
-
|
430
|
-
switch (c) {
|
431
|
-
case '~':
|
432
|
-
case '*':
|
433
|
-
case '$':
|
434
|
-
case '?':
|
435
|
-
case '!':
|
436
|
-
case '@':
|
437
|
-
case '\\':
|
438
|
-
case '/':
|
439
|
-
case ';':
|
440
|
-
case ',':
|
441
|
-
case '.':
|
442
|
-
case '=':
|
443
|
-
case ':':
|
444
|
-
case '<':
|
445
|
-
case '>':
|
446
|
-
case '"':
|
447
|
-
case '&':
|
448
|
-
case '\'':
|
449
|
-
case '`':
|
450
|
-
case '+':
|
451
|
-
advance_char(state, c);
|
452
|
-
return next_token(state, tGIDENT);
|
453
|
-
|
454
|
-
default:
|
455
|
-
if (is_opr(c) || c == 0) {
|
456
|
-
return next_token(state, ErrorToken);
|
457
|
-
}
|
458
|
-
|
459
|
-
while (true) {
|
460
|
-
advance_char(state, c);
|
461
|
-
c = peek(state);
|
462
|
-
|
463
|
-
if (rb_isspace(c) || is_opr(c) || c == 0) {
|
464
|
-
break;
|
465
|
-
}
|
466
|
-
}
|
467
|
-
|
468
|
-
return next_token(state, tGIDENT);
|
469
|
-
}
|
470
|
-
}
|
471
|
-
|
472
|
-
void pp(VALUE object) {
|
473
|
-
VALUE inspect = rb_funcall(object, rb_intern("inspect"), 0);
|
474
|
-
printf("pp >> %s\n", RSTRING_PTR(inspect));
|
475
|
-
}
|
476
|
-
|
477
|
-
static token lex_ident(lexstate *state, enum TokenType default_type) {
|
478
|
-
unsigned int c;
|
479
|
-
token tok;
|
480
|
-
|
481
|
-
while (true) {
|
482
|
-
c = peek(state);
|
483
|
-
if (rb_isalnum(c) || c == '_') {
|
484
|
-
advance_char(state, c);
|
485
|
-
} else if (c == '!') {
|
486
|
-
advance_char(state, c);
|
487
|
-
tok = next_token(state, tBANGIDENT);
|
488
|
-
break;
|
489
|
-
} else if (c == '=') {
|
490
|
-
advance_char(state, c);
|
491
|
-
tok = next_token(state, tEQIDENT);
|
492
|
-
break;
|
493
|
-
} else {
|
494
|
-
tok = next_token(state, default_type);
|
495
|
-
break;
|
496
|
-
}
|
497
|
-
}
|
498
|
-
|
499
|
-
if (tok.type == tLIDENT) {
|
500
|
-
VALUE string = rb_enc_str_new(
|
501
|
-
RSTRING_PTR(state->string) + tok.range.start.byte_pos,
|
502
|
-
RANGE_BYTES(tok.range),
|
503
|
-
rb_enc_get(state->string)
|
504
|
-
);
|
505
|
-
|
506
|
-
VALUE type = rb_hash_aref(RBS_Parser_KEYWORDS, string);
|
507
|
-
if (FIXNUM_P(type)) {
|
508
|
-
tok.type = FIX2INT(type);
|
509
|
-
}
|
510
|
-
}
|
511
|
-
|
512
|
-
return tok;
|
513
|
-
}
|
514
|
-
|
515
|
-
static token lex_comment(lexstate *state, enum TokenType type) {
|
516
|
-
unsigned int c;
|
517
|
-
|
518
|
-
c = peek(state);
|
519
|
-
if (c == ' ') {
|
520
|
-
advance_char(state, c);
|
521
|
-
}
|
522
|
-
|
523
|
-
while (true) {
|
524
|
-
c = peek(state);
|
525
|
-
|
526
|
-
if (c == '\n' || c == '\0') {
|
527
|
-
break;
|
528
|
-
} else {
|
529
|
-
advance_char(state, c);
|
530
|
-
}
|
531
|
-
}
|
532
|
-
|
533
|
-
token tok = next_token(state, type);
|
534
|
-
|
535
|
-
skip_char(state, c);
|
536
|
-
|
537
|
-
return tok;
|
538
|
-
}
|
539
|
-
|
540
|
-
/*
|
541
|
-
... " ... " ...
|
542
|
-
^ start
|
543
|
-
^ current
|
544
|
-
^ current (after)
|
545
|
-
*/
|
546
|
-
static token lex_dqstring(lexstate *state) {
|
547
|
-
unsigned int c;
|
548
|
-
|
549
|
-
while (true) {
|
550
|
-
c = peek(state);
|
551
|
-
advance_char(state, c);
|
552
|
-
|
553
|
-
if (c == '\\') {
|
554
|
-
if (peek(state) == '"') {
|
555
|
-
advance_char(state, c);
|
556
|
-
c = peek(state);
|
557
|
-
}
|
558
|
-
} else if (c == '"') {
|
559
|
-
break;
|
560
|
-
}
|
561
|
-
}
|
562
|
-
|
563
|
-
return next_token(state, tDQSTRING);
|
564
|
-
}
|
565
|
-
|
566
|
-
/*
|
567
|
-
... @ foo ...
|
568
|
-
^ start
|
569
|
-
^ current
|
570
|
-
^ current (return)
|
571
|
-
|
572
|
-
... @ @ foo ...
|
573
|
-
^ start
|
574
|
-
^ current
|
575
|
-
^ current (return)
|
576
|
-
*/
|
577
|
-
static token lex_ivar(lexstate *state) {
|
578
|
-
unsigned int c;
|
579
|
-
|
580
|
-
enum TokenType type = tAIDENT;
|
581
|
-
|
582
|
-
c = peek(state);
|
583
|
-
|
584
|
-
if (c == '@') {
|
585
|
-
type = tA2IDENT;
|
586
|
-
advance_char(state, c);
|
587
|
-
c = peek(state);
|
588
|
-
}
|
589
|
-
|
590
|
-
if (rb_isalpha(c) || c == '_') {
|
591
|
-
advance_char(state, c);
|
592
|
-
c = peek(state);
|
593
|
-
} else {
|
594
|
-
return next_token(state, ErrorToken);
|
595
|
-
}
|
596
|
-
|
597
|
-
while (rb_isalnum(c) || c == '_') {
|
598
|
-
advance_char(state, c);
|
599
|
-
c = peek(state);
|
600
|
-
}
|
601
|
-
|
602
|
-
return next_token(state, type);
|
603
|
-
}
|
604
|
-
|
605
|
-
/*
|
606
|
-
... ' ... ' ...
|
607
|
-
^ start
|
608
|
-
^ current
|
609
|
-
^ current (after)
|
610
|
-
*/
|
611
|
-
static token lex_sqstring(lexstate *state) {
|
612
|
-
unsigned int c;
|
613
|
-
|
614
|
-
c = peek(state);
|
615
|
-
|
616
|
-
while (true) {
|
617
|
-
c = peek(state);
|
618
|
-
advance_char(state, c);
|
619
|
-
|
620
|
-
if (c == '\\') {
|
621
|
-
if (peek(state) == '\'') {
|
622
|
-
advance_char(state, c);
|
623
|
-
c = peek(state);
|
624
|
-
}
|
625
|
-
} else if (c == '\'') {
|
626
|
-
break;
|
627
|
-
}
|
628
|
-
}
|
629
|
-
|
630
|
-
return next_token(state, tSQSTRING);
|
631
|
-
}
|
632
|
-
|
633
|
-
#define EQPOINTS2(c0, c1, s) (c0 == s[0] && c1 == s[1])
|
634
|
-
#define EQPOINTS3(c0, c1, c2, s) (c0 == s[0] && c1 == s[1] && c2 == s[2])
|
635
|
-
|
636
|
-
/*
|
637
|
-
... : @ ...
|
638
|
-
^ start
|
639
|
-
^ current
|
640
|
-
^ current (return)
|
641
|
-
*/
|
642
|
-
static token lex_colon_symbol(lexstate *state) {
|
643
|
-
unsigned int c[3];
|
644
|
-
peekn(state, c, 3);
|
645
|
-
|
646
|
-
switch (c[0]) {
|
647
|
-
case '|':
|
648
|
-
case '&':
|
649
|
-
case '/':
|
650
|
-
case '%':
|
651
|
-
case '~':
|
652
|
-
case '`':
|
653
|
-
case '^':
|
654
|
-
advance_char(state, c[0]);
|
655
|
-
return next_token(state, tSYMBOL);
|
656
|
-
case '=':
|
657
|
-
if (EQPOINTS2(c[0], c[1], "=~")) {
|
658
|
-
// :=~
|
659
|
-
advance_char(state, c[0]);
|
660
|
-
advance_char(state, c[1]);
|
661
|
-
return next_token(state, tSYMBOL);
|
662
|
-
} else if (EQPOINTS3(c[0], c[1], c[2], "===")) {
|
663
|
-
// :===
|
664
|
-
advance_char(state, c[0]);
|
665
|
-
advance_char(state, c[1]);
|
666
|
-
advance_char(state, c[2]);
|
667
|
-
return next_token(state, tSYMBOL);
|
668
|
-
} else if (EQPOINTS2(c[0], c[1], "==")) {
|
669
|
-
// :==
|
670
|
-
advance_char(state, c[0]);
|
671
|
-
advance_char(state, c[1]);
|
672
|
-
return next_token(state, tSYMBOL);
|
673
|
-
}
|
674
|
-
break;
|
675
|
-
case '<':
|
676
|
-
if (EQPOINTS3(c[0], c[1], c[2], "<=>")) {
|
677
|
-
advance_char(state, c[0]);
|
678
|
-
advance_char(state, c[1]);
|
679
|
-
advance_char(state, c[2]);
|
680
|
-
} else if (EQPOINTS2(c[0], c[1], "<=") || EQPOINTS2(c[0], c[1], "<<")) {
|
681
|
-
advance_char(state, c[0]);
|
682
|
-
advance_char(state, c[1]);
|
683
|
-
} else {
|
684
|
-
advance_char(state, c[0]);
|
685
|
-
}
|
686
|
-
return next_token(state, tSYMBOL);
|
687
|
-
case '>':
|
688
|
-
if (EQPOINTS2(c[0], c[1], ">=") || EQPOINTS2(c[0], c[1], ">>")) {
|
689
|
-
advance_char(state, c[0]);
|
690
|
-
advance_char(state, c[1]);
|
691
|
-
} else {
|
692
|
-
advance_char(state, c[0]);
|
693
|
-
}
|
694
|
-
return next_token(state, tSYMBOL);
|
695
|
-
case '-':
|
696
|
-
case '+':
|
697
|
-
if (EQPOINTS2(c[0], c[1], "+@") || EQPOINTS2(c[0], c[1], "-@")) {
|
698
|
-
advance_char(state, c[0]);
|
699
|
-
advance_char(state, c[1]);
|
700
|
-
} else {
|
701
|
-
advance_char(state, c[0]);
|
702
|
-
}
|
703
|
-
return next_token(state, tSYMBOL);
|
704
|
-
case '*':
|
705
|
-
if (EQPOINTS2(c[0], c[1], "**")) {
|
706
|
-
advance_char(state, c[0]);
|
707
|
-
advance_char(state, c[1]);
|
708
|
-
} else {
|
709
|
-
advance_char(state, c[0]);
|
710
|
-
}
|
711
|
-
return next_token(state, tSYMBOL);
|
712
|
-
case '[':
|
713
|
-
if (EQPOINTS3(c[0], c[1], c[2], "[]=")) {
|
714
|
-
advance_char(state, c[0]);
|
715
|
-
advance_char(state, c[1]);
|
716
|
-
advance_char(state, c[2]);
|
717
|
-
} else if (EQPOINTS2(c[0], c[1], "[]")) {
|
718
|
-
advance_char(state, c[0]);
|
719
|
-
advance_char(state, c[1]);
|
720
|
-
} else {
|
721
|
-
break;
|
722
|
-
}
|
723
|
-
return next_token(state, tSYMBOL);
|
724
|
-
case '!':
|
725
|
-
if (EQPOINTS2(c[0], c[1], "!=") || EQPOINTS2(c[0], c[1], "!~")) {
|
726
|
-
advance_char(state, c[0]);
|
727
|
-
advance_char(state, c[1]);
|
728
|
-
} else {
|
729
|
-
advance_char(state, c[0]);
|
730
|
-
}
|
731
|
-
return next_token(state, tSYMBOL);
|
732
|
-
case '@': {
|
733
|
-
advance_char(state, '@');
|
734
|
-
token tok = lex_ivar(state);
|
735
|
-
if (tok.type != ErrorToken) {
|
736
|
-
tok.type = tSYMBOL;
|
737
|
-
}
|
738
|
-
return tok;
|
739
|
-
}
|
740
|
-
case '$': {
|
741
|
-
advance_char(state, '$');
|
742
|
-
token tok = lex_global(state);
|
743
|
-
if (tok.type != ErrorToken) {
|
744
|
-
tok.type = tSYMBOL;
|
745
|
-
}
|
746
|
-
return tok;
|
747
|
-
}
|
748
|
-
case '\'': {
|
749
|
-
position start = state->start;
|
750
|
-
advance_char(state, '\'');
|
751
|
-
token tok = lex_sqstring(state);
|
752
|
-
tok.type = tSQSYMBOL;
|
753
|
-
tok.range.start = start;
|
754
|
-
return tok;
|
755
|
-
}
|
756
|
-
case '"': {
|
757
|
-
position start = state->start;
|
758
|
-
advance_char(state, '"');
|
759
|
-
token tok = lex_dqstring(state);
|
760
|
-
tok.type = tDQSYMBOL;
|
761
|
-
tok.range.start = start;
|
762
|
-
return tok;
|
763
|
-
}
|
764
|
-
default:
|
765
|
-
if (rb_isalpha(c[0]) || c[0] == '_') {
|
766
|
-
position start = state->start;
|
767
|
-
token tok = lex_ident(state, NullType);
|
768
|
-
tok.range.start = start;
|
769
|
-
|
770
|
-
if (peek(state) == '?') {
|
771
|
-
if (tok.type != tBANGIDENT && tok.type != tEQIDENT) {
|
772
|
-
skip_char(state, '?');
|
773
|
-
tok.range.end = state->current;
|
774
|
-
}
|
775
|
-
}
|
776
|
-
|
777
|
-
tok.type = tSYMBOL;
|
778
|
-
return tok;
|
779
|
-
}
|
780
|
-
}
|
781
|
-
|
782
|
-
return next_token(state, pCOLON);
|
783
|
-
}
|
784
|
-
|
785
|
-
/*
|
786
|
-
... : : ...
|
787
|
-
^ start
|
788
|
-
^ current
|
789
|
-
^ current (return)
|
790
|
-
|
791
|
-
... : ...
|
792
|
-
^ start
|
793
|
-
^ current (lex_colon_symbol)
|
794
|
-
*/
|
795
|
-
static token lex_colon(lexstate *state) {
|
796
|
-
unsigned int c = peek(state);
|
797
|
-
|
798
|
-
if (c == ':') {
|
799
|
-
advance_char(state, c);
|
800
|
-
return next_token(state, pCOLON2);
|
801
|
-
} else {
|
802
|
-
return lex_colon_symbol(state);
|
803
|
-
}
|
804
|
-
}
|
805
|
-
|
806
|
-
/*
|
807
|
-
lex_lt ::= < (pLT)
|
808
|
-
| < < (tOPERATOR)
|
809
|
-
| < = (tOPERATOR)
|
810
|
-
| < = > (tOPERATOR)
|
811
|
-
*/
|
812
|
-
static token lex_lt(lexstate *state) {
|
813
|
-
if (advance_next_character_if(state, '<')) {
|
814
|
-
return next_token(state, tOPERATOR);
|
815
|
-
} else if (advance_next_character_if(state, '=')) {
|
816
|
-
advance_next_character_if(state, '>');
|
817
|
-
return next_token(state, tOPERATOR);
|
818
|
-
} else {
|
819
|
-
return next_token(state, pLT);
|
820
|
-
}
|
821
|
-
}
|
822
|
-
|
823
|
-
/*
|
824
|
-
lex_gt ::= >
|
825
|
-
| > =
|
826
|
-
| > >
|
827
|
-
*/
|
828
|
-
static token lex_gt(lexstate *state) {
|
829
|
-
advance_next_character_if(state, '=') || advance_next_character_if(state, '>');
|
830
|
-
return next_token(state, tOPERATOR);
|
831
|
-
}
|
832
|
-
|
833
|
-
/*
|
834
|
-
... `%` `a` `{` ... `}` ...
|
835
|
-
^ start
|
836
|
-
^ current
|
837
|
-
^ current (exit)
|
838
|
-
--- token
|
839
|
-
*/
|
840
|
-
static token lex_percent(lexstate *state) {
|
841
|
-
unsigned int cs[2];
|
842
|
-
unsigned int end_char;
|
843
|
-
|
844
|
-
peekn(state, cs, 2);
|
845
|
-
|
846
|
-
if (cs[0] != 'a') {
|
847
|
-
return next_token(state, tOPERATOR);
|
848
|
-
}
|
849
|
-
|
850
|
-
switch (cs[1])
|
851
|
-
{
|
852
|
-
case '{':
|
853
|
-
end_char = '}';
|
854
|
-
break;
|
855
|
-
case '(':
|
856
|
-
end_char = ')';
|
857
|
-
break;
|
858
|
-
case '[':
|
859
|
-
end_char = ']';
|
860
|
-
break;
|
861
|
-
case '|':
|
862
|
-
end_char = '|';
|
863
|
-
break;
|
864
|
-
case '<':
|
865
|
-
end_char = '>';
|
866
|
-
break;
|
867
|
-
default:
|
868
|
-
return next_token(state, tOPERATOR);
|
869
|
-
}
|
870
|
-
|
871
|
-
advance_char(state, cs[0]);
|
872
|
-
advance_char(state, cs[1]);
|
873
|
-
|
874
|
-
unsigned int c;
|
875
|
-
|
876
|
-
while ((c = peek(state))) {
|
877
|
-
if (c == end_char) {
|
878
|
-
advance_char(state, c);
|
879
|
-
return next_token(state, tANNOTATION);
|
880
|
-
}
|
881
|
-
advance_char(state, c);
|
882
|
-
}
|
883
|
-
|
884
|
-
return next_token(state, ErrorToken);
|
885
|
-
}
|
886
|
-
|
887
|
-
/*
|
888
|
-
bracket ::= [ (pLBRACKET)
|
889
|
-
* ^
|
890
|
-
| [ ] (tOPERATOR)
|
891
|
-
* ^ $
|
892
|
-
| [ ] = (tOPERATOR)
|
893
|
-
* ^ $
|
894
|
-
*/
|
895
|
-
static token lex_bracket(lexstate *state) {
|
896
|
-
if (advance_next_character_if(state, ']')) {
|
897
|
-
advance_next_character_if(state, '=');
|
898
|
-
return next_token(state, tOPERATOR);
|
899
|
-
} else {
|
900
|
-
return next_token(state, pLBRACKET);
|
901
|
-
}
|
902
|
-
}
|
903
|
-
|
904
|
-
/*
|
905
|
-
bracket ::= *
|
906
|
-
| * *
|
907
|
-
*/
|
908
|
-
static token lex_star(lexstate *state) {
|
909
|
-
if (advance_next_character_if(state, '*')) {
|
910
|
-
return next_token(state, pSTAR2);
|
911
|
-
} else {
|
912
|
-
return next_token(state, pSTAR);
|
913
|
-
}
|
914
|
-
}
|
915
|
-
|
916
|
-
/*
|
917
|
-
bang ::= !
|
918
|
-
| ! =
|
919
|
-
| ! ~
|
920
|
-
*/
|
921
|
-
static token lex_bang(lexstate *state) {
|
922
|
-
advance_next_character_if(state, '=') || advance_next_character_if(state, '~');
|
923
|
-
return next_token(state, tOPERATOR);
|
924
|
-
}
|
925
|
-
|
926
|
-
/*
|
927
|
-
backquote ::= ` (tOPERATOR)
|
928
|
-
| `[^ :][^`]` (tQIDENT)
|
929
|
-
*/
|
930
|
-
static token lex_backquote(lexstate *state) {
|
931
|
-
unsigned int c = peek(state);
|
932
|
-
|
933
|
-
if (c == ' ' || c == ':') {
|
934
|
-
return next_token(state, tOPERATOR);
|
935
|
-
} else {
|
936
|
-
while (true) {
|
937
|
-
if (c == '`') {
|
938
|
-
break;
|
939
|
-
}
|
940
|
-
|
941
|
-
c = peek(state);
|
942
|
-
advance_char(state, c);
|
943
|
-
}
|
944
|
-
|
945
|
-
return next_token(state, tQIDENT);
|
946
|
-
}
|
947
|
-
}
|
948
|
-
|
949
|
-
token rbsparser_next_token(lexstate *state) {
|
950
|
-
token tok = NullToken;
|
951
|
-
|
952
|
-
unsigned int c;
|
953
|
-
bool skipping = true;
|
954
|
-
|
955
|
-
while (skipping) {
|
956
|
-
c = peek(state);
|
957
|
-
|
958
|
-
switch (c) {
|
959
|
-
case ' ':
|
960
|
-
case '\t':
|
961
|
-
case '\n':
|
962
|
-
// nop
|
963
|
-
skip_char(state, c);
|
964
|
-
break;
|
965
|
-
case '\0':
|
966
|
-
return next_token(state, pEOF);
|
967
|
-
default:
|
968
|
-
advance_char(state, c);
|
969
|
-
skipping = false;
|
970
|
-
break;
|
971
|
-
}
|
972
|
-
}
|
973
|
-
|
974
|
-
/* ... c d .. */
|
975
|
-
/* ^ state->current */
|
976
|
-
/* ^ start */
|
977
|
-
switch (c) {
|
978
|
-
case '\0': tok = next_token(state, pEOF);
|
979
|
-
ONE_CHAR_PATTERN('(', pLPAREN);
|
980
|
-
ONE_CHAR_PATTERN(')', pRPAREN);
|
981
|
-
ONE_CHAR_PATTERN(']', pRBRACKET);
|
982
|
-
ONE_CHAR_PATTERN('{', pLBRACE);
|
983
|
-
ONE_CHAR_PATTERN('}', pRBRACE);
|
984
|
-
ONE_CHAR_PATTERN(',', pCOMMA);
|
985
|
-
ONE_CHAR_PATTERN('|', pBAR);
|
986
|
-
ONE_CHAR_PATTERN('^', pHAT);
|
987
|
-
ONE_CHAR_PATTERN('&', pAMP);
|
988
|
-
ONE_CHAR_PATTERN('?', pQUESTION);
|
989
|
-
ONE_CHAR_PATTERN('/', tOPERATOR);
|
990
|
-
ONE_CHAR_PATTERN('~', tOPERATOR);
|
991
|
-
case '[':
|
992
|
-
tok = lex_bracket(state);
|
993
|
-
break;
|
994
|
-
case '-':
|
995
|
-
tok = lex_hyphen(state);
|
996
|
-
break;
|
997
|
-
case '+':
|
998
|
-
tok = lex_plus(state);
|
999
|
-
break;
|
1000
|
-
case '*':
|
1001
|
-
tok = lex_star(state);
|
1002
|
-
break;
|
1003
|
-
case '<':
|
1004
|
-
tok = lex_lt(state);
|
1005
|
-
break;
|
1006
|
-
case '=':
|
1007
|
-
tok = lex_eq(state);
|
1008
|
-
break;
|
1009
|
-
case '>':
|
1010
|
-
tok = lex_gt(state);
|
1011
|
-
break;
|
1012
|
-
case '!':
|
1013
|
-
tok = lex_bang(state);
|
1014
|
-
break;
|
1015
|
-
case '#':
|
1016
|
-
if (state->first_token_of_line) {
|
1017
|
-
tok = lex_comment(state, tLINECOMMENT);
|
1018
|
-
} else {
|
1019
|
-
tok = lex_comment(state, tCOMMENT);
|
1020
|
-
}
|
1021
|
-
break;
|
1022
|
-
case ':':
|
1023
|
-
tok = lex_colon(state);
|
1024
|
-
break;
|
1025
|
-
case '.':
|
1026
|
-
tok = lex_dot(state);
|
1027
|
-
break;
|
1028
|
-
case '_':
|
1029
|
-
tok = lex_underscore(state);
|
1030
|
-
break;
|
1031
|
-
case '$':
|
1032
|
-
tok = lex_global(state);
|
1033
|
-
break;
|
1034
|
-
case '@':
|
1035
|
-
tok = lex_ivar(state);
|
1036
|
-
break;
|
1037
|
-
case '"':
|
1038
|
-
tok = lex_dqstring(state);
|
1039
|
-
break;
|
1040
|
-
case '\'':
|
1041
|
-
tok = lex_sqstring(state);
|
1042
|
-
break;
|
1043
|
-
case '%':
|
1044
|
-
tok = lex_percent(state);
|
1045
|
-
break;
|
1046
|
-
case '`':
|
1047
|
-
tok = lex_backquote(state);
|
1048
|
-
break;
|
1049
|
-
default:
|
1050
|
-
if (rb_isalpha(c) && rb_isupper(c)) {
|
1051
|
-
tok = lex_ident(state, tUIDENT);
|
1052
|
-
}
|
1053
|
-
if (rb_isalpha(c) && rb_islower(c)) {
|
1054
|
-
tok = lex_ident(state, tLIDENT);
|
1055
|
-
}
|
1056
|
-
if (rb_isdigit(c)) {
|
1057
|
-
tok = lex_number(state);
|
1058
|
-
}
|
1059
|
-
}
|
1060
|
-
|
1061
|
-
if (tok.type == NullType) {
|
1062
|
-
tok = next_token(state, ErrorToken);
|
1063
|
-
}
|
1064
|
-
|
1065
|
-
return tok;
|
1066
|
-
}
|
1067
|
-
|
1068
|
-
char *peek_token(lexstate *state, token tok) {
|
1069
|
-
return RSTRING_PTR(state->string) + tok.range.start.byte_pos;
|
1070
|
-
}
|