rbs 1.7.0.beta.1 → 1.7.0.beta.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.github/workflows/ruby.yml +20 -1
- data/.gitignore +9 -1
- data/CHANGELOG.md +20 -9
- data/Rakefile +16 -1
- data/core/binding.rbs +2 -0
- data/core/complex.rbs +0 -2
- data/core/float.rbs +0 -2
- data/core/integer.rbs +0 -2
- data/core/numeric.rbs +7 -0
- data/core/object.rbs +1 -1
- data/core/proc.rbs +2 -0
- data/core/rational.rbs +0 -2
- data/core/unbound_method.rbs +13 -0
- data/docs/rbs_by_example.md +2 -2
- data/docs/syntax.md +2 -3
- data/ext/{rbs/extension → rbs_extension}/constants.c +0 -1
- data/ext/{rbs/extension → rbs_extension}/constants.h +0 -0
- data/ext/{rbs/extension → rbs_extension}/extconf.rb +1 -1
- data/ext/rbs_extension/lexer.c +2533 -0
- data/ext/{rbs/extension → rbs_extension}/lexer.h +33 -17
- data/ext/rbs_extension/lexer.re +140 -0
- data/ext/rbs_extension/lexstate.c +139 -0
- data/ext/{rbs/extension → rbs_extension}/location.c +0 -0
- data/ext/{rbs/extension → rbs_extension}/location.h +0 -0
- data/ext/{rbs/extension → rbs_extension}/main.c +1 -1
- data/ext/{rbs/extension → rbs_extension}/parser.c +6 -32
- data/ext/{rbs/extension → rbs_extension}/parser.h +0 -5
- data/ext/{rbs/extension → rbs_extension}/parserstate.c +0 -1
- data/ext/{rbs/extension → rbs_extension}/parserstate.h +0 -0
- data/ext/{rbs/extension → rbs_extension}/rbs_extension.h +1 -1
- data/ext/{rbs/extension → rbs_extension}/ruby_objs.c +84 -148
- data/ext/{rbs/extension → rbs_extension}/ruby_objs.h +0 -2
- data/ext/{rbs/extension → rbs_extension}/unescape.c +0 -0
- data/lib/rbs/collection/installer.rb +1 -0
- data/lib/rbs/collection/sources/git.rb +6 -1
- data/lib/rbs/errors.rb +14 -0
- data/lib/rbs/location_aux.rb +13 -0
- data/lib/rbs/parser_aux.rb +39 -0
- data/lib/rbs/parser_compat/lexer_error.rb +4 -0
- data/lib/rbs/parser_compat/located_value.rb +5 -0
- data/lib/rbs/parser_compat/semantics_error.rb +4 -0
- data/lib/rbs/parser_compat/syntax_error.rb +4 -0
- data/lib/rbs/prototype/helpers.rb +113 -0
- data/lib/rbs/prototype/rb.rb +2 -105
- data/lib/rbs/prototype/runtime.rb +16 -0
- data/lib/rbs/types.rb +2 -2
- data/lib/rbs/version.rb +1 -1
- data/lib/rbs.rb +13 -1
- data/rbs.gemspec +1 -1
- data/sig/errors.rbs +10 -0
- data/sig/location.rbs +5 -0
- data/sig/parser.rbs +5 -0
- data/sig/rbs.rbs +4 -0
- data/stdlib/io-console/0/io-console.rbs +137 -0
- data/stdlib/net-http/0/net-http.rbs +2 -1
- data/stdlib/tempfile/0/tempfile.rbs +4 -6
- metadata +27 -19
- data/ext/rbs/extension/lexer.c +0 -1070
data/ext/rbs/extension/lexer.c
DELETED
@@ -1,1070 +0,0 @@
|
|
1
|
-
#include "rbs_extension.h"
|
2
|
-
|
3
|
-
#define ONE_CHAR_PATTERN(c, t) case c: tok = next_token(state, t); break
|
4
|
-
|
5
|
-
/**
|
6
|
-
* Returns one character at current.
|
7
|
-
*
|
8
|
-
* ... A B C ...
|
9
|
-
* ^ current => A
|
10
|
-
* */
|
11
|
-
#define peek(state) rb_enc_mbc_to_codepoint(RSTRING_PTR(state->string) + state->current.byte_pos, RSTRING_END(state->string), rb_enc_get(state->string))
|
12
|
-
|
13
|
-
static const char *RBS_TOKENTYPE_NAMES[] = {
|
14
|
-
"NullType",
|
15
|
-
"pEOF",
|
16
|
-
"ErrorToken",
|
17
|
-
|
18
|
-
"pLPAREN", /* ( */
|
19
|
-
"pRPAREN", /* ) */
|
20
|
-
"pCOLON", /* : */
|
21
|
-
"pCOLON2", /* :: */
|
22
|
-
"pLBRACKET", /* [ */
|
23
|
-
"pRBRACKET", /* ] */
|
24
|
-
"pLBRACE", /* { */
|
25
|
-
"pRBRACE", /* } */
|
26
|
-
"pHAT", /* ^ */
|
27
|
-
"pARROW", /* -> */
|
28
|
-
"pFATARROW", /* => */
|
29
|
-
"pCOMMA", /* , */
|
30
|
-
"pBAR", /* | */
|
31
|
-
"pAMP", /* & */
|
32
|
-
"pSTAR", /* * */
|
33
|
-
"pSTAR2", /* ** */
|
34
|
-
"pDOT", /* . */
|
35
|
-
"pDOT3", /* ... */
|
36
|
-
"pBANG", /* ! */
|
37
|
-
"pQUESTION", /* ? */
|
38
|
-
"pLT", /* < */
|
39
|
-
"pEQ", /* = */
|
40
|
-
|
41
|
-
"kBOOL", /* bool */
|
42
|
-
"kBOT", /* bot */
|
43
|
-
"kCLASS", /* class */
|
44
|
-
"kFALSE", /* kFALSE */
|
45
|
-
"kINSTANCE", /* instance */
|
46
|
-
"kINTERFACE", /* interface */
|
47
|
-
"kNIL", /* nil */
|
48
|
-
"kSELF", /* self */
|
49
|
-
"kSINGLETON", /* singleton */
|
50
|
-
"kTOP", /* top */
|
51
|
-
"kTRUE", /* true */
|
52
|
-
"kVOID", /* void */
|
53
|
-
"kTYPE", /* type */
|
54
|
-
"kUNCHECKED", /* unchecked */
|
55
|
-
"kIN", /* in */
|
56
|
-
"kOUT", /* out */
|
57
|
-
"kEND", /* end */
|
58
|
-
"kDEF", /* def */
|
59
|
-
"kINCLUDE", /* include */
|
60
|
-
"kEXTEND", /* extend */
|
61
|
-
"kPREPEND", /* prepend */
|
62
|
-
"kALIAS", /* alias */
|
63
|
-
"kMODULE", /* module */
|
64
|
-
"kATTRREADER", /* attr_reader */
|
65
|
-
"kATTRWRITER", /* attr_writer */
|
66
|
-
"kATTRACCESSOR", /* attr_accessor */
|
67
|
-
"kPUBLIC", /* public */
|
68
|
-
"kPRIVATE", /* private */
|
69
|
-
"kUNTYPED", /* untyped */
|
70
|
-
|
71
|
-
"tLIDENT", /* Identifiers starting with lower case */
|
72
|
-
"tUIDENT", /* Identifiers starting with upper case */
|
73
|
-
"tULIDENT", /* Identifiers starting with `_` */
|
74
|
-
"tULLIDENT",
|
75
|
-
"tGIDENT", /* Identifiers starting with `$` */
|
76
|
-
"tAIDENT", /* Identifiers starting with `@` */
|
77
|
-
"tA2IDENT", /* Identifiers starting with `@@` */
|
78
|
-
"tBANGIDENT",
|
79
|
-
"tEQIDENT",
|
80
|
-
"tQIDENT", /* Quoted identifier */
|
81
|
-
"tOPERATOR", /* Operator identifier */
|
82
|
-
|
83
|
-
"tCOMMENT",
|
84
|
-
"tLINECOMMENT",
|
85
|
-
|
86
|
-
"tDQSTRING", /* Double quoted string */
|
87
|
-
"tSQSTRING", /* Single quoted string */
|
88
|
-
"tINTEGER", /* Integer */
|
89
|
-
"tSYMBOL", /* Symbol */
|
90
|
-
"tDQSYMBOL",
|
91
|
-
"tSQSYMBOL",
|
92
|
-
"tANNOTATION", /* Annotation */
|
93
|
-
};
|
94
|
-
|
95
|
-
token NullToken = { NullType };
|
96
|
-
position NullPosition = { -1, -1, -1, -1 };
|
97
|
-
range NULL_RANGE = { { -1, -1, -1, -1 }, { -1, -1, -1, -1 } };
|
98
|
-
|
99
|
-
const char *token_type_str(enum TokenType type) {
|
100
|
-
return RBS_TOKENTYPE_NAMES[type];
|
101
|
-
}
|
102
|
-
|
103
|
-
unsigned int peekn(lexstate *state, unsigned int chars[], size_t length) {
|
104
|
-
int byteoffset = 0;
|
105
|
-
|
106
|
-
rb_encoding *encoding = rb_enc_get(state->string);
|
107
|
-
char *start = RSTRING_PTR(state->string) + state->current.byte_pos;
|
108
|
-
char *end = RSTRING_END(state->string);
|
109
|
-
|
110
|
-
for (size_t i = 0; i < length; i++)
|
111
|
-
{
|
112
|
-
chars[i] = rb_enc_mbc_to_codepoint(start + byteoffset, end, encoding);
|
113
|
-
byteoffset += rb_enc_codelen(chars[i], rb_enc_get(state->string));
|
114
|
-
}
|
115
|
-
|
116
|
-
return byteoffset;
|
117
|
-
}
|
118
|
-
|
119
|
-
int token_chars(token tok) {
|
120
|
-
return tok.range.end.char_pos - tok.range.start.char_pos;
|
121
|
-
}
|
122
|
-
|
123
|
-
int token_bytes(token tok) {
|
124
|
-
return RANGE_BYTES(tok.range);
|
125
|
-
}
|
126
|
-
|
127
|
-
/**
|
128
|
-
* ... token ...
|
129
|
-
* ^ start
|
130
|
-
* ^ current
|
131
|
-
*
|
132
|
-
* */
|
133
|
-
token next_token(lexstate *state, enum TokenType type) {
|
134
|
-
token t;
|
135
|
-
|
136
|
-
t.type = type;
|
137
|
-
t.range.start = state->start;
|
138
|
-
t.range.end = state->current;
|
139
|
-
state->start = state->current;
|
140
|
-
state->first_token_of_line = false;
|
141
|
-
|
142
|
-
return t;
|
143
|
-
}
|
144
|
-
|
145
|
-
void advance_skip(lexstate *state, unsigned int c, bool skip) {
|
146
|
-
int len = rb_enc_codelen(c, rb_enc_get(state->string));
|
147
|
-
|
148
|
-
state->current.char_pos += 1;
|
149
|
-
state->current.byte_pos += len;
|
150
|
-
|
151
|
-
if (c == '\n') {
|
152
|
-
state->current.line += 1;
|
153
|
-
state->current.column = 0;
|
154
|
-
state->first_token_of_line = true;
|
155
|
-
} else {
|
156
|
-
state->current.column += 1;
|
157
|
-
}
|
158
|
-
|
159
|
-
if (skip) {
|
160
|
-
state->start = state->current;
|
161
|
-
}
|
162
|
-
}
|
163
|
-
|
164
|
-
void advance_char(lexstate *state, unsigned int c) {
|
165
|
-
advance_skip(state, c, false);
|
166
|
-
}
|
167
|
-
|
168
|
-
void skip_char(lexstate *state, unsigned int c) {
|
169
|
-
advance_skip(state, c, true);
|
170
|
-
}
|
171
|
-
|
172
|
-
void skip(lexstate *state) {
|
173
|
-
unsigned char c = peek(state);
|
174
|
-
skip_char(state, c);
|
175
|
-
}
|
176
|
-
|
177
|
-
void advance(lexstate *state) {
|
178
|
-
unsigned char c = peek(state);
|
179
|
-
advance_char(state, c);
|
180
|
-
}
|
181
|
-
|
182
|
-
/*
|
183
|
-
1. Peek one character from state
|
184
|
-
2. If read characetr equals to given `c`, skip the character and return true.
|
185
|
-
3. Return false otherwise.
|
186
|
-
*/
|
187
|
-
static bool advance_next_character_if(lexstate *state, unsigned int c) {
|
188
|
-
if (peek(state) == c) {
|
189
|
-
advance_char(state, c);
|
190
|
-
return true;
|
191
|
-
} else {
|
192
|
-
return false;
|
193
|
-
}
|
194
|
-
}
|
195
|
-
|
196
|
-
/*
|
197
|
-
... 0 1 ...
|
198
|
-
^ current
|
199
|
-
^ current (return)
|
200
|
-
*/
|
201
|
-
static token lex_number(lexstate *state) {
|
202
|
-
unsigned int c;
|
203
|
-
|
204
|
-
while (true) {
|
205
|
-
c = peek(state);
|
206
|
-
|
207
|
-
if (rb_isdigit(c) || c == '_') {
|
208
|
-
advance_char(state, c);
|
209
|
-
} else {
|
210
|
-
break;
|
211
|
-
}
|
212
|
-
}
|
213
|
-
|
214
|
-
return next_token(state, tINTEGER);
|
215
|
-
}
|
216
|
-
|
217
|
-
/*
|
218
|
-
lex_hyphen ::= - (tOPERATOR)
|
219
|
-
| - @ (tOPERATOR)
|
220
|
-
| - > (pARROW)
|
221
|
-
| - 1 ... (tINTEGER)
|
222
|
-
*/
|
223
|
-
static token lex_hyphen(lexstate* state) {
|
224
|
-
if (advance_next_character_if(state, '>')) {
|
225
|
-
return next_token(state, pARROW);
|
226
|
-
} else if (advance_next_character_if(state, '@')) {
|
227
|
-
return next_token(state, tOPERATOR);
|
228
|
-
} else {
|
229
|
-
unsigned int c = peek(state);
|
230
|
-
|
231
|
-
if (rb_isdigit(c)) {
|
232
|
-
advance_char(state, c);
|
233
|
-
return lex_number(state);
|
234
|
-
} else {
|
235
|
-
return next_token(state, tOPERATOR);
|
236
|
-
}
|
237
|
-
}
|
238
|
-
}
|
239
|
-
|
240
|
-
/*
|
241
|
-
lex_plus ::= +
|
242
|
-
| + @
|
243
|
-
| + \d
|
244
|
-
*/
|
245
|
-
static token lex_plus(lexstate *state) {
|
246
|
-
if (advance_next_character_if(state, '@')) {
|
247
|
-
return next_token(state, tOPERATOR);
|
248
|
-
} else if (rb_isdigit(peek(state))) {
|
249
|
-
return lex_number(state);
|
250
|
-
} else {
|
251
|
-
return next_token(state, tOPERATOR);
|
252
|
-
}
|
253
|
-
}
|
254
|
-
|
255
|
-
/*
|
256
|
-
lex_dot ::= . pDOT
|
257
|
-
| . . . pDOT3
|
258
|
-
*/
|
259
|
-
static token lex_dot(lexstate *state) {
|
260
|
-
unsigned int cs[2];
|
261
|
-
|
262
|
-
peekn(state, cs, 2);
|
263
|
-
|
264
|
-
if (cs[0] == '.' && cs[1] == '.') {
|
265
|
-
advance_char(state, '.');
|
266
|
-
advance_char(state, '.');
|
267
|
-
return next_token(state, pDOT3);
|
268
|
-
} else {
|
269
|
-
return next_token(state, pDOT);
|
270
|
-
}
|
271
|
-
}
|
272
|
-
|
273
|
-
/*
|
274
|
-
lex_eq ::= =
|
275
|
-
| ==
|
276
|
-
| ===
|
277
|
-
| =~
|
278
|
-
| =>
|
279
|
-
*/
|
280
|
-
static token lex_eq(lexstate *state) {
|
281
|
-
unsigned int cs[2];
|
282
|
-
peekn(state, cs, 2);
|
283
|
-
|
284
|
-
if (cs[0] == '=' && cs[1] == '=') {
|
285
|
-
// ===
|
286
|
-
advance_char(state, cs[0]);
|
287
|
-
advance_char(state, cs[1]);
|
288
|
-
return next_token(state, tOPERATOR);
|
289
|
-
} else if (cs[0] == '=') {
|
290
|
-
// ==
|
291
|
-
advance_char(state, cs[0]);
|
292
|
-
return next_token(state, tOPERATOR);
|
293
|
-
} else if (cs[0] == '~') {
|
294
|
-
// =~
|
295
|
-
advance_char(state, cs[0]);
|
296
|
-
return next_token(state, tOPERATOR);
|
297
|
-
} else if (cs[0] == '>') {
|
298
|
-
// =>
|
299
|
-
advance_char(state, cs[0]);
|
300
|
-
return next_token(state, pFATARROW);
|
301
|
-
} else {
|
302
|
-
return next_token(state, pEQ);
|
303
|
-
}
|
304
|
-
}
|
305
|
-
|
306
|
-
/*
|
307
|
-
underscore ::= _A tULIDENT
|
308
|
-
| _a tULLIDENT
|
309
|
-
| _ tULLIDENT
|
310
|
-
*/
|
311
|
-
static token lex_underscore(lexstate *state) {
|
312
|
-
unsigned int c;
|
313
|
-
|
314
|
-
c = peek(state);
|
315
|
-
|
316
|
-
if ('A' <= c && c <= 'Z') {
|
317
|
-
advance_char(state, c);
|
318
|
-
|
319
|
-
while (true) {
|
320
|
-
c = peek(state);
|
321
|
-
|
322
|
-
if (rb_isalnum(c) || c == '_') {
|
323
|
-
// ok
|
324
|
-
advance_char(state, c);
|
325
|
-
} else {
|
326
|
-
break;
|
327
|
-
}
|
328
|
-
}
|
329
|
-
|
330
|
-
return next_token(state, tULIDENT);
|
331
|
-
} else if (rb_isalnum(c) || c == '_') {
|
332
|
-
advance_char(state, c);
|
333
|
-
|
334
|
-
while (true) {
|
335
|
-
c = peek(state);
|
336
|
-
|
337
|
-
if (rb_isalnum(c) || c == '_') {
|
338
|
-
// ok
|
339
|
-
advance_char(state, c);
|
340
|
-
} else {
|
341
|
-
break;
|
342
|
-
}
|
343
|
-
}
|
344
|
-
|
345
|
-
if (c == '!') {
|
346
|
-
advance_char(state, c);
|
347
|
-
return next_token(state, tBANGIDENT);
|
348
|
-
} else if (c == '=') {
|
349
|
-
advance_char(state, c);
|
350
|
-
return next_token(state, tEQIDENT);
|
351
|
-
} else {
|
352
|
-
return next_token(state, tULLIDENT);
|
353
|
-
}
|
354
|
-
} else {
|
355
|
-
return next_token(state, tULLIDENT);
|
356
|
-
}
|
357
|
-
}
|
358
|
-
|
359
|
-
static bool is_opr(unsigned int c) {
|
360
|
-
switch (c) {
|
361
|
-
case ':':
|
362
|
-
case ';':
|
363
|
-
case '=':
|
364
|
-
case '.':
|
365
|
-
case ',':
|
366
|
-
case '!':
|
367
|
-
case '"':
|
368
|
-
case '$':
|
369
|
-
case '%':
|
370
|
-
case '&':
|
371
|
-
case '(':
|
372
|
-
case ')':
|
373
|
-
case '-':
|
374
|
-
case '+':
|
375
|
-
case '~':
|
376
|
-
case '|':
|
377
|
-
case '\\':
|
378
|
-
case '\'':
|
379
|
-
case '[':
|
380
|
-
case ']':
|
381
|
-
case '{':
|
382
|
-
case '}':
|
383
|
-
case '*':
|
384
|
-
case '/':
|
385
|
-
case '<':
|
386
|
-
case '>':
|
387
|
-
case '^':
|
388
|
-
return true;
|
389
|
-
default:
|
390
|
-
return false;
|
391
|
-
}
|
392
|
-
}
|
393
|
-
|
394
|
-
static token lex_global(lexstate *state) {
|
395
|
-
unsigned int c;
|
396
|
-
|
397
|
-
c = peek(state);
|
398
|
-
|
399
|
-
if (rb_isspace(c) || c == 0) {
|
400
|
-
return next_token(state, ErrorToken);
|
401
|
-
}
|
402
|
-
|
403
|
-
if (rb_isdigit(c)) {
|
404
|
-
// `$` [`0`-`9`]+
|
405
|
-
advance_char(state, c);
|
406
|
-
|
407
|
-
while (true) {
|
408
|
-
c = peek(state);
|
409
|
-
if (rb_isdigit(c)) {
|
410
|
-
advance_char(state, c);
|
411
|
-
} else {
|
412
|
-
return next_token(state, tGIDENT);
|
413
|
-
}
|
414
|
-
}
|
415
|
-
}
|
416
|
-
|
417
|
-
if (c == '-') {
|
418
|
-
// `$` `-` [a-zA-Z0-9_]
|
419
|
-
advance_char(state, c);
|
420
|
-
c = peek(state);
|
421
|
-
|
422
|
-
if (rb_isalnum(c) || c == '_') {
|
423
|
-
advance_char(state, c);
|
424
|
-
return next_token(state, tGIDENT);
|
425
|
-
} else {
|
426
|
-
return next_token(state, ErrorToken);
|
427
|
-
}
|
428
|
-
}
|
429
|
-
|
430
|
-
switch (c) {
|
431
|
-
case '~':
|
432
|
-
case '*':
|
433
|
-
case '$':
|
434
|
-
case '?':
|
435
|
-
case '!':
|
436
|
-
case '@':
|
437
|
-
case '\\':
|
438
|
-
case '/':
|
439
|
-
case ';':
|
440
|
-
case ',':
|
441
|
-
case '.':
|
442
|
-
case '=':
|
443
|
-
case ':':
|
444
|
-
case '<':
|
445
|
-
case '>':
|
446
|
-
case '"':
|
447
|
-
case '&':
|
448
|
-
case '\'':
|
449
|
-
case '`':
|
450
|
-
case '+':
|
451
|
-
advance_char(state, c);
|
452
|
-
return next_token(state, tGIDENT);
|
453
|
-
|
454
|
-
default:
|
455
|
-
if (is_opr(c) || c == 0) {
|
456
|
-
return next_token(state, ErrorToken);
|
457
|
-
}
|
458
|
-
|
459
|
-
while (true) {
|
460
|
-
advance_char(state, c);
|
461
|
-
c = peek(state);
|
462
|
-
|
463
|
-
if (rb_isspace(c) || is_opr(c) || c == 0) {
|
464
|
-
break;
|
465
|
-
}
|
466
|
-
}
|
467
|
-
|
468
|
-
return next_token(state, tGIDENT);
|
469
|
-
}
|
470
|
-
}
|
471
|
-
|
472
|
-
void pp(VALUE object) {
|
473
|
-
VALUE inspect = rb_funcall(object, rb_intern("inspect"), 0);
|
474
|
-
printf("pp >> %s\n", RSTRING_PTR(inspect));
|
475
|
-
}
|
476
|
-
|
477
|
-
static token lex_ident(lexstate *state, enum TokenType default_type) {
|
478
|
-
unsigned int c;
|
479
|
-
token tok;
|
480
|
-
|
481
|
-
while (true) {
|
482
|
-
c = peek(state);
|
483
|
-
if (rb_isalnum(c) || c == '_') {
|
484
|
-
advance_char(state, c);
|
485
|
-
} else if (c == '!') {
|
486
|
-
advance_char(state, c);
|
487
|
-
tok = next_token(state, tBANGIDENT);
|
488
|
-
break;
|
489
|
-
} else if (c == '=') {
|
490
|
-
advance_char(state, c);
|
491
|
-
tok = next_token(state, tEQIDENT);
|
492
|
-
break;
|
493
|
-
} else {
|
494
|
-
tok = next_token(state, default_type);
|
495
|
-
break;
|
496
|
-
}
|
497
|
-
}
|
498
|
-
|
499
|
-
if (tok.type == tLIDENT) {
|
500
|
-
VALUE string = rb_enc_str_new(
|
501
|
-
RSTRING_PTR(state->string) + tok.range.start.byte_pos,
|
502
|
-
RANGE_BYTES(tok.range),
|
503
|
-
rb_enc_get(state->string)
|
504
|
-
);
|
505
|
-
|
506
|
-
VALUE type = rb_hash_aref(RBS_Parser_KEYWORDS, string);
|
507
|
-
if (FIXNUM_P(type)) {
|
508
|
-
tok.type = FIX2INT(type);
|
509
|
-
}
|
510
|
-
}
|
511
|
-
|
512
|
-
return tok;
|
513
|
-
}
|
514
|
-
|
515
|
-
static token lex_comment(lexstate *state, enum TokenType type) {
|
516
|
-
unsigned int c;
|
517
|
-
|
518
|
-
c = peek(state);
|
519
|
-
if (c == ' ') {
|
520
|
-
advance_char(state, c);
|
521
|
-
}
|
522
|
-
|
523
|
-
while (true) {
|
524
|
-
c = peek(state);
|
525
|
-
|
526
|
-
if (c == '\n' || c == '\0') {
|
527
|
-
break;
|
528
|
-
} else {
|
529
|
-
advance_char(state, c);
|
530
|
-
}
|
531
|
-
}
|
532
|
-
|
533
|
-
token tok = next_token(state, type);
|
534
|
-
|
535
|
-
skip_char(state, c);
|
536
|
-
|
537
|
-
return tok;
|
538
|
-
}
|
539
|
-
|
540
|
-
/*
|
541
|
-
... " ... " ...
|
542
|
-
^ start
|
543
|
-
^ current
|
544
|
-
^ current (after)
|
545
|
-
*/
|
546
|
-
static token lex_dqstring(lexstate *state) {
|
547
|
-
unsigned int c;
|
548
|
-
|
549
|
-
while (true) {
|
550
|
-
c = peek(state);
|
551
|
-
advance_char(state, c);
|
552
|
-
|
553
|
-
if (c == '\\') {
|
554
|
-
if (peek(state) == '"') {
|
555
|
-
advance_char(state, c);
|
556
|
-
c = peek(state);
|
557
|
-
}
|
558
|
-
} else if (c == '"') {
|
559
|
-
break;
|
560
|
-
}
|
561
|
-
}
|
562
|
-
|
563
|
-
return next_token(state, tDQSTRING);
|
564
|
-
}
|
565
|
-
|
566
|
-
/*
|
567
|
-
... @ foo ...
|
568
|
-
^ start
|
569
|
-
^ current
|
570
|
-
^ current (return)
|
571
|
-
|
572
|
-
... @ @ foo ...
|
573
|
-
^ start
|
574
|
-
^ current
|
575
|
-
^ current (return)
|
576
|
-
*/
|
577
|
-
static token lex_ivar(lexstate *state) {
|
578
|
-
unsigned int c;
|
579
|
-
|
580
|
-
enum TokenType type = tAIDENT;
|
581
|
-
|
582
|
-
c = peek(state);
|
583
|
-
|
584
|
-
if (c == '@') {
|
585
|
-
type = tA2IDENT;
|
586
|
-
advance_char(state, c);
|
587
|
-
c = peek(state);
|
588
|
-
}
|
589
|
-
|
590
|
-
if (rb_isalpha(c) || c == '_') {
|
591
|
-
advance_char(state, c);
|
592
|
-
c = peek(state);
|
593
|
-
} else {
|
594
|
-
return next_token(state, ErrorToken);
|
595
|
-
}
|
596
|
-
|
597
|
-
while (rb_isalnum(c) || c == '_') {
|
598
|
-
advance_char(state, c);
|
599
|
-
c = peek(state);
|
600
|
-
}
|
601
|
-
|
602
|
-
return next_token(state, type);
|
603
|
-
}
|
604
|
-
|
605
|
-
/*
|
606
|
-
... ' ... ' ...
|
607
|
-
^ start
|
608
|
-
^ current
|
609
|
-
^ current (after)
|
610
|
-
*/
|
611
|
-
static token lex_sqstring(lexstate *state) {
|
612
|
-
unsigned int c;
|
613
|
-
|
614
|
-
c = peek(state);
|
615
|
-
|
616
|
-
while (true) {
|
617
|
-
c = peek(state);
|
618
|
-
advance_char(state, c);
|
619
|
-
|
620
|
-
if (c == '\\') {
|
621
|
-
if (peek(state) == '\'') {
|
622
|
-
advance_char(state, c);
|
623
|
-
c = peek(state);
|
624
|
-
}
|
625
|
-
} else if (c == '\'') {
|
626
|
-
break;
|
627
|
-
}
|
628
|
-
}
|
629
|
-
|
630
|
-
return next_token(state, tSQSTRING);
|
631
|
-
}
|
632
|
-
|
633
|
-
#define EQPOINTS2(c0, c1, s) (c0 == s[0] && c1 == s[1])
|
634
|
-
#define EQPOINTS3(c0, c1, c2, s) (c0 == s[0] && c1 == s[1] && c2 == s[2])
|
635
|
-
|
636
|
-
/*
|
637
|
-
... : @ ...
|
638
|
-
^ start
|
639
|
-
^ current
|
640
|
-
^ current (return)
|
641
|
-
*/
|
642
|
-
static token lex_colon_symbol(lexstate *state) {
|
643
|
-
unsigned int c[3];
|
644
|
-
peekn(state, c, 3);
|
645
|
-
|
646
|
-
switch (c[0]) {
|
647
|
-
case '|':
|
648
|
-
case '&':
|
649
|
-
case '/':
|
650
|
-
case '%':
|
651
|
-
case '~':
|
652
|
-
case '`':
|
653
|
-
case '^':
|
654
|
-
advance_char(state, c[0]);
|
655
|
-
return next_token(state, tSYMBOL);
|
656
|
-
case '=':
|
657
|
-
if (EQPOINTS2(c[0], c[1], "=~")) {
|
658
|
-
// :=~
|
659
|
-
advance_char(state, c[0]);
|
660
|
-
advance_char(state, c[1]);
|
661
|
-
return next_token(state, tSYMBOL);
|
662
|
-
} else if (EQPOINTS3(c[0], c[1], c[2], "===")) {
|
663
|
-
// :===
|
664
|
-
advance_char(state, c[0]);
|
665
|
-
advance_char(state, c[1]);
|
666
|
-
advance_char(state, c[2]);
|
667
|
-
return next_token(state, tSYMBOL);
|
668
|
-
} else if (EQPOINTS2(c[0], c[1], "==")) {
|
669
|
-
// :==
|
670
|
-
advance_char(state, c[0]);
|
671
|
-
advance_char(state, c[1]);
|
672
|
-
return next_token(state, tSYMBOL);
|
673
|
-
}
|
674
|
-
break;
|
675
|
-
case '<':
|
676
|
-
if (EQPOINTS3(c[0], c[1], c[2], "<=>")) {
|
677
|
-
advance_char(state, c[0]);
|
678
|
-
advance_char(state, c[1]);
|
679
|
-
advance_char(state, c[2]);
|
680
|
-
} else if (EQPOINTS2(c[0], c[1], "<=") || EQPOINTS2(c[0], c[1], "<<")) {
|
681
|
-
advance_char(state, c[0]);
|
682
|
-
advance_char(state, c[1]);
|
683
|
-
} else {
|
684
|
-
advance_char(state, c[0]);
|
685
|
-
}
|
686
|
-
return next_token(state, tSYMBOL);
|
687
|
-
case '>':
|
688
|
-
if (EQPOINTS2(c[0], c[1], ">=") || EQPOINTS2(c[0], c[1], ">>")) {
|
689
|
-
advance_char(state, c[0]);
|
690
|
-
advance_char(state, c[1]);
|
691
|
-
} else {
|
692
|
-
advance_char(state, c[0]);
|
693
|
-
}
|
694
|
-
return next_token(state, tSYMBOL);
|
695
|
-
case '-':
|
696
|
-
case '+':
|
697
|
-
if (EQPOINTS2(c[0], c[1], "+@") || EQPOINTS2(c[0], c[1], "-@")) {
|
698
|
-
advance_char(state, c[0]);
|
699
|
-
advance_char(state, c[1]);
|
700
|
-
} else {
|
701
|
-
advance_char(state, c[0]);
|
702
|
-
}
|
703
|
-
return next_token(state, tSYMBOL);
|
704
|
-
case '*':
|
705
|
-
if (EQPOINTS2(c[0], c[1], "**")) {
|
706
|
-
advance_char(state, c[0]);
|
707
|
-
advance_char(state, c[1]);
|
708
|
-
} else {
|
709
|
-
advance_char(state, c[0]);
|
710
|
-
}
|
711
|
-
return next_token(state, tSYMBOL);
|
712
|
-
case '[':
|
713
|
-
if (EQPOINTS3(c[0], c[1], c[2], "[]=")) {
|
714
|
-
advance_char(state, c[0]);
|
715
|
-
advance_char(state, c[1]);
|
716
|
-
advance_char(state, c[2]);
|
717
|
-
} else if (EQPOINTS2(c[0], c[1], "[]")) {
|
718
|
-
advance_char(state, c[0]);
|
719
|
-
advance_char(state, c[1]);
|
720
|
-
} else {
|
721
|
-
break;
|
722
|
-
}
|
723
|
-
return next_token(state, tSYMBOL);
|
724
|
-
case '!':
|
725
|
-
if (EQPOINTS2(c[0], c[1], "!=") || EQPOINTS2(c[0], c[1], "!~")) {
|
726
|
-
advance_char(state, c[0]);
|
727
|
-
advance_char(state, c[1]);
|
728
|
-
} else {
|
729
|
-
advance_char(state, c[0]);
|
730
|
-
}
|
731
|
-
return next_token(state, tSYMBOL);
|
732
|
-
case '@': {
|
733
|
-
advance_char(state, '@');
|
734
|
-
token tok = lex_ivar(state);
|
735
|
-
if (tok.type != ErrorToken) {
|
736
|
-
tok.type = tSYMBOL;
|
737
|
-
}
|
738
|
-
return tok;
|
739
|
-
}
|
740
|
-
case '$': {
|
741
|
-
advance_char(state, '$');
|
742
|
-
token tok = lex_global(state);
|
743
|
-
if (tok.type != ErrorToken) {
|
744
|
-
tok.type = tSYMBOL;
|
745
|
-
}
|
746
|
-
return tok;
|
747
|
-
}
|
748
|
-
case '\'': {
|
749
|
-
position start = state->start;
|
750
|
-
advance_char(state, '\'');
|
751
|
-
token tok = lex_sqstring(state);
|
752
|
-
tok.type = tSQSYMBOL;
|
753
|
-
tok.range.start = start;
|
754
|
-
return tok;
|
755
|
-
}
|
756
|
-
case '"': {
|
757
|
-
position start = state->start;
|
758
|
-
advance_char(state, '"');
|
759
|
-
token tok = lex_dqstring(state);
|
760
|
-
tok.type = tDQSYMBOL;
|
761
|
-
tok.range.start = start;
|
762
|
-
return tok;
|
763
|
-
}
|
764
|
-
default:
|
765
|
-
if (rb_isalpha(c[0]) || c[0] == '_') {
|
766
|
-
position start = state->start;
|
767
|
-
token tok = lex_ident(state, NullType);
|
768
|
-
tok.range.start = start;
|
769
|
-
|
770
|
-
if (peek(state) == '?') {
|
771
|
-
if (tok.type != tBANGIDENT && tok.type != tEQIDENT) {
|
772
|
-
skip_char(state, '?');
|
773
|
-
tok.range.end = state->current;
|
774
|
-
}
|
775
|
-
}
|
776
|
-
|
777
|
-
tok.type = tSYMBOL;
|
778
|
-
return tok;
|
779
|
-
}
|
780
|
-
}
|
781
|
-
|
782
|
-
return next_token(state, pCOLON);
|
783
|
-
}
|
784
|
-
|
785
|
-
/*
|
786
|
-
... : : ...
|
787
|
-
^ start
|
788
|
-
^ current
|
789
|
-
^ current (return)
|
790
|
-
|
791
|
-
... : ...
|
792
|
-
^ start
|
793
|
-
^ current (lex_colon_symbol)
|
794
|
-
*/
|
795
|
-
static token lex_colon(lexstate *state) {
|
796
|
-
unsigned int c = peek(state);
|
797
|
-
|
798
|
-
if (c == ':') {
|
799
|
-
advance_char(state, c);
|
800
|
-
return next_token(state, pCOLON2);
|
801
|
-
} else {
|
802
|
-
return lex_colon_symbol(state);
|
803
|
-
}
|
804
|
-
}
|
805
|
-
|
806
|
-
/*
|
807
|
-
lex_lt ::= < (pLT)
|
808
|
-
| < < (tOPERATOR)
|
809
|
-
| < = (tOPERATOR)
|
810
|
-
| < = > (tOPERATOR)
|
811
|
-
*/
|
812
|
-
static token lex_lt(lexstate *state) {
|
813
|
-
if (advance_next_character_if(state, '<')) {
|
814
|
-
return next_token(state, tOPERATOR);
|
815
|
-
} else if (advance_next_character_if(state, '=')) {
|
816
|
-
advance_next_character_if(state, '>');
|
817
|
-
return next_token(state, tOPERATOR);
|
818
|
-
} else {
|
819
|
-
return next_token(state, pLT);
|
820
|
-
}
|
821
|
-
}
|
822
|
-
|
823
|
-
/*
|
824
|
-
lex_gt ::= >
|
825
|
-
| > =
|
826
|
-
| > >
|
827
|
-
*/
|
828
|
-
static token lex_gt(lexstate *state) {
|
829
|
-
advance_next_character_if(state, '=') || advance_next_character_if(state, '>');
|
830
|
-
return next_token(state, tOPERATOR);
|
831
|
-
}
|
832
|
-
|
833
|
-
/*
|
834
|
-
... `%` `a` `{` ... `}` ...
|
835
|
-
^ start
|
836
|
-
^ current
|
837
|
-
^ current (exit)
|
838
|
-
--- token
|
839
|
-
*/
|
840
|
-
static token lex_percent(lexstate *state) {
|
841
|
-
unsigned int cs[2];
|
842
|
-
unsigned int end_char;
|
843
|
-
|
844
|
-
peekn(state, cs, 2);
|
845
|
-
|
846
|
-
if (cs[0] != 'a') {
|
847
|
-
return next_token(state, tOPERATOR);
|
848
|
-
}
|
849
|
-
|
850
|
-
switch (cs[1])
|
851
|
-
{
|
852
|
-
case '{':
|
853
|
-
end_char = '}';
|
854
|
-
break;
|
855
|
-
case '(':
|
856
|
-
end_char = ')';
|
857
|
-
break;
|
858
|
-
case '[':
|
859
|
-
end_char = ']';
|
860
|
-
break;
|
861
|
-
case '|':
|
862
|
-
end_char = '|';
|
863
|
-
break;
|
864
|
-
case '<':
|
865
|
-
end_char = '>';
|
866
|
-
break;
|
867
|
-
default:
|
868
|
-
return next_token(state, tOPERATOR);
|
869
|
-
}
|
870
|
-
|
871
|
-
advance_char(state, cs[0]);
|
872
|
-
advance_char(state, cs[1]);
|
873
|
-
|
874
|
-
unsigned int c;
|
875
|
-
|
876
|
-
while ((c = peek(state))) {
|
877
|
-
if (c == end_char) {
|
878
|
-
advance_char(state, c);
|
879
|
-
return next_token(state, tANNOTATION);
|
880
|
-
}
|
881
|
-
advance_char(state, c);
|
882
|
-
}
|
883
|
-
|
884
|
-
return next_token(state, ErrorToken);
|
885
|
-
}
|
886
|
-
|
887
|
-
/*
|
888
|
-
bracket ::= [ (pLBRACKET)
|
889
|
-
* ^
|
890
|
-
| [ ] (tOPERATOR)
|
891
|
-
* ^ $
|
892
|
-
| [ ] = (tOPERATOR)
|
893
|
-
* ^ $
|
894
|
-
*/
|
895
|
-
static token lex_bracket(lexstate *state) {
|
896
|
-
if (advance_next_character_if(state, ']')) {
|
897
|
-
advance_next_character_if(state, '=');
|
898
|
-
return next_token(state, tOPERATOR);
|
899
|
-
} else {
|
900
|
-
return next_token(state, pLBRACKET);
|
901
|
-
}
|
902
|
-
}
|
903
|
-
|
904
|
-
/*
|
905
|
-
bracket ::= *
|
906
|
-
| * *
|
907
|
-
*/
|
908
|
-
static token lex_star(lexstate *state) {
|
909
|
-
if (advance_next_character_if(state, '*')) {
|
910
|
-
return next_token(state, pSTAR2);
|
911
|
-
} else {
|
912
|
-
return next_token(state, pSTAR);
|
913
|
-
}
|
914
|
-
}
|
915
|
-
|
916
|
-
/*
|
917
|
-
bang ::= !
|
918
|
-
| ! =
|
919
|
-
| ! ~
|
920
|
-
*/
|
921
|
-
static token lex_bang(lexstate *state) {
|
922
|
-
advance_next_character_if(state, '=') || advance_next_character_if(state, '~');
|
923
|
-
return next_token(state, tOPERATOR);
|
924
|
-
}
|
925
|
-
|
926
|
-
/*
|
927
|
-
backquote ::= ` (tOPERATOR)
|
928
|
-
| `[^ :][^`]` (tQIDENT)
|
929
|
-
*/
|
930
|
-
static token lex_backquote(lexstate *state) {
|
931
|
-
unsigned int c = peek(state);
|
932
|
-
|
933
|
-
if (c == ' ' || c == ':') {
|
934
|
-
return next_token(state, tOPERATOR);
|
935
|
-
} else {
|
936
|
-
while (true) {
|
937
|
-
if (c == '`') {
|
938
|
-
break;
|
939
|
-
}
|
940
|
-
|
941
|
-
c = peek(state);
|
942
|
-
advance_char(state, c);
|
943
|
-
}
|
944
|
-
|
945
|
-
return next_token(state, tQIDENT);
|
946
|
-
}
|
947
|
-
}
|
948
|
-
|
949
|
-
token rbsparser_next_token(lexstate *state) {
|
950
|
-
token tok = NullToken;
|
951
|
-
|
952
|
-
unsigned int c;
|
953
|
-
bool skipping = true;
|
954
|
-
|
955
|
-
while (skipping) {
|
956
|
-
c = peek(state);
|
957
|
-
|
958
|
-
switch (c) {
|
959
|
-
case ' ':
|
960
|
-
case '\t':
|
961
|
-
case '\n':
|
962
|
-
// nop
|
963
|
-
skip_char(state, c);
|
964
|
-
break;
|
965
|
-
case '\0':
|
966
|
-
return next_token(state, pEOF);
|
967
|
-
default:
|
968
|
-
advance_char(state, c);
|
969
|
-
skipping = false;
|
970
|
-
break;
|
971
|
-
}
|
972
|
-
}
|
973
|
-
|
974
|
-
/* ... c d .. */
|
975
|
-
/* ^ state->current */
|
976
|
-
/* ^ start */
|
977
|
-
switch (c) {
|
978
|
-
case '\0': tok = next_token(state, pEOF);
|
979
|
-
ONE_CHAR_PATTERN('(', pLPAREN);
|
980
|
-
ONE_CHAR_PATTERN(')', pRPAREN);
|
981
|
-
ONE_CHAR_PATTERN(']', pRBRACKET);
|
982
|
-
ONE_CHAR_PATTERN('{', pLBRACE);
|
983
|
-
ONE_CHAR_PATTERN('}', pRBRACE);
|
984
|
-
ONE_CHAR_PATTERN(',', pCOMMA);
|
985
|
-
ONE_CHAR_PATTERN('|', pBAR);
|
986
|
-
ONE_CHAR_PATTERN('^', pHAT);
|
987
|
-
ONE_CHAR_PATTERN('&', pAMP);
|
988
|
-
ONE_CHAR_PATTERN('?', pQUESTION);
|
989
|
-
ONE_CHAR_PATTERN('/', tOPERATOR);
|
990
|
-
ONE_CHAR_PATTERN('~', tOPERATOR);
|
991
|
-
case '[':
|
992
|
-
tok = lex_bracket(state);
|
993
|
-
break;
|
994
|
-
case '-':
|
995
|
-
tok = lex_hyphen(state);
|
996
|
-
break;
|
997
|
-
case '+':
|
998
|
-
tok = lex_plus(state);
|
999
|
-
break;
|
1000
|
-
case '*':
|
1001
|
-
tok = lex_star(state);
|
1002
|
-
break;
|
1003
|
-
case '<':
|
1004
|
-
tok = lex_lt(state);
|
1005
|
-
break;
|
1006
|
-
case '=':
|
1007
|
-
tok = lex_eq(state);
|
1008
|
-
break;
|
1009
|
-
case '>':
|
1010
|
-
tok = lex_gt(state);
|
1011
|
-
break;
|
1012
|
-
case '!':
|
1013
|
-
tok = lex_bang(state);
|
1014
|
-
break;
|
1015
|
-
case '#':
|
1016
|
-
if (state->first_token_of_line) {
|
1017
|
-
tok = lex_comment(state, tLINECOMMENT);
|
1018
|
-
} else {
|
1019
|
-
tok = lex_comment(state, tCOMMENT);
|
1020
|
-
}
|
1021
|
-
break;
|
1022
|
-
case ':':
|
1023
|
-
tok = lex_colon(state);
|
1024
|
-
break;
|
1025
|
-
case '.':
|
1026
|
-
tok = lex_dot(state);
|
1027
|
-
break;
|
1028
|
-
case '_':
|
1029
|
-
tok = lex_underscore(state);
|
1030
|
-
break;
|
1031
|
-
case '$':
|
1032
|
-
tok = lex_global(state);
|
1033
|
-
break;
|
1034
|
-
case '@':
|
1035
|
-
tok = lex_ivar(state);
|
1036
|
-
break;
|
1037
|
-
case '"':
|
1038
|
-
tok = lex_dqstring(state);
|
1039
|
-
break;
|
1040
|
-
case '\'':
|
1041
|
-
tok = lex_sqstring(state);
|
1042
|
-
break;
|
1043
|
-
case '%':
|
1044
|
-
tok = lex_percent(state);
|
1045
|
-
break;
|
1046
|
-
case '`':
|
1047
|
-
tok = lex_backquote(state);
|
1048
|
-
break;
|
1049
|
-
default:
|
1050
|
-
if (rb_isalpha(c) && rb_isupper(c)) {
|
1051
|
-
tok = lex_ident(state, tUIDENT);
|
1052
|
-
}
|
1053
|
-
if (rb_isalpha(c) && rb_islower(c)) {
|
1054
|
-
tok = lex_ident(state, tLIDENT);
|
1055
|
-
}
|
1056
|
-
if (rb_isdigit(c)) {
|
1057
|
-
tok = lex_number(state);
|
1058
|
-
}
|
1059
|
-
}
|
1060
|
-
|
1061
|
-
if (tok.type == NullType) {
|
1062
|
-
tok = next_token(state, ErrorToken);
|
1063
|
-
}
|
1064
|
-
|
1065
|
-
return tok;
|
1066
|
-
}
|
1067
|
-
|
1068
|
-
char *peek_token(lexstate *state, token tok) {
|
1069
|
-
return RSTRING_PTR(state->string) + tok.range.start.byte_pos;
|
1070
|
-
}
|