rbs 1.5.1 → 1.7.0.beta.1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (88) hide show
  1. checksums.yaml +4 -4
  2. data/.github/dependabot.yml +10 -0
  3. data/.github/workflows/ruby.yml +0 -4
  4. data/.gitignore +1 -0
  5. data/CHANGELOG.md +51 -0
  6. data/Gemfile +2 -0
  7. data/Rakefile +7 -22
  8. data/Steepfile +9 -1
  9. data/core/enumerator.rbs +1 -0
  10. data/core/io.rbs +3 -1
  11. data/core/kernel.rbs +4 -4
  12. data/core/trace_point.rbs +1 -1
  13. data/docs/collection.md +116 -0
  14. data/ext/rbs/extension/constants.c +140 -0
  15. data/ext/rbs/extension/constants.h +72 -0
  16. data/ext/rbs/extension/extconf.rb +3 -0
  17. data/ext/rbs/extension/lexer.c +1070 -0
  18. data/ext/rbs/extension/lexer.h +145 -0
  19. data/ext/rbs/extension/location.c +295 -0
  20. data/ext/rbs/extension/location.h +59 -0
  21. data/ext/rbs/extension/main.c +9 -0
  22. data/ext/rbs/extension/parser.c +2418 -0
  23. data/ext/rbs/extension/parser.h +23 -0
  24. data/ext/rbs/extension/parserstate.c +313 -0
  25. data/ext/rbs/extension/parserstate.h +141 -0
  26. data/ext/rbs/extension/rbs_extension.h +40 -0
  27. data/ext/rbs/extension/ruby_objs.c +585 -0
  28. data/ext/rbs/extension/ruby_objs.h +46 -0
  29. data/ext/rbs/extension/unescape.c +65 -0
  30. data/goodcheck.yml +1 -1
  31. data/lib/rbs/ast/comment.rb +0 -12
  32. data/lib/rbs/buffer.rb +4 -0
  33. data/lib/rbs/builtin_names.rb +1 -0
  34. data/lib/rbs/cli.rb +98 -10
  35. data/lib/rbs/collection/cleaner.rb +29 -0
  36. data/lib/rbs/collection/config/lockfile_generator.rb +95 -0
  37. data/lib/rbs/collection/config.rb +85 -0
  38. data/lib/rbs/collection/installer.rb +27 -0
  39. data/lib/rbs/collection/sources/git.rb +162 -0
  40. data/lib/rbs/collection/sources/rubygems.rb +40 -0
  41. data/lib/rbs/collection/sources/stdlib.rb +38 -0
  42. data/lib/rbs/collection/sources.rb +22 -0
  43. data/lib/rbs/collection.rb +13 -0
  44. data/lib/rbs/environment_loader.rb +12 -0
  45. data/lib/rbs/errors.rb +16 -1
  46. data/lib/rbs/location.rb +221 -217
  47. data/lib/rbs/location_aux.rb +108 -0
  48. data/lib/rbs/locator.rb +10 -7
  49. data/lib/rbs/parser_aux.rb +24 -0
  50. data/lib/rbs/repository.rb +13 -7
  51. data/lib/rbs/types.rb +2 -3
  52. data/lib/rbs/validator.rb +4 -1
  53. data/lib/rbs/version.rb +1 -1
  54. data/lib/rbs/writer.rb +4 -2
  55. data/lib/rbs.rb +4 -7
  56. data/rbs.gemspec +2 -1
  57. data/sig/ancestor_builder.rbs +2 -2
  58. data/sig/annotation.rbs +2 -2
  59. data/sig/builtin_names.rbs +1 -0
  60. data/sig/cli.rbs +5 -0
  61. data/sig/collection/cleaner.rbs +13 -0
  62. data/sig/collection/collections.rbs +112 -0
  63. data/sig/collection/config.rbs +69 -0
  64. data/sig/collection/installer.rbs +15 -0
  65. data/sig/collection.rbs +4 -0
  66. data/sig/comment.rbs +7 -7
  67. data/sig/constant_table.rbs +1 -1
  68. data/sig/declarations.rbs +9 -9
  69. data/sig/definition.rbs +1 -1
  70. data/sig/definition_builder.rbs +2 -2
  71. data/sig/environment_loader.rbs +3 -0
  72. data/sig/errors.rbs +30 -25
  73. data/sig/location.rbs +42 -79
  74. data/sig/locator.rbs +2 -2
  75. data/sig/members.rbs +7 -7
  76. data/sig/method_types.rbs +3 -3
  77. data/sig/parser.rbs +11 -21
  78. data/sig/polyfill.rbs +12 -3
  79. data/sig/repository.rbs +4 -0
  80. data/sig/types.rbs +45 -27
  81. data/sig/writer.rbs +1 -1
  82. data/stdlib/json/0/json.rbs +3 -3
  83. data/stdlib/objspace/0/objspace.rbs +406 -0
  84. data/stdlib/openssl/0/openssl.rbs +1 -1
  85. data/stdlib/tempfile/0/tempfile.rbs +270 -0
  86. data/steep/Gemfile.lock +10 -10
  87. metadata +43 -7
  88. data/lib/rbs/parser.rb +0 -3614
@@ -0,0 +1,72 @@
1
+ #ifndef RBS__CONSTANTS_H
2
+ #define RBS__CONSTANTS_H
3
+
4
+ extern VALUE RBS;
5
+
6
+ extern VALUE RBS_AST;
7
+ extern VALUE RBS_AST_Annotation;
8
+ extern VALUE RBS_AST_Comment;
9
+
10
+ extern VALUE RBS_AST_Declarations;
11
+ extern VALUE RBS_AST_Declarations_Alias;
12
+ extern VALUE RBS_AST_Declarations_Class_Super;
13
+ extern VALUE RBS_AST_Declarations_Class;
14
+ extern VALUE RBS_AST_Declarations_Constant;
15
+ extern VALUE RBS_AST_Declarations_Global;
16
+ extern VALUE RBS_AST_Declarations_Interface;
17
+ extern VALUE RBS_AST_Declarations_Module_Self;
18
+ extern VALUE RBS_AST_Declarations_Module;
19
+ extern VALUE RBS_AST_Declarations_ModuleTypeParams_TypeParam;
20
+ extern VALUE RBS_AST_Declarations_ModuleTypeParams;
21
+
22
+ extern VALUE RBS_AST_Members;
23
+ extern VALUE RBS_AST_Members_Alias;
24
+ extern VALUE RBS_AST_Members_AttrAccessor;
25
+ extern VALUE RBS_AST_Members_AttrReader;
26
+ extern VALUE RBS_AST_Members_AttrWriter;
27
+ extern VALUE RBS_AST_Members_ClassInstanceVariable;
28
+ extern VALUE RBS_AST_Members_ClassVariable;
29
+ extern VALUE RBS_AST_Members_Extend;
30
+ extern VALUE RBS_AST_Members_Include;
31
+ extern VALUE RBS_AST_Members_InstanceVariable;
32
+ extern VALUE RBS_AST_Members_MethodDefinition;
33
+ extern VALUE RBS_AST_Members_Prepend;
34
+ extern VALUE RBS_AST_Members_Private;
35
+ extern VALUE RBS_AST_Members_Public;
36
+
37
+ extern VALUE RBS_MethodType;
38
+ extern VALUE RBS_Namespace;
39
+
40
+ extern VALUE RBS_ParsingError;
41
+ extern VALUE RBS_TypeName;
42
+
43
+ extern VALUE RBS_Types;
44
+ extern VALUE RBS_Types_Alias;
45
+ extern VALUE RBS_Types_Bases;
46
+ extern VALUE RBS_Types_Bases_Any;
47
+ extern VALUE RBS_Types_Bases_Bool;
48
+ extern VALUE RBS_Types_Bases_Bottom;
49
+ extern VALUE RBS_Types_Bases_Class;
50
+ extern VALUE RBS_Types_Bases_Instance;
51
+ extern VALUE RBS_Types_Bases_Nil;
52
+ extern VALUE RBS_Types_Bases_Self;
53
+ extern VALUE RBS_Types_Bases_Top;
54
+ extern VALUE RBS_Types_Bases_Void;
55
+ extern VALUE RBS_Types_Block;
56
+ extern VALUE RBS_Types_ClassInstance;
57
+ extern VALUE RBS_Types_ClassSingleton;
58
+ extern VALUE RBS_Types_Function_Param;
59
+ extern VALUE RBS_Types_Function;
60
+ extern VALUE RBS_Types_Interface;
61
+ extern VALUE RBS_Types_Intersection;
62
+ extern VALUE RBS_Types_Literal;
63
+ extern VALUE RBS_Types_Optional;
64
+ extern VALUE RBS_Types_Proc;
65
+ extern VALUE RBS_Types_Record;
66
+ extern VALUE RBS_Types_Tuple;
67
+ extern VALUE RBS_Types_Union;
68
+ extern VALUE RBS_Types_Variable;
69
+
70
+ void rbs__init_constants();
71
+
72
+ #endif
@@ -0,0 +1,3 @@
1
+ require 'mkmf'
2
+ $INCFLAGS << " -I$(top_srcdir)" if $extmk
3
+ create_makefile 'extension'
@@ -0,0 +1,1070 @@
1
+ #include "rbs_extension.h"
2
+
3
+ #define ONE_CHAR_PATTERN(c, t) case c: tok = next_token(state, t); break
4
+
5
+ /**
6
+ * Returns one character at current.
7
+ *
8
+ * ... A B C ...
9
+ * ^ current => A
10
+ * */
11
+ #define peek(state) rb_enc_mbc_to_codepoint(RSTRING_PTR(state->string) + state->current.byte_pos, RSTRING_END(state->string), rb_enc_get(state->string))
12
+
13
+ static const char *RBS_TOKENTYPE_NAMES[] = {
14
+ "NullType",
15
+ "pEOF",
16
+ "ErrorToken",
17
+
18
+ "pLPAREN", /* ( */
19
+ "pRPAREN", /* ) */
20
+ "pCOLON", /* : */
21
+ "pCOLON2", /* :: */
22
+ "pLBRACKET", /* [ */
23
+ "pRBRACKET", /* ] */
24
+ "pLBRACE", /* { */
25
+ "pRBRACE", /* } */
26
+ "pHAT", /* ^ */
27
+ "pARROW", /* -> */
28
+ "pFATARROW", /* => */
29
+ "pCOMMA", /* , */
30
+ "pBAR", /* | */
31
+ "pAMP", /* & */
32
+ "pSTAR", /* * */
33
+ "pSTAR2", /* ** */
34
+ "pDOT", /* . */
35
+ "pDOT3", /* ... */
36
+ "pBANG", /* ! */
37
+ "pQUESTION", /* ? */
38
+ "pLT", /* < */
39
+ "pEQ", /* = */
40
+
41
+ "kBOOL", /* bool */
42
+ "kBOT", /* bot */
43
+ "kCLASS", /* class */
44
+ "kFALSE", /* kFALSE */
45
+ "kINSTANCE", /* instance */
46
+ "kINTERFACE", /* interface */
47
+ "kNIL", /* nil */
48
+ "kSELF", /* self */
49
+ "kSINGLETON", /* singleton */
50
+ "kTOP", /* top */
51
+ "kTRUE", /* true */
52
+ "kVOID", /* void */
53
+ "kTYPE", /* type */
54
+ "kUNCHECKED", /* unchecked */
55
+ "kIN", /* in */
56
+ "kOUT", /* out */
57
+ "kEND", /* end */
58
+ "kDEF", /* def */
59
+ "kINCLUDE", /* include */
60
+ "kEXTEND", /* extend */
61
+ "kPREPEND", /* prepend */
62
+ "kALIAS", /* alias */
63
+ "kMODULE", /* module */
64
+ "kATTRREADER", /* attr_reader */
65
+ "kATTRWRITER", /* attr_writer */
66
+ "kATTRACCESSOR", /* attr_accessor */
67
+ "kPUBLIC", /* public */
68
+ "kPRIVATE", /* private */
69
+ "kUNTYPED", /* untyped */
70
+
71
+ "tLIDENT", /* Identifiers starting with lower case */
72
+ "tUIDENT", /* Identifiers starting with upper case */
73
+ "tULIDENT", /* Identifiers starting with `_` */
74
+ "tULLIDENT",
75
+ "tGIDENT", /* Identifiers starting with `$` */
76
+ "tAIDENT", /* Identifiers starting with `@` */
77
+ "tA2IDENT", /* Identifiers starting with `@@` */
78
+ "tBANGIDENT",
79
+ "tEQIDENT",
80
+ "tQIDENT", /* Quoted identifier */
81
+ "tOPERATOR", /* Operator identifier */
82
+
83
+ "tCOMMENT",
84
+ "tLINECOMMENT",
85
+
86
+ "tDQSTRING", /* Double quoted string */
87
+ "tSQSTRING", /* Single quoted string */
88
+ "tINTEGER", /* Integer */
89
+ "tSYMBOL", /* Symbol */
90
+ "tDQSYMBOL",
91
+ "tSQSYMBOL",
92
+ "tANNOTATION", /* Annotation */
93
+ };
94
+
95
+ token NullToken = { NullType };
96
+ position NullPosition = { -1, -1, -1, -1 };
97
+ range NULL_RANGE = { { -1, -1, -1, -1 }, { -1, -1, -1, -1 } };
98
+
99
+ const char *token_type_str(enum TokenType type) {
100
+ return RBS_TOKENTYPE_NAMES[type];
101
+ }
102
+
103
+ unsigned int peekn(lexstate *state, unsigned int chars[], size_t length) {
104
+ int byteoffset = 0;
105
+
106
+ rb_encoding *encoding = rb_enc_get(state->string);
107
+ char *start = RSTRING_PTR(state->string) + state->current.byte_pos;
108
+ char *end = RSTRING_END(state->string);
109
+
110
+ for (size_t i = 0; i < length; i++)
111
+ {
112
+ chars[i] = rb_enc_mbc_to_codepoint(start + byteoffset, end, encoding);
113
+ byteoffset += rb_enc_codelen(chars[i], rb_enc_get(state->string));
114
+ }
115
+
116
+ return byteoffset;
117
+ }
118
+
119
+ int token_chars(token tok) {
120
+ return tok.range.end.char_pos - tok.range.start.char_pos;
121
+ }
122
+
123
+ int token_bytes(token tok) {
124
+ return RANGE_BYTES(tok.range);
125
+ }
126
+
127
+ /**
128
+ * ... token ...
129
+ * ^ start
130
+ * ^ current
131
+ *
132
+ * */
133
+ token next_token(lexstate *state, enum TokenType type) {
134
+ token t;
135
+
136
+ t.type = type;
137
+ t.range.start = state->start;
138
+ t.range.end = state->current;
139
+ state->start = state->current;
140
+ state->first_token_of_line = false;
141
+
142
+ return t;
143
+ }
144
+
145
+ void advance_skip(lexstate *state, unsigned int c, bool skip) {
146
+ int len = rb_enc_codelen(c, rb_enc_get(state->string));
147
+
148
+ state->current.char_pos += 1;
149
+ state->current.byte_pos += len;
150
+
151
+ if (c == '\n') {
152
+ state->current.line += 1;
153
+ state->current.column = 0;
154
+ state->first_token_of_line = true;
155
+ } else {
156
+ state->current.column += 1;
157
+ }
158
+
159
+ if (skip) {
160
+ state->start = state->current;
161
+ }
162
+ }
163
+
164
+ void advance_char(lexstate *state, unsigned int c) {
165
+ advance_skip(state, c, false);
166
+ }
167
+
168
+ void skip_char(lexstate *state, unsigned int c) {
169
+ advance_skip(state, c, true);
170
+ }
171
+
172
+ void skip(lexstate *state) {
173
+ unsigned char c = peek(state);
174
+ skip_char(state, c);
175
+ }
176
+
177
+ void advance(lexstate *state) {
178
+ unsigned char c = peek(state);
179
+ advance_char(state, c);
180
+ }
181
+
182
+ /*
183
+ 1. Peek one character from state
184
+ 2. If read characetr equals to given `c`, skip the character and return true.
185
+ 3. Return false otherwise.
186
+ */
187
+ static bool advance_next_character_if(lexstate *state, unsigned int c) {
188
+ if (peek(state) == c) {
189
+ advance_char(state, c);
190
+ return true;
191
+ } else {
192
+ return false;
193
+ }
194
+ }
195
+
196
+ /*
197
+ ... 0 1 ...
198
+ ^ current
199
+ ^ current (return)
200
+ */
201
+ static token lex_number(lexstate *state) {
202
+ unsigned int c;
203
+
204
+ while (true) {
205
+ c = peek(state);
206
+
207
+ if (rb_isdigit(c) || c == '_') {
208
+ advance_char(state, c);
209
+ } else {
210
+ break;
211
+ }
212
+ }
213
+
214
+ return next_token(state, tINTEGER);
215
+ }
216
+
217
+ /*
218
+ lex_hyphen ::= - (tOPERATOR)
219
+ | - @ (tOPERATOR)
220
+ | - > (pARROW)
221
+ | - 1 ... (tINTEGER)
222
+ */
223
+ static token lex_hyphen(lexstate* state) {
224
+ if (advance_next_character_if(state, '>')) {
225
+ return next_token(state, pARROW);
226
+ } else if (advance_next_character_if(state, '@')) {
227
+ return next_token(state, tOPERATOR);
228
+ } else {
229
+ unsigned int c = peek(state);
230
+
231
+ if (rb_isdigit(c)) {
232
+ advance_char(state, c);
233
+ return lex_number(state);
234
+ } else {
235
+ return next_token(state, tOPERATOR);
236
+ }
237
+ }
238
+ }
239
+
240
+ /*
241
+ lex_plus ::= +
242
+ | + @
243
+ | + \d
244
+ */
245
+ static token lex_plus(lexstate *state) {
246
+ if (advance_next_character_if(state, '@')) {
247
+ return next_token(state, tOPERATOR);
248
+ } else if (rb_isdigit(peek(state))) {
249
+ return lex_number(state);
250
+ } else {
251
+ return next_token(state, tOPERATOR);
252
+ }
253
+ }
254
+
255
+ /*
256
+ lex_dot ::= . pDOT
257
+ | . . . pDOT3
258
+ */
259
+ static token lex_dot(lexstate *state) {
260
+ unsigned int cs[2];
261
+
262
+ peekn(state, cs, 2);
263
+
264
+ if (cs[0] == '.' && cs[1] == '.') {
265
+ advance_char(state, '.');
266
+ advance_char(state, '.');
267
+ return next_token(state, pDOT3);
268
+ } else {
269
+ return next_token(state, pDOT);
270
+ }
271
+ }
272
+
273
+ /*
274
+ lex_eq ::= =
275
+ | ==
276
+ | ===
277
+ | =~
278
+ | =>
279
+ */
280
+ static token lex_eq(lexstate *state) {
281
+ unsigned int cs[2];
282
+ peekn(state, cs, 2);
283
+
284
+ if (cs[0] == '=' && cs[1] == '=') {
285
+ // ===
286
+ advance_char(state, cs[0]);
287
+ advance_char(state, cs[1]);
288
+ return next_token(state, tOPERATOR);
289
+ } else if (cs[0] == '=') {
290
+ // ==
291
+ advance_char(state, cs[0]);
292
+ return next_token(state, tOPERATOR);
293
+ } else if (cs[0] == '~') {
294
+ // =~
295
+ advance_char(state, cs[0]);
296
+ return next_token(state, tOPERATOR);
297
+ } else if (cs[0] == '>') {
298
+ // =>
299
+ advance_char(state, cs[0]);
300
+ return next_token(state, pFATARROW);
301
+ } else {
302
+ return next_token(state, pEQ);
303
+ }
304
+ }
305
+
306
+ /*
307
+ underscore ::= _A tULIDENT
308
+ | _a tULLIDENT
309
+ | _ tULLIDENT
310
+ */
311
+ static token lex_underscore(lexstate *state) {
312
+ unsigned int c;
313
+
314
+ c = peek(state);
315
+
316
+ if ('A' <= c && c <= 'Z') {
317
+ advance_char(state, c);
318
+
319
+ while (true) {
320
+ c = peek(state);
321
+
322
+ if (rb_isalnum(c) || c == '_') {
323
+ // ok
324
+ advance_char(state, c);
325
+ } else {
326
+ break;
327
+ }
328
+ }
329
+
330
+ return next_token(state, tULIDENT);
331
+ } else if (rb_isalnum(c) || c == '_') {
332
+ advance_char(state, c);
333
+
334
+ while (true) {
335
+ c = peek(state);
336
+
337
+ if (rb_isalnum(c) || c == '_') {
338
+ // ok
339
+ advance_char(state, c);
340
+ } else {
341
+ break;
342
+ }
343
+ }
344
+
345
+ if (c == '!') {
346
+ advance_char(state, c);
347
+ return next_token(state, tBANGIDENT);
348
+ } else if (c == '=') {
349
+ advance_char(state, c);
350
+ return next_token(state, tEQIDENT);
351
+ } else {
352
+ return next_token(state, tULLIDENT);
353
+ }
354
+ } else {
355
+ return next_token(state, tULLIDENT);
356
+ }
357
+ }
358
+
359
+ static bool is_opr(unsigned int c) {
360
+ switch (c) {
361
+ case ':':
362
+ case ';':
363
+ case '=':
364
+ case '.':
365
+ case ',':
366
+ case '!':
367
+ case '"':
368
+ case '$':
369
+ case '%':
370
+ case '&':
371
+ case '(':
372
+ case ')':
373
+ case '-':
374
+ case '+':
375
+ case '~':
376
+ case '|':
377
+ case '\\':
378
+ case '\'':
379
+ case '[':
380
+ case ']':
381
+ case '{':
382
+ case '}':
383
+ case '*':
384
+ case '/':
385
+ case '<':
386
+ case '>':
387
+ case '^':
388
+ return true;
389
+ default:
390
+ return false;
391
+ }
392
+ }
393
+
394
+ static token lex_global(lexstate *state) {
395
+ unsigned int c;
396
+
397
+ c = peek(state);
398
+
399
+ if (rb_isspace(c) || c == 0) {
400
+ return next_token(state, ErrorToken);
401
+ }
402
+
403
+ if (rb_isdigit(c)) {
404
+ // `$` [`0`-`9`]+
405
+ advance_char(state, c);
406
+
407
+ while (true) {
408
+ c = peek(state);
409
+ if (rb_isdigit(c)) {
410
+ advance_char(state, c);
411
+ } else {
412
+ return next_token(state, tGIDENT);
413
+ }
414
+ }
415
+ }
416
+
417
+ if (c == '-') {
418
+ // `$` `-` [a-zA-Z0-9_]
419
+ advance_char(state, c);
420
+ c = peek(state);
421
+
422
+ if (rb_isalnum(c) || c == '_') {
423
+ advance_char(state, c);
424
+ return next_token(state, tGIDENT);
425
+ } else {
426
+ return next_token(state, ErrorToken);
427
+ }
428
+ }
429
+
430
+ switch (c) {
431
+ case '~':
432
+ case '*':
433
+ case '$':
434
+ case '?':
435
+ case '!':
436
+ case '@':
437
+ case '\\':
438
+ case '/':
439
+ case ';':
440
+ case ',':
441
+ case '.':
442
+ case '=':
443
+ case ':':
444
+ case '<':
445
+ case '>':
446
+ case '"':
447
+ case '&':
448
+ case '\'':
449
+ case '`':
450
+ case '+':
451
+ advance_char(state, c);
452
+ return next_token(state, tGIDENT);
453
+
454
+ default:
455
+ if (is_opr(c) || c == 0) {
456
+ return next_token(state, ErrorToken);
457
+ }
458
+
459
+ while (true) {
460
+ advance_char(state, c);
461
+ c = peek(state);
462
+
463
+ if (rb_isspace(c) || is_opr(c) || c == 0) {
464
+ break;
465
+ }
466
+ }
467
+
468
+ return next_token(state, tGIDENT);
469
+ }
470
+ }
471
+
472
+ void pp(VALUE object) {
473
+ VALUE inspect = rb_funcall(object, rb_intern("inspect"), 0);
474
+ printf("pp >> %s\n", RSTRING_PTR(inspect));
475
+ }
476
+
477
+ static token lex_ident(lexstate *state, enum TokenType default_type) {
478
+ unsigned int c;
479
+ token tok;
480
+
481
+ while (true) {
482
+ c = peek(state);
483
+ if (rb_isalnum(c) || c == '_') {
484
+ advance_char(state, c);
485
+ } else if (c == '!') {
486
+ advance_char(state, c);
487
+ tok = next_token(state, tBANGIDENT);
488
+ break;
489
+ } else if (c == '=') {
490
+ advance_char(state, c);
491
+ tok = next_token(state, tEQIDENT);
492
+ break;
493
+ } else {
494
+ tok = next_token(state, default_type);
495
+ break;
496
+ }
497
+ }
498
+
499
+ if (tok.type == tLIDENT) {
500
+ VALUE string = rb_enc_str_new(
501
+ RSTRING_PTR(state->string) + tok.range.start.byte_pos,
502
+ RANGE_BYTES(tok.range),
503
+ rb_enc_get(state->string)
504
+ );
505
+
506
+ VALUE type = rb_hash_aref(RBS_Parser_KEYWORDS, string);
507
+ if (FIXNUM_P(type)) {
508
+ tok.type = FIX2INT(type);
509
+ }
510
+ }
511
+
512
+ return tok;
513
+ }
514
+
515
+ static token lex_comment(lexstate *state, enum TokenType type) {
516
+ unsigned int c;
517
+
518
+ c = peek(state);
519
+ if (c == ' ') {
520
+ advance_char(state, c);
521
+ }
522
+
523
+ while (true) {
524
+ c = peek(state);
525
+
526
+ if (c == '\n' || c == '\0') {
527
+ break;
528
+ } else {
529
+ advance_char(state, c);
530
+ }
531
+ }
532
+
533
+ token tok = next_token(state, type);
534
+
535
+ skip_char(state, c);
536
+
537
+ return tok;
538
+ }
539
+
540
+ /*
541
+ ... " ... " ...
542
+ ^ start
543
+ ^ current
544
+ ^ current (after)
545
+ */
546
+ static token lex_dqstring(lexstate *state) {
547
+ unsigned int c;
548
+
549
+ while (true) {
550
+ c = peek(state);
551
+ advance_char(state, c);
552
+
553
+ if (c == '\\') {
554
+ if (peek(state) == '"') {
555
+ advance_char(state, c);
556
+ c = peek(state);
557
+ }
558
+ } else if (c == '"') {
559
+ break;
560
+ }
561
+ }
562
+
563
+ return next_token(state, tDQSTRING);
564
+ }
565
+
566
+ /*
567
+ ... @ foo ...
568
+ ^ start
569
+ ^ current
570
+ ^ current (return)
571
+
572
+ ... @ @ foo ...
573
+ ^ start
574
+ ^ current
575
+ ^ current (return)
576
+ */
577
+ static token lex_ivar(lexstate *state) {
578
+ unsigned int c;
579
+
580
+ enum TokenType type = tAIDENT;
581
+
582
+ c = peek(state);
583
+
584
+ if (c == '@') {
585
+ type = tA2IDENT;
586
+ advance_char(state, c);
587
+ c = peek(state);
588
+ }
589
+
590
+ if (rb_isalpha(c) || c == '_') {
591
+ advance_char(state, c);
592
+ c = peek(state);
593
+ } else {
594
+ return next_token(state, ErrorToken);
595
+ }
596
+
597
+ while (rb_isalnum(c) || c == '_') {
598
+ advance_char(state, c);
599
+ c = peek(state);
600
+ }
601
+
602
+ return next_token(state, type);
603
+ }
604
+
605
+ /*
606
+ ... ' ... ' ...
607
+ ^ start
608
+ ^ current
609
+ ^ current (after)
610
+ */
611
+ static token lex_sqstring(lexstate *state) {
612
+ unsigned int c;
613
+
614
+ c = peek(state);
615
+
616
+ while (true) {
617
+ c = peek(state);
618
+ advance_char(state, c);
619
+
620
+ if (c == '\\') {
621
+ if (peek(state) == '\'') {
622
+ advance_char(state, c);
623
+ c = peek(state);
624
+ }
625
+ } else if (c == '\'') {
626
+ break;
627
+ }
628
+ }
629
+
630
+ return next_token(state, tSQSTRING);
631
+ }
632
+
633
+ #define EQPOINTS2(c0, c1, s) (c0 == s[0] && c1 == s[1])
634
+ #define EQPOINTS3(c0, c1, c2, s) (c0 == s[0] && c1 == s[1] && c2 == s[2])
635
+
636
+ /*
637
+ ... : @ ...
638
+ ^ start
639
+ ^ current
640
+ ^ current (return)
641
+ */
642
+ static token lex_colon_symbol(lexstate *state) {
643
+ unsigned int c[3];
644
+ peekn(state, c, 3);
645
+
646
+ switch (c[0]) {
647
+ case '|':
648
+ case '&':
649
+ case '/':
650
+ case '%':
651
+ case '~':
652
+ case '`':
653
+ case '^':
654
+ advance_char(state, c[0]);
655
+ return next_token(state, tSYMBOL);
656
+ case '=':
657
+ if (EQPOINTS2(c[0], c[1], "=~")) {
658
+ // :=~
659
+ advance_char(state, c[0]);
660
+ advance_char(state, c[1]);
661
+ return next_token(state, tSYMBOL);
662
+ } else if (EQPOINTS3(c[0], c[1], c[2], "===")) {
663
+ // :===
664
+ advance_char(state, c[0]);
665
+ advance_char(state, c[1]);
666
+ advance_char(state, c[2]);
667
+ return next_token(state, tSYMBOL);
668
+ } else if (EQPOINTS2(c[0], c[1], "==")) {
669
+ // :==
670
+ advance_char(state, c[0]);
671
+ advance_char(state, c[1]);
672
+ return next_token(state, tSYMBOL);
673
+ }
674
+ break;
675
+ case '<':
676
+ if (EQPOINTS3(c[0], c[1], c[2], "<=>")) {
677
+ advance_char(state, c[0]);
678
+ advance_char(state, c[1]);
679
+ advance_char(state, c[2]);
680
+ } else if (EQPOINTS2(c[0], c[1], "<=") || EQPOINTS2(c[0], c[1], "<<")) {
681
+ advance_char(state, c[0]);
682
+ advance_char(state, c[1]);
683
+ } else {
684
+ advance_char(state, c[0]);
685
+ }
686
+ return next_token(state, tSYMBOL);
687
+ case '>':
688
+ if (EQPOINTS2(c[0], c[1], ">=") || EQPOINTS2(c[0], c[1], ">>")) {
689
+ advance_char(state, c[0]);
690
+ advance_char(state, c[1]);
691
+ } else {
692
+ advance_char(state, c[0]);
693
+ }
694
+ return next_token(state, tSYMBOL);
695
+ case '-':
696
+ case '+':
697
+ if (EQPOINTS2(c[0], c[1], "+@") || EQPOINTS2(c[0], c[1], "-@")) {
698
+ advance_char(state, c[0]);
699
+ advance_char(state, c[1]);
700
+ } else {
701
+ advance_char(state, c[0]);
702
+ }
703
+ return next_token(state, tSYMBOL);
704
+ case '*':
705
+ if (EQPOINTS2(c[0], c[1], "**")) {
706
+ advance_char(state, c[0]);
707
+ advance_char(state, c[1]);
708
+ } else {
709
+ advance_char(state, c[0]);
710
+ }
711
+ return next_token(state, tSYMBOL);
712
+ case '[':
713
+ if (EQPOINTS3(c[0], c[1], c[2], "[]=")) {
714
+ advance_char(state, c[0]);
715
+ advance_char(state, c[1]);
716
+ advance_char(state, c[2]);
717
+ } else if (EQPOINTS2(c[0], c[1], "[]")) {
718
+ advance_char(state, c[0]);
719
+ advance_char(state, c[1]);
720
+ } else {
721
+ break;
722
+ }
723
+ return next_token(state, tSYMBOL);
724
+ case '!':
725
+ if (EQPOINTS2(c[0], c[1], "!=") || EQPOINTS2(c[0], c[1], "!~")) {
726
+ advance_char(state, c[0]);
727
+ advance_char(state, c[1]);
728
+ } else {
729
+ advance_char(state, c[0]);
730
+ }
731
+ return next_token(state, tSYMBOL);
732
+ case '@': {
733
+ advance_char(state, '@');
734
+ token tok = lex_ivar(state);
735
+ if (tok.type != ErrorToken) {
736
+ tok.type = tSYMBOL;
737
+ }
738
+ return tok;
739
+ }
740
+ case '$': {
741
+ advance_char(state, '$');
742
+ token tok = lex_global(state);
743
+ if (tok.type != ErrorToken) {
744
+ tok.type = tSYMBOL;
745
+ }
746
+ return tok;
747
+ }
748
+ case '\'': {
749
+ position start = state->start;
750
+ advance_char(state, '\'');
751
+ token tok = lex_sqstring(state);
752
+ tok.type = tSQSYMBOL;
753
+ tok.range.start = start;
754
+ return tok;
755
+ }
756
+ case '"': {
757
+ position start = state->start;
758
+ advance_char(state, '"');
759
+ token tok = lex_dqstring(state);
760
+ tok.type = tDQSYMBOL;
761
+ tok.range.start = start;
762
+ return tok;
763
+ }
764
+ default:
765
+ if (rb_isalpha(c[0]) || c[0] == '_') {
766
+ position start = state->start;
767
+ token tok = lex_ident(state, NullType);
768
+ tok.range.start = start;
769
+
770
+ if (peek(state) == '?') {
771
+ if (tok.type != tBANGIDENT && tok.type != tEQIDENT) {
772
+ skip_char(state, '?');
773
+ tok.range.end = state->current;
774
+ }
775
+ }
776
+
777
+ tok.type = tSYMBOL;
778
+ return tok;
779
+ }
780
+ }
781
+
782
+ return next_token(state, pCOLON);
783
+ }
784
+
785
+ /*
786
+ ... : : ...
787
+ ^ start
788
+ ^ current
789
+ ^ current (return)
790
+
791
+ ... : ...
792
+ ^ start
793
+ ^ current (lex_colon_symbol)
794
+ */
795
+ static token lex_colon(lexstate *state) {
796
+ unsigned int c = peek(state);
797
+
798
+ if (c == ':') {
799
+ advance_char(state, c);
800
+ return next_token(state, pCOLON2);
801
+ } else {
802
+ return lex_colon_symbol(state);
803
+ }
804
+ }
805
+
806
+ /*
807
+ lex_lt ::= < (pLT)
808
+ | < < (tOPERATOR)
809
+ | < = (tOPERATOR)
810
+ | < = > (tOPERATOR)
811
+ */
812
+ static token lex_lt(lexstate *state) {
813
+ if (advance_next_character_if(state, '<')) {
814
+ return next_token(state, tOPERATOR);
815
+ } else if (advance_next_character_if(state, '=')) {
816
+ advance_next_character_if(state, '>');
817
+ return next_token(state, tOPERATOR);
818
+ } else {
819
+ return next_token(state, pLT);
820
+ }
821
+ }
822
+
823
+ /*
824
+ lex_gt ::= >
825
+ | > =
826
+ | > >
827
+ */
828
+ static token lex_gt(lexstate *state) {
829
+ advance_next_character_if(state, '=') || advance_next_character_if(state, '>');
830
+ return next_token(state, tOPERATOR);
831
+ }
832
+
833
+ /*
834
+ ... `%` `a` `{` ... `}` ...
835
+ ^ start
836
+ ^ current
837
+ ^ current (exit)
838
+ --- token
839
+ */
840
+ static token lex_percent(lexstate *state) {
841
+ unsigned int cs[2];
842
+ unsigned int end_char;
843
+
844
+ peekn(state, cs, 2);
845
+
846
+ if (cs[0] != 'a') {
847
+ return next_token(state, tOPERATOR);
848
+ }
849
+
850
+ switch (cs[1])
851
+ {
852
+ case '{':
853
+ end_char = '}';
854
+ break;
855
+ case '(':
856
+ end_char = ')';
857
+ break;
858
+ case '[':
859
+ end_char = ']';
860
+ break;
861
+ case '|':
862
+ end_char = '|';
863
+ break;
864
+ case '<':
865
+ end_char = '>';
866
+ break;
867
+ default:
868
+ return next_token(state, tOPERATOR);
869
+ }
870
+
871
+ advance_char(state, cs[0]);
872
+ advance_char(state, cs[1]);
873
+
874
+ unsigned int c;
875
+
876
+ while ((c = peek(state))) {
877
+ if (c == end_char) {
878
+ advance_char(state, c);
879
+ return next_token(state, tANNOTATION);
880
+ }
881
+ advance_char(state, c);
882
+ }
883
+
884
+ return next_token(state, ErrorToken);
885
+ }
886
+
887
+ /*
888
+ bracket ::= [ (pLBRACKET)
889
+ * ^
890
+ | [ ] (tOPERATOR)
891
+ * ^ $
892
+ | [ ] = (tOPERATOR)
893
+ * ^ $
894
+ */
895
+ static token lex_bracket(lexstate *state) {
896
+ if (advance_next_character_if(state, ']')) {
897
+ advance_next_character_if(state, '=');
898
+ return next_token(state, tOPERATOR);
899
+ } else {
900
+ return next_token(state, pLBRACKET);
901
+ }
902
+ }
903
+
904
+ /*
905
+ bracket ::= *
906
+ | * *
907
+ */
908
+ static token lex_star(lexstate *state) {
909
+ if (advance_next_character_if(state, '*')) {
910
+ return next_token(state, pSTAR2);
911
+ } else {
912
+ return next_token(state, pSTAR);
913
+ }
914
+ }
915
+
916
+ /*
917
+ bang ::= !
918
+ | ! =
919
+ | ! ~
920
+ */
921
+ static token lex_bang(lexstate *state) {
922
+ advance_next_character_if(state, '=') || advance_next_character_if(state, '~');
923
+ return next_token(state, tOPERATOR);
924
+ }
925
+
926
+ /*
927
+ backquote ::= ` (tOPERATOR)
928
+ | `[^ :][^`]` (tQIDENT)
929
+ */
930
+ static token lex_backquote(lexstate *state) {
931
+ unsigned int c = peek(state);
932
+
933
+ if (c == ' ' || c == ':') {
934
+ return next_token(state, tOPERATOR);
935
+ } else {
936
+ while (true) {
937
+ if (c == '`') {
938
+ break;
939
+ }
940
+
941
+ c = peek(state);
942
+ advance_char(state, c);
943
+ }
944
+
945
+ return next_token(state, tQIDENT);
946
+ }
947
+ }
948
+
949
+ token rbsparser_next_token(lexstate *state) {
950
+ token tok = NullToken;
951
+
952
+ unsigned int c;
953
+ bool skipping = true;
954
+
955
+ while (skipping) {
956
+ c = peek(state);
957
+
958
+ switch (c) {
959
+ case ' ':
960
+ case '\t':
961
+ case '\n':
962
+ // nop
963
+ skip_char(state, c);
964
+ break;
965
+ case '\0':
966
+ return next_token(state, pEOF);
967
+ default:
968
+ advance_char(state, c);
969
+ skipping = false;
970
+ break;
971
+ }
972
+ }
973
+
974
+ /* ... c d .. */
975
+ /* ^ state->current */
976
+ /* ^ start */
977
+ switch (c) {
978
+ case '\0': tok = next_token(state, pEOF);
979
+ ONE_CHAR_PATTERN('(', pLPAREN);
980
+ ONE_CHAR_PATTERN(')', pRPAREN);
981
+ ONE_CHAR_PATTERN(']', pRBRACKET);
982
+ ONE_CHAR_PATTERN('{', pLBRACE);
983
+ ONE_CHAR_PATTERN('}', pRBRACE);
984
+ ONE_CHAR_PATTERN(',', pCOMMA);
985
+ ONE_CHAR_PATTERN('|', pBAR);
986
+ ONE_CHAR_PATTERN('^', pHAT);
987
+ ONE_CHAR_PATTERN('&', pAMP);
988
+ ONE_CHAR_PATTERN('?', pQUESTION);
989
+ ONE_CHAR_PATTERN('/', tOPERATOR);
990
+ ONE_CHAR_PATTERN('~', tOPERATOR);
991
+ case '[':
992
+ tok = lex_bracket(state);
993
+ break;
994
+ case '-':
995
+ tok = lex_hyphen(state);
996
+ break;
997
+ case '+':
998
+ tok = lex_plus(state);
999
+ break;
1000
+ case '*':
1001
+ tok = lex_star(state);
1002
+ break;
1003
+ case '<':
1004
+ tok = lex_lt(state);
1005
+ break;
1006
+ case '=':
1007
+ tok = lex_eq(state);
1008
+ break;
1009
+ case '>':
1010
+ tok = lex_gt(state);
1011
+ break;
1012
+ case '!':
1013
+ tok = lex_bang(state);
1014
+ break;
1015
+ case '#':
1016
+ if (state->first_token_of_line) {
1017
+ tok = lex_comment(state, tLINECOMMENT);
1018
+ } else {
1019
+ tok = lex_comment(state, tCOMMENT);
1020
+ }
1021
+ break;
1022
+ case ':':
1023
+ tok = lex_colon(state);
1024
+ break;
1025
+ case '.':
1026
+ tok = lex_dot(state);
1027
+ break;
1028
+ case '_':
1029
+ tok = lex_underscore(state);
1030
+ break;
1031
+ case '$':
1032
+ tok = lex_global(state);
1033
+ break;
1034
+ case '@':
1035
+ tok = lex_ivar(state);
1036
+ break;
1037
+ case '"':
1038
+ tok = lex_dqstring(state);
1039
+ break;
1040
+ case '\'':
1041
+ tok = lex_sqstring(state);
1042
+ break;
1043
+ case '%':
1044
+ tok = lex_percent(state);
1045
+ break;
1046
+ case '`':
1047
+ tok = lex_backquote(state);
1048
+ break;
1049
+ default:
1050
+ if (rb_isalpha(c) && rb_isupper(c)) {
1051
+ tok = lex_ident(state, tUIDENT);
1052
+ }
1053
+ if (rb_isalpha(c) && rb_islower(c)) {
1054
+ tok = lex_ident(state, tLIDENT);
1055
+ }
1056
+ if (rb_isdigit(c)) {
1057
+ tok = lex_number(state);
1058
+ }
1059
+ }
1060
+
1061
+ if (tok.type == NullType) {
1062
+ tok = next_token(state, ErrorToken);
1063
+ }
1064
+
1065
+ return tok;
1066
+ }
1067
+
1068
+ char *peek_token(lexstate *state, token tok) {
1069
+ return RSTRING_PTR(state->string) + tok.range.start.byte_pos;
1070
+ }