rbs 1.5.1 → 1.7.0.beta.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (88) hide show
  1. checksums.yaml +4 -4
  2. data/.github/dependabot.yml +10 -0
  3. data/.github/workflows/ruby.yml +0 -4
  4. data/.gitignore +1 -0
  5. data/CHANGELOG.md +51 -0
  6. data/Gemfile +2 -0
  7. data/Rakefile +7 -22
  8. data/Steepfile +9 -1
  9. data/core/enumerator.rbs +1 -0
  10. data/core/io.rbs +3 -1
  11. data/core/kernel.rbs +4 -4
  12. data/core/trace_point.rbs +1 -1
  13. data/docs/collection.md +116 -0
  14. data/ext/rbs/extension/constants.c +140 -0
  15. data/ext/rbs/extension/constants.h +72 -0
  16. data/ext/rbs/extension/extconf.rb +3 -0
  17. data/ext/rbs/extension/lexer.c +1070 -0
  18. data/ext/rbs/extension/lexer.h +145 -0
  19. data/ext/rbs/extension/location.c +295 -0
  20. data/ext/rbs/extension/location.h +59 -0
  21. data/ext/rbs/extension/main.c +9 -0
  22. data/ext/rbs/extension/parser.c +2418 -0
  23. data/ext/rbs/extension/parser.h +23 -0
  24. data/ext/rbs/extension/parserstate.c +313 -0
  25. data/ext/rbs/extension/parserstate.h +141 -0
  26. data/ext/rbs/extension/rbs_extension.h +40 -0
  27. data/ext/rbs/extension/ruby_objs.c +585 -0
  28. data/ext/rbs/extension/ruby_objs.h +46 -0
  29. data/ext/rbs/extension/unescape.c +65 -0
  30. data/goodcheck.yml +1 -1
  31. data/lib/rbs/ast/comment.rb +0 -12
  32. data/lib/rbs/buffer.rb +4 -0
  33. data/lib/rbs/builtin_names.rb +1 -0
  34. data/lib/rbs/cli.rb +98 -10
  35. data/lib/rbs/collection/cleaner.rb +29 -0
  36. data/lib/rbs/collection/config/lockfile_generator.rb +95 -0
  37. data/lib/rbs/collection/config.rb +85 -0
  38. data/lib/rbs/collection/installer.rb +27 -0
  39. data/lib/rbs/collection/sources/git.rb +162 -0
  40. data/lib/rbs/collection/sources/rubygems.rb +40 -0
  41. data/lib/rbs/collection/sources/stdlib.rb +38 -0
  42. data/lib/rbs/collection/sources.rb +22 -0
  43. data/lib/rbs/collection.rb +13 -0
  44. data/lib/rbs/environment_loader.rb +12 -0
  45. data/lib/rbs/errors.rb +16 -1
  46. data/lib/rbs/location.rb +221 -217
  47. data/lib/rbs/location_aux.rb +108 -0
  48. data/lib/rbs/locator.rb +10 -7
  49. data/lib/rbs/parser_aux.rb +24 -0
  50. data/lib/rbs/repository.rb +13 -7
  51. data/lib/rbs/types.rb +2 -3
  52. data/lib/rbs/validator.rb +4 -1
  53. data/lib/rbs/version.rb +1 -1
  54. data/lib/rbs/writer.rb +4 -2
  55. data/lib/rbs.rb +4 -7
  56. data/rbs.gemspec +2 -1
  57. data/sig/ancestor_builder.rbs +2 -2
  58. data/sig/annotation.rbs +2 -2
  59. data/sig/builtin_names.rbs +1 -0
  60. data/sig/cli.rbs +5 -0
  61. data/sig/collection/cleaner.rbs +13 -0
  62. data/sig/collection/collections.rbs +112 -0
  63. data/sig/collection/config.rbs +69 -0
  64. data/sig/collection/installer.rbs +15 -0
  65. data/sig/collection.rbs +4 -0
  66. data/sig/comment.rbs +7 -7
  67. data/sig/constant_table.rbs +1 -1
  68. data/sig/declarations.rbs +9 -9
  69. data/sig/definition.rbs +1 -1
  70. data/sig/definition_builder.rbs +2 -2
  71. data/sig/environment_loader.rbs +3 -0
  72. data/sig/errors.rbs +30 -25
  73. data/sig/location.rbs +42 -79
  74. data/sig/locator.rbs +2 -2
  75. data/sig/members.rbs +7 -7
  76. data/sig/method_types.rbs +3 -3
  77. data/sig/parser.rbs +11 -21
  78. data/sig/polyfill.rbs +12 -3
  79. data/sig/repository.rbs +4 -0
  80. data/sig/types.rbs +45 -27
  81. data/sig/writer.rbs +1 -1
  82. data/stdlib/json/0/json.rbs +3 -3
  83. data/stdlib/objspace/0/objspace.rbs +406 -0
  84. data/stdlib/openssl/0/openssl.rbs +1 -1
  85. data/stdlib/tempfile/0/tempfile.rbs +270 -0
  86. data/steep/Gemfile.lock +10 -10
  87. metadata +43 -7
  88. data/lib/rbs/parser.rb +0 -3614
@@ -0,0 +1,72 @@
1
+ #ifndef RBS__CONSTANTS_H
2
+ #define RBS__CONSTANTS_H
3
+
4
+ extern VALUE RBS;
5
+
6
+ extern VALUE RBS_AST;
7
+ extern VALUE RBS_AST_Annotation;
8
+ extern VALUE RBS_AST_Comment;
9
+
10
+ extern VALUE RBS_AST_Declarations;
11
+ extern VALUE RBS_AST_Declarations_Alias;
12
+ extern VALUE RBS_AST_Declarations_Class_Super;
13
+ extern VALUE RBS_AST_Declarations_Class;
14
+ extern VALUE RBS_AST_Declarations_Constant;
15
+ extern VALUE RBS_AST_Declarations_Global;
16
+ extern VALUE RBS_AST_Declarations_Interface;
17
+ extern VALUE RBS_AST_Declarations_Module_Self;
18
+ extern VALUE RBS_AST_Declarations_Module;
19
+ extern VALUE RBS_AST_Declarations_ModuleTypeParams_TypeParam;
20
+ extern VALUE RBS_AST_Declarations_ModuleTypeParams;
21
+
22
+ extern VALUE RBS_AST_Members;
23
+ extern VALUE RBS_AST_Members_Alias;
24
+ extern VALUE RBS_AST_Members_AttrAccessor;
25
+ extern VALUE RBS_AST_Members_AttrReader;
26
+ extern VALUE RBS_AST_Members_AttrWriter;
27
+ extern VALUE RBS_AST_Members_ClassInstanceVariable;
28
+ extern VALUE RBS_AST_Members_ClassVariable;
29
+ extern VALUE RBS_AST_Members_Extend;
30
+ extern VALUE RBS_AST_Members_Include;
31
+ extern VALUE RBS_AST_Members_InstanceVariable;
32
+ extern VALUE RBS_AST_Members_MethodDefinition;
33
+ extern VALUE RBS_AST_Members_Prepend;
34
+ extern VALUE RBS_AST_Members_Private;
35
+ extern VALUE RBS_AST_Members_Public;
36
+
37
+ extern VALUE RBS_MethodType;
38
+ extern VALUE RBS_Namespace;
39
+
40
+ extern VALUE RBS_ParsingError;
41
+ extern VALUE RBS_TypeName;
42
+
43
+ extern VALUE RBS_Types;
44
+ extern VALUE RBS_Types_Alias;
45
+ extern VALUE RBS_Types_Bases;
46
+ extern VALUE RBS_Types_Bases_Any;
47
+ extern VALUE RBS_Types_Bases_Bool;
48
+ extern VALUE RBS_Types_Bases_Bottom;
49
+ extern VALUE RBS_Types_Bases_Class;
50
+ extern VALUE RBS_Types_Bases_Instance;
51
+ extern VALUE RBS_Types_Bases_Nil;
52
+ extern VALUE RBS_Types_Bases_Self;
53
+ extern VALUE RBS_Types_Bases_Top;
54
+ extern VALUE RBS_Types_Bases_Void;
55
+ extern VALUE RBS_Types_Block;
56
+ extern VALUE RBS_Types_ClassInstance;
57
+ extern VALUE RBS_Types_ClassSingleton;
58
+ extern VALUE RBS_Types_Function_Param;
59
+ extern VALUE RBS_Types_Function;
60
+ extern VALUE RBS_Types_Interface;
61
+ extern VALUE RBS_Types_Intersection;
62
+ extern VALUE RBS_Types_Literal;
63
+ extern VALUE RBS_Types_Optional;
64
+ extern VALUE RBS_Types_Proc;
65
+ extern VALUE RBS_Types_Record;
66
+ extern VALUE RBS_Types_Tuple;
67
+ extern VALUE RBS_Types_Union;
68
+ extern VALUE RBS_Types_Variable;
69
+
70
+ void rbs__init_constants();
71
+
72
+ #endif
@@ -0,0 +1,3 @@
1
+ require 'mkmf'
2
+ $INCFLAGS << " -I$(top_srcdir)" if $extmk
3
+ create_makefile 'extension'
@@ -0,0 +1,1070 @@
1
+ #include "rbs_extension.h"
2
+
3
+ #define ONE_CHAR_PATTERN(c, t) case c: tok = next_token(state, t); break
4
+
5
+ /**
6
+ * Returns one character at current.
7
+ *
8
+ * ... A B C ...
9
+ * ^ current => A
10
+ * */
11
+ #define peek(state) rb_enc_mbc_to_codepoint(RSTRING_PTR(state->string) + state->current.byte_pos, RSTRING_END(state->string), rb_enc_get(state->string))
12
+
13
+ static const char *RBS_TOKENTYPE_NAMES[] = {
14
+ "NullType",
15
+ "pEOF",
16
+ "ErrorToken",
17
+
18
+ "pLPAREN", /* ( */
19
+ "pRPAREN", /* ) */
20
+ "pCOLON", /* : */
21
+ "pCOLON2", /* :: */
22
+ "pLBRACKET", /* [ */
23
+ "pRBRACKET", /* ] */
24
+ "pLBRACE", /* { */
25
+ "pRBRACE", /* } */
26
+ "pHAT", /* ^ */
27
+ "pARROW", /* -> */
28
+ "pFATARROW", /* => */
29
+ "pCOMMA", /* , */
30
+ "pBAR", /* | */
31
+ "pAMP", /* & */
32
+ "pSTAR", /* * */
33
+ "pSTAR2", /* ** */
34
+ "pDOT", /* . */
35
+ "pDOT3", /* ... */
36
+ "pBANG", /* ! */
37
+ "pQUESTION", /* ? */
38
+ "pLT", /* < */
39
+ "pEQ", /* = */
40
+
41
+ "kBOOL", /* bool */
42
+ "kBOT", /* bot */
43
+ "kCLASS", /* class */
44
+ "kFALSE", /* kFALSE */
45
+ "kINSTANCE", /* instance */
46
+ "kINTERFACE", /* interface */
47
+ "kNIL", /* nil */
48
+ "kSELF", /* self */
49
+ "kSINGLETON", /* singleton */
50
+ "kTOP", /* top */
51
+ "kTRUE", /* true */
52
+ "kVOID", /* void */
53
+ "kTYPE", /* type */
54
+ "kUNCHECKED", /* unchecked */
55
+ "kIN", /* in */
56
+ "kOUT", /* out */
57
+ "kEND", /* end */
58
+ "kDEF", /* def */
59
+ "kINCLUDE", /* include */
60
+ "kEXTEND", /* extend */
61
+ "kPREPEND", /* prepend */
62
+ "kALIAS", /* alias */
63
+ "kMODULE", /* module */
64
+ "kATTRREADER", /* attr_reader */
65
+ "kATTRWRITER", /* attr_writer */
66
+ "kATTRACCESSOR", /* attr_accessor */
67
+ "kPUBLIC", /* public */
68
+ "kPRIVATE", /* private */
69
+ "kUNTYPED", /* untyped */
70
+
71
+ "tLIDENT", /* Identifiers starting with lower case */
72
+ "tUIDENT", /* Identifiers starting with upper case */
73
+ "tULIDENT", /* Identifiers starting with `_` */
74
+ "tULLIDENT",
75
+ "tGIDENT", /* Identifiers starting with `$` */
76
+ "tAIDENT", /* Identifiers starting with `@` */
77
+ "tA2IDENT", /* Identifiers starting with `@@` */
78
+ "tBANGIDENT",
79
+ "tEQIDENT",
80
+ "tQIDENT", /* Quoted identifier */
81
+ "tOPERATOR", /* Operator identifier */
82
+
83
+ "tCOMMENT",
84
+ "tLINECOMMENT",
85
+
86
+ "tDQSTRING", /* Double quoted string */
87
+ "tSQSTRING", /* Single quoted string */
88
+ "tINTEGER", /* Integer */
89
+ "tSYMBOL", /* Symbol */
90
+ "tDQSYMBOL",
91
+ "tSQSYMBOL",
92
+ "tANNOTATION", /* Annotation */
93
+ };
94
+
95
+ token NullToken = { NullType };
96
+ position NullPosition = { -1, -1, -1, -1 };
97
+ range NULL_RANGE = { { -1, -1, -1, -1 }, { -1, -1, -1, -1 } };
98
+
99
+ const char *token_type_str(enum TokenType type) {
100
+ return RBS_TOKENTYPE_NAMES[type];
101
+ }
102
+
103
+ unsigned int peekn(lexstate *state, unsigned int chars[], size_t length) {
104
+ int byteoffset = 0;
105
+
106
+ rb_encoding *encoding = rb_enc_get(state->string);
107
+ char *start = RSTRING_PTR(state->string) + state->current.byte_pos;
108
+ char *end = RSTRING_END(state->string);
109
+
110
+ for (size_t i = 0; i < length; i++)
111
+ {
112
+ chars[i] = rb_enc_mbc_to_codepoint(start + byteoffset, end, encoding);
113
+ byteoffset += rb_enc_codelen(chars[i], rb_enc_get(state->string));
114
+ }
115
+
116
+ return byteoffset;
117
+ }
118
+
119
+ int token_chars(token tok) {
120
+ return tok.range.end.char_pos - tok.range.start.char_pos;
121
+ }
122
+
123
+ int token_bytes(token tok) {
124
+ return RANGE_BYTES(tok.range);
125
+ }
126
+
127
+ /**
128
+ * ... token ...
129
+ * ^ start
130
+ * ^ current
131
+ *
132
+ * */
133
+ token next_token(lexstate *state, enum TokenType type) {
134
+ token t;
135
+
136
+ t.type = type;
137
+ t.range.start = state->start;
138
+ t.range.end = state->current;
139
+ state->start = state->current;
140
+ state->first_token_of_line = false;
141
+
142
+ return t;
143
+ }
144
+
145
+ void advance_skip(lexstate *state, unsigned int c, bool skip) {
146
+ int len = rb_enc_codelen(c, rb_enc_get(state->string));
147
+
148
+ state->current.char_pos += 1;
149
+ state->current.byte_pos += len;
150
+
151
+ if (c == '\n') {
152
+ state->current.line += 1;
153
+ state->current.column = 0;
154
+ state->first_token_of_line = true;
155
+ } else {
156
+ state->current.column += 1;
157
+ }
158
+
159
+ if (skip) {
160
+ state->start = state->current;
161
+ }
162
+ }
163
+
164
+ void advance_char(lexstate *state, unsigned int c) {
165
+ advance_skip(state, c, false);
166
+ }
167
+
168
+ void skip_char(lexstate *state, unsigned int c) {
169
+ advance_skip(state, c, true);
170
+ }
171
+
172
+ void skip(lexstate *state) {
173
+ unsigned char c = peek(state);
174
+ skip_char(state, c);
175
+ }
176
+
177
+ void advance(lexstate *state) {
178
+ unsigned char c = peek(state);
179
+ advance_char(state, c);
180
+ }
181
+
182
+ /*
183
+ 1. Peek one character from state
184
+ 2. If read characetr equals to given `c`, skip the character and return true.
185
+ 3. Return false otherwise.
186
+ */
187
+ static bool advance_next_character_if(lexstate *state, unsigned int c) {
188
+ if (peek(state) == c) {
189
+ advance_char(state, c);
190
+ return true;
191
+ } else {
192
+ return false;
193
+ }
194
+ }
195
+
196
+ /*
197
+ ... 0 1 ...
198
+ ^ current
199
+ ^ current (return)
200
+ */
201
+ static token lex_number(lexstate *state) {
202
+ unsigned int c;
203
+
204
+ while (true) {
205
+ c = peek(state);
206
+
207
+ if (rb_isdigit(c) || c == '_') {
208
+ advance_char(state, c);
209
+ } else {
210
+ break;
211
+ }
212
+ }
213
+
214
+ return next_token(state, tINTEGER);
215
+ }
216
+
217
+ /*
218
+ lex_hyphen ::= - (tOPERATOR)
219
+ | - @ (tOPERATOR)
220
+ | - > (pARROW)
221
+ | - 1 ... (tINTEGER)
222
+ */
223
+ static token lex_hyphen(lexstate* state) {
224
+ if (advance_next_character_if(state, '>')) {
225
+ return next_token(state, pARROW);
226
+ } else if (advance_next_character_if(state, '@')) {
227
+ return next_token(state, tOPERATOR);
228
+ } else {
229
+ unsigned int c = peek(state);
230
+
231
+ if (rb_isdigit(c)) {
232
+ advance_char(state, c);
233
+ return lex_number(state);
234
+ } else {
235
+ return next_token(state, tOPERATOR);
236
+ }
237
+ }
238
+ }
239
+
240
+ /*
241
+ lex_plus ::= +
242
+ | + @
243
+ | + \d
244
+ */
245
+ static token lex_plus(lexstate *state) {
246
+ if (advance_next_character_if(state, '@')) {
247
+ return next_token(state, tOPERATOR);
248
+ } else if (rb_isdigit(peek(state))) {
249
+ return lex_number(state);
250
+ } else {
251
+ return next_token(state, tOPERATOR);
252
+ }
253
+ }
254
+
255
+ /*
256
+ lex_dot ::= . pDOT
257
+ | . . . pDOT3
258
+ */
259
+ static token lex_dot(lexstate *state) {
260
+ unsigned int cs[2];
261
+
262
+ peekn(state, cs, 2);
263
+
264
+ if (cs[0] == '.' && cs[1] == '.') {
265
+ advance_char(state, '.');
266
+ advance_char(state, '.');
267
+ return next_token(state, pDOT3);
268
+ } else {
269
+ return next_token(state, pDOT);
270
+ }
271
+ }
272
+
273
+ /*
274
+ lex_eq ::= =
275
+ | ==
276
+ | ===
277
+ | =~
278
+ | =>
279
+ */
280
+ static token lex_eq(lexstate *state) {
281
+ unsigned int cs[2];
282
+ peekn(state, cs, 2);
283
+
284
+ if (cs[0] == '=' && cs[1] == '=') {
285
+ // ===
286
+ advance_char(state, cs[0]);
287
+ advance_char(state, cs[1]);
288
+ return next_token(state, tOPERATOR);
289
+ } else if (cs[0] == '=') {
290
+ // ==
291
+ advance_char(state, cs[0]);
292
+ return next_token(state, tOPERATOR);
293
+ } else if (cs[0] == '~') {
294
+ // =~
295
+ advance_char(state, cs[0]);
296
+ return next_token(state, tOPERATOR);
297
+ } else if (cs[0] == '>') {
298
+ // =>
299
+ advance_char(state, cs[0]);
300
+ return next_token(state, pFATARROW);
301
+ } else {
302
+ return next_token(state, pEQ);
303
+ }
304
+ }
305
+
306
+ /*
307
+ underscore ::= _A tULIDENT
308
+ | _a tULLIDENT
309
+ | _ tULLIDENT
310
+ */
311
+ static token lex_underscore(lexstate *state) {
312
+ unsigned int c;
313
+
314
+ c = peek(state);
315
+
316
+ if ('A' <= c && c <= 'Z') {
317
+ advance_char(state, c);
318
+
319
+ while (true) {
320
+ c = peek(state);
321
+
322
+ if (rb_isalnum(c) || c == '_') {
323
+ // ok
324
+ advance_char(state, c);
325
+ } else {
326
+ break;
327
+ }
328
+ }
329
+
330
+ return next_token(state, tULIDENT);
331
+ } else if (rb_isalnum(c) || c == '_') {
332
+ advance_char(state, c);
333
+
334
+ while (true) {
335
+ c = peek(state);
336
+
337
+ if (rb_isalnum(c) || c == '_') {
338
+ // ok
339
+ advance_char(state, c);
340
+ } else {
341
+ break;
342
+ }
343
+ }
344
+
345
+ if (c == '!') {
346
+ advance_char(state, c);
347
+ return next_token(state, tBANGIDENT);
348
+ } else if (c == '=') {
349
+ advance_char(state, c);
350
+ return next_token(state, tEQIDENT);
351
+ } else {
352
+ return next_token(state, tULLIDENT);
353
+ }
354
+ } else {
355
+ return next_token(state, tULLIDENT);
356
+ }
357
+ }
358
+
359
+ static bool is_opr(unsigned int c) {
360
+ switch (c) {
361
+ case ':':
362
+ case ';':
363
+ case '=':
364
+ case '.':
365
+ case ',':
366
+ case '!':
367
+ case '"':
368
+ case '$':
369
+ case '%':
370
+ case '&':
371
+ case '(':
372
+ case ')':
373
+ case '-':
374
+ case '+':
375
+ case '~':
376
+ case '|':
377
+ case '\\':
378
+ case '\'':
379
+ case '[':
380
+ case ']':
381
+ case '{':
382
+ case '}':
383
+ case '*':
384
+ case '/':
385
+ case '<':
386
+ case '>':
387
+ case '^':
388
+ return true;
389
+ default:
390
+ return false;
391
+ }
392
+ }
393
+
394
+ static token lex_global(lexstate *state) {
395
+ unsigned int c;
396
+
397
+ c = peek(state);
398
+
399
+ if (rb_isspace(c) || c == 0) {
400
+ return next_token(state, ErrorToken);
401
+ }
402
+
403
+ if (rb_isdigit(c)) {
404
+ // `$` [`0`-`9`]+
405
+ advance_char(state, c);
406
+
407
+ while (true) {
408
+ c = peek(state);
409
+ if (rb_isdigit(c)) {
410
+ advance_char(state, c);
411
+ } else {
412
+ return next_token(state, tGIDENT);
413
+ }
414
+ }
415
+ }
416
+
417
+ if (c == '-') {
418
+ // `$` `-` [a-zA-Z0-9_]
419
+ advance_char(state, c);
420
+ c = peek(state);
421
+
422
+ if (rb_isalnum(c) || c == '_') {
423
+ advance_char(state, c);
424
+ return next_token(state, tGIDENT);
425
+ } else {
426
+ return next_token(state, ErrorToken);
427
+ }
428
+ }
429
+
430
+ switch (c) {
431
+ case '~':
432
+ case '*':
433
+ case '$':
434
+ case '?':
435
+ case '!':
436
+ case '@':
437
+ case '\\':
438
+ case '/':
439
+ case ';':
440
+ case ',':
441
+ case '.':
442
+ case '=':
443
+ case ':':
444
+ case '<':
445
+ case '>':
446
+ case '"':
447
+ case '&':
448
+ case '\'':
449
+ case '`':
450
+ case '+':
451
+ advance_char(state, c);
452
+ return next_token(state, tGIDENT);
453
+
454
+ default:
455
+ if (is_opr(c) || c == 0) {
456
+ return next_token(state, ErrorToken);
457
+ }
458
+
459
+ while (true) {
460
+ advance_char(state, c);
461
+ c = peek(state);
462
+
463
+ if (rb_isspace(c) || is_opr(c) || c == 0) {
464
+ break;
465
+ }
466
+ }
467
+
468
+ return next_token(state, tGIDENT);
469
+ }
470
+ }
471
+
472
+ void pp(VALUE object) {
473
+ VALUE inspect = rb_funcall(object, rb_intern("inspect"), 0);
474
+ printf("pp >> %s\n", RSTRING_PTR(inspect));
475
+ }
476
+
477
+ static token lex_ident(lexstate *state, enum TokenType default_type) {
478
+ unsigned int c;
479
+ token tok;
480
+
481
+ while (true) {
482
+ c = peek(state);
483
+ if (rb_isalnum(c) || c == '_') {
484
+ advance_char(state, c);
485
+ } else if (c == '!') {
486
+ advance_char(state, c);
487
+ tok = next_token(state, tBANGIDENT);
488
+ break;
489
+ } else if (c == '=') {
490
+ advance_char(state, c);
491
+ tok = next_token(state, tEQIDENT);
492
+ break;
493
+ } else {
494
+ tok = next_token(state, default_type);
495
+ break;
496
+ }
497
+ }
498
+
499
+ if (tok.type == tLIDENT) {
500
+ VALUE string = rb_enc_str_new(
501
+ RSTRING_PTR(state->string) + tok.range.start.byte_pos,
502
+ RANGE_BYTES(tok.range),
503
+ rb_enc_get(state->string)
504
+ );
505
+
506
+ VALUE type = rb_hash_aref(RBS_Parser_KEYWORDS, string);
507
+ if (FIXNUM_P(type)) {
508
+ tok.type = FIX2INT(type);
509
+ }
510
+ }
511
+
512
+ return tok;
513
+ }
514
+
515
+ static token lex_comment(lexstate *state, enum TokenType type) {
516
+ unsigned int c;
517
+
518
+ c = peek(state);
519
+ if (c == ' ') {
520
+ advance_char(state, c);
521
+ }
522
+
523
+ while (true) {
524
+ c = peek(state);
525
+
526
+ if (c == '\n' || c == '\0') {
527
+ break;
528
+ } else {
529
+ advance_char(state, c);
530
+ }
531
+ }
532
+
533
+ token tok = next_token(state, type);
534
+
535
+ skip_char(state, c);
536
+
537
+ return tok;
538
+ }
539
+
540
+ /*
541
+ ... " ... " ...
542
+ ^ start
543
+ ^ current
544
+ ^ current (after)
545
+ */
546
+ static token lex_dqstring(lexstate *state) {
547
+ unsigned int c;
548
+
549
+ while (true) {
550
+ c = peek(state);
551
+ advance_char(state, c);
552
+
553
+ if (c == '\\') {
554
+ if (peek(state) == '"') {
555
+ advance_char(state, c);
556
+ c = peek(state);
557
+ }
558
+ } else if (c == '"') {
559
+ break;
560
+ }
561
+ }
562
+
563
+ return next_token(state, tDQSTRING);
564
+ }
565
+
566
+ /*
567
+ ... @ foo ...
568
+ ^ start
569
+ ^ current
570
+ ^ current (return)
571
+
572
+ ... @ @ foo ...
573
+ ^ start
574
+ ^ current
575
+ ^ current (return)
576
+ */
577
+ static token lex_ivar(lexstate *state) {
578
+ unsigned int c;
579
+
580
+ enum TokenType type = tAIDENT;
581
+
582
+ c = peek(state);
583
+
584
+ if (c == '@') {
585
+ type = tA2IDENT;
586
+ advance_char(state, c);
587
+ c = peek(state);
588
+ }
589
+
590
+ if (rb_isalpha(c) || c == '_') {
591
+ advance_char(state, c);
592
+ c = peek(state);
593
+ } else {
594
+ return next_token(state, ErrorToken);
595
+ }
596
+
597
+ while (rb_isalnum(c) || c == '_') {
598
+ advance_char(state, c);
599
+ c = peek(state);
600
+ }
601
+
602
+ return next_token(state, type);
603
+ }
604
+
605
+ /*
606
+ ... ' ... ' ...
607
+ ^ start
608
+ ^ current
609
+ ^ current (after)
610
+ */
611
+ static token lex_sqstring(lexstate *state) {
612
+ unsigned int c;
613
+
614
+ c = peek(state);
615
+
616
+ while (true) {
617
+ c = peek(state);
618
+ advance_char(state, c);
619
+
620
+ if (c == '\\') {
621
+ if (peek(state) == '\'') {
622
+ advance_char(state, c);
623
+ c = peek(state);
624
+ }
625
+ } else if (c == '\'') {
626
+ break;
627
+ }
628
+ }
629
+
630
+ return next_token(state, tSQSTRING);
631
+ }
632
+
633
+ #define EQPOINTS2(c0, c1, s) (c0 == s[0] && c1 == s[1])
634
+ #define EQPOINTS3(c0, c1, c2, s) (c0 == s[0] && c1 == s[1] && c2 == s[2])
635
+
636
+ /*
637
+ ... : @ ...
638
+ ^ start
639
+ ^ current
640
+ ^ current (return)
641
+ */
642
+ static token lex_colon_symbol(lexstate *state) {
643
+ unsigned int c[3];
644
+ peekn(state, c, 3);
645
+
646
+ switch (c[0]) {
647
+ case '|':
648
+ case '&':
649
+ case '/':
650
+ case '%':
651
+ case '~':
652
+ case '`':
653
+ case '^':
654
+ advance_char(state, c[0]);
655
+ return next_token(state, tSYMBOL);
656
+ case '=':
657
+ if (EQPOINTS2(c[0], c[1], "=~")) {
658
+ // :=~
659
+ advance_char(state, c[0]);
660
+ advance_char(state, c[1]);
661
+ return next_token(state, tSYMBOL);
662
+ } else if (EQPOINTS3(c[0], c[1], c[2], "===")) {
663
+ // :===
664
+ advance_char(state, c[0]);
665
+ advance_char(state, c[1]);
666
+ advance_char(state, c[2]);
667
+ return next_token(state, tSYMBOL);
668
+ } else if (EQPOINTS2(c[0], c[1], "==")) {
669
+ // :==
670
+ advance_char(state, c[0]);
671
+ advance_char(state, c[1]);
672
+ return next_token(state, tSYMBOL);
673
+ }
674
+ break;
675
+ case '<':
676
+ if (EQPOINTS3(c[0], c[1], c[2], "<=>")) {
677
+ advance_char(state, c[0]);
678
+ advance_char(state, c[1]);
679
+ advance_char(state, c[2]);
680
+ } else if (EQPOINTS2(c[0], c[1], "<=") || EQPOINTS2(c[0], c[1], "<<")) {
681
+ advance_char(state, c[0]);
682
+ advance_char(state, c[1]);
683
+ } else {
684
+ advance_char(state, c[0]);
685
+ }
686
+ return next_token(state, tSYMBOL);
687
+ case '>':
688
+ if (EQPOINTS2(c[0], c[1], ">=") || EQPOINTS2(c[0], c[1], ">>")) {
689
+ advance_char(state, c[0]);
690
+ advance_char(state, c[1]);
691
+ } else {
692
+ advance_char(state, c[0]);
693
+ }
694
+ return next_token(state, tSYMBOL);
695
+ case '-':
696
+ case '+':
697
+ if (EQPOINTS2(c[0], c[1], "+@") || EQPOINTS2(c[0], c[1], "-@")) {
698
+ advance_char(state, c[0]);
699
+ advance_char(state, c[1]);
700
+ } else {
701
+ advance_char(state, c[0]);
702
+ }
703
+ return next_token(state, tSYMBOL);
704
+ case '*':
705
+ if (EQPOINTS2(c[0], c[1], "**")) {
706
+ advance_char(state, c[0]);
707
+ advance_char(state, c[1]);
708
+ } else {
709
+ advance_char(state, c[0]);
710
+ }
711
+ return next_token(state, tSYMBOL);
712
+ case '[':
713
+ if (EQPOINTS3(c[0], c[1], c[2], "[]=")) {
714
+ advance_char(state, c[0]);
715
+ advance_char(state, c[1]);
716
+ advance_char(state, c[2]);
717
+ } else if (EQPOINTS2(c[0], c[1], "[]")) {
718
+ advance_char(state, c[0]);
719
+ advance_char(state, c[1]);
720
+ } else {
721
+ break;
722
+ }
723
+ return next_token(state, tSYMBOL);
724
+ case '!':
725
+ if (EQPOINTS2(c[0], c[1], "!=") || EQPOINTS2(c[0], c[1], "!~")) {
726
+ advance_char(state, c[0]);
727
+ advance_char(state, c[1]);
728
+ } else {
729
+ advance_char(state, c[0]);
730
+ }
731
+ return next_token(state, tSYMBOL);
732
+ case '@': {
733
+ advance_char(state, '@');
734
+ token tok = lex_ivar(state);
735
+ if (tok.type != ErrorToken) {
736
+ tok.type = tSYMBOL;
737
+ }
738
+ return tok;
739
+ }
740
+ case '$': {
741
+ advance_char(state, '$');
742
+ token tok = lex_global(state);
743
+ if (tok.type != ErrorToken) {
744
+ tok.type = tSYMBOL;
745
+ }
746
+ return tok;
747
+ }
748
+ case '\'': {
749
+ position start = state->start;
750
+ advance_char(state, '\'');
751
+ token tok = lex_sqstring(state);
752
+ tok.type = tSQSYMBOL;
753
+ tok.range.start = start;
754
+ return tok;
755
+ }
756
+ case '"': {
757
+ position start = state->start;
758
+ advance_char(state, '"');
759
+ token tok = lex_dqstring(state);
760
+ tok.type = tDQSYMBOL;
761
+ tok.range.start = start;
762
+ return tok;
763
+ }
764
+ default:
765
+ if (rb_isalpha(c[0]) || c[0] == '_') {
766
+ position start = state->start;
767
+ token tok = lex_ident(state, NullType);
768
+ tok.range.start = start;
769
+
770
+ if (peek(state) == '?') {
771
+ if (tok.type != tBANGIDENT && tok.type != tEQIDENT) {
772
+ skip_char(state, '?');
773
+ tok.range.end = state->current;
774
+ }
775
+ }
776
+
777
+ tok.type = tSYMBOL;
778
+ return tok;
779
+ }
780
+ }
781
+
782
+ return next_token(state, pCOLON);
783
+ }
784
+
785
+ /*
786
+ ... : : ...
787
+ ^ start
788
+ ^ current
789
+ ^ current (return)
790
+
791
+ ... : ...
792
+ ^ start
793
+ ^ current (lex_colon_symbol)
794
+ */
795
+ static token lex_colon(lexstate *state) {
796
+ unsigned int c = peek(state);
797
+
798
+ if (c == ':') {
799
+ advance_char(state, c);
800
+ return next_token(state, pCOLON2);
801
+ } else {
802
+ return lex_colon_symbol(state);
803
+ }
804
+ }
805
+
806
+ /*
807
+ lex_lt ::= < (pLT)
808
+ | < < (tOPERATOR)
809
+ | < = (tOPERATOR)
810
+ | < = > (tOPERATOR)
811
+ */
812
+ static token lex_lt(lexstate *state) {
813
+ if (advance_next_character_if(state, '<')) {
814
+ return next_token(state, tOPERATOR);
815
+ } else if (advance_next_character_if(state, '=')) {
816
+ advance_next_character_if(state, '>');
817
+ return next_token(state, tOPERATOR);
818
+ } else {
819
+ return next_token(state, pLT);
820
+ }
821
+ }
822
+
823
+ /*
824
+ lex_gt ::= >
825
+ | > =
826
+ | > >
827
+ */
828
+ static token lex_gt(lexstate *state) {
829
+ advance_next_character_if(state, '=') || advance_next_character_if(state, '>');
830
+ return next_token(state, tOPERATOR);
831
+ }
832
+
833
+ /*
834
+ ... `%` `a` `{` ... `}` ...
835
+ ^ start
836
+ ^ current
837
+ ^ current (exit)
838
+ --- token
839
+ */
840
+ static token lex_percent(lexstate *state) {
841
+ unsigned int cs[2];
842
+ unsigned int end_char;
843
+
844
+ peekn(state, cs, 2);
845
+
846
+ if (cs[0] != 'a') {
847
+ return next_token(state, tOPERATOR);
848
+ }
849
+
850
+ switch (cs[1])
851
+ {
852
+ case '{':
853
+ end_char = '}';
854
+ break;
855
+ case '(':
856
+ end_char = ')';
857
+ break;
858
+ case '[':
859
+ end_char = ']';
860
+ break;
861
+ case '|':
862
+ end_char = '|';
863
+ break;
864
+ case '<':
865
+ end_char = '>';
866
+ break;
867
+ default:
868
+ return next_token(state, tOPERATOR);
869
+ }
870
+
871
+ advance_char(state, cs[0]);
872
+ advance_char(state, cs[1]);
873
+
874
+ unsigned int c;
875
+
876
+ while ((c = peek(state))) {
877
+ if (c == end_char) {
878
+ advance_char(state, c);
879
+ return next_token(state, tANNOTATION);
880
+ }
881
+ advance_char(state, c);
882
+ }
883
+
884
+ return next_token(state, ErrorToken);
885
+ }
886
+
887
+ /*
888
+ bracket ::= [ (pLBRACKET)
889
+ * ^
890
+ | [ ] (tOPERATOR)
891
+ * ^ $
892
+ | [ ] = (tOPERATOR)
893
+ * ^ $
894
+ */
895
+ static token lex_bracket(lexstate *state) {
896
+ if (advance_next_character_if(state, ']')) {
897
+ advance_next_character_if(state, '=');
898
+ return next_token(state, tOPERATOR);
899
+ } else {
900
+ return next_token(state, pLBRACKET);
901
+ }
902
+ }
903
+
904
+ /*
905
+ bracket ::= *
906
+ | * *
907
+ */
908
+ static token lex_star(lexstate *state) {
909
+ if (advance_next_character_if(state, '*')) {
910
+ return next_token(state, pSTAR2);
911
+ } else {
912
+ return next_token(state, pSTAR);
913
+ }
914
+ }
915
+
916
+ /*
917
+ bang ::= !
918
+ | ! =
919
+ | ! ~
920
+ */
921
+ static token lex_bang(lexstate *state) {
922
+ advance_next_character_if(state, '=') || advance_next_character_if(state, '~');
923
+ return next_token(state, tOPERATOR);
924
+ }
925
+
926
+ /*
927
+ backquote ::= ` (tOPERATOR)
928
+ | `[^ :][^`]` (tQIDENT)
929
+ */
930
+ static token lex_backquote(lexstate *state) {
931
+ unsigned int c = peek(state);
932
+
933
+ if (c == ' ' || c == ':') {
934
+ return next_token(state, tOPERATOR);
935
+ } else {
936
+ while (true) {
937
+ if (c == '`') {
938
+ break;
939
+ }
940
+
941
+ c = peek(state);
942
+ advance_char(state, c);
943
+ }
944
+
945
+ return next_token(state, tQIDENT);
946
+ }
947
+ }
948
+
949
+ token rbsparser_next_token(lexstate *state) {
950
+ token tok = NullToken;
951
+
952
+ unsigned int c;
953
+ bool skipping = true;
954
+
955
+ while (skipping) {
956
+ c = peek(state);
957
+
958
+ switch (c) {
959
+ case ' ':
960
+ case '\t':
961
+ case '\n':
962
+ // nop
963
+ skip_char(state, c);
964
+ break;
965
+ case '\0':
966
+ return next_token(state, pEOF);
967
+ default:
968
+ advance_char(state, c);
969
+ skipping = false;
970
+ break;
971
+ }
972
+ }
973
+
974
+ /* ... c d .. */
975
+ /* ^ state->current */
976
+ /* ^ start */
977
+ switch (c) {
978
+ case '\0': tok = next_token(state, pEOF);
979
+ ONE_CHAR_PATTERN('(', pLPAREN);
980
+ ONE_CHAR_PATTERN(')', pRPAREN);
981
+ ONE_CHAR_PATTERN(']', pRBRACKET);
982
+ ONE_CHAR_PATTERN('{', pLBRACE);
983
+ ONE_CHAR_PATTERN('}', pRBRACE);
984
+ ONE_CHAR_PATTERN(',', pCOMMA);
985
+ ONE_CHAR_PATTERN('|', pBAR);
986
+ ONE_CHAR_PATTERN('^', pHAT);
987
+ ONE_CHAR_PATTERN('&', pAMP);
988
+ ONE_CHAR_PATTERN('?', pQUESTION);
989
+ ONE_CHAR_PATTERN('/', tOPERATOR);
990
+ ONE_CHAR_PATTERN('~', tOPERATOR);
991
+ case '[':
992
+ tok = lex_bracket(state);
993
+ break;
994
+ case '-':
995
+ tok = lex_hyphen(state);
996
+ break;
997
+ case '+':
998
+ tok = lex_plus(state);
999
+ break;
1000
+ case '*':
1001
+ tok = lex_star(state);
1002
+ break;
1003
+ case '<':
1004
+ tok = lex_lt(state);
1005
+ break;
1006
+ case '=':
1007
+ tok = lex_eq(state);
1008
+ break;
1009
+ case '>':
1010
+ tok = lex_gt(state);
1011
+ break;
1012
+ case '!':
1013
+ tok = lex_bang(state);
1014
+ break;
1015
+ case '#':
1016
+ if (state->first_token_of_line) {
1017
+ tok = lex_comment(state, tLINECOMMENT);
1018
+ } else {
1019
+ tok = lex_comment(state, tCOMMENT);
1020
+ }
1021
+ break;
1022
+ case ':':
1023
+ tok = lex_colon(state);
1024
+ break;
1025
+ case '.':
1026
+ tok = lex_dot(state);
1027
+ break;
1028
+ case '_':
1029
+ tok = lex_underscore(state);
1030
+ break;
1031
+ case '$':
1032
+ tok = lex_global(state);
1033
+ break;
1034
+ case '@':
1035
+ tok = lex_ivar(state);
1036
+ break;
1037
+ case '"':
1038
+ tok = lex_dqstring(state);
1039
+ break;
1040
+ case '\'':
1041
+ tok = lex_sqstring(state);
1042
+ break;
1043
+ case '%':
1044
+ tok = lex_percent(state);
1045
+ break;
1046
+ case '`':
1047
+ tok = lex_backquote(state);
1048
+ break;
1049
+ default:
1050
+ if (rb_isalpha(c) && rb_isupper(c)) {
1051
+ tok = lex_ident(state, tUIDENT);
1052
+ }
1053
+ if (rb_isalpha(c) && rb_islower(c)) {
1054
+ tok = lex_ident(state, tLIDENT);
1055
+ }
1056
+ if (rb_isdigit(c)) {
1057
+ tok = lex_number(state);
1058
+ }
1059
+ }
1060
+
1061
+ if (tok.type == NullType) {
1062
+ tok = next_token(state, ErrorToken);
1063
+ }
1064
+
1065
+ return tok;
1066
+ }
1067
+
1068
+ char *peek_token(lexstate *state, token tok) {
1069
+ return RSTRING_PTR(state->string) + tok.range.start.byte_pos;
1070
+ }