rbs 1.7.0.beta.1 → 1.7.0.beta.5

Sign up to get free protection for your applications and to get access to all the features.
Files changed (59) hide show
  1. checksums.yaml +4 -4
  2. data/.github/workflows/ruby.yml +20 -1
  3. data/.gitignore +9 -1
  4. data/CHANGELOG.md +20 -9
  5. data/Rakefile +16 -1
  6. data/core/binding.rbs +2 -0
  7. data/core/complex.rbs +0 -2
  8. data/core/float.rbs +0 -2
  9. data/core/integer.rbs +0 -2
  10. data/core/numeric.rbs +7 -0
  11. data/core/object.rbs +1 -1
  12. data/core/proc.rbs +2 -0
  13. data/core/rational.rbs +0 -2
  14. data/core/unbound_method.rbs +13 -0
  15. data/docs/rbs_by_example.md +2 -2
  16. data/docs/syntax.md +2 -3
  17. data/ext/{rbs/extension → rbs_extension}/constants.c +0 -1
  18. data/ext/{rbs/extension → rbs_extension}/constants.h +0 -0
  19. data/ext/{rbs/extension → rbs_extension}/extconf.rb +1 -1
  20. data/ext/rbs_extension/lexer.c +2533 -0
  21. data/ext/{rbs/extension → rbs_extension}/lexer.h +33 -17
  22. data/ext/rbs_extension/lexer.re +140 -0
  23. data/ext/rbs_extension/lexstate.c +139 -0
  24. data/ext/{rbs/extension → rbs_extension}/location.c +0 -0
  25. data/ext/{rbs/extension → rbs_extension}/location.h +0 -0
  26. data/ext/{rbs/extension → rbs_extension}/main.c +1 -1
  27. data/ext/{rbs/extension → rbs_extension}/parser.c +6 -32
  28. data/ext/{rbs/extension → rbs_extension}/parser.h +0 -5
  29. data/ext/{rbs/extension → rbs_extension}/parserstate.c +0 -1
  30. data/ext/{rbs/extension → rbs_extension}/parserstate.h +0 -0
  31. data/ext/{rbs/extension → rbs_extension}/rbs_extension.h +1 -1
  32. data/ext/{rbs/extension → rbs_extension}/ruby_objs.c +84 -148
  33. data/ext/{rbs/extension → rbs_extension}/ruby_objs.h +0 -2
  34. data/ext/{rbs/extension → rbs_extension}/unescape.c +0 -0
  35. data/lib/rbs/collection/installer.rb +1 -0
  36. data/lib/rbs/collection/sources/git.rb +6 -1
  37. data/lib/rbs/errors.rb +14 -0
  38. data/lib/rbs/location_aux.rb +13 -0
  39. data/lib/rbs/parser_aux.rb +39 -0
  40. data/lib/rbs/parser_compat/lexer_error.rb +4 -0
  41. data/lib/rbs/parser_compat/located_value.rb +5 -0
  42. data/lib/rbs/parser_compat/semantics_error.rb +4 -0
  43. data/lib/rbs/parser_compat/syntax_error.rb +4 -0
  44. data/lib/rbs/prototype/helpers.rb +113 -0
  45. data/lib/rbs/prototype/rb.rb +2 -105
  46. data/lib/rbs/prototype/runtime.rb +16 -0
  47. data/lib/rbs/types.rb +2 -2
  48. data/lib/rbs/version.rb +1 -1
  49. data/lib/rbs.rb +13 -1
  50. data/rbs.gemspec +1 -1
  51. data/sig/errors.rbs +10 -0
  52. data/sig/location.rbs +5 -0
  53. data/sig/parser.rbs +5 -0
  54. data/sig/rbs.rbs +4 -0
  55. data/stdlib/io-console/0/io-console.rbs +137 -0
  56. data/stdlib/net-http/0/net-http.rbs +2 -1
  57. data/stdlib/tempfile/0/tempfile.rbs +4 -6
  58. metadata +27 -19
  59. data/ext/rbs/extension/lexer.c +0 -1070
@@ -1,1070 +0,0 @@
1
- #include "rbs_extension.h"
2
-
3
- #define ONE_CHAR_PATTERN(c, t) case c: tok = next_token(state, t); break
4
-
5
- /**
6
- * Returns one character at current.
7
- *
8
- * ... A B C ...
9
- * ^ current => A
10
- * */
11
- #define peek(state) rb_enc_mbc_to_codepoint(RSTRING_PTR(state->string) + state->current.byte_pos, RSTRING_END(state->string), rb_enc_get(state->string))
12
-
13
- static const char *RBS_TOKENTYPE_NAMES[] = {
14
- "NullType",
15
- "pEOF",
16
- "ErrorToken",
17
-
18
- "pLPAREN", /* ( */
19
- "pRPAREN", /* ) */
20
- "pCOLON", /* : */
21
- "pCOLON2", /* :: */
22
- "pLBRACKET", /* [ */
23
- "pRBRACKET", /* ] */
24
- "pLBRACE", /* { */
25
- "pRBRACE", /* } */
26
- "pHAT", /* ^ */
27
- "pARROW", /* -> */
28
- "pFATARROW", /* => */
29
- "pCOMMA", /* , */
30
- "pBAR", /* | */
31
- "pAMP", /* & */
32
- "pSTAR", /* * */
33
- "pSTAR2", /* ** */
34
- "pDOT", /* . */
35
- "pDOT3", /* ... */
36
- "pBANG", /* ! */
37
- "pQUESTION", /* ? */
38
- "pLT", /* < */
39
- "pEQ", /* = */
40
-
41
- "kBOOL", /* bool */
42
- "kBOT", /* bot */
43
- "kCLASS", /* class */
44
- "kFALSE", /* kFALSE */
45
- "kINSTANCE", /* instance */
46
- "kINTERFACE", /* interface */
47
- "kNIL", /* nil */
48
- "kSELF", /* self */
49
- "kSINGLETON", /* singleton */
50
- "kTOP", /* top */
51
- "kTRUE", /* true */
52
- "kVOID", /* void */
53
- "kTYPE", /* type */
54
- "kUNCHECKED", /* unchecked */
55
- "kIN", /* in */
56
- "kOUT", /* out */
57
- "kEND", /* end */
58
- "kDEF", /* def */
59
- "kINCLUDE", /* include */
60
- "kEXTEND", /* extend */
61
- "kPREPEND", /* prepend */
62
- "kALIAS", /* alias */
63
- "kMODULE", /* module */
64
- "kATTRREADER", /* attr_reader */
65
- "kATTRWRITER", /* attr_writer */
66
- "kATTRACCESSOR", /* attr_accessor */
67
- "kPUBLIC", /* public */
68
- "kPRIVATE", /* private */
69
- "kUNTYPED", /* untyped */
70
-
71
- "tLIDENT", /* Identifiers starting with lower case */
72
- "tUIDENT", /* Identifiers starting with upper case */
73
- "tULIDENT", /* Identifiers starting with `_` */
74
- "tULLIDENT",
75
- "tGIDENT", /* Identifiers starting with `$` */
76
- "tAIDENT", /* Identifiers starting with `@` */
77
- "tA2IDENT", /* Identifiers starting with `@@` */
78
- "tBANGIDENT",
79
- "tEQIDENT",
80
- "tQIDENT", /* Quoted identifier */
81
- "tOPERATOR", /* Operator identifier */
82
-
83
- "tCOMMENT",
84
- "tLINECOMMENT",
85
-
86
- "tDQSTRING", /* Double quoted string */
87
- "tSQSTRING", /* Single quoted string */
88
- "tINTEGER", /* Integer */
89
- "tSYMBOL", /* Symbol */
90
- "tDQSYMBOL",
91
- "tSQSYMBOL",
92
- "tANNOTATION", /* Annotation */
93
- };
94
-
95
- token NullToken = { NullType };
96
- position NullPosition = { -1, -1, -1, -1 };
97
- range NULL_RANGE = { { -1, -1, -1, -1 }, { -1, -1, -1, -1 } };
98
-
99
- const char *token_type_str(enum TokenType type) {
100
- return RBS_TOKENTYPE_NAMES[type];
101
- }
102
-
103
- unsigned int peekn(lexstate *state, unsigned int chars[], size_t length) {
104
- int byteoffset = 0;
105
-
106
- rb_encoding *encoding = rb_enc_get(state->string);
107
- char *start = RSTRING_PTR(state->string) + state->current.byte_pos;
108
- char *end = RSTRING_END(state->string);
109
-
110
- for (size_t i = 0; i < length; i++)
111
- {
112
- chars[i] = rb_enc_mbc_to_codepoint(start + byteoffset, end, encoding);
113
- byteoffset += rb_enc_codelen(chars[i], rb_enc_get(state->string));
114
- }
115
-
116
- return byteoffset;
117
- }
118
-
119
- int token_chars(token tok) {
120
- return tok.range.end.char_pos - tok.range.start.char_pos;
121
- }
122
-
123
- int token_bytes(token tok) {
124
- return RANGE_BYTES(tok.range);
125
- }
126
-
127
- /**
128
- * ... token ...
129
- * ^ start
130
- * ^ current
131
- *
132
- * */
133
- token next_token(lexstate *state, enum TokenType type) {
134
- token t;
135
-
136
- t.type = type;
137
- t.range.start = state->start;
138
- t.range.end = state->current;
139
- state->start = state->current;
140
- state->first_token_of_line = false;
141
-
142
- return t;
143
- }
144
-
145
- void advance_skip(lexstate *state, unsigned int c, bool skip) {
146
- int len = rb_enc_codelen(c, rb_enc_get(state->string));
147
-
148
- state->current.char_pos += 1;
149
- state->current.byte_pos += len;
150
-
151
- if (c == '\n') {
152
- state->current.line += 1;
153
- state->current.column = 0;
154
- state->first_token_of_line = true;
155
- } else {
156
- state->current.column += 1;
157
- }
158
-
159
- if (skip) {
160
- state->start = state->current;
161
- }
162
- }
163
-
164
- void advance_char(lexstate *state, unsigned int c) {
165
- advance_skip(state, c, false);
166
- }
167
-
168
- void skip_char(lexstate *state, unsigned int c) {
169
- advance_skip(state, c, true);
170
- }
171
-
172
- void skip(lexstate *state) {
173
- unsigned char c = peek(state);
174
- skip_char(state, c);
175
- }
176
-
177
- void advance(lexstate *state) {
178
- unsigned char c = peek(state);
179
- advance_char(state, c);
180
- }
181
-
182
- /*
183
- 1. Peek one character from state
184
- 2. If read characetr equals to given `c`, skip the character and return true.
185
- 3. Return false otherwise.
186
- */
187
- static bool advance_next_character_if(lexstate *state, unsigned int c) {
188
- if (peek(state) == c) {
189
- advance_char(state, c);
190
- return true;
191
- } else {
192
- return false;
193
- }
194
- }
195
-
196
- /*
197
- ... 0 1 ...
198
- ^ current
199
- ^ current (return)
200
- */
201
- static token lex_number(lexstate *state) {
202
- unsigned int c;
203
-
204
- while (true) {
205
- c = peek(state);
206
-
207
- if (rb_isdigit(c) || c == '_') {
208
- advance_char(state, c);
209
- } else {
210
- break;
211
- }
212
- }
213
-
214
- return next_token(state, tINTEGER);
215
- }
216
-
217
- /*
218
- lex_hyphen ::= - (tOPERATOR)
219
- | - @ (tOPERATOR)
220
- | - > (pARROW)
221
- | - 1 ... (tINTEGER)
222
- */
223
- static token lex_hyphen(lexstate* state) {
224
- if (advance_next_character_if(state, '>')) {
225
- return next_token(state, pARROW);
226
- } else if (advance_next_character_if(state, '@')) {
227
- return next_token(state, tOPERATOR);
228
- } else {
229
- unsigned int c = peek(state);
230
-
231
- if (rb_isdigit(c)) {
232
- advance_char(state, c);
233
- return lex_number(state);
234
- } else {
235
- return next_token(state, tOPERATOR);
236
- }
237
- }
238
- }
239
-
240
- /*
241
- lex_plus ::= +
242
- | + @
243
- | + \d
244
- */
245
- static token lex_plus(lexstate *state) {
246
- if (advance_next_character_if(state, '@')) {
247
- return next_token(state, tOPERATOR);
248
- } else if (rb_isdigit(peek(state))) {
249
- return lex_number(state);
250
- } else {
251
- return next_token(state, tOPERATOR);
252
- }
253
- }
254
-
255
- /*
256
- lex_dot ::= . pDOT
257
- | . . . pDOT3
258
- */
259
- static token lex_dot(lexstate *state) {
260
- unsigned int cs[2];
261
-
262
- peekn(state, cs, 2);
263
-
264
- if (cs[0] == '.' && cs[1] == '.') {
265
- advance_char(state, '.');
266
- advance_char(state, '.');
267
- return next_token(state, pDOT3);
268
- } else {
269
- return next_token(state, pDOT);
270
- }
271
- }
272
-
273
- /*
274
- lex_eq ::= =
275
- | ==
276
- | ===
277
- | =~
278
- | =>
279
- */
280
- static token lex_eq(lexstate *state) {
281
- unsigned int cs[2];
282
- peekn(state, cs, 2);
283
-
284
- if (cs[0] == '=' && cs[1] == '=') {
285
- // ===
286
- advance_char(state, cs[0]);
287
- advance_char(state, cs[1]);
288
- return next_token(state, tOPERATOR);
289
- } else if (cs[0] == '=') {
290
- // ==
291
- advance_char(state, cs[0]);
292
- return next_token(state, tOPERATOR);
293
- } else if (cs[0] == '~') {
294
- // =~
295
- advance_char(state, cs[0]);
296
- return next_token(state, tOPERATOR);
297
- } else if (cs[0] == '>') {
298
- // =>
299
- advance_char(state, cs[0]);
300
- return next_token(state, pFATARROW);
301
- } else {
302
- return next_token(state, pEQ);
303
- }
304
- }
305
-
306
- /*
307
- underscore ::= _A tULIDENT
308
- | _a tULLIDENT
309
- | _ tULLIDENT
310
- */
311
- static token lex_underscore(lexstate *state) {
312
- unsigned int c;
313
-
314
- c = peek(state);
315
-
316
- if ('A' <= c && c <= 'Z') {
317
- advance_char(state, c);
318
-
319
- while (true) {
320
- c = peek(state);
321
-
322
- if (rb_isalnum(c) || c == '_') {
323
- // ok
324
- advance_char(state, c);
325
- } else {
326
- break;
327
- }
328
- }
329
-
330
- return next_token(state, tULIDENT);
331
- } else if (rb_isalnum(c) || c == '_') {
332
- advance_char(state, c);
333
-
334
- while (true) {
335
- c = peek(state);
336
-
337
- if (rb_isalnum(c) || c == '_') {
338
- // ok
339
- advance_char(state, c);
340
- } else {
341
- break;
342
- }
343
- }
344
-
345
- if (c == '!') {
346
- advance_char(state, c);
347
- return next_token(state, tBANGIDENT);
348
- } else if (c == '=') {
349
- advance_char(state, c);
350
- return next_token(state, tEQIDENT);
351
- } else {
352
- return next_token(state, tULLIDENT);
353
- }
354
- } else {
355
- return next_token(state, tULLIDENT);
356
- }
357
- }
358
-
359
- static bool is_opr(unsigned int c) {
360
- switch (c) {
361
- case ':':
362
- case ';':
363
- case '=':
364
- case '.':
365
- case ',':
366
- case '!':
367
- case '"':
368
- case '$':
369
- case '%':
370
- case '&':
371
- case '(':
372
- case ')':
373
- case '-':
374
- case '+':
375
- case '~':
376
- case '|':
377
- case '\\':
378
- case '\'':
379
- case '[':
380
- case ']':
381
- case '{':
382
- case '}':
383
- case '*':
384
- case '/':
385
- case '<':
386
- case '>':
387
- case '^':
388
- return true;
389
- default:
390
- return false;
391
- }
392
- }
393
-
394
- static token lex_global(lexstate *state) {
395
- unsigned int c;
396
-
397
- c = peek(state);
398
-
399
- if (rb_isspace(c) || c == 0) {
400
- return next_token(state, ErrorToken);
401
- }
402
-
403
- if (rb_isdigit(c)) {
404
- // `$` [`0`-`9`]+
405
- advance_char(state, c);
406
-
407
- while (true) {
408
- c = peek(state);
409
- if (rb_isdigit(c)) {
410
- advance_char(state, c);
411
- } else {
412
- return next_token(state, tGIDENT);
413
- }
414
- }
415
- }
416
-
417
- if (c == '-') {
418
- // `$` `-` [a-zA-Z0-9_]
419
- advance_char(state, c);
420
- c = peek(state);
421
-
422
- if (rb_isalnum(c) || c == '_') {
423
- advance_char(state, c);
424
- return next_token(state, tGIDENT);
425
- } else {
426
- return next_token(state, ErrorToken);
427
- }
428
- }
429
-
430
- switch (c) {
431
- case '~':
432
- case '*':
433
- case '$':
434
- case '?':
435
- case '!':
436
- case '@':
437
- case '\\':
438
- case '/':
439
- case ';':
440
- case ',':
441
- case '.':
442
- case '=':
443
- case ':':
444
- case '<':
445
- case '>':
446
- case '"':
447
- case '&':
448
- case '\'':
449
- case '`':
450
- case '+':
451
- advance_char(state, c);
452
- return next_token(state, tGIDENT);
453
-
454
- default:
455
- if (is_opr(c) || c == 0) {
456
- return next_token(state, ErrorToken);
457
- }
458
-
459
- while (true) {
460
- advance_char(state, c);
461
- c = peek(state);
462
-
463
- if (rb_isspace(c) || is_opr(c) || c == 0) {
464
- break;
465
- }
466
- }
467
-
468
- return next_token(state, tGIDENT);
469
- }
470
- }
471
-
472
- void pp(VALUE object) {
473
- VALUE inspect = rb_funcall(object, rb_intern("inspect"), 0);
474
- printf("pp >> %s\n", RSTRING_PTR(inspect));
475
- }
476
-
477
- static token lex_ident(lexstate *state, enum TokenType default_type) {
478
- unsigned int c;
479
- token tok;
480
-
481
- while (true) {
482
- c = peek(state);
483
- if (rb_isalnum(c) || c == '_') {
484
- advance_char(state, c);
485
- } else if (c == '!') {
486
- advance_char(state, c);
487
- tok = next_token(state, tBANGIDENT);
488
- break;
489
- } else if (c == '=') {
490
- advance_char(state, c);
491
- tok = next_token(state, tEQIDENT);
492
- break;
493
- } else {
494
- tok = next_token(state, default_type);
495
- break;
496
- }
497
- }
498
-
499
- if (tok.type == tLIDENT) {
500
- VALUE string = rb_enc_str_new(
501
- RSTRING_PTR(state->string) + tok.range.start.byte_pos,
502
- RANGE_BYTES(tok.range),
503
- rb_enc_get(state->string)
504
- );
505
-
506
- VALUE type = rb_hash_aref(RBS_Parser_KEYWORDS, string);
507
- if (FIXNUM_P(type)) {
508
- tok.type = FIX2INT(type);
509
- }
510
- }
511
-
512
- return tok;
513
- }
514
-
515
- static token lex_comment(lexstate *state, enum TokenType type) {
516
- unsigned int c;
517
-
518
- c = peek(state);
519
- if (c == ' ') {
520
- advance_char(state, c);
521
- }
522
-
523
- while (true) {
524
- c = peek(state);
525
-
526
- if (c == '\n' || c == '\0') {
527
- break;
528
- } else {
529
- advance_char(state, c);
530
- }
531
- }
532
-
533
- token tok = next_token(state, type);
534
-
535
- skip_char(state, c);
536
-
537
- return tok;
538
- }
539
-
540
- /*
541
- ... " ... " ...
542
- ^ start
543
- ^ current
544
- ^ current (after)
545
- */
546
- static token lex_dqstring(lexstate *state) {
547
- unsigned int c;
548
-
549
- while (true) {
550
- c = peek(state);
551
- advance_char(state, c);
552
-
553
- if (c == '\\') {
554
- if (peek(state) == '"') {
555
- advance_char(state, c);
556
- c = peek(state);
557
- }
558
- } else if (c == '"') {
559
- break;
560
- }
561
- }
562
-
563
- return next_token(state, tDQSTRING);
564
- }
565
-
566
- /*
567
- ... @ foo ...
568
- ^ start
569
- ^ current
570
- ^ current (return)
571
-
572
- ... @ @ foo ...
573
- ^ start
574
- ^ current
575
- ^ current (return)
576
- */
577
- static token lex_ivar(lexstate *state) {
578
- unsigned int c;
579
-
580
- enum TokenType type = tAIDENT;
581
-
582
- c = peek(state);
583
-
584
- if (c == '@') {
585
- type = tA2IDENT;
586
- advance_char(state, c);
587
- c = peek(state);
588
- }
589
-
590
- if (rb_isalpha(c) || c == '_') {
591
- advance_char(state, c);
592
- c = peek(state);
593
- } else {
594
- return next_token(state, ErrorToken);
595
- }
596
-
597
- while (rb_isalnum(c) || c == '_') {
598
- advance_char(state, c);
599
- c = peek(state);
600
- }
601
-
602
- return next_token(state, type);
603
- }
604
-
605
- /*
606
- ... ' ... ' ...
607
- ^ start
608
- ^ current
609
- ^ current (after)
610
- */
611
- static token lex_sqstring(lexstate *state) {
612
- unsigned int c;
613
-
614
- c = peek(state);
615
-
616
- while (true) {
617
- c = peek(state);
618
- advance_char(state, c);
619
-
620
- if (c == '\\') {
621
- if (peek(state) == '\'') {
622
- advance_char(state, c);
623
- c = peek(state);
624
- }
625
- } else if (c == '\'') {
626
- break;
627
- }
628
- }
629
-
630
- return next_token(state, tSQSTRING);
631
- }
632
-
633
- #define EQPOINTS2(c0, c1, s) (c0 == s[0] && c1 == s[1])
634
- #define EQPOINTS3(c0, c1, c2, s) (c0 == s[0] && c1 == s[1] && c2 == s[2])
635
-
636
- /*
637
- ... : @ ...
638
- ^ start
639
- ^ current
640
- ^ current (return)
641
- */
642
- static token lex_colon_symbol(lexstate *state) {
643
- unsigned int c[3];
644
- peekn(state, c, 3);
645
-
646
- switch (c[0]) {
647
- case '|':
648
- case '&':
649
- case '/':
650
- case '%':
651
- case '~':
652
- case '`':
653
- case '^':
654
- advance_char(state, c[0]);
655
- return next_token(state, tSYMBOL);
656
- case '=':
657
- if (EQPOINTS2(c[0], c[1], "=~")) {
658
- // :=~
659
- advance_char(state, c[0]);
660
- advance_char(state, c[1]);
661
- return next_token(state, tSYMBOL);
662
- } else if (EQPOINTS3(c[0], c[1], c[2], "===")) {
663
- // :===
664
- advance_char(state, c[0]);
665
- advance_char(state, c[1]);
666
- advance_char(state, c[2]);
667
- return next_token(state, tSYMBOL);
668
- } else if (EQPOINTS2(c[0], c[1], "==")) {
669
- // :==
670
- advance_char(state, c[0]);
671
- advance_char(state, c[1]);
672
- return next_token(state, tSYMBOL);
673
- }
674
- break;
675
- case '<':
676
- if (EQPOINTS3(c[0], c[1], c[2], "<=>")) {
677
- advance_char(state, c[0]);
678
- advance_char(state, c[1]);
679
- advance_char(state, c[2]);
680
- } else if (EQPOINTS2(c[0], c[1], "<=") || EQPOINTS2(c[0], c[1], "<<")) {
681
- advance_char(state, c[0]);
682
- advance_char(state, c[1]);
683
- } else {
684
- advance_char(state, c[0]);
685
- }
686
- return next_token(state, tSYMBOL);
687
- case '>':
688
- if (EQPOINTS2(c[0], c[1], ">=") || EQPOINTS2(c[0], c[1], ">>")) {
689
- advance_char(state, c[0]);
690
- advance_char(state, c[1]);
691
- } else {
692
- advance_char(state, c[0]);
693
- }
694
- return next_token(state, tSYMBOL);
695
- case '-':
696
- case '+':
697
- if (EQPOINTS2(c[0], c[1], "+@") || EQPOINTS2(c[0], c[1], "-@")) {
698
- advance_char(state, c[0]);
699
- advance_char(state, c[1]);
700
- } else {
701
- advance_char(state, c[0]);
702
- }
703
- return next_token(state, tSYMBOL);
704
- case '*':
705
- if (EQPOINTS2(c[0], c[1], "**")) {
706
- advance_char(state, c[0]);
707
- advance_char(state, c[1]);
708
- } else {
709
- advance_char(state, c[0]);
710
- }
711
- return next_token(state, tSYMBOL);
712
- case '[':
713
- if (EQPOINTS3(c[0], c[1], c[2], "[]=")) {
714
- advance_char(state, c[0]);
715
- advance_char(state, c[1]);
716
- advance_char(state, c[2]);
717
- } else if (EQPOINTS2(c[0], c[1], "[]")) {
718
- advance_char(state, c[0]);
719
- advance_char(state, c[1]);
720
- } else {
721
- break;
722
- }
723
- return next_token(state, tSYMBOL);
724
- case '!':
725
- if (EQPOINTS2(c[0], c[1], "!=") || EQPOINTS2(c[0], c[1], "!~")) {
726
- advance_char(state, c[0]);
727
- advance_char(state, c[1]);
728
- } else {
729
- advance_char(state, c[0]);
730
- }
731
- return next_token(state, tSYMBOL);
732
- case '@': {
733
- advance_char(state, '@');
734
- token tok = lex_ivar(state);
735
- if (tok.type != ErrorToken) {
736
- tok.type = tSYMBOL;
737
- }
738
- return tok;
739
- }
740
- case '$': {
741
- advance_char(state, '$');
742
- token tok = lex_global(state);
743
- if (tok.type != ErrorToken) {
744
- tok.type = tSYMBOL;
745
- }
746
- return tok;
747
- }
748
- case '\'': {
749
- position start = state->start;
750
- advance_char(state, '\'');
751
- token tok = lex_sqstring(state);
752
- tok.type = tSQSYMBOL;
753
- tok.range.start = start;
754
- return tok;
755
- }
756
- case '"': {
757
- position start = state->start;
758
- advance_char(state, '"');
759
- token tok = lex_dqstring(state);
760
- tok.type = tDQSYMBOL;
761
- tok.range.start = start;
762
- return tok;
763
- }
764
- default:
765
- if (rb_isalpha(c[0]) || c[0] == '_') {
766
- position start = state->start;
767
- token tok = lex_ident(state, NullType);
768
- tok.range.start = start;
769
-
770
- if (peek(state) == '?') {
771
- if (tok.type != tBANGIDENT && tok.type != tEQIDENT) {
772
- skip_char(state, '?');
773
- tok.range.end = state->current;
774
- }
775
- }
776
-
777
- tok.type = tSYMBOL;
778
- return tok;
779
- }
780
- }
781
-
782
- return next_token(state, pCOLON);
783
- }
784
-
785
- /*
786
- ... : : ...
787
- ^ start
788
- ^ current
789
- ^ current (return)
790
-
791
- ... : ...
792
- ^ start
793
- ^ current (lex_colon_symbol)
794
- */
795
- static token lex_colon(lexstate *state) {
796
- unsigned int c = peek(state);
797
-
798
- if (c == ':') {
799
- advance_char(state, c);
800
- return next_token(state, pCOLON2);
801
- } else {
802
- return lex_colon_symbol(state);
803
- }
804
- }
805
-
806
- /*
807
- lex_lt ::= < (pLT)
808
- | < < (tOPERATOR)
809
- | < = (tOPERATOR)
810
- | < = > (tOPERATOR)
811
- */
812
- static token lex_lt(lexstate *state) {
813
- if (advance_next_character_if(state, '<')) {
814
- return next_token(state, tOPERATOR);
815
- } else if (advance_next_character_if(state, '=')) {
816
- advance_next_character_if(state, '>');
817
- return next_token(state, tOPERATOR);
818
- } else {
819
- return next_token(state, pLT);
820
- }
821
- }
822
-
823
- /*
824
- lex_gt ::= >
825
- | > =
826
- | > >
827
- */
828
- static token lex_gt(lexstate *state) {
829
- advance_next_character_if(state, '=') || advance_next_character_if(state, '>');
830
- return next_token(state, tOPERATOR);
831
- }
832
-
833
- /*
834
- ... `%` `a` `{` ... `}` ...
835
- ^ start
836
- ^ current
837
- ^ current (exit)
838
- --- token
839
- */
840
- static token lex_percent(lexstate *state) {
841
- unsigned int cs[2];
842
- unsigned int end_char;
843
-
844
- peekn(state, cs, 2);
845
-
846
- if (cs[0] != 'a') {
847
- return next_token(state, tOPERATOR);
848
- }
849
-
850
- switch (cs[1])
851
- {
852
- case '{':
853
- end_char = '}';
854
- break;
855
- case '(':
856
- end_char = ')';
857
- break;
858
- case '[':
859
- end_char = ']';
860
- break;
861
- case '|':
862
- end_char = '|';
863
- break;
864
- case '<':
865
- end_char = '>';
866
- break;
867
- default:
868
- return next_token(state, tOPERATOR);
869
- }
870
-
871
- advance_char(state, cs[0]);
872
- advance_char(state, cs[1]);
873
-
874
- unsigned int c;
875
-
876
- while ((c = peek(state))) {
877
- if (c == end_char) {
878
- advance_char(state, c);
879
- return next_token(state, tANNOTATION);
880
- }
881
- advance_char(state, c);
882
- }
883
-
884
- return next_token(state, ErrorToken);
885
- }
886
-
887
- /*
888
- bracket ::= [ (pLBRACKET)
889
- * ^
890
- | [ ] (tOPERATOR)
891
- * ^ $
892
- | [ ] = (tOPERATOR)
893
- * ^ $
894
- */
895
- static token lex_bracket(lexstate *state) {
896
- if (advance_next_character_if(state, ']')) {
897
- advance_next_character_if(state, '=');
898
- return next_token(state, tOPERATOR);
899
- } else {
900
- return next_token(state, pLBRACKET);
901
- }
902
- }
903
-
904
- /*
905
- bracket ::= *
906
- | * *
907
- */
908
- static token lex_star(lexstate *state) {
909
- if (advance_next_character_if(state, '*')) {
910
- return next_token(state, pSTAR2);
911
- } else {
912
- return next_token(state, pSTAR);
913
- }
914
- }
915
-
916
- /*
917
- bang ::= !
918
- | ! =
919
- | ! ~
920
- */
921
- static token lex_bang(lexstate *state) {
922
- advance_next_character_if(state, '=') || advance_next_character_if(state, '~');
923
- return next_token(state, tOPERATOR);
924
- }
925
-
926
- /*
927
- backquote ::= ` (tOPERATOR)
928
- | `[^ :][^`]` (tQIDENT)
929
- */
930
- static token lex_backquote(lexstate *state) {
931
- unsigned int c = peek(state);
932
-
933
- if (c == ' ' || c == ':') {
934
- return next_token(state, tOPERATOR);
935
- } else {
936
- while (true) {
937
- if (c == '`') {
938
- break;
939
- }
940
-
941
- c = peek(state);
942
- advance_char(state, c);
943
- }
944
-
945
- return next_token(state, tQIDENT);
946
- }
947
- }
948
-
949
- token rbsparser_next_token(lexstate *state) {
950
- token tok = NullToken;
951
-
952
- unsigned int c;
953
- bool skipping = true;
954
-
955
- while (skipping) {
956
- c = peek(state);
957
-
958
- switch (c) {
959
- case ' ':
960
- case '\t':
961
- case '\n':
962
- // nop
963
- skip_char(state, c);
964
- break;
965
- case '\0':
966
- return next_token(state, pEOF);
967
- default:
968
- advance_char(state, c);
969
- skipping = false;
970
- break;
971
- }
972
- }
973
-
974
- /* ... c d .. */
975
- /* ^ state->current */
976
- /* ^ start */
977
- switch (c) {
978
- case '\0': tok = next_token(state, pEOF);
979
- ONE_CHAR_PATTERN('(', pLPAREN);
980
- ONE_CHAR_PATTERN(')', pRPAREN);
981
- ONE_CHAR_PATTERN(']', pRBRACKET);
982
- ONE_CHAR_PATTERN('{', pLBRACE);
983
- ONE_CHAR_PATTERN('}', pRBRACE);
984
- ONE_CHAR_PATTERN(',', pCOMMA);
985
- ONE_CHAR_PATTERN('|', pBAR);
986
- ONE_CHAR_PATTERN('^', pHAT);
987
- ONE_CHAR_PATTERN('&', pAMP);
988
- ONE_CHAR_PATTERN('?', pQUESTION);
989
- ONE_CHAR_PATTERN('/', tOPERATOR);
990
- ONE_CHAR_PATTERN('~', tOPERATOR);
991
- case '[':
992
- tok = lex_bracket(state);
993
- break;
994
- case '-':
995
- tok = lex_hyphen(state);
996
- break;
997
- case '+':
998
- tok = lex_plus(state);
999
- break;
1000
- case '*':
1001
- tok = lex_star(state);
1002
- break;
1003
- case '<':
1004
- tok = lex_lt(state);
1005
- break;
1006
- case '=':
1007
- tok = lex_eq(state);
1008
- break;
1009
- case '>':
1010
- tok = lex_gt(state);
1011
- break;
1012
- case '!':
1013
- tok = lex_bang(state);
1014
- break;
1015
- case '#':
1016
- if (state->first_token_of_line) {
1017
- tok = lex_comment(state, tLINECOMMENT);
1018
- } else {
1019
- tok = lex_comment(state, tCOMMENT);
1020
- }
1021
- break;
1022
- case ':':
1023
- tok = lex_colon(state);
1024
- break;
1025
- case '.':
1026
- tok = lex_dot(state);
1027
- break;
1028
- case '_':
1029
- tok = lex_underscore(state);
1030
- break;
1031
- case '$':
1032
- tok = lex_global(state);
1033
- break;
1034
- case '@':
1035
- tok = lex_ivar(state);
1036
- break;
1037
- case '"':
1038
- tok = lex_dqstring(state);
1039
- break;
1040
- case '\'':
1041
- tok = lex_sqstring(state);
1042
- break;
1043
- case '%':
1044
- tok = lex_percent(state);
1045
- break;
1046
- case '`':
1047
- tok = lex_backquote(state);
1048
- break;
1049
- default:
1050
- if (rb_isalpha(c) && rb_isupper(c)) {
1051
- tok = lex_ident(state, tUIDENT);
1052
- }
1053
- if (rb_isalpha(c) && rb_islower(c)) {
1054
- tok = lex_ident(state, tLIDENT);
1055
- }
1056
- if (rb_isdigit(c)) {
1057
- tok = lex_number(state);
1058
- }
1059
- }
1060
-
1061
- if (tok.type == NullType) {
1062
- tok = next_token(state, ErrorToken);
1063
- }
1064
-
1065
- return tok;
1066
- }
1067
-
1068
- char *peek_token(lexstate *state, token tok) {
1069
- return RSTRING_PTR(state->string) + tok.range.start.byte_pos;
1070
- }