rbs 1.6.2 → 1.7.0.beta.1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (57) hide show
  1. checksums.yaml +4 -4
  2. data/.github/workflows/ruby.yml +0 -4
  3. data/.gitignore +1 -0
  4. data/CHANGELOG.md +6 -0
  5. data/Gemfile +1 -0
  6. data/Rakefile +7 -22
  7. data/core/kernel.rbs +4 -4
  8. data/core/trace_point.rbs +1 -1
  9. data/ext/rbs/extension/constants.c +140 -0
  10. data/ext/rbs/extension/constants.h +72 -0
  11. data/ext/rbs/extension/extconf.rb +3 -0
  12. data/ext/rbs/extension/lexer.c +1070 -0
  13. data/ext/rbs/extension/lexer.h +145 -0
  14. data/ext/rbs/extension/location.c +295 -0
  15. data/ext/rbs/extension/location.h +59 -0
  16. data/ext/rbs/extension/main.c +9 -0
  17. data/ext/rbs/extension/parser.c +2418 -0
  18. data/ext/rbs/extension/parser.h +23 -0
  19. data/ext/rbs/extension/parserstate.c +313 -0
  20. data/ext/rbs/extension/parserstate.h +141 -0
  21. data/ext/rbs/extension/rbs_extension.h +40 -0
  22. data/ext/rbs/extension/ruby_objs.c +585 -0
  23. data/ext/rbs/extension/ruby_objs.h +46 -0
  24. data/ext/rbs/extension/unescape.c +65 -0
  25. data/goodcheck.yml +1 -1
  26. data/lib/rbs/ast/comment.rb +0 -12
  27. data/lib/rbs/buffer.rb +4 -0
  28. data/lib/rbs/cli.rb +5 -8
  29. data/lib/rbs/collection/sources/git.rb +18 -3
  30. data/lib/rbs/errors.rb +14 -1
  31. data/lib/rbs/location.rb +221 -217
  32. data/lib/rbs/location_aux.rb +108 -0
  33. data/lib/rbs/locator.rb +10 -7
  34. data/lib/rbs/parser_aux.rb +24 -0
  35. data/lib/rbs/types.rb +2 -3
  36. data/lib/rbs/version.rb +1 -1
  37. data/lib/rbs/writer.rb +4 -2
  38. data/lib/rbs.rb +3 -7
  39. data/rbs.gemspec +2 -1
  40. data/sig/ancestor_builder.rbs +2 -2
  41. data/sig/annotation.rbs +2 -2
  42. data/sig/comment.rbs +7 -7
  43. data/sig/constant_table.rbs +1 -1
  44. data/sig/declarations.rbs +9 -9
  45. data/sig/definition.rbs +1 -1
  46. data/sig/definition_builder.rbs +2 -2
  47. data/sig/errors.rbs +30 -25
  48. data/sig/location.rbs +42 -79
  49. data/sig/locator.rbs +2 -2
  50. data/sig/members.rbs +7 -7
  51. data/sig/method_types.rbs +3 -3
  52. data/sig/parser.rbs +11 -21
  53. data/sig/types.rbs +45 -27
  54. data/sig/writer.rbs +1 -1
  55. data/stdlib/json/0/json.rbs +3 -3
  56. metadata +24 -6
  57. data/lib/rbs/parser.rb +0 -3614
@@ -0,0 +1,23 @@
1
+ #ifndef RBS__PARSER_H
2
+ #define RBS__PARSER_H
3
+
4
+ #include "ruby.h"
5
+ #include "parserstate.h"
6
+
7
+ /**
8
+ * RBS::Parser class
9
+ * */
10
+ extern VALUE RBS_Parser;
11
+
12
+ /**
13
+ * RBS::Parser::KEYWORDS constant, which stores a hash from keyword string to token type fixnum
14
+ * */
15
+ extern VALUE RBS_Parser_KEYWORDS;
16
+
17
+ VALUE parse_type(parserstate *state);
18
+ VALUE parse_method_type(parserstate *state);
19
+ VALUE parse_signature(parserstate *state);
20
+
21
+ void rbs__init_parser();
22
+
23
+ #endif
@@ -0,0 +1,313 @@
1
+ #include "rbs_extension.h"
2
+
3
+ #define RESET_TABLE_P(table) (table->size == 0)
4
+
5
+ id_table *alloc_empty_table() {
6
+ id_table *table = malloc(sizeof(id_table));
7
+ table->size = 10;
8
+ table->count = 0;
9
+ table->ids = calloc(10, sizeof(ID));
10
+
11
+ return table;
12
+ }
13
+
14
+ id_table *alloc_reset_table() {
15
+ id_table *table = malloc(sizeof(id_table));
16
+ table->size = 0;
17
+
18
+ return table;
19
+ }
20
+
21
+ id_table *parser_push_typevar_table(parserstate *state, bool reset) {
22
+ if (reset) {
23
+ id_table *table = alloc_reset_table();
24
+ table->next = state->vars;
25
+ state->vars = table;
26
+ }
27
+
28
+ id_table *table = alloc_empty_table();
29
+ table->next = state->vars;
30
+ state->vars = table;
31
+
32
+ return table;
33
+ }
34
+
35
+ void parser_pop_typevar_table(parserstate *state) {
36
+ id_table *table;
37
+
38
+ if (state->vars) {
39
+ table = state->vars;
40
+ state->vars = table->next;
41
+ free(table->ids);
42
+ free(table);
43
+ } else {
44
+ rb_raise(rb_eRuntimeError, "Cannot pop empty table");
45
+ }
46
+
47
+ if (state->vars && RESET_TABLE_P(state->vars)) {
48
+ table = state->vars;
49
+ state->vars = table->next;
50
+ free(table);
51
+ }
52
+ }
53
+
54
+ void parser_insert_typevar(parserstate *state, ID id) {
55
+ id_table *table = state->vars;
56
+
57
+ if (RESET_TABLE_P(table)) {
58
+ rb_raise(rb_eRuntimeError, "Cannot insert to reset table");
59
+ }
60
+
61
+ if (table->size == table->count) {
62
+ // expand
63
+ ID *ptr = table->ids;
64
+ table->size += 10;
65
+ table->ids = calloc(table->size, sizeof(ID));
66
+ memcpy(table->ids, ptr, sizeof(ID) * table->count);
67
+ free(ptr);
68
+ }
69
+
70
+ table->ids[table->count++] = id;
71
+ }
72
+
73
+ bool parser_typevar_member(parserstate *state, ID id) {
74
+ id_table *table = state->vars;
75
+
76
+ while (table && !RESET_TABLE_P(table)) {
77
+ for (size_t i = 0; i < table->count; i++) {
78
+ if (table->ids[i] == id) {
79
+ return true;
80
+ }
81
+ }
82
+
83
+ table = table->next;
84
+ }
85
+
86
+ return false;
87
+ }
88
+
89
+ void print_parser(parserstate *state) {
90
+ pp(state->buffer);
91
+ printf(" current_token = %s (%d...%d)\n", token_type_str(state->current_token.type), state->current_token.range.start.char_pos, state->current_token.range.end.char_pos);
92
+ printf(" next_token = %s (%d...%d)\n", token_type_str(state->next_token.type), state->next_token.range.start.char_pos, state->next_token.range.end.char_pos);
93
+ printf(" next_token2 = %s (%d...%d)\n", token_type_str(state->next_token2.type), state->next_token2.range.start.char_pos, state->next_token2.range.end.char_pos);
94
+ printf(" next_token3 = %s (%d...%d)\n", token_type_str(state->next_token3.type), state->next_token3.range.start.char_pos, state->next_token3.range.end.char_pos);
95
+ }
96
+
97
+ void parser_advance(parserstate *state) {
98
+ state->current_token = state->next_token;
99
+ state->next_token = state->next_token2;
100
+ state->next_token2 = state->next_token3;
101
+
102
+ while (true) {
103
+ if (state->next_token3.type == pEOF) {
104
+ break;
105
+ }
106
+
107
+ state->next_token3 = rbsparser_next_token(state->lexstate);
108
+
109
+ if (state->next_token3.type == tCOMMENT) {
110
+ // skip
111
+ } else if (state->next_token3.type == tLINECOMMENT) {
112
+ insert_comment_line(state, state->next_token3);
113
+ } else {
114
+ break;
115
+ }
116
+ }
117
+ }
118
+
119
+ /**
120
+ * Advance token if _next_ token is `type`.
121
+ * Ensures one token advance and `state->current_token.type == type`, or current token not changed.
122
+ *
123
+ * @returns true if token advances, false otherwise.
124
+ **/
125
+ bool parser_advance_if(parserstate *state, enum TokenType type) {
126
+ if (state->next_token.type == type) {
127
+ parser_advance(state);
128
+ return true;
129
+ } else {
130
+ return false;
131
+ }
132
+ }
133
+
134
+ void parser_advance_assert(parserstate *state, enum TokenType type) {
135
+ parser_advance(state);
136
+ if (state->current_token.type != type) {
137
+ raise_syntax_error(
138
+ state,
139
+ state->current_token,
140
+ "expected a token `%s`",
141
+ token_type_str(type)
142
+ );
143
+ }
144
+ }
145
+
146
+ void print_token(token tok) {
147
+ printf(
148
+ "%s char=%d...%d\n",
149
+ token_type_str(tok.type),
150
+ tok.range.start.char_pos,
151
+ tok.range.end.char_pos
152
+ );
153
+ }
154
+
155
+ void insert_comment_line(parserstate *state, token tok) {
156
+ int prev_line = tok.range.start.line - 1;
157
+
158
+ comment *com = comment_get_comment(state->last_comment, prev_line);
159
+
160
+ if (com) {
161
+ comment_insert_new_line(com, tok);
162
+ } else {
163
+ state->last_comment = alloc_comment(tok, state->last_comment);
164
+ }
165
+ }
166
+
167
+ VALUE get_comment(parserstate *state, int subject_line) {
168
+ int comment_line = subject_line - 1;
169
+
170
+ comment *com = comment_get_comment(state->last_comment, comment_line);
171
+
172
+ if (com) {
173
+ return comment_to_ruby(com, state->buffer);
174
+ } else {
175
+ return Qnil;
176
+ }
177
+ }
178
+
179
+ comment *alloc_comment(token comment_token, comment *last_comment) {
180
+ comment *new_comment = calloc(1, sizeof(comment));
181
+
182
+ new_comment->next_comment = last_comment;
183
+
184
+ new_comment->start = comment_token.range.start;
185
+ new_comment->end = comment_token.range.end;
186
+
187
+ new_comment->line_size = 0;
188
+ new_comment->line_count = 0;
189
+
190
+ comment_insert_new_line(new_comment, comment_token);
191
+
192
+ return new_comment;
193
+ }
194
+
195
+ void free_comment(comment *com) {
196
+ if (com->next_comment) {
197
+ free_comment(com->next_comment);
198
+ }
199
+
200
+ free(com->tokens);
201
+ free(com);
202
+ }
203
+
204
+ void comment_insert_new_line(comment *com, token comment_token) {
205
+ if (com->line_count == 0) {
206
+ com->start = comment_token.range.start;
207
+ }
208
+
209
+ if (com->line_count == com->line_size) {
210
+ com->line_size += 10;
211
+
212
+ if (com->tokens) {
213
+ token *p = com->tokens;
214
+ com->tokens = calloc(com->line_size, sizeof(token));
215
+ memcpy(com->tokens, p, sizeof(token) * com->line_count);
216
+ free(p);
217
+ } else {
218
+ com->tokens = calloc(com->line_size, sizeof(token));
219
+ }
220
+ }
221
+
222
+ com->tokens[com->line_count++] = comment_token;
223
+ com->end = comment_token.range.end;
224
+ }
225
+
226
+ comment *comment_get_comment(comment *com, int line) {
227
+ if (com == NULL) {
228
+ return NULL;
229
+ }
230
+
231
+ if (com->end.line < line) {
232
+ return NULL;
233
+ }
234
+
235
+ if (com->end.line == line) {
236
+ return com;
237
+ }
238
+
239
+ return comment_get_comment(com->next_comment, line);
240
+ }
241
+
242
+ VALUE comment_to_ruby(comment *com, VALUE buffer) {
243
+ VALUE content = rb_funcall(buffer, rb_intern("content"), 0);
244
+ rb_encoding *enc = rb_enc_get(content);
245
+ VALUE string = rb_enc_str_new_cstr("", enc);
246
+
247
+ int hash_bytes = rb_enc_codelen('#', enc);
248
+ int space_bytes = rb_enc_codelen(' ', enc);
249
+
250
+ for (size_t i = 0; i < com->line_count; i++) {
251
+ token tok = com->tokens[i];
252
+
253
+ char *comment_start = RSTRING_PTR(content) + tok.range.start.byte_pos + hash_bytes;
254
+ int comment_bytes = RANGE_BYTES(tok.range) - hash_bytes;
255
+ unsigned char c = rb_enc_mbc_to_codepoint(comment_start, RSTRING_END(content), enc);
256
+
257
+ if (c == ' ') {
258
+ comment_start += space_bytes;
259
+ comment_bytes -= space_bytes;
260
+ }
261
+
262
+ rb_str_cat(string, comment_start, comment_bytes);
263
+ rb_str_cat_cstr(string, "\n");
264
+ }
265
+
266
+ return rbs_ast_comment(
267
+ string,
268
+ rbs_location_pp(buffer, &com->start, &com->end)
269
+ );
270
+ }
271
+
272
+ parserstate *alloc_parser(VALUE buffer, int line, int column, VALUE variables) {
273
+ VALUE string = rb_funcall(buffer, rb_intern("content"), 0);
274
+
275
+ lexstate *lexer = calloc(1, sizeof(lexstate));
276
+ lexer->string = string;
277
+ lexer->current.line = line;
278
+ lexer->current.column = column;
279
+ lexer->start = lexer->current;
280
+ lexer->first_token_of_line = lexer->current.column == 0;
281
+
282
+ parserstate *parser = calloc(1, sizeof(parserstate));
283
+ parser->lexstate = lexer;
284
+ parser->buffer = buffer;
285
+ parser->current_token = NullToken;
286
+ parser->next_token = NullToken;
287
+ parser->next_token2 = NullToken;
288
+ parser->next_token3 = NullToken;
289
+
290
+ parser_advance(parser);
291
+ parser_advance(parser);
292
+ parser_advance(parser);
293
+
294
+ if (!NIL_P(variables)) {
295
+ parser_push_typevar_table(parser, true);
296
+
297
+ for (long i = 0; i < rb_array_len(variables); i++) {
298
+ VALUE index = INT2FIX(i);
299
+ VALUE symbol = rb_ary_aref(1, &index, variables);
300
+ parser_insert_typevar(parser, SYM2ID(symbol));
301
+ }
302
+ }
303
+
304
+ return parser;
305
+ }
306
+
307
+ void free_parser(parserstate *parser) {
308
+ free(parser->lexstate);
309
+ if (parser->last_comment) {
310
+ free_comment(parser->last_comment);
311
+ }
312
+ free(parser);
313
+ }
@@ -0,0 +1,141 @@
1
+ #ifndef RBS__PARSERSTATE_H
2
+ #define RBS__PARSERSTATE_H
3
+
4
+ #include <stdbool.h>
5
+
6
+ #include "lexer.h"
7
+ #include "location.h"
8
+
9
+ /**
10
+ * id_table represents a set of IDs.
11
+ * This is used to manage the set of bound variables.
12
+ * */
13
+ typedef struct id_table {
14
+ size_t size;
15
+ size_t count;
16
+ ID *ids;
17
+ struct id_table *next;
18
+ } id_table;
19
+
20
+ /**
21
+ * comment represents a sequence of comment lines.
22
+ *
23
+ * # Comment for the method.
24
+ * #
25
+ * # ```rb
26
+ * # object.foo() # Do something
27
+ * # ```
28
+ * #
29
+ * def foo: () -> void
30
+ *
31
+ * A comment object represents the six lines of comments.
32
+ * */
33
+ typedef struct comment {
34
+ position start;
35
+ position end;
36
+
37
+ size_t line_size;
38
+ size_t line_count;
39
+ token *tokens;
40
+
41
+ struct comment *next_comment;
42
+ } comment;
43
+
44
+ /**
45
+ * An RBS parser is a LL(3) parser.
46
+ * */
47
+ typedef struct {
48
+ lexstate *lexstate;
49
+
50
+ token current_token;
51
+ token next_token; /* The first lookahead token */
52
+ token next_token2; /* The second lookahead token */
53
+ token next_token3; /* The third lookahead token */
54
+ VALUE buffer;
55
+
56
+ id_table *vars; /* Known type variables */
57
+ comment *last_comment; /* Last read comment */
58
+ } parserstate;
59
+
60
+ comment *alloc_comment(token comment_token, comment *last_comment);
61
+ void free_comment(comment *com);
62
+ void comment_insert_new_line(comment *com, token comment_token);
63
+ comment *comment_get_comment(comment *com, int line);
64
+ VALUE comment_to_ruby(comment *com, VALUE buffer);
65
+
66
+ /**
67
+ * Insert new table entry.
68
+ * Setting `reset` inserts a _reset_ entry, which stops searching.
69
+ *
70
+ * ```
71
+ * class Foo[A]
72
+ * ^^^ <= push new table with reset
73
+ * def foo: [B] () -> [A, B]
74
+ * ^^^ <= push new table without reset
75
+ *
76
+ * class Baz[C]
77
+ * ^^^ <= push new table with reset
78
+ * end
79
+ * end
80
+ * ```
81
+ * */
82
+ id_table *parser_push_typevar_table(parserstate *state, bool reset);
83
+ void parser_pop_typevar_table(parserstate *state);
84
+ /**
85
+ * Insert new type variable into the latest table.
86
+ * */
87
+ void parser_insert_typevar(parserstate *state, ID id);
88
+
89
+ /**
90
+ * Returns true if given type variable is recorded in the table.
91
+ * If not found, it goes one table up, if it's not a reset table.
92
+ * Or returns false, if it's a reset table.
93
+ * */
94
+ bool parser_typevar_member(parserstate *state, ID id);
95
+
96
+ /**
97
+ * Allocate new parserstate object.
98
+ *
99
+ * ```
100
+ * alloc_parser(buffer, 0, 1, variables) // New parserstate with variables
101
+ * alloc_parser(buffer, 3, 5, Qnil) // New parserstate without variables
102
+ * ```
103
+ * */
104
+ parserstate *alloc_parser(VALUE buffer, int line, int column, VALUE variables);
105
+ void free_parser(parserstate *parser);
106
+ /**
107
+ * Advance one token.
108
+ * */
109
+ void parser_advance(parserstate *state);
110
+ /**
111
+ * Advance one token, and assert the current token type.
112
+ * Raises an exception if `current_token->type != type`.
113
+ * */
114
+ void parser_advance_assert(parserstate *state, enum TokenType type);
115
+ /**
116
+ * Advance one token if the next_token is a token of the type.
117
+ * */
118
+ bool parser_advance_if(parserstate *state, enum TokenType type);
119
+ void print_parser(parserstate *state);
120
+
121
+ /**
122
+ * Insert new comment line token.
123
+ * */
124
+ void insert_comment_line(parserstate *state, token token);
125
+
126
+ /**
127
+ * Returns a RBS::Comment object associated with an subject at `subject_line`.
128
+ *
129
+ * ```rbs
130
+ * # Comment1
131
+ * class Foo # This is the subject line for Comment1
132
+ *
133
+ * # Comment2
134
+ * %a{annotation} # This is the subject line for Comment2
135
+ * def foo: () -> void
136
+ * end
137
+ * ```
138
+ * */
139
+ VALUE get_comment(parserstate *state, int subject_line);
140
+
141
+ #endif