rbs 1.6.2 → 1.7.0.beta.4

Sign up to get free protection for your applications and to get access to all the features.
Files changed (68) hide show
  1. checksums.yaml +4 -4
  2. data/.github/workflows/ruby.yml +18 -3
  3. data/.gitignore +10 -1
  4. data/CHANGELOG.md +21 -6
  5. data/Gemfile +1 -0
  6. data/Rakefile +22 -22
  7. data/core/kernel.rbs +4 -4
  8. data/core/trace_point.rbs +1 -1
  9. data/ext/rbs_extension/constants.c +139 -0
  10. data/ext/rbs_extension/constants.h +72 -0
  11. data/ext/rbs_extension/extconf.rb +3 -0
  12. data/ext/rbs_extension/lexer.c +2533 -0
  13. data/ext/rbs_extension/lexer.h +161 -0
  14. data/ext/rbs_extension/lexer.re +140 -0
  15. data/ext/rbs_extension/lexstate.c +139 -0
  16. data/ext/rbs_extension/location.c +295 -0
  17. data/ext/rbs_extension/location.h +59 -0
  18. data/ext/rbs_extension/main.c +9 -0
  19. data/ext/rbs_extension/parser.c +2390 -0
  20. data/ext/rbs_extension/parser.h +18 -0
  21. data/ext/rbs_extension/parserstate.c +313 -0
  22. data/ext/rbs_extension/parserstate.h +141 -0
  23. data/ext/rbs_extension/rbs_extension.h +40 -0
  24. data/ext/rbs_extension/ruby_objs.c +521 -0
  25. data/ext/rbs_extension/ruby_objs.h +46 -0
  26. data/ext/rbs_extension/unescape.c +65 -0
  27. data/goodcheck.yml +1 -1
  28. data/lib/rbs/ast/comment.rb +0 -12
  29. data/lib/rbs/buffer.rb +4 -0
  30. data/lib/rbs/cli.rb +5 -8
  31. data/lib/rbs/collection/installer.rb +1 -0
  32. data/lib/rbs/collection/sources/git.rb +18 -3
  33. data/lib/rbs/errors.rb +28 -1
  34. data/lib/rbs/location.rb +221 -217
  35. data/lib/rbs/location_aux.rb +121 -0
  36. data/lib/rbs/locator.rb +10 -7
  37. data/lib/rbs/parser_aux.rb +63 -0
  38. data/lib/rbs/parser_compat/lexer_error.rb +4 -0
  39. data/lib/rbs/parser_compat/located_value.rb +5 -0
  40. data/lib/rbs/parser_compat/semantics_error.rb +4 -0
  41. data/lib/rbs/parser_compat/syntax_error.rb +4 -0
  42. data/lib/rbs/types.rb +2 -3
  43. data/lib/rbs/version.rb +1 -1
  44. data/lib/rbs/writer.rb +4 -2
  45. data/lib/rbs.rb +14 -7
  46. data/rbs.gemspec +2 -1
  47. data/sig/ancestor_builder.rbs +2 -2
  48. data/sig/annotation.rbs +2 -2
  49. data/sig/comment.rbs +7 -7
  50. data/sig/constant_table.rbs +1 -1
  51. data/sig/declarations.rbs +9 -9
  52. data/sig/definition.rbs +1 -1
  53. data/sig/definition_builder.rbs +2 -2
  54. data/sig/errors.rbs +40 -25
  55. data/sig/location.rbs +46 -78
  56. data/sig/locator.rbs +2 -2
  57. data/sig/members.rbs +7 -7
  58. data/sig/method_types.rbs +3 -3
  59. data/sig/parser.rbs +15 -20
  60. data/sig/rbs.rbs +4 -0
  61. data/sig/types.rbs +45 -27
  62. data/sig/writer.rbs +1 -1
  63. data/stdlib/io-console/0/io-console.rbs +137 -0
  64. data/stdlib/json/0/json.rbs +3 -3
  65. data/stdlib/net-http/0/net-http.rbs +2 -1
  66. data/stdlib/tempfile/0/tempfile.rbs +4 -6
  67. metadata +31 -6
  68. data/lib/rbs/parser.rb +0 -3614
@@ -0,0 +1,161 @@
1
+ #ifndef RBS__LEXER_H
2
+ #define RBS__LEXER_H
3
+
4
+ enum TokenType {
5
+ NullType, /* (Nothing) */
6
+ pEOF, /* EOF */
7
+ ErrorToken, /* Error */
8
+
9
+ pLPAREN, /* ( */
10
+ pRPAREN, /* ) */
11
+ pCOLON, /* : */
12
+ pCOLON2, /* :: */
13
+ pLBRACKET, /* [ */
14
+ pRBRACKET, /* ] */
15
+ pLBRACE, /* { */
16
+ pRBRACE, /* } */
17
+ pHAT, /* ^ */
18
+ pARROW, /* -> */
19
+ pFATARROW, /* => */
20
+ pCOMMA, /* , */
21
+ pBAR, /* | */
22
+ pAMP, /* & */
23
+ pSTAR, /* * */
24
+ pSTAR2, /* ** */
25
+ pDOT, /* . */
26
+ pDOT3, /* ... */
27
+ pBANG, /* ! */
28
+ pQUESTION, /* ? */
29
+ pLT, /* < */
30
+ pEQ, /* = */
31
+
32
+ kALIAS, /* alias */
33
+ kATTRACCESSOR, /* attr_accessor */
34
+ kATTRREADER, /* attr_reader */
35
+ kATTRWRITER, /* attr_writer */
36
+ kBOOL, /* bool */
37
+ kBOT, /* bot */
38
+ kCLASS, /* class */
39
+ kDEF, /* def */
40
+ kEND, /* end */
41
+ kEXTEND, /* extend */
42
+ kFALSE, /* false */
43
+ kIN, /* in */
44
+ kINCLUDE, /* include */
45
+ kINSTANCE, /* instance */
46
+ kINTERFACE, /* interface */
47
+ kMODULE, /* module */
48
+ kNIL, /* nil */
49
+ kOUT, /* out */
50
+ kPREPEND, /* prepend */
51
+ kPRIVATE, /* private */
52
+ kPUBLIC, /* public */
53
+ kSELF, /* self */
54
+ kSINGLETON, /* singleton */
55
+ kTOP, /* top */
56
+ kTRUE, /* true */
57
+ kTYPE, /* type */
58
+ kUNCHECKED, /* unchecked */
59
+ kUNTYPED, /* untyped */
60
+ kVOID, /* void */
61
+
62
+ tLIDENT, /* Identifiers starting with lower case */
63
+ tUIDENT, /* Identifiers starting with upper case */
64
+ tULIDENT, /* Identifiers starting with `_` followed by upper case */
65
+ tULLIDENT, /* Identifiers starting with `_` followed by lower case */
66
+ tGIDENT, /* Identifiers starting with `$` */
67
+ tAIDENT, /* Identifiers starting with `@` */
68
+ tA2IDENT, /* Identifiers starting with `@@` */
69
+ tBANGIDENT, /* Identifiers ending with `!` */
70
+ tEQIDENT, /* Identifiers ending with `=` */
71
+ tQIDENT, /* Quoted identifier */
72
+ tOPERATOR, /* Operator identifier */
73
+
74
+ tCOMMENT, /* Comment */
75
+ tLINECOMMENT, /* Comment of all line */
76
+
77
+ tDQSTRING, /* Double quoted string */
78
+ tSQSTRING, /* Single quoted string */
79
+ tINTEGER, /* Integer */
80
+ tSYMBOL, /* Symbol */
81
+ tDQSYMBOL, /* Double quoted symbol */
82
+ tSQSYMBOL, /* Single quoted symbol */
83
+ tANNOTATION, /* Annotation */
84
+ };
85
+
86
+ /**
87
+ * The `byte_pos` (or `char_pos`) is the primary data.
88
+ * The rest are cache.
89
+ *
90
+ * They can be computed from `byte_pos` (or `char_pos`), but it needs full scan from the beginning of the string (depending on the encoding).
91
+ * */
92
+ typedef struct {
93
+ int byte_pos;
94
+ int char_pos;
95
+ int line;
96
+ int column;
97
+ } position;
98
+
99
+ typedef struct {
100
+ position start;
101
+ position end;
102
+ } range;
103
+
104
+ typedef struct {
105
+ enum TokenType type;
106
+ range range;
107
+ } token;
108
+
109
+ /**
110
+ * The lexer state is the curren token.
111
+ *
112
+ * ```
113
+ * ... "a string token"
114
+ * ^ start position
115
+ * ^ current position
116
+ * ~~~~~~ Token => "a str
117
+ * ```
118
+ * */
119
+ typedef struct {
120
+ VALUE string;
121
+ position current; /* The current position */
122
+ position start; /* The start position of the current token */
123
+ bool first_token_of_line; /* This flag is used for tLINECOMMENT */
124
+ unsigned int last_char; /* Last peeked character */
125
+ } lexstate;
126
+
127
+ extern token NullToken;
128
+ extern position NullPosition;
129
+ extern range NULL_RANGE;
130
+
131
+ char *peek_token(lexstate *state, token tok);
132
+ int token_chars(token tok);
133
+ int token_bytes(token tok);
134
+
135
+ #define null_position_p(pos) (pos.byte_pos == -1)
136
+ #define null_range_p(range) (range.start.byte_pos == -1)
137
+ #define nonnull_pos_or(pos1, pos2) (null_position_p(pos1) ? pos2 : pos1)
138
+ #define RANGE_BYTES(range) (range.end.byte_pos - range.start.byte_pos)
139
+
140
+ const char *token_type_str(enum TokenType type);
141
+
142
+ /**
143
+ * Read next character.
144
+ * */
145
+ unsigned int peek(lexstate *state);
146
+
147
+ /**
148
+ * Skip one character.
149
+ * */
150
+ void skip(lexstate *state);
151
+
152
+ /**
153
+ * Return new token with given type.
154
+ * */
155
+ token next_token(lexstate *state, enum TokenType type);
156
+
157
+ token rbsparser_next_token(lexstate *state);
158
+
159
+ void print_token(token tok);
160
+
161
+ #endif
@@ -0,0 +1,140 @@
1
+ #include "rbs_extension.h"
2
+
3
+ token rbsparser_next_token(lexstate *state) {
4
+ lexstate backup;
5
+
6
+ start:
7
+ backup = *state;
8
+
9
+ /*!re2c
10
+ re2c:flags:u = 1;
11
+ re2c:api:style = free-form;
12
+ re2c:flags:input = custom;
13
+ re2c:define:YYCTYPE = "unsigned int";
14
+ re2c:define:YYPEEK = "peek(state)";
15
+ re2c:define:YYSKIP = "skip(state);";
16
+ re2c:define:YYBACKUP = "backup = *state;";
17
+ re2c:define:YYRESTORE = "*state = backup;";
18
+ re2c:yyfill:enable = 0;
19
+
20
+ word = [a-zA-Z0-9_];
21
+
22
+ operator = "/" | "~" | "[]" | "[]=" | "!" | "!=" | "!~" | "-" | "-@" | "+" | "+@"
23
+ | "==" | "===" | "=~" | "<<" | "<=" | "<=>" | ">" | ">=" | ">>" | "%";
24
+
25
+ "(" { return next_token(state, pLPAREN); }
26
+ ")" { return next_token(state, pRPAREN); }
27
+ "[" { return next_token(state, pLBRACKET); }
28
+ "]" { return next_token(state, pRBRACKET); }
29
+ "{" { return next_token(state, pLBRACE); }
30
+ "}" { return next_token(state, pRBRACE); }
31
+ "," { return next_token(state, pCOMMA); }
32
+ "|" { return next_token(state, pBAR); }
33
+ "^" { return next_token(state, pHAT); }
34
+ "&" { return next_token(state, pAMP); }
35
+ "?" { return next_token(state, pQUESTION); }
36
+ "*" { return next_token(state, pSTAR); }
37
+ "**" { return next_token(state, pSTAR2); }
38
+ "." { return next_token(state, pDOT); }
39
+ "..." { return next_token(state, pDOT3); }
40
+ "`" { return next_token(state, tOPERATOR); }
41
+ "`" [^ :\x00] [^`\x00]* "`" { return next_token(state, tQIDENT); }
42
+ "->" { return next_token(state, pARROW); }
43
+ "=>" { return next_token(state, pFATARROW); }
44
+ "=" { return next_token(state, pEQ); }
45
+ ":" { return next_token(state, pCOLON); }
46
+ "::" { return next_token(state, pCOLON2); }
47
+ "<" { return next_token(state, pLT); }
48
+ operator { return next_token(state, tOPERATOR); }
49
+
50
+ number = [0-9] [0-9_]*;
51
+ ("-"|"+")? number { return next_token(state, tINTEGER); }
52
+
53
+ "%a{" [^}\x00]* "}" { return next_token(state, tANNOTATION); }
54
+ "%a(" [^)\x00]* ")" { return next_token(state, tANNOTATION); }
55
+ "%a[" [^\]\x00]* "]" { return next_token(state, tANNOTATION); }
56
+ "%a|" [^|\x00]* "|" { return next_token(state, tANNOTATION); }
57
+ "%a<" [^>\x00]* ">" { return next_token(state, tANNOTATION); }
58
+
59
+ "#" (. \ [\x00])* {
60
+ return next_token(
61
+ state,
62
+ state->first_token_of_line ? tLINECOMMENT : tCOMMENT
63
+ );
64
+ }
65
+
66
+ "alias" { return next_token(state, kALIAS); }
67
+ "attr_accessor" { return next_token(state, kATTRACCESSOR); }
68
+ "attr_reader" { return next_token(state, kATTRREADER); }
69
+ "attr_writer" { return next_token(state, kATTRWRITER); }
70
+ "bool" { return next_token(state, kBOOL); }
71
+ "bot" { return next_token(state, kBOT); }
72
+ "class" { return next_token(state, kCLASS); }
73
+ "def" { return next_token(state, kDEF); }
74
+ "end" { return next_token(state, kEND); }
75
+ "extend" { return next_token(state, kEXTEND); }
76
+ "false" { return next_token(state, kFALSE); }
77
+ "in" { return next_token(state, kIN); }
78
+ "include" { return next_token(state, kINCLUDE); }
79
+ "instance" { return next_token(state, kINSTANCE); }
80
+ "interface" { return next_token(state, kINTERFACE); }
81
+ "module" { return next_token(state, kMODULE); }
82
+ "nil" { return next_token(state, kNIL); }
83
+ "out" { return next_token(state, kOUT); }
84
+ "prepend" { return next_token(state, kPREPEND); }
85
+ "private" { return next_token(state, kPRIVATE); }
86
+ "public" { return next_token(state, kPUBLIC); }
87
+ "self" { return next_token(state, kSELF); }
88
+ "singleton" { return next_token(state, kSINGLETON); }
89
+ "top" { return next_token(state, kTOP); }
90
+ "true" { return next_token(state, kTRUE); }
91
+ "type" { return next_token(state, kTYPE); }
92
+ "unchecked" { return next_token(state, kUNCHECKED); }
93
+ "untyped" { return next_token(state, kUNTYPED); }
94
+ "void" { return next_token(state, kVOID); }
95
+
96
+ dqstring = ["] ("\\"["] | [^"\x00])* ["];
97
+ sqstring = ['] ("\\"['] | [^'\x00])* ['];
98
+
99
+ dqstring { return next_token(state, tDQSTRING); }
100
+ sqstring { return next_token(state, tSQSTRING); }
101
+ ":" dqstring { return next_token(state, tDQSYMBOL); }
102
+ ":" sqstring { return next_token(state, tSQSYMBOL); }
103
+
104
+ identifier = [a-zA-Z_] word* [!?=]?;
105
+ symbol_opr = ":|" | ":&" | ":/" | ":%" | ":~" | ":`" | ":^"
106
+ | ":==" | ":=~" | ":===" | ":!" | ":!=" | ":!~"
107
+ | ":<" | ":<=" | ":<<" | ":<=>" | ":>" | ":>=" | ":>>"
108
+ | ":-" | ":-@" | ":+" | ":+@" | ":*" | ":**" | ":[]" | ":[]=";
109
+
110
+ global_ident = [0-9]+
111
+ | "-" [a-zA-Z0-9_]
112
+ | [~*$?!@\\/;,.=:<>"&'`+]
113
+ | [^ \t\r\n:;=.,!"$%&()-+~|\\'[\]{}*/<>^\x00]+;
114
+
115
+ ":" identifier { return next_token(state, tSYMBOL); }
116
+ ":@" identifier { return next_token(state, tSYMBOL); }
117
+ ":@@" identifier { return next_token(state, tSYMBOL); }
118
+ ":$" global_ident { return next_token(state, tSYMBOL); }
119
+ symbol_opr { return next_token(state, tSYMBOL); }
120
+
121
+ [a-z] word* { return next_token(state, tLIDENT); }
122
+ [A-Z] word* { return next_token(state, tUIDENT); }
123
+ "_" [a-z0-9_] word* { return next_token(state, tULLIDENT); }
124
+ "_" [A-Z] word* { return next_token(state, tULIDENT); }
125
+ "_" { return next_token(state, tULLIDENT); }
126
+ [a-zA-Z_] word* "!" { return next_token(state, tBANGIDENT); }
127
+ [a-zA-Z_] word* "=" { return next_token(state, tEQIDENT); }
128
+
129
+ "@" [a-zA-Z_] word* { return next_token(state, tAIDENT); }
130
+ "@@" [a-zA-Z_] word* { return next_token(state, tA2IDENT); }
131
+
132
+ "$" global_ident { return next_token(state, tGIDENT); }
133
+
134
+ skip = [ \t\n]+;
135
+
136
+ skip { state->start = state->current; goto start; }
137
+ "\x00" { return next_token(state, pEOF); }
138
+ * { return next_token(state, ErrorToken); }
139
+ */
140
+ }
@@ -0,0 +1,139 @@
1
+ #include "rbs_extension.h"
2
+
3
+ static const char *RBS_TOKENTYPE_NAMES[] = {
4
+ "NullType",
5
+ "pEOF",
6
+ "ErrorToken",
7
+
8
+ "pLPAREN", /* ( */
9
+ "pRPAREN", /* ) */
10
+ "pCOLON", /* : */
11
+ "pCOLON2", /* :: */
12
+ "pLBRACKET", /* [ */
13
+ "pRBRACKET", /* ] */
14
+ "pLBRACE", /* { */
15
+ "pRBRACE", /* } */
16
+ "pHAT", /* ^ */
17
+ "pARROW", /* -> */
18
+ "pFATARROW", /* => */
19
+ "pCOMMA", /* , */
20
+ "pBAR", /* | */
21
+ "pAMP", /* & */
22
+ "pSTAR", /* * */
23
+ "pSTAR2", /* ** */
24
+ "pDOT", /* . */
25
+ "pDOT3", /* ... */
26
+ "pBANG", /* ! */
27
+ "pQUESTION", /* ? */
28
+ "pLT", /* < */
29
+ "pEQ", /* = */
30
+
31
+ "kALIAS", /* alias */
32
+ "kATTRACCESSOR", /* attr_accessor */
33
+ "kATTRREADER", /* attr_reader */
34
+ "kATTRWRITER", /* attr_writer */
35
+ "kBOOL", /* bool */
36
+ "kBOT", /* bot */
37
+ "kCLASS", /* class */
38
+ "kDEF", /* def */
39
+ "kEND", /* end */
40
+ "kEXTEND", /* extend */
41
+ "kFALSE", /* kFALSE */
42
+ "kIN", /* in */
43
+ "kINCLUDE", /* include */
44
+ "kINSTANCE", /* instance */
45
+ "kINTERFACE", /* interface */
46
+ "kMODULE", /* module */
47
+ "kNIL", /* nil */
48
+ "kOUT", /* out */
49
+ "kPREPEND", /* prepend */
50
+ "kPRIVATE", /* private */
51
+ "kPUBLIC", /* public */
52
+ "kSELF", /* self */
53
+ "kSINGLETON", /* singleton */
54
+ "kTOP", /* top */
55
+ "kTRUE", /* true */
56
+ "kTYPE", /* type */
57
+ "kUNCHECKED", /* unchecked */
58
+ "kUNTYPED", /* untyped */
59
+ "kVOID", /* void */
60
+
61
+ "tLIDENT", /* Identifiers starting with lower case */
62
+ "tUIDENT", /* Identifiers starting with upper case */
63
+ "tULIDENT", /* Identifiers starting with `_` */
64
+ "tULLIDENT",
65
+ "tGIDENT", /* Identifiers starting with `$` */
66
+ "tAIDENT", /* Identifiers starting with `@` */
67
+ "tA2IDENT", /* Identifiers starting with `@@` */
68
+ "tBANGIDENT",
69
+ "tEQIDENT",
70
+ "tQIDENT", /* Quoted identifier */
71
+ "tOPERATOR", /* Operator identifier */
72
+
73
+ "tCOMMENT",
74
+ "tLINECOMMENT",
75
+
76
+ "tDQSTRING", /* Double quoted string */
77
+ "tSQSTRING", /* Single quoted string */
78
+ "tINTEGER", /* Integer */
79
+ "tSYMBOL", /* Symbol */
80
+ "tDQSYMBOL",
81
+ "tSQSYMBOL",
82
+ "tANNOTATION", /* Annotation */
83
+ };
84
+
85
+ token NullToken = { NullType };
86
+ position NullPosition = { -1, -1, -1, -1 };
87
+ range NULL_RANGE = { { -1, -1, -1, -1 }, { -1, -1, -1, -1 } };
88
+
89
+ const char *token_type_str(enum TokenType type) {
90
+ return RBS_TOKENTYPE_NAMES[type];
91
+ }
92
+
93
+ int token_chars(token tok) {
94
+ return tok.range.end.char_pos - tok.range.start.char_pos;
95
+ }
96
+
97
+ int token_bytes(token tok) {
98
+ return RANGE_BYTES(tok.range);
99
+ }
100
+
101
+ unsigned int peek(lexstate *state) {
102
+ unsigned int c = rb_enc_mbc_to_codepoint(RSTRING_PTR(state->string) + state->current.byte_pos, RSTRING_END(state->string), rb_enc_get(state->string));
103
+ state->last_char = c;
104
+ return c;
105
+ }
106
+
107
+ token next_token(lexstate *state, enum TokenType type) {
108
+ token t;
109
+
110
+ t.type = type;
111
+ t.range.start = state->start;
112
+ t.range.end = state->current;
113
+ state->start = state->current;
114
+ state->first_token_of_line = false;
115
+
116
+ return t;
117
+ }
118
+
119
+ void skip(lexstate *state) {
120
+ if (!state->last_char) {
121
+ peek(state);
122
+ }
123
+ int byte_len = rb_enc_codelen(state->last_char, rb_enc_get(state->string));
124
+
125
+ state->current.char_pos += 1;
126
+ state->current.byte_pos += byte_len;
127
+
128
+ if (state->last_char == '\n') {
129
+ state->current.line += 1;
130
+ state->current.column = 0;
131
+ state->first_token_of_line = true;
132
+ } else {
133
+ state->current.column += 1;
134
+ }
135
+ }
136
+
137
+ char *peek_token(lexstate *state, token tok) {
138
+ return RSTRING_PTR(state->string) + tok.range.start.byte_pos;
139
+ }