rbs 3.9.4 → 3.10.0.pre.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (173) hide show
  1. checksums.yaml +4 -4
  2. data/.clang-format +74 -0
  3. data/.clangd +2 -0
  4. data/.github/workflows/c-check.yml +54 -0
  5. data/.github/workflows/comments.yml +3 -3
  6. data/.github/workflows/ruby.yml +34 -19
  7. data/.github/workflows/typecheck.yml +1 -1
  8. data/.github/workflows/windows.yml +1 -1
  9. data/.gitignore +4 -0
  10. data/CHANGELOG.md +13 -0
  11. data/README.md +38 -1
  12. data/Rakefile +152 -23
  13. data/config.yml +190 -62
  14. data/core/array.rbs +44 -43
  15. data/core/dir.rbs +2 -2
  16. data/core/encoding.rbs +3 -2
  17. data/core/enumerable.rbs +89 -2
  18. data/core/errno.rbs +8 -0
  19. data/core/errors.rbs +28 -1
  20. data/core/exception.rbs +2 -2
  21. data/core/fiber.rbs +3 -3
  22. data/core/file.rbs +26 -11
  23. data/core/float.rbs +1 -1
  24. data/core/gc.rbs +422 -281
  25. data/core/hash.rbs +1024 -727
  26. data/core/io/wait.rbs +11 -33
  27. data/core/io.rbs +6 -4
  28. data/core/kernel.rbs +49 -43
  29. data/core/marshal.rbs +1 -1
  30. data/core/match_data.rbs +1 -1
  31. data/core/math.rbs +42 -3
  32. data/core/method.rbs +14 -6
  33. data/core/module.rbs +71 -11
  34. data/core/nil_class.rbs +3 -3
  35. data/core/numeric.rbs +8 -8
  36. data/core/object.rbs +3 -3
  37. data/core/object_space.rbs +13 -0
  38. data/{stdlib/pathname/0 → core}/pathname.rbs +253 -352
  39. data/core/proc.rbs +15 -8
  40. data/core/process.rbs +2 -2
  41. data/core/ractor.rbs +278 -437
  42. data/core/range.rbs +6 -7
  43. data/core/rbs/unnamed/argf.rbs +1 -1
  44. data/core/rbs/unnamed/env_class.rbs +1 -1
  45. data/core/rbs/unnamed/random.rbs +4 -2
  46. data/core/regexp.rbs +22 -17
  47. data/core/ruby_vm.rbs +6 -4
  48. data/core/rubygems/errors.rbs +3 -70
  49. data/core/rubygems/rubygems.rbs +11 -79
  50. data/core/set.rbs +439 -332
  51. data/core/string.rbs +2909 -1118
  52. data/core/struct.rbs +1 -1
  53. data/core/symbol.rbs +4 -4
  54. data/core/thread.rbs +83 -20
  55. data/core/time.rbs +35 -9
  56. data/core/unbound_method.rbs +14 -6
  57. data/docs/aliases.md +79 -0
  58. data/docs/collection.md +2 -2
  59. data/docs/gem.md +0 -1
  60. data/docs/sigs.md +3 -3
  61. data/ext/rbs_extension/ast_translation.c +1016 -0
  62. data/ext/rbs_extension/ast_translation.h +37 -0
  63. data/ext/rbs_extension/class_constants.c +157 -0
  64. data/{include/rbs/constants.h → ext/rbs_extension/class_constants.h} +7 -1
  65. data/ext/rbs_extension/compat.h +10 -0
  66. data/ext/rbs_extension/extconf.rb +25 -1
  67. data/ext/rbs_extension/legacy_location.c +317 -0
  68. data/ext/rbs_extension/legacy_location.h +45 -0
  69. data/ext/rbs_extension/main.c +375 -23
  70. data/ext/rbs_extension/rbs_extension.h +6 -21
  71. data/ext/rbs_extension/rbs_string_bridging.c +9 -0
  72. data/ext/rbs_extension/rbs_string_bridging.h +24 -0
  73. data/include/rbs/ast.h +687 -0
  74. data/include/rbs/defines.h +86 -0
  75. data/include/rbs/lexer.h +199 -0
  76. data/include/rbs/location.h +59 -0
  77. data/include/rbs/parser.h +135 -0
  78. data/include/rbs/string.h +49 -0
  79. data/include/rbs/util/rbs_allocator.h +59 -0
  80. data/include/rbs/util/rbs_assert.h +20 -0
  81. data/include/rbs/util/rbs_buffer.h +83 -0
  82. data/include/rbs/util/rbs_constant_pool.h +6 -67
  83. data/include/rbs/util/rbs_encoding.h +282 -0
  84. data/include/rbs/util/rbs_unescape.h +23 -0
  85. data/include/rbs.h +1 -2
  86. data/lib/rbs/annotate/formatter.rb +3 -13
  87. data/lib/rbs/annotate/rdoc_annotator.rb +3 -1
  88. data/lib/rbs/annotate/rdoc_source.rb +1 -1
  89. data/lib/rbs/cli/validate.rb +2 -2
  90. data/lib/rbs/cli.rb +1 -1
  91. data/lib/rbs/collection/config/lockfile_generator.rb +1 -0
  92. data/lib/rbs/collection.rb +1 -0
  93. data/lib/rbs/definition_builder/ancestor_builder.rb +5 -5
  94. data/lib/rbs/environment.rb +64 -59
  95. data/lib/rbs/environment_loader.rb +1 -1
  96. data/lib/rbs/errors.rb +1 -1
  97. data/lib/rbs/parser_aux.rb +5 -0
  98. data/lib/rbs/resolver/constant_resolver.rb +2 -2
  99. data/lib/rbs/resolver/type_name_resolver.rb +124 -38
  100. data/lib/rbs/test/type_check.rb +13 -0
  101. data/lib/rbs/types.rb +3 -1
  102. data/lib/rbs/version.rb +1 -1
  103. data/lib/rbs.rb +1 -1
  104. data/lib/rdoc/discover.rb +1 -1
  105. data/lib/rdoc_plugin/parser.rb +3 -3
  106. data/sig/annotate/formatter.rbs +2 -2
  107. data/sig/annotate/rdoc_annotater.rbs +1 -1
  108. data/sig/environment.rbs +57 -6
  109. data/sig/manifest.yaml +0 -1
  110. data/sig/parser.rbs +20 -0
  111. data/sig/resolver/type_name_resolver.rbs +38 -7
  112. data/sig/types.rbs +4 -1
  113. data/src/ast.c +1256 -0
  114. data/src/lexer.c +2956 -0
  115. data/src/lexer.re +147 -0
  116. data/src/lexstate.c +205 -0
  117. data/src/location.c +71 -0
  118. data/src/parser.c +3495 -0
  119. data/src/string.c +90 -0
  120. data/src/util/rbs_allocator.c +152 -0
  121. data/src/util/rbs_assert.c +21 -0
  122. data/src/util/rbs_buffer.c +54 -0
  123. data/src/util/rbs_constant_pool.c +16 -86
  124. data/src/util/rbs_encoding.c +21308 -0
  125. data/src/util/rbs_unescape.c +131 -0
  126. data/stdlib/cgi/0/core.rbs +2 -396
  127. data/stdlib/cgi/0/manifest.yaml +1 -0
  128. data/stdlib/cgi-escape/0/escape.rbs +153 -0
  129. data/stdlib/coverage/0/coverage.rbs +3 -1
  130. data/stdlib/delegate/0/delegator.rbs +10 -7
  131. data/stdlib/erb/0/erb.rbs +737 -347
  132. data/stdlib/fileutils/0/fileutils.rbs +18 -13
  133. data/stdlib/forwardable/0/forwardable.rbs +3 -0
  134. data/stdlib/json/0/json.rbs +67 -48
  135. data/stdlib/net-http/0/net-http.rbs +3 -0
  136. data/stdlib/objspace/0/objspace.rbs +8 -3
  137. data/stdlib/open-uri/0/open-uri.rbs +40 -0
  138. data/stdlib/openssl/0/openssl.rbs +182 -149
  139. data/stdlib/optparse/0/optparse.rbs +3 -3
  140. data/stdlib/rdoc/0/code_object.rbs +2 -2
  141. data/stdlib/rdoc/0/comment.rbs +2 -0
  142. data/stdlib/rdoc/0/options.rbs +76 -0
  143. data/stdlib/rdoc/0/rdoc.rbs +7 -5
  144. data/stdlib/rdoc/0/store.rbs +1 -1
  145. data/stdlib/resolv/0/resolv.rbs +25 -68
  146. data/stdlib/ripper/0/ripper.rbs +5 -2
  147. data/stdlib/singleton/0/singleton.rbs +3 -0
  148. data/stdlib/socket/0/socket.rbs +13 -1
  149. data/stdlib/socket/0/tcp_socket.rbs +10 -2
  150. data/stdlib/stringio/0/stringio.rbs +412 -80
  151. data/stdlib/strscan/0/string_scanner.rbs +31 -31
  152. data/stdlib/tempfile/0/tempfile.rbs +1 -1
  153. data/stdlib/tsort/0/cyclic.rbs +3 -0
  154. data/stdlib/uri/0/common.rbs +11 -2
  155. data/stdlib/uri/0/file.rbs +1 -1
  156. data/stdlib/uri/0/generic.rbs +16 -15
  157. data/stdlib/uri/0/rfc2396_parser.rbs +6 -7
  158. data/stdlib/zlib/0/zstream.rbs +1 -0
  159. metadata +42 -19
  160. data/ext/rbs_extension/lexer.c +0 -2728
  161. data/ext/rbs_extension/lexer.h +0 -179
  162. data/ext/rbs_extension/lexer.re +0 -147
  163. data/ext/rbs_extension/lexstate.c +0 -175
  164. data/ext/rbs_extension/location.c +0 -325
  165. data/ext/rbs_extension/location.h +0 -85
  166. data/ext/rbs_extension/parser.c +0 -2982
  167. data/ext/rbs_extension/parser.h +0 -18
  168. data/ext/rbs_extension/parserstate.c +0 -411
  169. data/ext/rbs_extension/parserstate.h +0 -163
  170. data/ext/rbs_extension/unescape.c +0 -32
  171. data/include/rbs/ruby_objs.h +0 -72
  172. data/src/constants.c +0 -153
  173. data/src/ruby_objs.c +0 -799
data/src/lexer.re ADDED
@@ -0,0 +1,147 @@
1
+ #include "rbs/lexer.h"
2
+
3
+ rbs_token_t rbs_lexer_next_token(rbs_lexer_t *lexer) {
4
+ rbs_lexer_t backup;
5
+
6
+ backup = *lexer;
7
+
8
+ /*!re2c
9
+ re2c:flags:u = 1;
10
+ re2c:api:style = free-form;
11
+ re2c:flags:input = custom;
12
+ re2c:define:YYCTYPE = "unsigned int";
13
+ re2c:define:YYPEEK = "rbs_peek(lexer)";
14
+ re2c:define:YYSKIP = "rbs_skip(lexer);";
15
+ re2c:define:YYBACKUP = "backup = *lexer;";
16
+ re2c:define:YYRESTORE = "*lexer = backup;";
17
+ re2c:yyfill:enable = 0;
18
+
19
+ word = [a-zA-Z0-9_];
20
+
21
+ operator = "/" | "~" | "[]=" | "!" | "!=" | "!~" | "-" | "-@" | "+" | "+@"
22
+ | "==" | "===" | "=~" | "<<" | "<=" | "<=>" | ">" | ">=" | ">>" | "%";
23
+
24
+ "(" { return rbs_next_token(lexer, pLPAREN); }
25
+ ")" { return rbs_next_token(lexer, pRPAREN); }
26
+ "[" { return rbs_next_token(lexer, pLBRACKET); }
27
+ "]" { return rbs_next_token(lexer, pRBRACKET); }
28
+ "{" { return rbs_next_token(lexer, pLBRACE); }
29
+ "}" { return rbs_next_token(lexer, pRBRACE); }
30
+ "," { return rbs_next_token(lexer, pCOMMA); }
31
+ "|" { return rbs_next_token(lexer, pBAR); }
32
+ "^" { return rbs_next_token(lexer, pHAT); }
33
+ "&" { return rbs_next_token(lexer, pAMP); }
34
+ "?" { return rbs_next_token(lexer, pQUESTION); }
35
+ "*" { return rbs_next_token(lexer, pSTAR); }
36
+ "**" { return rbs_next_token(lexer, pSTAR2); }
37
+ "." { return rbs_next_token(lexer, pDOT); }
38
+ "..." { return rbs_next_token(lexer, pDOT3); }
39
+ "`" { return rbs_next_token(lexer, tOPERATOR); }
40
+ "`" [^ :\x00] [^`\x00]* "`" { return rbs_next_token(lexer, tQIDENT); }
41
+ "->" { return rbs_next_token(lexer, pARROW); }
42
+ "=>" { return rbs_next_token(lexer, pFATARROW); }
43
+ "=" { return rbs_next_token(lexer, pEQ); }
44
+ ":" { return rbs_next_token(lexer, pCOLON); }
45
+ "::" { return rbs_next_token(lexer, pCOLON2); }
46
+ "<" { return rbs_next_token(lexer, pLT); }
47
+ "[]" { return rbs_next_token(lexer, pAREF_OPR); }
48
+ operator { return rbs_next_token(lexer, tOPERATOR); }
49
+
50
+ number = [0-9] [0-9_]*;
51
+ ("-"|"+")? number { return rbs_next_token(lexer, tINTEGER); }
52
+
53
+ "%a{" [^}\x00]* "}" { return rbs_next_token(lexer, tANNOTATION); }
54
+ "%a(" [^)\x00]* ")" { return rbs_next_token(lexer, tANNOTATION); }
55
+ "%a[" [^\]\x00]* "]" { return rbs_next_token(lexer, tANNOTATION); }
56
+ "%a|" [^|\x00]* "|" { return rbs_next_token(lexer, tANNOTATION); }
57
+ "%a<" [^>\x00]* ">" { return rbs_next_token(lexer, tANNOTATION); }
58
+
59
+ "#" (. \ [\x00])* {
60
+ return rbs_next_token(
61
+ lexer,
62
+ lexer->first_token_of_line ? tLINECOMMENT : tCOMMENT
63
+ );
64
+ }
65
+
66
+ "alias" { return rbs_next_token(lexer, kALIAS); }
67
+ "attr_accessor" { return rbs_next_token(lexer, kATTRACCESSOR); }
68
+ "attr_reader" { return rbs_next_token(lexer, kATTRREADER); }
69
+ "attr_writer" { return rbs_next_token(lexer, kATTRWRITER); }
70
+ "bool" { return rbs_next_token(lexer, kBOOL); }
71
+ "bot" { return rbs_next_token(lexer, kBOT); }
72
+ "class" { return rbs_next_token(lexer, kCLASS); }
73
+ "def" { return rbs_next_token(lexer, kDEF); }
74
+ "end" { return rbs_next_token(lexer, kEND); }
75
+ "extend" { return rbs_next_token(lexer, kEXTEND); }
76
+ "false" { return rbs_next_token(lexer, kFALSE); }
77
+ "in" { return rbs_next_token(lexer, kIN); }
78
+ "include" { return rbs_next_token(lexer, kINCLUDE); }
79
+ "instance" { return rbs_next_token(lexer, kINSTANCE); }
80
+ "interface" { return rbs_next_token(lexer, kINTERFACE); }
81
+ "module" { return rbs_next_token(lexer, kMODULE); }
82
+ "nil" { return rbs_next_token(lexer, kNIL); }
83
+ "out" { return rbs_next_token(lexer, kOUT); }
84
+ "prepend" { return rbs_next_token(lexer, kPREPEND); }
85
+ "private" { return rbs_next_token(lexer, kPRIVATE); }
86
+ "public" { return rbs_next_token(lexer, kPUBLIC); }
87
+ "self" { return rbs_next_token(lexer, kSELF); }
88
+ "singleton" { return rbs_next_token(lexer, kSINGLETON); }
89
+ "top" { return rbs_next_token(lexer, kTOP); }
90
+ "true" { return rbs_next_token(lexer, kTRUE); }
91
+ "type" { return rbs_next_token(lexer, kTYPE); }
92
+ "unchecked" { return rbs_next_token(lexer, kUNCHECKED); }
93
+ "untyped" { return rbs_next_token(lexer, kUNTYPED); }
94
+ "void" { return rbs_next_token(lexer, kVOID); }
95
+ "use" { return rbs_next_token(lexer, kUSE); }
96
+ "as" { return rbs_next_token(lexer, kAS); }
97
+ "__todo__" { return rbs_next_token(lexer, k__TODO__); }
98
+
99
+ unicode_char = "\\u" [0-9a-fA-F]{4};
100
+ oct_char = "\\x" [0-9a-f]{1,2};
101
+ hex_char = "\\" [0-7]{1,3};
102
+
103
+ dqstring = ["] (unicode_char | oct_char | hex_char | "\\" [^xu] | [^\\"\x00])* ["];
104
+ sqstring = ['] ("\\"['\\] | [^'\x00])* ['];
105
+
106
+ dqstring { return rbs_next_token(lexer, tDQSTRING); }
107
+ sqstring { return rbs_next_token(lexer, tSQSTRING); }
108
+ ":" dqstring { return rbs_next_token(lexer, tDQSYMBOL); }
109
+ ":" sqstring { return rbs_next_token(lexer, tSQSYMBOL); }
110
+
111
+ identifier = [a-zA-Z_] word* [!?=]?;
112
+ symbol_opr = ":|" | ":&" | ":/" | ":%" | ":~" | ":`" | ":^"
113
+ | ":==" | ":=~" | ":===" | ":!" | ":!=" | ":!~"
114
+ | ":<" | ":<=" | ":<<" | ":<=>" | ":>" | ":>=" | ":>>"
115
+ | ":-" | ":-@" | ":+" | ":+@" | ":*" | ":**" | ":[]" | ":[]=";
116
+
117
+ global_ident = [0-9]+
118
+ | "-" [a-zA-Z0-9_]
119
+ | [~*$?!@\\/;,.=:<>"&'`+]
120
+ | [^ \t\r\n:;=.,!"$%&()-+~|\\'[\]{}*/<>^\x00]+;
121
+
122
+ ":" identifier { return rbs_next_token(lexer, tSYMBOL); }
123
+ ":@" identifier { return rbs_next_token(lexer, tSYMBOL); }
124
+ ":@@" identifier { return rbs_next_token(lexer, tSYMBOL); }
125
+ ":$" global_ident { return rbs_next_token(lexer, tSYMBOL); }
126
+ symbol_opr { return rbs_next_token(lexer, tSYMBOL); }
127
+
128
+ [a-z] word* { return rbs_next_token(lexer, tLIDENT); }
129
+ [A-Z] word* { return rbs_next_token(lexer, tUIDENT); }
130
+ "_" [a-z0-9_] word* { return rbs_next_token(lexer, tULLIDENT); }
131
+ "_" [A-Z] word* { return rbs_next_token(lexer, tULIDENT); }
132
+ "_" { return rbs_next_token(lexer, tULLIDENT); }
133
+ [a-zA-Z_] word* "!" { return rbs_next_token(lexer, tBANGIDENT); }
134
+ [a-zA-Z_] word* "=" { return rbs_next_token(lexer, tEQIDENT); }
135
+
136
+ "@" [a-zA-Z_] word* { return rbs_next_token(lexer, tAIDENT); }
137
+ "@@" [a-zA-Z_] word* { return rbs_next_token(lexer, tA2IDENT); }
138
+
139
+ "$" global_ident { return rbs_next_token(lexer, tGIDENT); }
140
+
141
+ skip = ([ \t]+|[\r\n]);
142
+
143
+ skip { return rbs_next_token(lexer, tTRIVIA); }
144
+ "\x00" { return rbs_next_eof_token(lexer); }
145
+ * { return rbs_next_token(lexer, ErrorToken); }
146
+ */
147
+ }
data/src/lexstate.c ADDED
@@ -0,0 +1,205 @@
1
+ #include "rbs/defines.h"
2
+ #include "rbs/lexer.h"
3
+ #include "rbs/util/rbs_assert.h"
4
+
5
+ static const char *RBS_TOKENTYPE_NAMES[] = {
6
+ "NullType",
7
+ "pEOF",
8
+ "ErrorToken",
9
+
10
+ "pLPAREN", /* ( */
11
+ "pRPAREN", /* ) */
12
+ "pCOLON", /* : */
13
+ "pCOLON2", /* :: */
14
+ "pLBRACKET", /* [ */
15
+ "pRBRACKET", /* ] */
16
+ "pLBRACE", /* { */
17
+ "pRBRACE", /* } */
18
+ "pHAT", /* ^ */
19
+ "pARROW", /* -> */
20
+ "pFATARROW", /* => */
21
+ "pCOMMA", /* , */
22
+ "pBAR", /* | */
23
+ "pAMP", /* & */
24
+ "pSTAR", /* * */
25
+ "pSTAR2", /* ** */
26
+ "pDOT", /* . */
27
+ "pDOT3", /* ... */
28
+ "pBANG", /* ! */
29
+ "pQUESTION", /* ? */
30
+ "pLT", /* < */
31
+ "pEQ", /* = */
32
+
33
+ "kALIAS", /* alias */
34
+ "kATTRACCESSOR", /* attr_accessor */
35
+ "kATTRREADER", /* attr_reader */
36
+ "kATTRWRITER", /* attr_writer */
37
+ "kBOOL", /* bool */
38
+ "kBOT", /* bot */
39
+ "kCLASS", /* class */
40
+ "kDEF", /* def */
41
+ "kEND", /* end */
42
+ "kEXTEND", /* extend */
43
+ "kFALSE", /* kFALSE */
44
+ "kIN", /* in */
45
+ "kINCLUDE", /* include */
46
+ "kINSTANCE", /* instance */
47
+ "kINTERFACE", /* interface */
48
+ "kMODULE", /* module */
49
+ "kNIL", /* nil */
50
+ "kOUT", /* out */
51
+ "kPREPEND", /* prepend */
52
+ "kPRIVATE", /* private */
53
+ "kPUBLIC", /* public */
54
+ "kSELF", /* self */
55
+ "kSINGLETON", /* singleton */
56
+ "kTOP", /* top */
57
+ "kTRUE", /* true */
58
+ "kTYPE", /* type */
59
+ "kUNCHECKED", /* unchecked */
60
+ "kUNTYPED", /* untyped */
61
+ "kVOID", /* void */
62
+ "kUSE", /* use */
63
+ "kAS", /* as */
64
+ "k__TODO__", /* __todo__ */
65
+
66
+ "tLIDENT", /* Identifiers starting with lower case */
67
+ "tUIDENT", /* Identifiers starting with upper case */
68
+ "tULIDENT", /* Identifiers starting with `_` */
69
+ "tULLIDENT",
70
+ "tGIDENT", /* Identifiers starting with `$` */
71
+ "tAIDENT", /* Identifiers starting with `@` */
72
+ "tA2IDENT", /* Identifiers starting with `@@` */
73
+ "tBANGIDENT",
74
+ "tEQIDENT",
75
+ "tQIDENT", /* Quoted identifier */
76
+ "pAREF_OPR", /* [] */
77
+ "tOPERATOR", /* Operator identifier */
78
+
79
+ "tCOMMENT",
80
+ "tLINECOMMENT",
81
+
82
+ "tTRIVIA",
83
+
84
+ "tDQSTRING", /* Double quoted string */
85
+ "tSQSTRING", /* Single quoted string */
86
+ "tINTEGER", /* Integer */
87
+ "tSYMBOL", /* Symbol */
88
+ "tDQSYMBOL",
89
+ "tSQSYMBOL",
90
+ "tANNOTATION", /* Annotation */
91
+ };
92
+
93
+ const rbs_position_t NullPosition = { -1, -1, -1, -1 };
94
+ const rbs_range_t NULL_RANGE = { { -1, -1, -1, -1 }, { -1, -1, -1, -1 } };
95
+ const rbs_token_t NullToken = { .type = NullType, .range = { { 0 }, { 0 } } };
96
+
97
+ const char *rbs_token_type_str(enum RBSTokenType type) {
98
+ return RBS_TOKENTYPE_NAMES[type];
99
+ }
100
+
101
+ int rbs_token_chars(rbs_token_t tok) {
102
+ return tok.range.end.char_pos - tok.range.start.char_pos;
103
+ }
104
+
105
+ int rbs_token_bytes(rbs_token_t tok) {
106
+ return RBS_RANGE_BYTES(tok.range);
107
+ }
108
+
109
+ unsigned int rbs_peek(rbs_lexer_t *lexer) {
110
+ return lexer->current_code_point;
111
+ }
112
+
113
+ bool rbs_next_char(rbs_lexer_t *lexer, unsigned int *codepoint, size_t *byte_len) {
114
+ if (RBS_UNLIKELY(lexer->current.char_pos == lexer->end_pos)) {
115
+ return false;
116
+ }
117
+
118
+ const char *start = lexer->string.start + lexer->current.byte_pos;
119
+
120
+ // Fast path for ASCII (single-byte) characters
121
+ if ((unsigned int) *start < 128) {
122
+ *codepoint = (unsigned int) *start;
123
+ *byte_len = 1;
124
+ return true;
125
+ }
126
+
127
+ *byte_len = lexer->encoding->char_width((const uint8_t *) start, (ptrdiff_t) (lexer->string.end - start));
128
+
129
+ if (*byte_len == 1) {
130
+ *codepoint = (unsigned int) *start;
131
+ } else {
132
+ *codepoint = 12523; // Dummy data for "ル" from "ルビー" (Ruby) in Unicode
133
+ }
134
+
135
+ return true;
136
+ }
137
+
138
+ void rbs_skip(rbs_lexer_t *lexer) {
139
+ RBS_ASSERT(lexer->current_character_bytes > 0, "rbs_skip called with current_character_bytes == 0");
140
+
141
+ if (RBS_UNLIKELY(lexer->current_code_point == '\0')) {
142
+ return;
143
+ }
144
+
145
+ unsigned int codepoint;
146
+ size_t byte_len;
147
+
148
+ lexer->current.byte_pos += lexer->current_character_bytes;
149
+ lexer->current.char_pos += 1;
150
+ if (lexer->current_code_point == '\n') {
151
+ lexer->current.line += 1;
152
+ lexer->current.column = 0;
153
+ lexer->first_token_of_line = true;
154
+ } else {
155
+ lexer->current.column += 1;
156
+ }
157
+
158
+ if (rbs_next_char(lexer, &codepoint, &byte_len)) {
159
+ lexer->current_code_point = codepoint;
160
+ lexer->current_character_bytes = byte_len;
161
+ } else {
162
+ lexer->current_character_bytes = 1;
163
+ lexer->current_code_point = '\0';
164
+ }
165
+ }
166
+
167
+ rbs_token_t rbs_next_token(rbs_lexer_t *lexer, enum RBSTokenType type) {
168
+ rbs_token_t t;
169
+
170
+ t.type = type;
171
+ t.range.start = lexer->start;
172
+ t.range.end = lexer->current;
173
+ lexer->start = lexer->current;
174
+ if (type != tTRIVIA) {
175
+ lexer->first_token_of_line = false;
176
+ }
177
+
178
+ return t;
179
+ }
180
+
181
+ rbs_token_t rbs_next_eof_token(rbs_lexer_t *lexer) {
182
+ if ((size_t) lexer->current.byte_pos == rbs_string_len(lexer->string) + 1) {
183
+ // End of String
184
+ rbs_token_t t;
185
+ t.type = pEOF;
186
+ t.range.start = lexer->start;
187
+ t.range.end = lexer->start;
188
+ lexer->start = lexer->current;
189
+
190
+ return t;
191
+ } else {
192
+ // NULL byte in the middle of the string
193
+ return rbs_next_token(lexer, pEOF);
194
+ }
195
+ }
196
+
197
+ void rbs_skipn(rbs_lexer_t *lexer, size_t size) {
198
+ for (size_t i = 0; i < size; i++) {
199
+ rbs_skip(lexer);
200
+ }
201
+ }
202
+
203
+ char *rbs_peek_token(rbs_lexer_t *lexer, rbs_token_t tok) {
204
+ return (char *) lexer->string.start + tok.range.start.byte_pos;
205
+ }
data/src/location.c ADDED
@@ -0,0 +1,71 @@
1
+ #include "rbs/location.h"
2
+ #include "rbs/util/rbs_assert.h"
3
+
4
+ #include <stdio.h>
5
+
6
+ #define RBS_LOC_CHILDREN_SIZE(cap) (sizeof(rbs_loc_children) + sizeof(rbs_loc_entry) * ((cap) - 1))
7
+
8
+ void rbs_loc_alloc_children(rbs_allocator_t *allocator, rbs_location_t *loc, size_t capacity) {
9
+ RBS_ASSERT(capacity <= sizeof(rbs_loc_entry_bitmap) * 8, "Capacity %zu is too large. Max is %zu", capacity, sizeof(rbs_loc_entry_bitmap) * 8);
10
+
11
+ loc->children = rbs_allocator_malloc_impl(allocator, RBS_LOC_CHILDREN_SIZE(capacity), rbs_alignof(rbs_loc_children));
12
+
13
+ loc->children->len = 0;
14
+ loc->children->required_p = 0;
15
+ loc->children->cap = capacity;
16
+ }
17
+
18
+ void rbs_loc_add_optional_child(rbs_location_t *loc, rbs_constant_id_t name, rbs_range_t r) {
19
+ RBS_ASSERT(loc->children != NULL, "All children should have been pre-allocated with rbs_loc_alloc_children()");
20
+ RBS_ASSERT((loc->children->len + 1 <= loc->children->cap), "Not enough space was pre-allocated for the children. Children: %hu, Capacity: %hu", loc->children->len, loc->children->cap);
21
+
22
+ unsigned short i = loc->children->len++;
23
+ loc->children->entries[i].name = name;
24
+ loc->children->entries[i].rg = (rbs_loc_range) { r.start.char_pos, r.end.char_pos };
25
+ }
26
+
27
+ void rbs_loc_add_required_child(rbs_location_t *loc, rbs_constant_id_t name, rbs_range_t r) {
28
+ rbs_loc_add_optional_child(loc, name, r);
29
+ unsigned short last_index = loc->children->len - 1;
30
+ loc->children->required_p |= 1 << last_index;
31
+ }
32
+
33
+ rbs_location_t *rbs_location_new(rbs_allocator_t *allocator, rbs_range_t rg) {
34
+ rbs_location_t *location = rbs_allocator_alloc(allocator, rbs_location_t);
35
+ *location = (rbs_location_t) {
36
+ .rg = rg,
37
+ .children = NULL,
38
+ };
39
+
40
+ return location;
41
+ }
42
+
43
+ rbs_location_list_t *rbs_location_list_new(rbs_allocator_t *allocator) {
44
+ rbs_location_list_t *list = rbs_allocator_alloc(allocator, rbs_location_list_t);
45
+ *list = (rbs_location_list_t) {
46
+ .allocator = allocator,
47
+ .head = NULL,
48
+ .tail = NULL,
49
+ .length = 0,
50
+ };
51
+
52
+ return list;
53
+ }
54
+
55
+ void rbs_location_list_append(rbs_location_list_t *list, rbs_location_t *loc) {
56
+ rbs_location_list_node_t *node = rbs_allocator_alloc(list->allocator, rbs_location_list_node_t);
57
+ *node = (rbs_location_list_node_t) {
58
+ .loc = loc,
59
+ .next = NULL,
60
+ };
61
+
62
+ if (list->head == NULL) {
63
+ list->head = node;
64
+ list->tail = node;
65
+ } else {
66
+ list->tail->next = node;
67
+ list->tail = node;
68
+ }
69
+
70
+ list->length++;
71
+ }