rbs 4.0.0.dev.4 → 4.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (281) hide show
  1. checksums.yaml +4 -4
  2. data/.github/dependabot.yml +14 -14
  3. data/.github/workflows/bundle-update.yml +60 -0
  4. data/.github/workflows/c-check.yml +18 -11
  5. data/.github/workflows/comments.yml +5 -3
  6. data/.github/workflows/dependabot.yml +2 -2
  7. data/.github/workflows/ruby.yml +27 -34
  8. data/.github/workflows/rust.yml +95 -0
  9. data/.github/workflows/typecheck.yml +2 -2
  10. data/.github/workflows/windows.yml +2 -2
  11. data/.rubocop.yml +1 -1
  12. data/CHANGELOG.md +323 -0
  13. data/README.md +1 -1
  14. data/Rakefile +43 -33
  15. data/Steepfile +1 -0
  16. data/config.yml +426 -24
  17. data/core/array.rbs +307 -227
  18. data/core/basic_object.rbs +9 -8
  19. data/core/binding.rbs +0 -2
  20. data/core/builtin.rbs +2 -2
  21. data/core/class.rbs +6 -5
  22. data/core/comparable.rbs +55 -34
  23. data/core/complex.rbs +104 -78
  24. data/core/dir.rbs +61 -49
  25. data/core/encoding.rbs +12 -15
  26. data/core/enumerable.rbs +179 -87
  27. data/core/enumerator/arithmetic_sequence.rbs +70 -0
  28. data/core/enumerator.rbs +65 -2
  29. data/core/errno.rbs +11 -2
  30. data/core/errors.rbs +58 -29
  31. data/core/exception.rbs +13 -13
  32. data/core/fiber.rbs +74 -54
  33. data/core/file.rbs +280 -177
  34. data/core/file_test.rbs +3 -3
  35. data/core/float.rbs +257 -92
  36. data/core/gc.rbs +425 -281
  37. data/core/hash.rbs +1045 -739
  38. data/core/integer.rbs +135 -137
  39. data/core/io/buffer.rbs +53 -42
  40. data/core/io/wait.rbs +13 -35
  41. data/core/io.rbs +192 -144
  42. data/core/kernel.rbs +216 -155
  43. data/core/marshal.rbs +4 -4
  44. data/core/match_data.rbs +15 -13
  45. data/core/math.rbs +107 -66
  46. data/core/method.rbs +69 -33
  47. data/core/module.rbs +244 -106
  48. data/core/nil_class.rbs +7 -6
  49. data/core/numeric.rbs +74 -63
  50. data/core/object.rbs +9 -11
  51. data/core/object_space.rbs +30 -23
  52. data/core/pathname.rbs +1322 -0
  53. data/core/proc.rbs +95 -58
  54. data/core/process.rbs +222 -202
  55. data/core/ractor.rbs +371 -515
  56. data/core/random.rbs +21 -3
  57. data/core/range.rbs +159 -57
  58. data/core/rational.rbs +60 -89
  59. data/core/rbs/unnamed/argf.rbs +60 -53
  60. data/core/rbs/unnamed/env_class.rbs +19 -14
  61. data/core/rbs/unnamed/main_class.rbs +123 -0
  62. data/core/rbs/unnamed/random.rbs +11 -118
  63. data/core/regexp.rbs +258 -214
  64. data/core/ruby.rbs +53 -0
  65. data/core/ruby_vm.rbs +38 -34
  66. data/core/rubygems/config_file.rbs +5 -5
  67. data/core/rubygems/errors.rbs +4 -71
  68. data/core/rubygems/requirement.rbs +5 -5
  69. data/core/rubygems/rubygems.rbs +16 -82
  70. data/core/rubygems/version.rbs +2 -3
  71. data/core/set.rbs +490 -360
  72. data/core/signal.rbs +26 -16
  73. data/core/string.rbs +3234 -1285
  74. data/core/struct.rbs +27 -26
  75. data/core/symbol.rbs +41 -34
  76. data/core/thread.rbs +135 -67
  77. data/core/time.rbs +81 -50
  78. data/core/trace_point.rbs +41 -35
  79. data/core/true_class.rbs +2 -2
  80. data/core/unbound_method.rbs +24 -16
  81. data/core/warning.rbs +7 -7
  82. data/docs/aliases.md +79 -0
  83. data/docs/collection.md +3 -3
  84. data/docs/config.md +171 -0
  85. data/docs/encoding.md +56 -0
  86. data/docs/gem.md +0 -1
  87. data/docs/inline.md +576 -0
  88. data/docs/sigs.md +3 -3
  89. data/docs/syntax.md +46 -16
  90. data/docs/type_fingerprint.md +21 -0
  91. data/exe/rbs +1 -1
  92. data/ext/rbs_extension/ast_translation.c +544 -116
  93. data/ext/rbs_extension/ast_translation.h +3 -0
  94. data/ext/rbs_extension/class_constants.c +16 -2
  95. data/ext/rbs_extension/class_constants.h +8 -0
  96. data/ext/rbs_extension/extconf.rb +5 -1
  97. data/ext/rbs_extension/legacy_location.c +33 -56
  98. data/ext/rbs_extension/legacy_location.h +37 -0
  99. data/ext/rbs_extension/main.c +44 -35
  100. data/include/rbs/ast.h +448 -173
  101. data/include/rbs/defines.h +27 -0
  102. data/include/rbs/lexer.h +30 -11
  103. data/include/rbs/location.h +25 -44
  104. data/include/rbs/parser.h +6 -6
  105. data/include/rbs/string.h +0 -2
  106. data/include/rbs/util/rbs_allocator.h +34 -13
  107. data/include/rbs/util/rbs_assert.h +12 -1
  108. data/include/rbs/util/rbs_constant_pool.h +0 -3
  109. data/include/rbs/util/rbs_encoding.h +2 -0
  110. data/include/rbs/util/rbs_unescape.h +2 -1
  111. data/include/rbs.h +8 -0
  112. data/lib/rbs/ast/annotation.rb +1 -1
  113. data/lib/rbs/ast/comment.rb +1 -1
  114. data/lib/rbs/ast/declarations.rb +10 -10
  115. data/lib/rbs/ast/members.rb +14 -14
  116. data/lib/rbs/ast/ruby/annotations.rb +293 -3
  117. data/lib/rbs/ast/ruby/comment_block.rb +24 -0
  118. data/lib/rbs/ast/ruby/declarations.rb +198 -3
  119. data/lib/rbs/ast/ruby/helpers/constant_helper.rb +4 -0
  120. data/lib/rbs/ast/ruby/members.rb +532 -22
  121. data/lib/rbs/ast/type_param.rb +24 -4
  122. data/lib/rbs/buffer.rb +20 -15
  123. data/lib/rbs/cli/diff.rb +16 -15
  124. data/lib/rbs/cli/validate.rb +38 -106
  125. data/lib/rbs/cli.rb +52 -19
  126. data/lib/rbs/collection/config/lockfile_generator.rb +14 -2
  127. data/lib/rbs/collection/sources/git.rb +1 -0
  128. data/lib/rbs/definition.rb +1 -1
  129. data/lib/rbs/definition_builder/ancestor_builder.rb +62 -9
  130. data/lib/rbs/definition_builder/method_builder.rb +20 -0
  131. data/lib/rbs/definition_builder.rb +147 -25
  132. data/lib/rbs/diff.rb +7 -1
  133. data/lib/rbs/environment.rb +227 -74
  134. data/lib/rbs/environment_loader.rb +0 -6
  135. data/lib/rbs/errors.rb +27 -18
  136. data/lib/rbs/inline_parser.rb +342 -6
  137. data/lib/rbs/location_aux.rb +1 -1
  138. data/lib/rbs/locator.rb +5 -1
  139. data/lib/rbs/method_type.rb +5 -3
  140. data/lib/rbs/parser_aux.rb +20 -7
  141. data/lib/rbs/prototype/helpers.rb +57 -0
  142. data/lib/rbs/prototype/rb.rb +3 -28
  143. data/lib/rbs/prototype/rbi.rb +3 -20
  144. data/lib/rbs/prototype/runtime.rb +8 -0
  145. data/lib/rbs/resolver/constant_resolver.rb +2 -2
  146. data/lib/rbs/resolver/type_name_resolver.rb +116 -38
  147. data/lib/rbs/subtractor.rb +3 -1
  148. data/lib/rbs/test/type_check.rb +19 -2
  149. data/lib/rbs/type_name.rb +1 -1
  150. data/lib/rbs/types.rb +88 -78
  151. data/lib/rbs/unit_test/type_assertions.rb +35 -8
  152. data/lib/rbs/validator.rb +2 -2
  153. data/lib/rbs/version.rb +1 -1
  154. data/lib/rbs.rb +1 -2
  155. data/lib/rdoc/discover.rb +1 -1
  156. data/lib/rdoc_plugin/parser.rb +1 -1
  157. data/rbs.gemspec +4 -3
  158. data/rust/.gitignore +1 -0
  159. data/rust/Cargo.lock +378 -0
  160. data/rust/Cargo.toml +7 -0
  161. data/rust/ruby-rbs/Cargo.toml +22 -0
  162. data/rust/ruby-rbs/build.rs +764 -0
  163. data/rust/ruby-rbs/examples/locations.rs +60 -0
  164. data/rust/ruby-rbs/src/lib.rs +1 -0
  165. data/rust/ruby-rbs/src/node/mod.rs +742 -0
  166. data/rust/ruby-rbs/tests/sanity.rs +47 -0
  167. data/rust/ruby-rbs/vendor/rbs/config.yml +1 -0
  168. data/rust/ruby-rbs-sys/Cargo.toml +23 -0
  169. data/rust/ruby-rbs-sys/build.rs +204 -0
  170. data/rust/ruby-rbs-sys/src/lib.rs +50 -0
  171. data/rust/ruby-rbs-sys/vendor/rbs/include +1 -0
  172. data/rust/ruby-rbs-sys/vendor/rbs/src +1 -0
  173. data/rust/ruby-rbs-sys/wrapper.h +1 -0
  174. data/schema/typeParam.json +17 -1
  175. data/sig/ast/ruby/annotations.rbs +315 -4
  176. data/sig/ast/ruby/comment_block.rbs +8 -0
  177. data/sig/ast/ruby/declarations.rbs +102 -4
  178. data/sig/ast/ruby/members.rbs +108 -2
  179. data/sig/cli/diff.rbs +5 -11
  180. data/sig/cli/validate.rbs +12 -8
  181. data/sig/cli.rbs +18 -18
  182. data/sig/definition.rbs +6 -1
  183. data/sig/definition_builder.rbs +2 -0
  184. data/sig/environment.rbs +70 -12
  185. data/sig/errors.rbs +13 -14
  186. data/sig/inline_parser.rbs +39 -2
  187. data/sig/locator.rbs +0 -2
  188. data/sig/manifest.yaml +0 -1
  189. data/sig/method_builder.rbs +3 -1
  190. data/sig/parser.rbs +31 -13
  191. data/sig/prototype/helpers.rbs +2 -0
  192. data/sig/resolver/type_name_resolver.rbs +35 -7
  193. data/sig/source.rbs +3 -3
  194. data/sig/type_param.rbs +13 -8
  195. data/sig/types.rbs +6 -7
  196. data/sig/unit_test/spy.rbs +0 -8
  197. data/sig/unit_test/type_assertions.rbs +11 -0
  198. data/src/ast.c +410 -153
  199. data/src/lexer.c +1392 -1313
  200. data/src/lexer.re +3 -0
  201. data/src/lexstate.c +58 -37
  202. data/src/location.c +8 -48
  203. data/src/parser.c +977 -516
  204. data/src/string.c +0 -48
  205. data/src/util/rbs_allocator.c +89 -71
  206. data/src/util/rbs_assert.c +1 -1
  207. data/src/util/rbs_buffer.c +2 -2
  208. data/src/util/rbs_constant_pool.c +10 -14
  209. data/src/util/rbs_encoding.c +4 -8
  210. data/src/util/rbs_unescape.c +56 -20
  211. data/stdlib/bigdecimal/0/big_decimal.rbs +116 -98
  212. data/stdlib/bigdecimal-math/0/big_math.rbs +169 -8
  213. data/stdlib/cgi/0/core.rbs +9 -393
  214. data/stdlib/cgi/0/manifest.yaml +1 -0
  215. data/stdlib/cgi-escape/0/escape.rbs +171 -0
  216. data/stdlib/coverage/0/coverage.rbs +7 -4
  217. data/stdlib/date/0/date.rbs +92 -79
  218. data/stdlib/date/0/date_time.rbs +25 -24
  219. data/stdlib/delegate/0/delegator.rbs +10 -7
  220. data/stdlib/did_you_mean/0/did_you_mean.rbs +17 -16
  221. data/stdlib/digest/0/digest.rbs +110 -0
  222. data/stdlib/erb/0/erb.rbs +748 -347
  223. data/stdlib/etc/0/etc.rbs +55 -50
  224. data/stdlib/fileutils/0/fileutils.rbs +158 -139
  225. data/stdlib/forwardable/0/forwardable.rbs +13 -10
  226. data/stdlib/io-console/0/io-console.rbs +2 -2
  227. data/stdlib/json/0/json.rbs +217 -136
  228. data/stdlib/monitor/0/monitor.rbs +3 -3
  229. data/stdlib/net-http/0/net-http.rbs +162 -134
  230. data/stdlib/objspace/0/objspace.rbs +17 -34
  231. data/stdlib/open-uri/0/open-uri.rbs +48 -8
  232. data/stdlib/open3/0/open3.rbs +469 -10
  233. data/stdlib/openssl/0/openssl.rbs +475 -357
  234. data/stdlib/optparse/0/optparse.rbs +26 -17
  235. data/stdlib/pathname/0/pathname.rbs +11 -1381
  236. data/stdlib/pp/0/pp.rbs +9 -8
  237. data/stdlib/prettyprint/0/prettyprint.rbs +7 -7
  238. data/stdlib/pstore/0/pstore.rbs +35 -30
  239. data/stdlib/psych/0/psych.rbs +65 -12
  240. data/stdlib/psych/0/store.rbs +2 -4
  241. data/stdlib/pty/0/pty.rbs +9 -6
  242. data/stdlib/random-formatter/0/random-formatter.rbs +277 -0
  243. data/stdlib/rdoc/0/code_object.rbs +2 -1
  244. data/stdlib/rdoc/0/parser.rbs +1 -1
  245. data/stdlib/rdoc/0/rdoc.rbs +1 -1
  246. data/stdlib/rdoc/0/store.rbs +1 -1
  247. data/stdlib/resolv/0/resolv.rbs +25 -68
  248. data/stdlib/ripper/0/ripper.rbs +22 -19
  249. data/stdlib/securerandom/0/manifest.yaml +2 -0
  250. data/stdlib/securerandom/0/securerandom.rbs +7 -20
  251. data/stdlib/shellwords/0/shellwords.rbs +2 -2
  252. data/stdlib/singleton/0/singleton.rbs +3 -0
  253. data/stdlib/socket/0/addrinfo.rbs +7 -7
  254. data/stdlib/socket/0/basic_socket.rbs +3 -3
  255. data/stdlib/socket/0/ip_socket.rbs +10 -8
  256. data/stdlib/socket/0/socket.rbs +23 -10
  257. data/stdlib/socket/0/tcp_server.rbs +1 -1
  258. data/stdlib/socket/0/tcp_socket.rbs +11 -3
  259. data/stdlib/socket/0/udp_socket.rbs +1 -1
  260. data/stdlib/socket/0/unix_server.rbs +1 -1
  261. data/stdlib/stringio/0/stringio.rbs +1177 -85
  262. data/stdlib/strscan/0/string_scanner.rbs +27 -25
  263. data/stdlib/tempfile/0/tempfile.rbs +25 -21
  264. data/stdlib/time/0/time.rbs +8 -6
  265. data/stdlib/timeout/0/timeout.rbs +63 -7
  266. data/stdlib/tsort/0/cyclic.rbs +3 -0
  267. data/stdlib/tsort/0/tsort.rbs +7 -6
  268. data/stdlib/uri/0/common.rbs +42 -20
  269. data/stdlib/uri/0/file.rbs +3 -3
  270. data/stdlib/uri/0/generic.rbs +26 -18
  271. data/stdlib/uri/0/http.rbs +2 -2
  272. data/stdlib/uri/0/ldap.rbs +2 -2
  273. data/stdlib/uri/0/mailto.rbs +3 -3
  274. data/stdlib/uri/0/rfc2396_parser.rbs +12 -12
  275. data/stdlib/zlib/0/deflate.rbs +4 -3
  276. data/stdlib/zlib/0/gzip_reader.rbs +6 -6
  277. data/stdlib/zlib/0/gzip_writer.rbs +14 -12
  278. data/stdlib/zlib/0/inflate.rbs +1 -1
  279. data/stdlib/zlib/0/need_dict.rbs +1 -1
  280. data/stdlib/zlib/0/zstream.rbs +1 -0
  281. metadata +50 -6
data/src/string.c CHANGED
@@ -5,54 +5,6 @@
5
5
  #include <stdio.h>
6
6
  #include <ctype.h>
7
7
 
8
- unsigned int rbs_utf8_string_to_codepoint(const rbs_string_t string) {
9
- unsigned int codepoint = 0;
10
- int remaining_bytes = 0;
11
-
12
- const char *s = string.start;
13
- const char *end = string.end;
14
-
15
- if (s >= end) return 0; // End of string
16
-
17
- if ((*s & 0x80) == 0) {
18
- // Single byte character (0xxxxxxx)
19
- return *s;
20
- } else if ((*s & 0xE0) == 0xC0) {
21
- // Two byte character (110xxxxx 10xxxxxx)
22
- codepoint = *s & 0x1F;
23
- remaining_bytes = 1;
24
- } else if ((*s & 0xF0) == 0xE0) {
25
- // Three byte character (1110xxxx 10xxxxxx 10xxxxxx)
26
- codepoint = *s & 0x0F;
27
- remaining_bytes = 2;
28
- } else if ((*s & 0xF8) == 0xF0) {
29
- // Four byte character (11110xxx 10xxxxxx 10xxxxxx 10xxxxxx)
30
- codepoint = *s & 0x07;
31
- remaining_bytes = 3;
32
- } else {
33
- // Invalid UTF-8 sequence
34
- return 0xFFFD; // Unicode replacement character
35
- }
36
-
37
- s++;
38
- while (remaining_bytes > 0 && s < end) {
39
- if ((*s & 0xC0) != 0x80) {
40
- // Invalid continuation byte
41
- return 0xFFFD;
42
- }
43
- codepoint = (codepoint << 6) | (*s & 0x3F);
44
- s++;
45
- remaining_bytes--;
46
- }
47
-
48
- if (remaining_bytes > 0) {
49
- // Incomplete sequence
50
- return 0xFFFD;
51
- }
52
-
53
- return codepoint;
54
- }
55
-
56
8
  rbs_string_t rbs_string_new(const char *start, const char *end) {
57
9
  return (rbs_string_t) {
58
10
  .start = start,
@@ -3,6 +3,14 @@
3
3
  *
4
4
  * A simple arena allocator that can be freed all at once.
5
5
  *
6
+ * This allocator maintains a linked list of pages, which come in two flavours:
7
+ * 1. Small allocation pages, which are the same size as the system page size.
8
+ * 2. Large allocation pages, which are the exact size requested, for sizes greater than the small page size.
9
+ *
10
+ * Small allocations always fit into the unused space at the end of the "head" page. If there isn't enough room, a new
11
+ * page is allocated, and the small allocation is placed at its start. This approach wastes that unused slack at the
12
+ * end of the previous page, but it means that allocations are instant and never scan the linked list to find a gap.
13
+ *
6
14
  * This allocator doesn't support freeing individual allocations. Only the whole arena can be freed at once at the end.
7
15
  */
8
16
 
@@ -20,16 +28,19 @@
20
28
  #include <unistd.h>
21
29
  #include <sys/types.h>
22
30
  #include <sys/mman.h>
31
+ #include <fcntl.h>
23
32
  #endif
24
33
 
25
- #if defined(__APPLE__) || defined(__FreeBSD__) || defined(__OpenBSD__) || defined(__sun)
26
- #define MAP_ANONYMOUS MAP_ANON
27
- #endif
34
+ typedef struct rbs_allocator_page {
35
+ // The previously allocated page, or NULL if this is the first page.
36
+ struct rbs_allocator_page *next;
28
37
 
29
- struct rbs_allocator {
30
- uintptr_t heap_ptr;
31
- uintptr_t size;
32
- };
38
+ // The size of the payload in bytes.
39
+ size_t size;
40
+
41
+ // The offset of the next available byte.
42
+ size_t used;
43
+ } rbs_allocator_page_t;
33
44
 
34
45
  static size_t get_system_page_size(void) {
35
46
  #ifdef _WIN32
@@ -43,73 +54,43 @@ static size_t get_system_page_size(void) {
43
54
  #endif
44
55
  }
45
56
 
46
- static void *map_memory(size_t size) {
47
- #ifdef _WIN32
48
- LPVOID result = VirtualAlloc(NULL, size, MEM_RESERVE | MEM_COMMIT, PAGE_READWRITE);
49
- rbs_assert(result != NULL, "VirtualAlloc failed");
50
- #else
51
- void *result = mmap(NULL, size, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
52
- rbs_assert(result != MAP_FAILED, "mmap failed");
53
- #endif
54
- return result;
57
+ static inline uintptr_t rbs_align_up_uintptr(uintptr_t value, size_t alignment) {
58
+ // alignment must be a non-zero power of two
59
+ RBS_ASSERT(alignment != 0 && (alignment & (alignment - 1)) == 0, "alignment must be a non-zero power of two. alignment: %zu", alignment);
60
+ return (value + (alignment - 1)) & ~(uintptr_t) (alignment - 1);
55
61
  }
56
62
 
57
- static void destroy_memory(void *memory, size_t size) {
58
- #ifdef _WIN32
59
- VirtualFree(memory, 0, MEM_RELEASE);
60
- #else
61
- munmap(memory, size);
62
- #endif
63
- }
63
+ static rbs_allocator_page_t *rbs_allocator_page_new(size_t payload_size) {
64
+ const size_t page_header_size = sizeof(rbs_allocator_page_t);
64
65
 
65
- static void guard_page(void *memory, size_t page_size) {
66
- #ifdef _WIN32
67
- DWORD old_protect_;
68
- BOOL result = VirtualProtect(memory, page_size, PAGE_NOACCESS, &old_protect_);
69
- rbs_assert(result != 0, "VirtualProtect failed");
70
- #else
71
- int result = mprotect(memory, page_size, PROT_NONE);
72
- rbs_assert(result == 0, "mprotect failed");
73
- #endif
74
- }
66
+ rbs_allocator_page_t *page = (rbs_allocator_page_t *) malloc(page_header_size + payload_size);
67
+ page->size = payload_size;
68
+ page->used = 0;
75
69
 
76
- static size_t rbs_allocator_default_mem(void) {
77
- size_t kib = 1024;
78
- size_t mib = kib * 1024;
79
- size_t gib = mib * 1024;
80
- return 4 * gib;
70
+ return page;
81
71
  }
82
72
 
83
- static inline bool is_power_of_two(uintptr_t value) {
84
- return value > 0 && (value & (value - 1)) == 0;
85
- }
73
+ rbs_allocator_t *rbs_allocator_init(void) {
74
+ rbs_allocator_t *allocator = (rbs_allocator_t *) malloc(sizeof(rbs_allocator_t));
86
75
 
87
- // Align `val' to nearest multiple of `alignment'.
88
- static uintptr_t align(uintptr_t size, uintptr_t alignment) {
89
- rbs_assert(is_power_of_two(alignment), "alignment is not a power of two");
90
- return (size + alignment - 1) & ~(alignment - 1);
91
- }
76
+ const size_t system_page_size = get_system_page_size();
92
77
 
93
- rbs_allocator_t *rbs_allocator_init(void) {
94
- size_t size = rbs_allocator_default_mem();
95
- size_t page_size = get_system_page_size();
96
- size = align(size, page_size);
97
- void *mem = map_memory(size + page_size);
98
- // Guard page; remove range checks in alloc fast path and hard fail if we
99
- // consume all memory
100
- void *last_page = (char *) mem + size;
101
- guard_page(last_page, page_size);
102
- uintptr_t start = (uintptr_t) mem;
103
- rbs_allocator_t header = (rbs_allocator_t) {
104
- .heap_ptr = start + sizeof header,
105
- .size = size + page_size,
106
- };
107
- memcpy(mem, &header, sizeof header);
108
- return (rbs_allocator_t *) mem;
78
+ allocator->default_page_payload_size = system_page_size - sizeof(rbs_allocator_page_t);
79
+
80
+ allocator->page = rbs_allocator_page_new(allocator->default_page_payload_size);
81
+ allocator->page->next = NULL;
82
+
83
+ return allocator;
109
84
  }
110
85
 
111
86
  void rbs_allocator_free(rbs_allocator_t *allocator) {
112
- destroy_memory((void *) allocator, allocator->size);
87
+ rbs_allocator_page_t *page = allocator->page;
88
+ while (page) {
89
+ rbs_allocator_page_t *next = page->next;
90
+ free(page);
91
+ page = next;
92
+ }
93
+ free(allocator);
113
94
  }
114
95
 
115
96
  // Allocates `new_size` bytes from `allocator`, aligned to an `alignment`-byte boundary.
@@ -123,21 +104,58 @@ void *rbs_allocator_realloc_impl(rbs_allocator_t *allocator, void *ptr, size_t o
123
104
 
124
105
  // Allocates `size` bytes from `allocator`, aligned to an `alignment`-byte boundary.
125
106
  void *rbs_allocator_malloc_impl(rbs_allocator_t *allocator, size_t size, size_t alignment) {
126
- rbs_assert(size % alignment == 0, "size must be a multiple of the alignment. size: %zu, alignment: %zu", size, alignment);
127
- uintptr_t aligned = align(allocator->heap_ptr, alignment);
128
- allocator->heap_ptr = aligned + size;
129
- return (void *) aligned;
107
+ if (allocator->default_page_payload_size < size) { // Big allocation, give it its own page.
108
+ // Add padding to ensure we can align the start pointer within this page
109
+ rbs_allocator_page_t *new_page = rbs_allocator_page_new(size + (alignment - 1));
110
+
111
+ // This simple allocator can only put small allocations into the head page.
112
+ // Naively prepending this large allocation page to the head of the allocator before the previous head page
113
+ // would waste the remaining space in the head page.
114
+ // So instead, we'll splice in the large page *after* the head page.
115
+ //
116
+ // +-------+ +-----------+ +-----------+
117
+ // | arena | | head page | | new_page |
118
+ // |-------| |-----------+ |-----------+
119
+ // | *page |--->| size | +--->| size | +---> ... previous tail
120
+ // +-------+ | offset | | | offset | |
121
+ // | *next ----+---+ | *next ----+---+
122
+ // | ... | | ... |
123
+ // +-----------+ +-----------+
124
+ //
125
+ new_page->next = allocator->page->next;
126
+ allocator->page->next = new_page;
127
+
128
+ uintptr_t base = (uintptr_t) new_page + sizeof(rbs_allocator_page_t);
129
+ uintptr_t aligned_ptr = rbs_align_up_uintptr(base, alignment);
130
+ return (void *) aligned_ptr;
131
+ }
132
+
133
+ rbs_allocator_page_t *page = allocator->page;
134
+ uintptr_t base = (uintptr_t) page + sizeof(rbs_allocator_page_t);
135
+
136
+ // Compute aligned offset within the payload
137
+ size_t used_aligned = (size_t) (rbs_align_up_uintptr(base + page->used, alignment) - base);
138
+
139
+ if (used_aligned + size > page->size) {
140
+ // Not enough space. Allocate a new small page and prepend it to the allocator's linked list.
141
+ rbs_allocator_page_t *new_page = rbs_allocator_page_new(allocator->default_page_payload_size);
142
+ new_page->next = allocator->page;
143
+ allocator->page = new_page;
144
+ page = new_page;
145
+ base = (uintptr_t) page + sizeof(rbs_allocator_page_t);
146
+ used_aligned = (size_t) (rbs_align_up_uintptr(base, alignment) - base); // start of fresh page (usually 0 if header is aligned)
147
+ }
148
+
149
+ uintptr_t pointer = base + used_aligned;
150
+ page->used = used_aligned + size;
151
+ return (void *) pointer;
130
152
  }
131
153
 
132
154
  // Note: This will eagerly fill with zeroes, unlike `calloc()` which can map a page in a page to be zeroed lazily.
133
155
  // It's assumed that callers to this function will immediately write to the allocated memory, anyway.
134
156
  void *rbs_allocator_calloc_impl(rbs_allocator_t *allocator, size_t count, size_t size, size_t alignment) {
135
157
  void *p = rbs_allocator_malloc_many_impl(allocator, count, size, alignment);
136
- #if defined(__linux__)
137
- // mmap with MAP_ANONYMOUS gives zero-filled pages.
138
- #else
139
158
  memset(p, 0, count * size);
140
- #endif
141
159
  return p;
142
160
  }
143
161
 
@@ -5,7 +5,7 @@
5
5
  #include <stdlib.h>
6
6
  #include <stdbool.h>
7
7
 
8
- void rbs_assert(bool condition, const char *fmt, ...) {
8
+ void rbs_assert_impl(bool condition, const char *fmt, ...) {
9
9
  if (condition) {
10
10
  return;
11
11
  }
@@ -25,7 +25,7 @@ void rbs_buffer_append_string(rbs_allocator_t *allocator, rbs_buffer_t *buffer,
25
25
  if (next_length > buffer->capacity) {
26
26
  size_t old_capacity = buffer->capacity;
27
27
 
28
- rbs_assert(old_capacity != 0, "Precondition: capacity must be at least 1. Got %zu", old_capacity);
28
+ RBS_ASSERT(old_capacity != 0, "Precondition: capacity must be at least 1. Got %zu", old_capacity);
29
29
 
30
30
  size_t new_capacity = buffer->capacity * 2;
31
31
 
@@ -34,7 +34,7 @@ void rbs_buffer_append_string(rbs_allocator_t *allocator, rbs_buffer_t *buffer,
34
34
  }
35
35
 
36
36
  char *new_value = rbs_allocator_realloc(allocator, buffer->value, old_capacity, new_capacity, char);
37
- rbs_assert(new_value != NULL, "Failed to append to buffer. Old capacity: %zu, new capacity: %zu", old_capacity, new_capacity);
37
+ RBS_ASSERT(new_value != NULL, "Failed to append to buffer. Old capacity: %zu, new capacity: %zu", old_capacity, new_capacity);
38
38
 
39
39
  buffer->value = new_value;
40
40
  buffer->capacity = new_capacity;
@@ -37,7 +37,7 @@ next_power_of_two(uint32_t v) {
37
37
  return v;
38
38
  }
39
39
 
40
- static bool is_power_of_two(uint32_t size) {
40
+ RBS_ATTRIBUTE_UNUSED static bool is_power_of_two(uint32_t size) {
41
41
  return (size & (size - 1)) == 0;
42
42
  }
43
43
 
@@ -46,7 +46,7 @@ static bool is_power_of_two(uint32_t size) {
46
46
  */
47
47
  static inline bool
48
48
  rbs_constant_pool_resize(rbs_constant_pool_t *pool) {
49
- rbs_assert(is_power_of_two(pool->capacity), "pool->capacity is not a power of two. Got %i", pool->capacity);
49
+ RBS_ASSERT(is_power_of_two(pool->capacity), "pool->capacity is not a power of two. Got %i", pool->capacity);
50
50
 
51
51
  uint32_t next_capacity = pool->capacity * 2;
52
52
  if (next_capacity < pool->capacity) return false;
@@ -57,8 +57,8 @@ rbs_constant_pool_resize(rbs_constant_pool_t *pool) {
57
57
  void *next = calloc(next_capacity, element_size);
58
58
  if (next == NULL) return false;
59
59
 
60
- rbs_constant_pool_bucket_t *next_buckets = next;
61
- rbs_constant_t *next_constants = (void *) (((char *) next) + next_capacity * sizeof(rbs_constant_pool_bucket_t));
60
+ rbs_constant_pool_bucket_t *next_buckets = (rbs_constant_pool_bucket_t *) next;
61
+ rbs_constant_t *next_constants = (rbs_constant_t *) (((char *) next) + next_capacity * sizeof(rbs_constant_pool_bucket_t));
62
62
 
63
63
  // For each bucket in the current constant pool, find the index in the
64
64
  // next constant pool, and insert it.
@@ -95,10 +95,6 @@ rbs_constant_pool_resize(rbs_constant_pool_t *pool) {
95
95
  return true;
96
96
  }
97
97
 
98
- // This storage is initialized by `Init_rbs_extension()` in `main.c`.
99
- static rbs_constant_pool_t RBS_GLOBAL_CONSTANT_POOL_STORAGE = { 0 };
100
- rbs_constant_pool_t *RBS_GLOBAL_CONSTANT_POOL = &RBS_GLOBAL_CONSTANT_POOL_STORAGE;
101
-
102
98
  /**
103
99
  * Initialize a new constant pool with a given capacity.
104
100
  */
@@ -111,8 +107,8 @@ bool rbs_constant_pool_init(rbs_constant_pool_t *pool, uint32_t capacity) {
111
107
  void *memory = calloc(capacity, element_size);
112
108
  if (memory == NULL) return false;
113
109
 
114
- pool->buckets = memory;
115
- pool->constants = (void *) (((char *) memory) + capacity * sizeof(rbs_constant_pool_bucket_t));
110
+ pool->buckets = (rbs_constant_pool_bucket_t *) memory;
111
+ pool->constants = (rbs_constant_t *) (((char *) memory) + capacity * sizeof(rbs_constant_pool_bucket_t));
116
112
  pool->size = 0;
117
113
  pool->capacity = capacity;
118
114
  return true;
@@ -123,7 +119,7 @@ bool rbs_constant_pool_init(rbs_constant_pool_t *pool, uint32_t capacity) {
123
119
  */
124
120
  rbs_constant_t *
125
121
  rbs_constant_pool_id_to_constant(const rbs_constant_pool_t *pool, rbs_constant_id_t constant_id) {
126
- rbs_assert(constant_id != RBS_CONSTANT_ID_UNSET && constant_id <= pool->size, "constant_id is not valid. Got %i, pool->size: %i", constant_id, pool->size);
122
+ RBS_ASSERT(constant_id != RBS_CONSTANT_ID_UNSET && constant_id <= pool->size, "constant_id is not valid. Got %i, pool->size: %i", constant_id, pool->size);
127
123
  return &pool->constants[constant_id - 1];
128
124
  }
129
125
 
@@ -133,7 +129,7 @@ rbs_constant_pool_id_to_constant(const rbs_constant_pool_t *pool, rbs_constant_i
133
129
  */
134
130
  rbs_constant_id_t
135
131
  rbs_constant_pool_find(const rbs_constant_pool_t *pool, const uint8_t *start, size_t length) {
136
- rbs_assert(is_power_of_two(pool->capacity), "pool->capacity is not a power of two. Got %i", pool->capacity);
132
+ RBS_ASSERT(is_power_of_two(pool->capacity), "pool->capacity is not a power of two. Got %i", pool->capacity);
137
133
  const uint32_t mask = pool->capacity - 1;
138
134
 
139
135
  uint32_t hash = rbs_constant_pool_hash(start, length);
@@ -161,7 +157,7 @@ rbs_constant_pool_insert(rbs_constant_pool_t *pool, const uint8_t *start, size_t
161
157
  if (!rbs_constant_pool_resize(pool)) return RBS_CONSTANT_ID_UNSET;
162
158
  }
163
159
 
164
- rbs_assert(is_power_of_two(pool->capacity), "pool->capacity is not a power of two. Got %i", pool->capacity);
160
+ RBS_ASSERT(is_power_of_two(pool->capacity), "pool->capacity is not a power of two. Got %i", pool->capacity);
165
161
  const uint32_t mask = pool->capacity - 1;
166
162
 
167
163
  uint32_t hash = rbs_constant_pool_hash(start, length);
@@ -202,7 +198,7 @@ rbs_constant_pool_insert(rbs_constant_pool_t *pool, const uint8_t *start, size_t
202
198
  // IDs are allocated starting at 1, since the value 0 denotes a non-existent
203
199
  // constant.
204
200
  uint32_t id = ++pool->size;
205
- rbs_assert(pool->size < ((uint32_t) (1 << 30)), "pool->size is too large. Got %i", pool->size);
201
+ RBS_ASSERT(pool->size < ((uint32_t) (1 << 30)), "pool->size is too large. Got %i", pool->size);
206
202
 
207
203
  *bucket = (rbs_constant_pool_bucket_t) {
208
204
  .id = (unsigned int) (id & 0x3fffffff),
@@ -3,12 +3,6 @@
3
3
 
4
4
  #include <ctype.h>
5
5
 
6
- #if defined(__GNUC__)
7
- #define RBS_ATTRIBUTE_UNUSED __attribute__((unused))
8
- #else
9
- #define RBS_ATTRIBUTE_UNUSED
10
- #endif
11
-
12
6
  typedef uint32_t rbs_unicode_codepoint_t;
13
7
 
14
8
  #define UNICODE_ALPHA_CODEPOINTS_LENGTH 1450
@@ -4620,6 +4614,7 @@ rbs_unicode_codepoint_match(rbs_unicode_codepoint_t codepoint, const rbs_unicode
4620
4614
  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
4621
4615
  * SOFTWARE.
4622
4616
  */
4617
+ // clang-format off
4623
4618
  static const uint8_t rbs_utf_8_dfa[] = {
4624
4619
  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 00..1f
4625
4620
  0,
@@ -4991,6 +4986,7 @@ static const uint8_t rbs_utf_8_dfa[] = {
4991
4986
  1,
4992
4987
  1, // s7..s8
4993
4988
  };
4989
+ // clang-format on
4994
4990
 
4995
4991
  /**
4996
4992
  * Given a pointer to a string and the number of bytes remaining in the string,
@@ -4999,7 +4995,7 @@ static const uint8_t rbs_utf_8_dfa[] = {
4999
4995
  */
5000
4996
  static rbs_unicode_codepoint_t
5001
4997
  rbs_utf_8_codepoint(const uint8_t *b, ptrdiff_t n, size_t *width) {
5002
- rbs_assert(n >= 0, "n must be greater than or equal to 0. Got %ti", n);
4998
+ RBS_ASSERT(n >= 0, "[rbs_unicode_codepoint_t] n must be greater than or equal to 0. Got %ti", n);
5003
4999
 
5004
5000
  size_t maximum = (n > 4) ? 4 : ((size_t) n);
5005
5001
  uint32_t codepoint;
@@ -5029,7 +5025,7 @@ rbs_utf_8_codepoint(const uint8_t *b, ptrdiff_t n, size_t *width) {
5029
5025
  */
5030
5026
  size_t
5031
5027
  rbs_encoding_utf_8_char_width(const uint8_t *b, ptrdiff_t n) {
5032
- rbs_assert(n >= 0, "n must be greater than or equal to 0. Got %ti", n);
5028
+ RBS_ASSERT(n >= 0, "[rbs_encoding_utf_8_char_width] n must be greater than or equal to 0. Got %ti", n);
5033
5029
 
5034
5030
  size_t maximum = (n > 4) ? 4 : ((size_t) n);
5035
5031
  uint32_t state = 0;
@@ -1,4 +1,5 @@
1
1
  #include "rbs/util/rbs_unescape.h"
2
+ #include "rbs/util/rbs_encoding.h"
2
3
  #include <string.h>
3
4
  #include <stdlib.h>
4
5
  #include <ctype.h>
@@ -42,20 +43,44 @@ static int octal_to_int(const char *octal, int length) {
42
43
  return result;
43
44
  }
44
45
 
45
- int rbs_utf8_codelen(unsigned int c) {
46
- if (c <= 0x7F) return 1;
47
- if (c <= 0x7FF) return 2;
48
- if (c <= 0xFFFF) return 3;
49
- if (c <= 0x10FFFF) return 4;
50
- return 1; // Invalid Unicode codepoint, treat as 1 byte
46
+ // Fills buf starting at index 'start' with the UTF-8 encoding of 'codepoint'.
47
+ // Returns the number of bytes written, or 0 when the output is not changed.
48
+ //
49
+ size_t rbs_utf8_fill_codepoint(char *buf, size_t start, size_t end, unsigned int codepoint) {
50
+ if (start + 4 > end) {
51
+ return 0;
52
+ }
53
+
54
+ if (codepoint <= 0x7F) {
55
+ buf[start] = codepoint & 0x7F;
56
+ return 1;
57
+ } else if (codepoint <= 0x7FF) {
58
+ buf[start + 0] = 0xC0 | ((codepoint >> 6) & 0x1F);
59
+ buf[start + 1] = 0x80 | (codepoint & 0x3F);
60
+ return 2;
61
+ } else if (codepoint <= 0xFFFF) {
62
+ buf[start + 0] = 0xE0 | ((codepoint >> 12) & 0x0F);
63
+ buf[start + 1] = 0x80 | ((codepoint >> 6) & 0x3F);
64
+ buf[start + 2] = 0x80 | (codepoint & 0x3F);
65
+ return 3;
66
+ } else if (codepoint <= 0x10FFFF) {
67
+ buf[start + 0] = 0xF0 | ((codepoint >> 18) & 0x07);
68
+ buf[start + 1] = 0x80 | ((codepoint >> 12) & 0x3F);
69
+ buf[start + 2] = 0x80 | ((codepoint >> 6) & 0x3F);
70
+ buf[start + 3] = 0x80 | (codepoint & 0x3F);
71
+ return 4;
72
+ } else {
73
+ return 0;
74
+ }
51
75
  }
52
76
 
53
- rbs_string_t unescape_string(rbs_allocator_t *allocator, const rbs_string_t string, bool is_double_quote) {
77
+ rbs_string_t unescape_string(rbs_allocator_t *allocator, const rbs_string_t string, bool is_double_quote, bool is_unicode) {
54
78
  if (!string.start) return RBS_STRING_NULL;
55
79
 
56
80
  size_t len = string.end - string.start;
57
81
  const char *input = string.start;
58
82
 
83
+ // The output cannot be longer than the input even after unescaping.
59
84
  char *output = rbs_allocator_alloc_many(allocator, len + 1, char);
60
85
  if (!output) return RBS_STRING_NULL;
61
86
 
@@ -79,9 +104,21 @@ rbs_string_t unescape_string(rbs_allocator_t *allocator, const rbs_string_t stri
79
104
  i += hex_len + 2;
80
105
  } else if (input[i + 1] == 'u' && i + 5 < len) {
81
106
  // Unicode escape
82
- int value = hex_to_int(input + i + 2, 4);
83
- output[j++] = (char) value;
84
- i += 6;
107
+
108
+ if (is_unicode) {
109
+ // The UTF-8 representation is at most 4 bytes, shorter than the input length.
110
+ int value = hex_to_int(input + i + 2, 4);
111
+ j += rbs_utf8_fill_codepoint(output, j, len + 1, value);
112
+ i += 6;
113
+ } else {
114
+ // Copy the escape sequence as-is
115
+ output[j++] = input[i++];
116
+ output[j++] = input[i++];
117
+ output[j++] = input[i++];
118
+ output[j++] = input[i++];
119
+ output[j++] = input[i++];
120
+ output[j++] = input[i++];
121
+ }
85
122
  } else {
86
123
  // Other escapes
87
124
  int found = 0;
@@ -114,18 +151,17 @@ rbs_string_t unescape_string(rbs_allocator_t *allocator, const rbs_string_t stri
114
151
  return rbs_string_new(output, output + j);
115
152
  }
116
153
 
117
- rbs_string_t rbs_unquote_string(rbs_allocator_t *allocator, rbs_string_t input) {
118
- unsigned int first_char = rbs_utf8_string_to_codepoint(input);
119
- size_t byte_length = rbs_string_len(input);
154
+ rbs_string_t rbs_unquote_string(rbs_allocator_t *allocator, rbs_string_t input, const rbs_encoding_t *encoding) {
155
+ unsigned int first_char = input.start[0];
156
+
157
+ const char *new_start = input.start;
158
+ const char *new_end = input.end;
120
159
 
121
- ptrdiff_t start_offset = 0;
122
160
  if (first_char == '"' || first_char == '\'' || first_char == '`') {
123
- int bs = rbs_utf8_codelen(first_char);
124
- start_offset += bs;
125
- byte_length -= 2 * bs;
161
+ new_start += 1;
162
+ new_end -= 1;
126
163
  }
127
164
 
128
- const char *new_start = input.start + start_offset;
129
- rbs_string_t string = rbs_string_new(new_start, new_start + byte_length);
130
- return unescape_string(allocator, string, first_char == '"');
165
+ rbs_string_t string = rbs_string_new(new_start, new_end);
166
+ return unescape_string(allocator, string, first_char == '"', encoding == RBS_ENCODING_UTF_8_ENTRY);
131
167
  }