rbs 4.0.0.dev.4 → 4.0.0.dev.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (223) hide show
  1. checksums.yaml +4 -4
  2. data/.github/dependabot.yml +14 -14
  3. data/.github/workflows/bundle-update.yml +60 -0
  4. data/.github/workflows/c-check.yml +11 -8
  5. data/.github/workflows/comments.yml +3 -3
  6. data/.github/workflows/dependabot.yml +1 -1
  7. data/.github/workflows/ruby.yml +17 -34
  8. data/.github/workflows/typecheck.yml +2 -2
  9. data/.github/workflows/valgrind.yml +42 -0
  10. data/.github/workflows/windows.yml +2 -2
  11. data/.rubocop.yml +1 -1
  12. data/README.md +1 -1
  13. data/Rakefile +32 -5
  14. data/config.yml +46 -0
  15. data/core/array.rbs +96 -46
  16. data/core/binding.rbs +0 -2
  17. data/core/builtin.rbs +2 -2
  18. data/core/comparable.rbs +13 -6
  19. data/core/complex.rbs +55 -41
  20. data/core/dir.rbs +4 -4
  21. data/core/encoding.rbs +7 -10
  22. data/core/enumerable.rbs +90 -3
  23. data/core/enumerator/arithmetic_sequence.rbs +70 -0
  24. data/core/enumerator.rbs +63 -1
  25. data/core/errno.rbs +8 -0
  26. data/core/errors.rbs +28 -1
  27. data/core/exception.rbs +2 -2
  28. data/core/fiber.rbs +40 -20
  29. data/core/file.rbs +108 -78
  30. data/core/file_test.rbs +1 -1
  31. data/core/float.rbs +225 -69
  32. data/core/gc.rbs +417 -281
  33. data/core/hash.rbs +1023 -727
  34. data/core/integer.rbs +104 -110
  35. data/core/io/buffer.rbs +21 -10
  36. data/core/io/wait.rbs +11 -33
  37. data/core/io.rbs +82 -19
  38. data/core/kernel.rbs +70 -59
  39. data/core/marshal.rbs +1 -1
  40. data/core/match_data.rbs +1 -1
  41. data/core/math.rbs +42 -3
  42. data/core/method.rbs +63 -27
  43. data/core/module.rbs +103 -26
  44. data/core/nil_class.rbs +3 -3
  45. data/core/numeric.rbs +43 -35
  46. data/core/object.rbs +3 -3
  47. data/core/object_space.rbs +21 -15
  48. data/core/pathname.rbs +1272 -0
  49. data/core/proc.rbs +30 -25
  50. data/core/process.rbs +4 -2
  51. data/core/ractor.rbs +361 -509
  52. data/core/random.rbs +17 -0
  53. data/core/range.rbs +113 -16
  54. data/core/rational.rbs +56 -85
  55. data/core/rbs/unnamed/argf.rbs +2 -2
  56. data/core/rbs/unnamed/env_class.rbs +1 -1
  57. data/core/rbs/unnamed/random.rbs +4 -113
  58. data/core/regexp.rbs +25 -20
  59. data/core/ruby.rbs +53 -0
  60. data/core/ruby_vm.rbs +6 -4
  61. data/core/rubygems/errors.rbs +3 -70
  62. data/core/rubygems/rubygems.rbs +11 -79
  63. data/core/rubygems/version.rbs +2 -3
  64. data/core/set.rbs +488 -359
  65. data/core/signal.rbs +24 -14
  66. data/core/string.rbs +3171 -1241
  67. data/core/struct.rbs +1 -1
  68. data/core/symbol.rbs +17 -11
  69. data/core/thread.rbs +95 -33
  70. data/core/time.rbs +35 -9
  71. data/core/trace_point.rbs +7 -4
  72. data/core/unbound_method.rbs +14 -6
  73. data/docs/aliases.md +79 -0
  74. data/docs/collection.md +2 -2
  75. data/docs/encoding.md +56 -0
  76. data/docs/gem.md +0 -1
  77. data/docs/inline.md +470 -0
  78. data/docs/sigs.md +3 -3
  79. data/docs/syntax.md +33 -4
  80. data/docs/type_fingerprint.md +21 -0
  81. data/exe/rbs +1 -1
  82. data/ext/rbs_extension/ast_translation.c +77 -3
  83. data/ext/rbs_extension/ast_translation.h +3 -0
  84. data/ext/rbs_extension/class_constants.c +8 -2
  85. data/ext/rbs_extension/class_constants.h +4 -0
  86. data/ext/rbs_extension/extconf.rb +5 -1
  87. data/ext/rbs_extension/legacy_location.c +5 -5
  88. data/ext/rbs_extension/main.c +37 -20
  89. data/include/rbs/ast.h +85 -38
  90. data/include/rbs/defines.h +27 -0
  91. data/include/rbs/lexer.h +30 -11
  92. data/include/rbs/parser.h +6 -6
  93. data/include/rbs/string.h +0 -2
  94. data/include/rbs/util/rbs_allocator.h +34 -13
  95. data/include/rbs/util/rbs_assert.h +12 -1
  96. data/include/rbs/util/rbs_encoding.h +2 -0
  97. data/include/rbs/util/rbs_unescape.h +2 -1
  98. data/lib/rbs/ast/annotation.rb +1 -1
  99. data/lib/rbs/ast/comment.rb +1 -1
  100. data/lib/rbs/ast/declarations.rb +10 -10
  101. data/lib/rbs/ast/members.rb +14 -14
  102. data/lib/rbs/ast/ruby/annotations.rb +137 -0
  103. data/lib/rbs/ast/ruby/comment_block.rb +24 -0
  104. data/lib/rbs/ast/ruby/declarations.rb +198 -3
  105. data/lib/rbs/ast/ruby/helpers/constant_helper.rb +4 -0
  106. data/lib/rbs/ast/ruby/members.rb +159 -1
  107. data/lib/rbs/ast/type_param.rb +24 -4
  108. data/lib/rbs/buffer.rb +20 -15
  109. data/lib/rbs/cli/diff.rb +16 -15
  110. data/lib/rbs/cli/validate.rb +38 -51
  111. data/lib/rbs/cli.rb +52 -19
  112. data/lib/rbs/collection/config/lockfile_generator.rb +8 -0
  113. data/lib/rbs/collection/sources/git.rb +1 -0
  114. data/lib/rbs/definition.rb +1 -1
  115. data/lib/rbs/definition_builder/ancestor_builder.rb +62 -9
  116. data/lib/rbs/definition_builder/method_builder.rb +20 -0
  117. data/lib/rbs/definition_builder.rb +91 -2
  118. data/lib/rbs/diff.rb +7 -1
  119. data/lib/rbs/environment.rb +227 -74
  120. data/lib/rbs/environment_loader.rb +0 -6
  121. data/lib/rbs/errors.rb +27 -7
  122. data/lib/rbs/inline_parser.rb +341 -5
  123. data/lib/rbs/location_aux.rb +1 -1
  124. data/lib/rbs/locator.rb +5 -1
  125. data/lib/rbs/method_type.rb +5 -3
  126. data/lib/rbs/parser_aux.rb +2 -2
  127. data/lib/rbs/prototype/rb.rb +2 -2
  128. data/lib/rbs/prototype/rbi.rb +2 -0
  129. data/lib/rbs/prototype/runtime.rb +8 -0
  130. data/lib/rbs/resolver/constant_resolver.rb +2 -2
  131. data/lib/rbs/resolver/type_name_resolver.rb +116 -38
  132. data/lib/rbs/subtractor.rb +3 -1
  133. data/lib/rbs/test/type_check.rb +16 -2
  134. data/lib/rbs/type_name.rb +1 -1
  135. data/lib/rbs/types.rb +27 -27
  136. data/lib/rbs/validator.rb +2 -2
  137. data/lib/rbs/version.rb +1 -1
  138. data/lib/rbs.rb +1 -1
  139. data/lib/rdoc/discover.rb +1 -1
  140. data/lib/rdoc_plugin/parser.rb +1 -1
  141. data/rbs.gemspec +3 -2
  142. data/schema/typeParam.json +17 -1
  143. data/sig/ast/ruby/annotations.rbs +124 -0
  144. data/sig/ast/ruby/comment_block.rbs +8 -0
  145. data/sig/ast/ruby/declarations.rbs +102 -4
  146. data/sig/ast/ruby/members.rbs +87 -1
  147. data/sig/cli/diff.rbs +5 -11
  148. data/sig/cli/validate.rbs +13 -4
  149. data/sig/cli.rbs +18 -18
  150. data/sig/definition.rbs +6 -1
  151. data/sig/environment.rbs +70 -12
  152. data/sig/errors.rbs +13 -6
  153. data/sig/inline_parser.rbs +39 -2
  154. data/sig/locator.rbs +0 -2
  155. data/sig/manifest.yaml +0 -1
  156. data/sig/method_builder.rbs +3 -1
  157. data/sig/method_types.rbs +1 -1
  158. data/sig/parser.rbs +16 -2
  159. data/sig/resolver/type_name_resolver.rbs +35 -7
  160. data/sig/source.rbs +3 -3
  161. data/sig/type_param.rbs +13 -8
  162. data/sig/types.rbs +4 -4
  163. data/src/ast.c +80 -1
  164. data/src/lexer.c +1392 -1313
  165. data/src/lexer.re +3 -0
  166. data/src/lexstate.c +58 -37
  167. data/src/location.c +4 -4
  168. data/src/parser.c +412 -145
  169. data/src/string.c +0 -48
  170. data/src/util/rbs_allocator.c +89 -71
  171. data/src/util/rbs_assert.c +1 -1
  172. data/src/util/rbs_buffer.c +2 -2
  173. data/src/util/rbs_constant_pool.c +10 -10
  174. data/src/util/rbs_encoding.c +4 -8
  175. data/src/util/rbs_unescape.c +56 -20
  176. data/stdlib/bigdecimal/0/big_decimal.rbs +100 -82
  177. data/stdlib/bigdecimal-math/0/big_math.rbs +169 -8
  178. data/stdlib/cgi/0/core.rbs +9 -393
  179. data/stdlib/cgi/0/manifest.yaml +1 -0
  180. data/stdlib/cgi-escape/0/escape.rbs +171 -0
  181. data/stdlib/coverage/0/coverage.rbs +3 -1
  182. data/stdlib/date/0/date.rbs +67 -59
  183. data/stdlib/date/0/date_time.rbs +1 -1
  184. data/stdlib/delegate/0/delegator.rbs +10 -7
  185. data/stdlib/digest/0/digest.rbs +110 -0
  186. data/stdlib/erb/0/erb.rbs +737 -347
  187. data/stdlib/fileutils/0/fileutils.rbs +20 -14
  188. data/stdlib/forwardable/0/forwardable.rbs +3 -0
  189. data/stdlib/json/0/json.rbs +82 -28
  190. data/stdlib/net-http/0/net-http.rbs +3 -0
  191. data/stdlib/objspace/0/objspace.rbs +9 -27
  192. data/stdlib/open-uri/0/open-uri.rbs +40 -0
  193. data/stdlib/open3/0/open3.rbs +459 -1
  194. data/stdlib/openssl/0/openssl.rbs +331 -228
  195. data/stdlib/optparse/0/optparse.rbs +8 -3
  196. data/stdlib/pathname/0/pathname.rbs +9 -1379
  197. data/stdlib/psych/0/psych.rbs +4 -4
  198. data/stdlib/random-formatter/0/random-formatter.rbs +277 -0
  199. data/stdlib/rdoc/0/code_object.rbs +2 -1
  200. data/stdlib/rdoc/0/parser.rbs +1 -1
  201. data/stdlib/rdoc/0/rdoc.rbs +1 -1
  202. data/stdlib/rdoc/0/store.rbs +1 -1
  203. data/stdlib/resolv/0/resolv.rbs +25 -68
  204. data/stdlib/ripper/0/ripper.rbs +2 -2
  205. data/stdlib/securerandom/0/manifest.yaml +2 -0
  206. data/stdlib/securerandom/0/securerandom.rbs +6 -19
  207. data/stdlib/singleton/0/singleton.rbs +3 -0
  208. data/stdlib/socket/0/socket.rbs +13 -1
  209. data/stdlib/socket/0/tcp_socket.rbs +10 -2
  210. data/stdlib/stringio/0/stringio.rbs +1176 -85
  211. data/stdlib/strscan/0/string_scanner.rbs +31 -31
  212. data/stdlib/tempfile/0/tempfile.rbs +3 -3
  213. data/stdlib/time/0/time.rbs +1 -1
  214. data/stdlib/timeout/0/timeout.rbs +63 -7
  215. data/stdlib/tsort/0/cyclic.rbs +3 -0
  216. data/stdlib/uri/0/common.rbs +16 -2
  217. data/stdlib/uri/0/file.rbs +1 -1
  218. data/stdlib/uri/0/generic.rbs +24 -16
  219. data/stdlib/uri/0/rfc2396_parser.rbs +6 -7
  220. data/stdlib/zlib/0/gzip_reader.rbs +2 -2
  221. data/stdlib/zlib/0/gzip_writer.rbs +1 -1
  222. data/stdlib/zlib/0/zstream.rbs +1 -0
  223. metadata +30 -4
data/src/string.c CHANGED
@@ -5,54 +5,6 @@
5
5
  #include <stdio.h>
6
6
  #include <ctype.h>
7
7
 
8
- unsigned int rbs_utf8_string_to_codepoint(const rbs_string_t string) {
9
- unsigned int codepoint = 0;
10
- int remaining_bytes = 0;
11
-
12
- const char *s = string.start;
13
- const char *end = string.end;
14
-
15
- if (s >= end) return 0; // End of string
16
-
17
- if ((*s & 0x80) == 0) {
18
- // Single byte character (0xxxxxxx)
19
- return *s;
20
- } else if ((*s & 0xE0) == 0xC0) {
21
- // Two byte character (110xxxxx 10xxxxxx)
22
- codepoint = *s & 0x1F;
23
- remaining_bytes = 1;
24
- } else if ((*s & 0xF0) == 0xE0) {
25
- // Three byte character (1110xxxx 10xxxxxx 10xxxxxx)
26
- codepoint = *s & 0x0F;
27
- remaining_bytes = 2;
28
- } else if ((*s & 0xF8) == 0xF0) {
29
- // Four byte character (11110xxx 10xxxxxx 10xxxxxx 10xxxxxx)
30
- codepoint = *s & 0x07;
31
- remaining_bytes = 3;
32
- } else {
33
- // Invalid UTF-8 sequence
34
- return 0xFFFD; // Unicode replacement character
35
- }
36
-
37
- s++;
38
- while (remaining_bytes > 0 && s < end) {
39
- if ((*s & 0xC0) != 0x80) {
40
- // Invalid continuation byte
41
- return 0xFFFD;
42
- }
43
- codepoint = (codepoint << 6) | (*s & 0x3F);
44
- s++;
45
- remaining_bytes--;
46
- }
47
-
48
- if (remaining_bytes > 0) {
49
- // Incomplete sequence
50
- return 0xFFFD;
51
- }
52
-
53
- return codepoint;
54
- }
55
-
56
8
  rbs_string_t rbs_string_new(const char *start, const char *end) {
57
9
  return (rbs_string_t) {
58
10
  .start = start,
@@ -3,6 +3,14 @@
3
3
  *
4
4
  * A simple arena allocator that can be freed all at once.
5
5
  *
6
+ * This allocator maintains a linked list of pages, which come in two flavours:
7
+ * 1. Small allocation pages, which are the same size as the system page size.
8
+ * 2. Large allocation pages, which are the exact size requested, for sizes greater than the small page size.
9
+ *
10
+ * Small allocations always fit into the unused space at the end of the "head" page. If there isn't enough room, a new
11
+ * page is allocated, and the small allocation is placed at its start. This approach wastes that unused slack at the
12
+ * end of the previous page, but it means that allocations are instant and never scan the linked list to find a gap.
13
+ *
6
14
  * This allocator doesn't support freeing individual allocations. Only the whole arena can be freed at once at the end.
7
15
  */
8
16
 
@@ -20,16 +28,19 @@
20
28
  #include <unistd.h>
21
29
  #include <sys/types.h>
22
30
  #include <sys/mman.h>
31
+ #include <fcntl.h>
23
32
  #endif
24
33
 
25
- #if defined(__APPLE__) || defined(__FreeBSD__) || defined(__OpenBSD__) || defined(__sun)
26
- #define MAP_ANONYMOUS MAP_ANON
27
- #endif
34
+ typedef struct rbs_allocator_page {
35
+ // The previously allocated page, or NULL if this is the first page.
36
+ struct rbs_allocator_page *next;
28
37
 
29
- struct rbs_allocator {
30
- uintptr_t heap_ptr;
31
- uintptr_t size;
32
- };
38
+ // The size of the payload in bytes.
39
+ size_t size;
40
+
41
+ // The offset of the next available byte.
42
+ size_t used;
43
+ } rbs_allocator_page_t;
33
44
 
34
45
  static size_t get_system_page_size(void) {
35
46
  #ifdef _WIN32
@@ -43,73 +54,43 @@ static size_t get_system_page_size(void) {
43
54
  #endif
44
55
  }
45
56
 
46
- static void *map_memory(size_t size) {
47
- #ifdef _WIN32
48
- LPVOID result = VirtualAlloc(NULL, size, MEM_RESERVE | MEM_COMMIT, PAGE_READWRITE);
49
- rbs_assert(result != NULL, "VirtualAlloc failed");
50
- #else
51
- void *result = mmap(NULL, size, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
52
- rbs_assert(result != MAP_FAILED, "mmap failed");
53
- #endif
54
- return result;
57
+ static inline uintptr_t rbs_align_up_uintptr(uintptr_t value, size_t alignment) {
58
+ // alignment must be a non-zero power of two
59
+ RBS_ASSERT(alignment != 0 && (alignment & (alignment - 1)) == 0, "alignment must be a non-zero power of two. alignment: %zu", alignment);
60
+ return (value + (alignment - 1)) & ~(uintptr_t) (alignment - 1);
55
61
  }
56
62
 
57
- static void destroy_memory(void *memory, size_t size) {
58
- #ifdef _WIN32
59
- VirtualFree(memory, 0, MEM_RELEASE);
60
- #else
61
- munmap(memory, size);
62
- #endif
63
- }
63
+ static rbs_allocator_page_t *rbs_allocator_page_new(size_t payload_size) {
64
+ const size_t page_header_size = sizeof(rbs_allocator_page_t);
64
65
 
65
- static void guard_page(void *memory, size_t page_size) {
66
- #ifdef _WIN32
67
- DWORD old_protect_;
68
- BOOL result = VirtualProtect(memory, page_size, PAGE_NOACCESS, &old_protect_);
69
- rbs_assert(result != 0, "VirtualProtect failed");
70
- #else
71
- int result = mprotect(memory, page_size, PROT_NONE);
72
- rbs_assert(result == 0, "mprotect failed");
73
- #endif
74
- }
66
+ rbs_allocator_page_t *page = (rbs_allocator_page_t *) malloc(page_header_size + payload_size);
67
+ page->size = payload_size;
68
+ page->used = 0;
75
69
 
76
- static size_t rbs_allocator_default_mem(void) {
77
- size_t kib = 1024;
78
- size_t mib = kib * 1024;
79
- size_t gib = mib * 1024;
80
- return 4 * gib;
70
+ return page;
81
71
  }
82
72
 
83
- static inline bool is_power_of_two(uintptr_t value) {
84
- return value > 0 && (value & (value - 1)) == 0;
85
- }
73
+ rbs_allocator_t *rbs_allocator_init(void) {
74
+ rbs_allocator_t *allocator = (rbs_allocator_t *) malloc(sizeof(rbs_allocator_t));
86
75
 
87
- // Align `val' to nearest multiple of `alignment'.
88
- static uintptr_t align(uintptr_t size, uintptr_t alignment) {
89
- rbs_assert(is_power_of_two(alignment), "alignment is not a power of two");
90
- return (size + alignment - 1) & ~(alignment - 1);
91
- }
76
+ const size_t system_page_size = get_system_page_size();
92
77
 
93
- rbs_allocator_t *rbs_allocator_init(void) {
94
- size_t size = rbs_allocator_default_mem();
95
- size_t page_size = get_system_page_size();
96
- size = align(size, page_size);
97
- void *mem = map_memory(size + page_size);
98
- // Guard page; remove range checks in alloc fast path and hard fail if we
99
- // consume all memory
100
- void *last_page = (char *) mem + size;
101
- guard_page(last_page, page_size);
102
- uintptr_t start = (uintptr_t) mem;
103
- rbs_allocator_t header = (rbs_allocator_t) {
104
- .heap_ptr = start + sizeof header,
105
- .size = size + page_size,
106
- };
107
- memcpy(mem, &header, sizeof header);
108
- return (rbs_allocator_t *) mem;
78
+ allocator->default_page_payload_size = system_page_size - sizeof(rbs_allocator_page_t);
79
+
80
+ allocator->page = rbs_allocator_page_new(allocator->default_page_payload_size);
81
+ allocator->page->next = NULL;
82
+
83
+ return allocator;
109
84
  }
110
85
 
111
86
  void rbs_allocator_free(rbs_allocator_t *allocator) {
112
- destroy_memory((void *) allocator, allocator->size);
87
+ rbs_allocator_page_t *page = allocator->page;
88
+ while (page) {
89
+ rbs_allocator_page_t *next = page->next;
90
+ free(page);
91
+ page = next;
92
+ }
93
+ free(allocator);
113
94
  }
114
95
 
115
96
  // Allocates `new_size` bytes from `allocator`, aligned to an `alignment`-byte boundary.
@@ -123,21 +104,58 @@ void *rbs_allocator_realloc_impl(rbs_allocator_t *allocator, void *ptr, size_t o
123
104
 
124
105
  // Allocates `size` bytes from `allocator`, aligned to an `alignment`-byte boundary.
125
106
  void *rbs_allocator_malloc_impl(rbs_allocator_t *allocator, size_t size, size_t alignment) {
126
- rbs_assert(size % alignment == 0, "size must be a multiple of the alignment. size: %zu, alignment: %zu", size, alignment);
127
- uintptr_t aligned = align(allocator->heap_ptr, alignment);
128
- allocator->heap_ptr = aligned + size;
129
- return (void *) aligned;
107
+ if (allocator->default_page_payload_size < size) { // Big allocation, give it its own page.
108
+ // Add padding to ensure we can align the start pointer within this page
109
+ rbs_allocator_page_t *new_page = rbs_allocator_page_new(size + (alignment - 1));
110
+
111
+ // This simple allocator can only put small allocations into the head page.
112
+ // Naively prepending this large allocation page to the head of the allocator before the previous head page
113
+ // would waste the remaining space in the head page.
114
+ // So instead, we'll splice in the large page *after* the head page.
115
+ //
116
+ // +-------+ +-----------+ +-----------+
117
+ // | arena | | head page | | new_page |
118
+ // |-------| |-----------+ |-----------+
119
+ // | *page |--->| size | +--->| size | +---> ... previous tail
120
+ // +-------+ | offset | | | offset | |
121
+ // | *next ----+---+ | *next ----+---+
122
+ // | ... | | ... |
123
+ // +-----------+ +-----------+
124
+ //
125
+ new_page->next = allocator->page->next;
126
+ allocator->page->next = new_page;
127
+
128
+ uintptr_t base = (uintptr_t) new_page + sizeof(rbs_allocator_page_t);
129
+ uintptr_t aligned_ptr = rbs_align_up_uintptr(base, alignment);
130
+ return (void *) aligned_ptr;
131
+ }
132
+
133
+ rbs_allocator_page_t *page = allocator->page;
134
+ uintptr_t base = (uintptr_t) page + sizeof(rbs_allocator_page_t);
135
+
136
+ // Compute aligned offset within the payload
137
+ size_t used_aligned = (size_t) (rbs_align_up_uintptr(base + page->used, alignment) - base);
138
+
139
+ if (used_aligned + size > page->size) {
140
+ // Not enough space. Allocate a new small page and prepend it to the allocator's linked list.
141
+ rbs_allocator_page_t *new_page = rbs_allocator_page_new(allocator->default_page_payload_size);
142
+ new_page->next = allocator->page;
143
+ allocator->page = new_page;
144
+ page = new_page;
145
+ base = (uintptr_t) page + sizeof(rbs_allocator_page_t);
146
+ used_aligned = (size_t) (rbs_align_up_uintptr(base, alignment) - base); // start of fresh page (usually 0 if header is aligned)
147
+ }
148
+
149
+ uintptr_t pointer = base + used_aligned;
150
+ page->used = used_aligned + size;
151
+ return (void *) pointer;
130
152
  }
131
153
 
132
154
  // Note: This will eagerly fill with zeroes, unlike `calloc()` which can map a page in a page to be zeroed lazily.
133
155
  // It's assumed that callers to this function will immediately write to the allocated memory, anyway.
134
156
  void *rbs_allocator_calloc_impl(rbs_allocator_t *allocator, size_t count, size_t size, size_t alignment) {
135
157
  void *p = rbs_allocator_malloc_many_impl(allocator, count, size, alignment);
136
- #if defined(__linux__)
137
- // mmap with MAP_ANONYMOUS gives zero-filled pages.
138
- #else
139
158
  memset(p, 0, count * size);
140
- #endif
141
159
  return p;
142
160
  }
143
161
 
@@ -5,7 +5,7 @@
5
5
  #include <stdlib.h>
6
6
  #include <stdbool.h>
7
7
 
8
- void rbs_assert(bool condition, const char *fmt, ...) {
8
+ void rbs_assert_impl(bool condition, const char *fmt, ...) {
9
9
  if (condition) {
10
10
  return;
11
11
  }
@@ -25,7 +25,7 @@ void rbs_buffer_append_string(rbs_allocator_t *allocator, rbs_buffer_t *buffer,
25
25
  if (next_length > buffer->capacity) {
26
26
  size_t old_capacity = buffer->capacity;
27
27
 
28
- rbs_assert(old_capacity != 0, "Precondition: capacity must be at least 1. Got %zu", old_capacity);
28
+ RBS_ASSERT(old_capacity != 0, "Precondition: capacity must be at least 1. Got %zu", old_capacity);
29
29
 
30
30
  size_t new_capacity = buffer->capacity * 2;
31
31
 
@@ -34,7 +34,7 @@ void rbs_buffer_append_string(rbs_allocator_t *allocator, rbs_buffer_t *buffer,
34
34
  }
35
35
 
36
36
  char *new_value = rbs_allocator_realloc(allocator, buffer->value, old_capacity, new_capacity, char);
37
- rbs_assert(new_value != NULL, "Failed to append to buffer. Old capacity: %zu, new capacity: %zu", old_capacity, new_capacity);
37
+ RBS_ASSERT(new_value != NULL, "Failed to append to buffer. Old capacity: %zu, new capacity: %zu", old_capacity, new_capacity);
38
38
 
39
39
  buffer->value = new_value;
40
40
  buffer->capacity = new_capacity;
@@ -37,7 +37,7 @@ next_power_of_two(uint32_t v) {
37
37
  return v;
38
38
  }
39
39
 
40
- static bool is_power_of_two(uint32_t size) {
40
+ RBS_ATTRIBUTE_UNUSED static bool is_power_of_two(uint32_t size) {
41
41
  return (size & (size - 1)) == 0;
42
42
  }
43
43
 
@@ -46,7 +46,7 @@ static bool is_power_of_two(uint32_t size) {
46
46
  */
47
47
  static inline bool
48
48
  rbs_constant_pool_resize(rbs_constant_pool_t *pool) {
49
- rbs_assert(is_power_of_two(pool->capacity), "pool->capacity is not a power of two. Got %i", pool->capacity);
49
+ RBS_ASSERT(is_power_of_two(pool->capacity), "pool->capacity is not a power of two. Got %i", pool->capacity);
50
50
 
51
51
  uint32_t next_capacity = pool->capacity * 2;
52
52
  if (next_capacity < pool->capacity) return false;
@@ -57,8 +57,8 @@ rbs_constant_pool_resize(rbs_constant_pool_t *pool) {
57
57
  void *next = calloc(next_capacity, element_size);
58
58
  if (next == NULL) return false;
59
59
 
60
- rbs_constant_pool_bucket_t *next_buckets = next;
61
- rbs_constant_t *next_constants = (void *) (((char *) next) + next_capacity * sizeof(rbs_constant_pool_bucket_t));
60
+ rbs_constant_pool_bucket_t *next_buckets = (rbs_constant_pool_bucket_t *) next;
61
+ rbs_constant_t *next_constants = (rbs_constant_t *) (((char *) next) + next_capacity * sizeof(rbs_constant_pool_bucket_t));
62
62
 
63
63
  // For each bucket in the current constant pool, find the index in the
64
64
  // next constant pool, and insert it.
@@ -111,8 +111,8 @@ bool rbs_constant_pool_init(rbs_constant_pool_t *pool, uint32_t capacity) {
111
111
  void *memory = calloc(capacity, element_size);
112
112
  if (memory == NULL) return false;
113
113
 
114
- pool->buckets = memory;
115
- pool->constants = (void *) (((char *) memory) + capacity * sizeof(rbs_constant_pool_bucket_t));
114
+ pool->buckets = (rbs_constant_pool_bucket_t *) memory;
115
+ pool->constants = (rbs_constant_t *) (((char *) memory) + capacity * sizeof(rbs_constant_pool_bucket_t));
116
116
  pool->size = 0;
117
117
  pool->capacity = capacity;
118
118
  return true;
@@ -123,7 +123,7 @@ bool rbs_constant_pool_init(rbs_constant_pool_t *pool, uint32_t capacity) {
123
123
  */
124
124
  rbs_constant_t *
125
125
  rbs_constant_pool_id_to_constant(const rbs_constant_pool_t *pool, rbs_constant_id_t constant_id) {
126
- rbs_assert(constant_id != RBS_CONSTANT_ID_UNSET && constant_id <= pool->size, "constant_id is not valid. Got %i, pool->size: %i", constant_id, pool->size);
126
+ RBS_ASSERT(constant_id != RBS_CONSTANT_ID_UNSET && constant_id <= pool->size, "constant_id is not valid. Got %i, pool->size: %i", constant_id, pool->size);
127
127
  return &pool->constants[constant_id - 1];
128
128
  }
129
129
 
@@ -133,7 +133,7 @@ rbs_constant_pool_id_to_constant(const rbs_constant_pool_t *pool, rbs_constant_i
133
133
  */
134
134
  rbs_constant_id_t
135
135
  rbs_constant_pool_find(const rbs_constant_pool_t *pool, const uint8_t *start, size_t length) {
136
- rbs_assert(is_power_of_two(pool->capacity), "pool->capacity is not a power of two. Got %i", pool->capacity);
136
+ RBS_ASSERT(is_power_of_two(pool->capacity), "pool->capacity is not a power of two. Got %i", pool->capacity);
137
137
  const uint32_t mask = pool->capacity - 1;
138
138
 
139
139
  uint32_t hash = rbs_constant_pool_hash(start, length);
@@ -161,7 +161,7 @@ rbs_constant_pool_insert(rbs_constant_pool_t *pool, const uint8_t *start, size_t
161
161
  if (!rbs_constant_pool_resize(pool)) return RBS_CONSTANT_ID_UNSET;
162
162
  }
163
163
 
164
- rbs_assert(is_power_of_two(pool->capacity), "pool->capacity is not a power of two. Got %i", pool->capacity);
164
+ RBS_ASSERT(is_power_of_two(pool->capacity), "pool->capacity is not a power of two. Got %i", pool->capacity);
165
165
  const uint32_t mask = pool->capacity - 1;
166
166
 
167
167
  uint32_t hash = rbs_constant_pool_hash(start, length);
@@ -202,7 +202,7 @@ rbs_constant_pool_insert(rbs_constant_pool_t *pool, const uint8_t *start, size_t
202
202
  // IDs are allocated starting at 1, since the value 0 denotes a non-existent
203
203
  // constant.
204
204
  uint32_t id = ++pool->size;
205
- rbs_assert(pool->size < ((uint32_t) (1 << 30)), "pool->size is too large. Got %i", pool->size);
205
+ RBS_ASSERT(pool->size < ((uint32_t) (1 << 30)), "pool->size is too large. Got %i", pool->size);
206
206
 
207
207
  *bucket = (rbs_constant_pool_bucket_t) {
208
208
  .id = (unsigned int) (id & 0x3fffffff),
@@ -3,12 +3,6 @@
3
3
 
4
4
  #include <ctype.h>
5
5
 
6
- #if defined(__GNUC__)
7
- #define RBS_ATTRIBUTE_UNUSED __attribute__((unused))
8
- #else
9
- #define RBS_ATTRIBUTE_UNUSED
10
- #endif
11
-
12
6
  typedef uint32_t rbs_unicode_codepoint_t;
13
7
 
14
8
  #define UNICODE_ALPHA_CODEPOINTS_LENGTH 1450
@@ -4620,6 +4614,7 @@ rbs_unicode_codepoint_match(rbs_unicode_codepoint_t codepoint, const rbs_unicode
4620
4614
  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
4621
4615
  * SOFTWARE.
4622
4616
  */
4617
+ // clang-format off
4623
4618
  static const uint8_t rbs_utf_8_dfa[] = {
4624
4619
  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 00..1f
4625
4620
  0,
@@ -4991,6 +4986,7 @@ static const uint8_t rbs_utf_8_dfa[] = {
4991
4986
  1,
4992
4987
  1, // s7..s8
4993
4988
  };
4989
+ // clang-format on
4994
4990
 
4995
4991
  /**
4996
4992
  * Given a pointer to a string and the number of bytes remaining in the string,
@@ -4999,7 +4995,7 @@ static const uint8_t rbs_utf_8_dfa[] = {
4999
4995
  */
5000
4996
  static rbs_unicode_codepoint_t
5001
4997
  rbs_utf_8_codepoint(const uint8_t *b, ptrdiff_t n, size_t *width) {
5002
- rbs_assert(n >= 0, "n must be greater than or equal to 0. Got %ti", n);
4998
+ RBS_ASSERT(n >= 0, "[rbs_unicode_codepoint_t] n must be greater than or equal to 0. Got %ti", n);
5003
4999
 
5004
5000
  size_t maximum = (n > 4) ? 4 : ((size_t) n);
5005
5001
  uint32_t codepoint;
@@ -5029,7 +5025,7 @@ rbs_utf_8_codepoint(const uint8_t *b, ptrdiff_t n, size_t *width) {
5029
5025
  */
5030
5026
  size_t
5031
5027
  rbs_encoding_utf_8_char_width(const uint8_t *b, ptrdiff_t n) {
5032
- rbs_assert(n >= 0, "n must be greater than or equal to 0. Got %ti", n);
5028
+ RBS_ASSERT(n >= 0, "[rbs_encoding_utf_8_char_width] n must be greater than or equal to 0. Got %ti", n);
5033
5029
 
5034
5030
  size_t maximum = (n > 4) ? 4 : ((size_t) n);
5035
5031
  uint32_t state = 0;
@@ -1,4 +1,5 @@
1
1
  #include "rbs/util/rbs_unescape.h"
2
+ #include "rbs/util/rbs_encoding.h"
2
3
  #include <string.h>
3
4
  #include <stdlib.h>
4
5
  #include <ctype.h>
@@ -42,20 +43,44 @@ static int octal_to_int(const char *octal, int length) {
42
43
  return result;
43
44
  }
44
45
 
45
- int rbs_utf8_codelen(unsigned int c) {
46
- if (c <= 0x7F) return 1;
47
- if (c <= 0x7FF) return 2;
48
- if (c <= 0xFFFF) return 3;
49
- if (c <= 0x10FFFF) return 4;
50
- return 1; // Invalid Unicode codepoint, treat as 1 byte
46
+ // Fills buf starting at index 'start' with the UTF-8 encoding of 'codepoint'.
47
+ // Returns the number of bytes written, or 0 when the output is not changed.
48
+ //
49
+ size_t rbs_utf8_fill_codepoint(char *buf, size_t start, size_t end, unsigned int codepoint) {
50
+ if (start + 4 > end) {
51
+ return 0;
52
+ }
53
+
54
+ if (codepoint <= 0x7F) {
55
+ buf[start] = codepoint & 0x7F;
56
+ return 1;
57
+ } else if (codepoint <= 0x7FF) {
58
+ buf[start + 0] = 0xC0 | ((codepoint >> 6) & 0x1F);
59
+ buf[start + 1] = 0x80 | (codepoint & 0x3F);
60
+ return 2;
61
+ } else if (codepoint <= 0xFFFF) {
62
+ buf[start + 0] = 0xE0 | ((codepoint >> 12) & 0x0F);
63
+ buf[start + 1] = 0x80 | ((codepoint >> 6) & 0x3F);
64
+ buf[start + 2] = 0x80 | (codepoint & 0x3F);
65
+ return 3;
66
+ } else if (codepoint <= 0x10FFFF) {
67
+ buf[start + 0] = 0xF0 | ((codepoint >> 18) & 0x07);
68
+ buf[start + 1] = 0x80 | ((codepoint >> 12) & 0x3F);
69
+ buf[start + 2] = 0x80 | ((codepoint >> 6) & 0x3F);
70
+ buf[start + 3] = 0x80 | (codepoint & 0x3F);
71
+ return 4;
72
+ } else {
73
+ return 0;
74
+ }
51
75
  }
52
76
 
53
- rbs_string_t unescape_string(rbs_allocator_t *allocator, const rbs_string_t string, bool is_double_quote) {
77
+ rbs_string_t unescape_string(rbs_allocator_t *allocator, const rbs_string_t string, bool is_double_quote, bool is_unicode) {
54
78
  if (!string.start) return RBS_STRING_NULL;
55
79
 
56
80
  size_t len = string.end - string.start;
57
81
  const char *input = string.start;
58
82
 
83
+ // The output cannot be longer than the input even after unescaping.
59
84
  char *output = rbs_allocator_alloc_many(allocator, len + 1, char);
60
85
  if (!output) return RBS_STRING_NULL;
61
86
 
@@ -79,9 +104,21 @@ rbs_string_t unescape_string(rbs_allocator_t *allocator, const rbs_string_t stri
79
104
  i += hex_len + 2;
80
105
  } else if (input[i + 1] == 'u' && i + 5 < len) {
81
106
  // Unicode escape
82
- int value = hex_to_int(input + i + 2, 4);
83
- output[j++] = (char) value;
84
- i += 6;
107
+
108
+ if (is_unicode) {
109
+ // The UTF-8 representation is at most 4 bytes, shorter than the input length.
110
+ int value = hex_to_int(input + i + 2, 4);
111
+ j += rbs_utf8_fill_codepoint(output, j, len + 1, value);
112
+ i += 6;
113
+ } else {
114
+ // Copy the escape sequence as-is
115
+ output[j++] = input[i++];
116
+ output[j++] = input[i++];
117
+ output[j++] = input[i++];
118
+ output[j++] = input[i++];
119
+ output[j++] = input[i++];
120
+ output[j++] = input[i++];
121
+ }
85
122
  } else {
86
123
  // Other escapes
87
124
  int found = 0;
@@ -114,18 +151,17 @@ rbs_string_t unescape_string(rbs_allocator_t *allocator, const rbs_string_t stri
114
151
  return rbs_string_new(output, output + j);
115
152
  }
116
153
 
117
- rbs_string_t rbs_unquote_string(rbs_allocator_t *allocator, rbs_string_t input) {
118
- unsigned int first_char = rbs_utf8_string_to_codepoint(input);
119
- size_t byte_length = rbs_string_len(input);
154
+ rbs_string_t rbs_unquote_string(rbs_allocator_t *allocator, rbs_string_t input, const rbs_encoding_t *encoding) {
155
+ unsigned int first_char = input.start[0];
156
+
157
+ const char *new_start = input.start;
158
+ const char *new_end = input.end;
120
159
 
121
- ptrdiff_t start_offset = 0;
122
160
  if (first_char == '"' || first_char == '\'' || first_char == '`') {
123
- int bs = rbs_utf8_codelen(first_char);
124
- start_offset += bs;
125
- byte_length -= 2 * bs;
161
+ new_start += 1;
162
+ new_end -= 1;
126
163
  }
127
164
 
128
- const char *new_start = input.start + start_offset;
129
- rbs_string_t string = rbs_string_new(new_start, new_start + byte_length);
130
- return unescape_string(allocator, string, first_char == '"');
165
+ rbs_string_t string = rbs_string_new(new_start, new_end);
166
+ return unescape_string(allocator, string, first_char == '"', encoding == RBS_ENCODING_UTF_8_ENTRY);
131
167
  }