rbs 4.0.0.dev.4 → 4.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.github/dependabot.yml +14 -14
- data/.github/workflows/bundle-update.yml +60 -0
- data/.github/workflows/c-check.yml +18 -11
- data/.github/workflows/comments.yml +5 -3
- data/.github/workflows/dependabot.yml +2 -2
- data/.github/workflows/ruby.yml +27 -34
- data/.github/workflows/rust.yml +95 -0
- data/.github/workflows/typecheck.yml +2 -2
- data/.github/workflows/windows.yml +2 -2
- data/.rubocop.yml +1 -1
- data/CHANGELOG.md +323 -0
- data/README.md +1 -1
- data/Rakefile +43 -33
- data/Steepfile +1 -0
- data/config.yml +426 -24
- data/core/array.rbs +307 -227
- data/core/basic_object.rbs +9 -8
- data/core/binding.rbs +0 -2
- data/core/builtin.rbs +2 -2
- data/core/class.rbs +6 -5
- data/core/comparable.rbs +55 -34
- data/core/complex.rbs +104 -78
- data/core/dir.rbs +61 -49
- data/core/encoding.rbs +12 -15
- data/core/enumerable.rbs +179 -87
- data/core/enumerator/arithmetic_sequence.rbs +70 -0
- data/core/enumerator.rbs +65 -2
- data/core/errno.rbs +11 -2
- data/core/errors.rbs +58 -29
- data/core/exception.rbs +13 -13
- data/core/fiber.rbs +74 -54
- data/core/file.rbs +280 -177
- data/core/file_test.rbs +3 -3
- data/core/float.rbs +257 -92
- data/core/gc.rbs +425 -281
- data/core/hash.rbs +1045 -739
- data/core/integer.rbs +135 -137
- data/core/io/buffer.rbs +53 -42
- data/core/io/wait.rbs +13 -35
- data/core/io.rbs +192 -144
- data/core/kernel.rbs +216 -155
- data/core/marshal.rbs +4 -4
- data/core/match_data.rbs +15 -13
- data/core/math.rbs +107 -66
- data/core/method.rbs +69 -33
- data/core/module.rbs +244 -106
- data/core/nil_class.rbs +7 -6
- data/core/numeric.rbs +74 -63
- data/core/object.rbs +9 -11
- data/core/object_space.rbs +30 -23
- data/core/pathname.rbs +1322 -0
- data/core/proc.rbs +95 -58
- data/core/process.rbs +222 -202
- data/core/ractor.rbs +371 -515
- data/core/random.rbs +21 -3
- data/core/range.rbs +159 -57
- data/core/rational.rbs +60 -89
- data/core/rbs/unnamed/argf.rbs +60 -53
- data/core/rbs/unnamed/env_class.rbs +19 -14
- data/core/rbs/unnamed/main_class.rbs +123 -0
- data/core/rbs/unnamed/random.rbs +11 -118
- data/core/regexp.rbs +258 -214
- data/core/ruby.rbs +53 -0
- data/core/ruby_vm.rbs +38 -34
- data/core/rubygems/config_file.rbs +5 -5
- data/core/rubygems/errors.rbs +4 -71
- data/core/rubygems/requirement.rbs +5 -5
- data/core/rubygems/rubygems.rbs +16 -82
- data/core/rubygems/version.rbs +2 -3
- data/core/set.rbs +490 -360
- data/core/signal.rbs +26 -16
- data/core/string.rbs +3234 -1285
- data/core/struct.rbs +27 -26
- data/core/symbol.rbs +41 -34
- data/core/thread.rbs +135 -67
- data/core/time.rbs +81 -50
- data/core/trace_point.rbs +41 -35
- data/core/true_class.rbs +2 -2
- data/core/unbound_method.rbs +24 -16
- data/core/warning.rbs +7 -7
- data/docs/aliases.md +79 -0
- data/docs/collection.md +3 -3
- data/docs/config.md +171 -0
- data/docs/encoding.md +56 -0
- data/docs/gem.md +0 -1
- data/docs/inline.md +576 -0
- data/docs/sigs.md +3 -3
- data/docs/syntax.md +46 -16
- data/docs/type_fingerprint.md +21 -0
- data/exe/rbs +1 -1
- data/ext/rbs_extension/ast_translation.c +544 -116
- data/ext/rbs_extension/ast_translation.h +3 -0
- data/ext/rbs_extension/class_constants.c +16 -2
- data/ext/rbs_extension/class_constants.h +8 -0
- data/ext/rbs_extension/extconf.rb +5 -1
- data/ext/rbs_extension/legacy_location.c +33 -56
- data/ext/rbs_extension/legacy_location.h +37 -0
- data/ext/rbs_extension/main.c +44 -35
- data/include/rbs/ast.h +448 -173
- data/include/rbs/defines.h +27 -0
- data/include/rbs/lexer.h +30 -11
- data/include/rbs/location.h +25 -44
- data/include/rbs/parser.h +6 -6
- data/include/rbs/string.h +0 -2
- data/include/rbs/util/rbs_allocator.h +34 -13
- data/include/rbs/util/rbs_assert.h +12 -1
- data/include/rbs/util/rbs_constant_pool.h +0 -3
- data/include/rbs/util/rbs_encoding.h +2 -0
- data/include/rbs/util/rbs_unescape.h +2 -1
- data/include/rbs.h +8 -0
- data/lib/rbs/ast/annotation.rb +1 -1
- data/lib/rbs/ast/comment.rb +1 -1
- data/lib/rbs/ast/declarations.rb +10 -10
- data/lib/rbs/ast/members.rb +14 -14
- data/lib/rbs/ast/ruby/annotations.rb +293 -3
- data/lib/rbs/ast/ruby/comment_block.rb +24 -0
- data/lib/rbs/ast/ruby/declarations.rb +198 -3
- data/lib/rbs/ast/ruby/helpers/constant_helper.rb +4 -0
- data/lib/rbs/ast/ruby/members.rb +532 -22
- data/lib/rbs/ast/type_param.rb +24 -4
- data/lib/rbs/buffer.rb +20 -15
- data/lib/rbs/cli/diff.rb +16 -15
- data/lib/rbs/cli/validate.rb +38 -106
- data/lib/rbs/cli.rb +52 -19
- data/lib/rbs/collection/config/lockfile_generator.rb +14 -2
- data/lib/rbs/collection/sources/git.rb +1 -0
- data/lib/rbs/definition.rb +1 -1
- data/lib/rbs/definition_builder/ancestor_builder.rb +62 -9
- data/lib/rbs/definition_builder/method_builder.rb +20 -0
- data/lib/rbs/definition_builder.rb +147 -25
- data/lib/rbs/diff.rb +7 -1
- data/lib/rbs/environment.rb +227 -74
- data/lib/rbs/environment_loader.rb +0 -6
- data/lib/rbs/errors.rb +27 -18
- data/lib/rbs/inline_parser.rb +342 -6
- data/lib/rbs/location_aux.rb +1 -1
- data/lib/rbs/locator.rb +5 -1
- data/lib/rbs/method_type.rb +5 -3
- data/lib/rbs/parser_aux.rb +20 -7
- data/lib/rbs/prototype/helpers.rb +57 -0
- data/lib/rbs/prototype/rb.rb +3 -28
- data/lib/rbs/prototype/rbi.rb +3 -20
- data/lib/rbs/prototype/runtime.rb +8 -0
- data/lib/rbs/resolver/constant_resolver.rb +2 -2
- data/lib/rbs/resolver/type_name_resolver.rb +116 -38
- data/lib/rbs/subtractor.rb +3 -1
- data/lib/rbs/test/type_check.rb +19 -2
- data/lib/rbs/type_name.rb +1 -1
- data/lib/rbs/types.rb +88 -78
- data/lib/rbs/unit_test/type_assertions.rb +35 -8
- data/lib/rbs/validator.rb +2 -2
- data/lib/rbs/version.rb +1 -1
- data/lib/rbs.rb +1 -2
- data/lib/rdoc/discover.rb +1 -1
- data/lib/rdoc_plugin/parser.rb +1 -1
- data/rbs.gemspec +4 -3
- data/rust/.gitignore +1 -0
- data/rust/Cargo.lock +378 -0
- data/rust/Cargo.toml +7 -0
- data/rust/ruby-rbs/Cargo.toml +22 -0
- data/rust/ruby-rbs/build.rs +764 -0
- data/rust/ruby-rbs/examples/locations.rs +60 -0
- data/rust/ruby-rbs/src/lib.rs +1 -0
- data/rust/ruby-rbs/src/node/mod.rs +742 -0
- data/rust/ruby-rbs/tests/sanity.rs +47 -0
- data/rust/ruby-rbs/vendor/rbs/config.yml +1 -0
- data/rust/ruby-rbs-sys/Cargo.toml +23 -0
- data/rust/ruby-rbs-sys/build.rs +204 -0
- data/rust/ruby-rbs-sys/src/lib.rs +50 -0
- data/rust/ruby-rbs-sys/vendor/rbs/include +1 -0
- data/rust/ruby-rbs-sys/vendor/rbs/src +1 -0
- data/rust/ruby-rbs-sys/wrapper.h +1 -0
- data/schema/typeParam.json +17 -1
- data/sig/ast/ruby/annotations.rbs +315 -4
- data/sig/ast/ruby/comment_block.rbs +8 -0
- data/sig/ast/ruby/declarations.rbs +102 -4
- data/sig/ast/ruby/members.rbs +108 -2
- data/sig/cli/diff.rbs +5 -11
- data/sig/cli/validate.rbs +12 -8
- data/sig/cli.rbs +18 -18
- data/sig/definition.rbs +6 -1
- data/sig/definition_builder.rbs +2 -0
- data/sig/environment.rbs +70 -12
- data/sig/errors.rbs +13 -14
- data/sig/inline_parser.rbs +39 -2
- data/sig/locator.rbs +0 -2
- data/sig/manifest.yaml +0 -1
- data/sig/method_builder.rbs +3 -1
- data/sig/parser.rbs +31 -13
- data/sig/prototype/helpers.rbs +2 -0
- data/sig/resolver/type_name_resolver.rbs +35 -7
- data/sig/source.rbs +3 -3
- data/sig/type_param.rbs +13 -8
- data/sig/types.rbs +6 -7
- data/sig/unit_test/spy.rbs +0 -8
- data/sig/unit_test/type_assertions.rbs +11 -0
- data/src/ast.c +410 -153
- data/src/lexer.c +1392 -1313
- data/src/lexer.re +3 -0
- data/src/lexstate.c +58 -37
- data/src/location.c +8 -48
- data/src/parser.c +977 -516
- data/src/string.c +0 -48
- data/src/util/rbs_allocator.c +89 -71
- data/src/util/rbs_assert.c +1 -1
- data/src/util/rbs_buffer.c +2 -2
- data/src/util/rbs_constant_pool.c +10 -14
- data/src/util/rbs_encoding.c +4 -8
- data/src/util/rbs_unescape.c +56 -20
- data/stdlib/bigdecimal/0/big_decimal.rbs +116 -98
- data/stdlib/bigdecimal-math/0/big_math.rbs +169 -8
- data/stdlib/cgi/0/core.rbs +9 -393
- data/stdlib/cgi/0/manifest.yaml +1 -0
- data/stdlib/cgi-escape/0/escape.rbs +171 -0
- data/stdlib/coverage/0/coverage.rbs +7 -4
- data/stdlib/date/0/date.rbs +92 -79
- data/stdlib/date/0/date_time.rbs +25 -24
- data/stdlib/delegate/0/delegator.rbs +10 -7
- data/stdlib/did_you_mean/0/did_you_mean.rbs +17 -16
- data/stdlib/digest/0/digest.rbs +110 -0
- data/stdlib/erb/0/erb.rbs +748 -347
- data/stdlib/etc/0/etc.rbs +55 -50
- data/stdlib/fileutils/0/fileutils.rbs +158 -139
- data/stdlib/forwardable/0/forwardable.rbs +13 -10
- data/stdlib/io-console/0/io-console.rbs +2 -2
- data/stdlib/json/0/json.rbs +217 -136
- data/stdlib/monitor/0/monitor.rbs +3 -3
- data/stdlib/net-http/0/net-http.rbs +162 -134
- data/stdlib/objspace/0/objspace.rbs +17 -34
- data/stdlib/open-uri/0/open-uri.rbs +48 -8
- data/stdlib/open3/0/open3.rbs +469 -10
- data/stdlib/openssl/0/openssl.rbs +475 -357
- data/stdlib/optparse/0/optparse.rbs +26 -17
- data/stdlib/pathname/0/pathname.rbs +11 -1381
- data/stdlib/pp/0/pp.rbs +9 -8
- data/stdlib/prettyprint/0/prettyprint.rbs +7 -7
- data/stdlib/pstore/0/pstore.rbs +35 -30
- data/stdlib/psych/0/psych.rbs +65 -12
- data/stdlib/psych/0/store.rbs +2 -4
- data/stdlib/pty/0/pty.rbs +9 -6
- data/stdlib/random-formatter/0/random-formatter.rbs +277 -0
- data/stdlib/rdoc/0/code_object.rbs +2 -1
- data/stdlib/rdoc/0/parser.rbs +1 -1
- data/stdlib/rdoc/0/rdoc.rbs +1 -1
- data/stdlib/rdoc/0/store.rbs +1 -1
- data/stdlib/resolv/0/resolv.rbs +25 -68
- data/stdlib/ripper/0/ripper.rbs +22 -19
- data/stdlib/securerandom/0/manifest.yaml +2 -0
- data/stdlib/securerandom/0/securerandom.rbs +7 -20
- data/stdlib/shellwords/0/shellwords.rbs +2 -2
- data/stdlib/singleton/0/singleton.rbs +3 -0
- data/stdlib/socket/0/addrinfo.rbs +7 -7
- data/stdlib/socket/0/basic_socket.rbs +3 -3
- data/stdlib/socket/0/ip_socket.rbs +10 -8
- data/stdlib/socket/0/socket.rbs +23 -10
- data/stdlib/socket/0/tcp_server.rbs +1 -1
- data/stdlib/socket/0/tcp_socket.rbs +11 -3
- data/stdlib/socket/0/udp_socket.rbs +1 -1
- data/stdlib/socket/0/unix_server.rbs +1 -1
- data/stdlib/stringio/0/stringio.rbs +1177 -85
- data/stdlib/strscan/0/string_scanner.rbs +27 -25
- data/stdlib/tempfile/0/tempfile.rbs +25 -21
- data/stdlib/time/0/time.rbs +8 -6
- data/stdlib/timeout/0/timeout.rbs +63 -7
- data/stdlib/tsort/0/cyclic.rbs +3 -0
- data/stdlib/tsort/0/tsort.rbs +7 -6
- data/stdlib/uri/0/common.rbs +42 -20
- data/stdlib/uri/0/file.rbs +3 -3
- data/stdlib/uri/0/generic.rbs +26 -18
- data/stdlib/uri/0/http.rbs +2 -2
- data/stdlib/uri/0/ldap.rbs +2 -2
- data/stdlib/uri/0/mailto.rbs +3 -3
- data/stdlib/uri/0/rfc2396_parser.rbs +12 -12
- data/stdlib/zlib/0/deflate.rbs +4 -3
- data/stdlib/zlib/0/gzip_reader.rbs +6 -6
- data/stdlib/zlib/0/gzip_writer.rbs +14 -12
- data/stdlib/zlib/0/inflate.rbs +1 -1
- data/stdlib/zlib/0/need_dict.rbs +1 -1
- data/stdlib/zlib/0/zstream.rbs +1 -0
- metadata +50 -6
data/src/string.c
CHANGED
|
@@ -5,54 +5,6 @@
|
|
|
5
5
|
#include <stdio.h>
|
|
6
6
|
#include <ctype.h>
|
|
7
7
|
|
|
8
|
-
unsigned int rbs_utf8_string_to_codepoint(const rbs_string_t string) {
|
|
9
|
-
unsigned int codepoint = 0;
|
|
10
|
-
int remaining_bytes = 0;
|
|
11
|
-
|
|
12
|
-
const char *s = string.start;
|
|
13
|
-
const char *end = string.end;
|
|
14
|
-
|
|
15
|
-
if (s >= end) return 0; // End of string
|
|
16
|
-
|
|
17
|
-
if ((*s & 0x80) == 0) {
|
|
18
|
-
// Single byte character (0xxxxxxx)
|
|
19
|
-
return *s;
|
|
20
|
-
} else if ((*s & 0xE0) == 0xC0) {
|
|
21
|
-
// Two byte character (110xxxxx 10xxxxxx)
|
|
22
|
-
codepoint = *s & 0x1F;
|
|
23
|
-
remaining_bytes = 1;
|
|
24
|
-
} else if ((*s & 0xF0) == 0xE0) {
|
|
25
|
-
// Three byte character (1110xxxx 10xxxxxx 10xxxxxx)
|
|
26
|
-
codepoint = *s & 0x0F;
|
|
27
|
-
remaining_bytes = 2;
|
|
28
|
-
} else if ((*s & 0xF8) == 0xF0) {
|
|
29
|
-
// Four byte character (11110xxx 10xxxxxx 10xxxxxx 10xxxxxx)
|
|
30
|
-
codepoint = *s & 0x07;
|
|
31
|
-
remaining_bytes = 3;
|
|
32
|
-
} else {
|
|
33
|
-
// Invalid UTF-8 sequence
|
|
34
|
-
return 0xFFFD; // Unicode replacement character
|
|
35
|
-
}
|
|
36
|
-
|
|
37
|
-
s++;
|
|
38
|
-
while (remaining_bytes > 0 && s < end) {
|
|
39
|
-
if ((*s & 0xC0) != 0x80) {
|
|
40
|
-
// Invalid continuation byte
|
|
41
|
-
return 0xFFFD;
|
|
42
|
-
}
|
|
43
|
-
codepoint = (codepoint << 6) | (*s & 0x3F);
|
|
44
|
-
s++;
|
|
45
|
-
remaining_bytes--;
|
|
46
|
-
}
|
|
47
|
-
|
|
48
|
-
if (remaining_bytes > 0) {
|
|
49
|
-
// Incomplete sequence
|
|
50
|
-
return 0xFFFD;
|
|
51
|
-
}
|
|
52
|
-
|
|
53
|
-
return codepoint;
|
|
54
|
-
}
|
|
55
|
-
|
|
56
8
|
rbs_string_t rbs_string_new(const char *start, const char *end) {
|
|
57
9
|
return (rbs_string_t) {
|
|
58
10
|
.start = start,
|
data/src/util/rbs_allocator.c
CHANGED
|
@@ -3,6 +3,14 @@
|
|
|
3
3
|
*
|
|
4
4
|
* A simple arena allocator that can be freed all at once.
|
|
5
5
|
*
|
|
6
|
+
* This allocator maintains a linked list of pages, which come in two flavours:
|
|
7
|
+
* 1. Small allocation pages, which are the same size as the system page size.
|
|
8
|
+
* 2. Large allocation pages, which are the exact size requested, for sizes greater than the small page size.
|
|
9
|
+
*
|
|
10
|
+
* Small allocations always fit into the unused space at the end of the "head" page. If there isn't enough room, a new
|
|
11
|
+
* page is allocated, and the small allocation is placed at its start. This approach wastes that unused slack at the
|
|
12
|
+
* end of the previous page, but it means that allocations are instant and never scan the linked list to find a gap.
|
|
13
|
+
*
|
|
6
14
|
* This allocator doesn't support freeing individual allocations. Only the whole arena can be freed at once at the end.
|
|
7
15
|
*/
|
|
8
16
|
|
|
@@ -20,16 +28,19 @@
|
|
|
20
28
|
#include <unistd.h>
|
|
21
29
|
#include <sys/types.h>
|
|
22
30
|
#include <sys/mman.h>
|
|
31
|
+
#include <fcntl.h>
|
|
23
32
|
#endif
|
|
24
33
|
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
34
|
+
typedef struct rbs_allocator_page {
|
|
35
|
+
// The previously allocated page, or NULL if this is the first page.
|
|
36
|
+
struct rbs_allocator_page *next;
|
|
28
37
|
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
38
|
+
// The size of the payload in bytes.
|
|
39
|
+
size_t size;
|
|
40
|
+
|
|
41
|
+
// The offset of the next available byte.
|
|
42
|
+
size_t used;
|
|
43
|
+
} rbs_allocator_page_t;
|
|
33
44
|
|
|
34
45
|
static size_t get_system_page_size(void) {
|
|
35
46
|
#ifdef _WIN32
|
|
@@ -43,73 +54,43 @@ static size_t get_system_page_size(void) {
|
|
|
43
54
|
#endif
|
|
44
55
|
}
|
|
45
56
|
|
|
46
|
-
static
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
#else
|
|
51
|
-
void *result = mmap(NULL, size, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
|
|
52
|
-
rbs_assert(result != MAP_FAILED, "mmap failed");
|
|
53
|
-
#endif
|
|
54
|
-
return result;
|
|
57
|
+
static inline uintptr_t rbs_align_up_uintptr(uintptr_t value, size_t alignment) {
|
|
58
|
+
// alignment must be a non-zero power of two
|
|
59
|
+
RBS_ASSERT(alignment != 0 && (alignment & (alignment - 1)) == 0, "alignment must be a non-zero power of two. alignment: %zu", alignment);
|
|
60
|
+
return (value + (alignment - 1)) & ~(uintptr_t) (alignment - 1);
|
|
55
61
|
}
|
|
56
62
|
|
|
57
|
-
static
|
|
58
|
-
|
|
59
|
-
VirtualFree(memory, 0, MEM_RELEASE);
|
|
60
|
-
#else
|
|
61
|
-
munmap(memory, size);
|
|
62
|
-
#endif
|
|
63
|
-
}
|
|
63
|
+
static rbs_allocator_page_t *rbs_allocator_page_new(size_t payload_size) {
|
|
64
|
+
const size_t page_header_size = sizeof(rbs_allocator_page_t);
|
|
64
65
|
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
BOOL result = VirtualProtect(memory, page_size, PAGE_NOACCESS, &old_protect_);
|
|
69
|
-
rbs_assert(result != 0, "VirtualProtect failed");
|
|
70
|
-
#else
|
|
71
|
-
int result = mprotect(memory, page_size, PROT_NONE);
|
|
72
|
-
rbs_assert(result == 0, "mprotect failed");
|
|
73
|
-
#endif
|
|
74
|
-
}
|
|
66
|
+
rbs_allocator_page_t *page = (rbs_allocator_page_t *) malloc(page_header_size + payload_size);
|
|
67
|
+
page->size = payload_size;
|
|
68
|
+
page->used = 0;
|
|
75
69
|
|
|
76
|
-
|
|
77
|
-
size_t kib = 1024;
|
|
78
|
-
size_t mib = kib * 1024;
|
|
79
|
-
size_t gib = mib * 1024;
|
|
80
|
-
return 4 * gib;
|
|
70
|
+
return page;
|
|
81
71
|
}
|
|
82
72
|
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
}
|
|
73
|
+
rbs_allocator_t *rbs_allocator_init(void) {
|
|
74
|
+
rbs_allocator_t *allocator = (rbs_allocator_t *) malloc(sizeof(rbs_allocator_t));
|
|
86
75
|
|
|
87
|
-
|
|
88
|
-
static uintptr_t align(uintptr_t size, uintptr_t alignment) {
|
|
89
|
-
rbs_assert(is_power_of_two(alignment), "alignment is not a power of two");
|
|
90
|
-
return (size + alignment - 1) & ~(alignment - 1);
|
|
91
|
-
}
|
|
76
|
+
const size_t system_page_size = get_system_page_size();
|
|
92
77
|
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
// consume all memory
|
|
100
|
-
void *last_page = (char *) mem + size;
|
|
101
|
-
guard_page(last_page, page_size);
|
|
102
|
-
uintptr_t start = (uintptr_t) mem;
|
|
103
|
-
rbs_allocator_t header = (rbs_allocator_t) {
|
|
104
|
-
.heap_ptr = start + sizeof header,
|
|
105
|
-
.size = size + page_size,
|
|
106
|
-
};
|
|
107
|
-
memcpy(mem, &header, sizeof header);
|
|
108
|
-
return (rbs_allocator_t *) mem;
|
|
78
|
+
allocator->default_page_payload_size = system_page_size - sizeof(rbs_allocator_page_t);
|
|
79
|
+
|
|
80
|
+
allocator->page = rbs_allocator_page_new(allocator->default_page_payload_size);
|
|
81
|
+
allocator->page->next = NULL;
|
|
82
|
+
|
|
83
|
+
return allocator;
|
|
109
84
|
}
|
|
110
85
|
|
|
111
86
|
void rbs_allocator_free(rbs_allocator_t *allocator) {
|
|
112
|
-
|
|
87
|
+
rbs_allocator_page_t *page = allocator->page;
|
|
88
|
+
while (page) {
|
|
89
|
+
rbs_allocator_page_t *next = page->next;
|
|
90
|
+
free(page);
|
|
91
|
+
page = next;
|
|
92
|
+
}
|
|
93
|
+
free(allocator);
|
|
113
94
|
}
|
|
114
95
|
|
|
115
96
|
// Allocates `new_size` bytes from `allocator`, aligned to an `alignment`-byte boundary.
|
|
@@ -123,21 +104,58 @@ void *rbs_allocator_realloc_impl(rbs_allocator_t *allocator, void *ptr, size_t o
|
|
|
123
104
|
|
|
124
105
|
// Allocates `size` bytes from `allocator`, aligned to an `alignment`-byte boundary.
|
|
125
106
|
void *rbs_allocator_malloc_impl(rbs_allocator_t *allocator, size_t size, size_t alignment) {
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
|
|
107
|
+
if (allocator->default_page_payload_size < size) { // Big allocation, give it its own page.
|
|
108
|
+
// Add padding to ensure we can align the start pointer within this page
|
|
109
|
+
rbs_allocator_page_t *new_page = rbs_allocator_page_new(size + (alignment - 1));
|
|
110
|
+
|
|
111
|
+
// This simple allocator can only put small allocations into the head page.
|
|
112
|
+
// Naively prepending this large allocation page to the head of the allocator before the previous head page
|
|
113
|
+
// would waste the remaining space in the head page.
|
|
114
|
+
// So instead, we'll splice in the large page *after* the head page.
|
|
115
|
+
//
|
|
116
|
+
// +-------+ +-----------+ +-----------+
|
|
117
|
+
// | arena | | head page | | new_page |
|
|
118
|
+
// |-------| |-----------+ |-----------+
|
|
119
|
+
// | *page |--->| size | +--->| size | +---> ... previous tail
|
|
120
|
+
// +-------+ | offset | | | offset | |
|
|
121
|
+
// | *next ----+---+ | *next ----+---+
|
|
122
|
+
// | ... | | ... |
|
|
123
|
+
// +-----------+ +-----------+
|
|
124
|
+
//
|
|
125
|
+
new_page->next = allocator->page->next;
|
|
126
|
+
allocator->page->next = new_page;
|
|
127
|
+
|
|
128
|
+
uintptr_t base = (uintptr_t) new_page + sizeof(rbs_allocator_page_t);
|
|
129
|
+
uintptr_t aligned_ptr = rbs_align_up_uintptr(base, alignment);
|
|
130
|
+
return (void *) aligned_ptr;
|
|
131
|
+
}
|
|
132
|
+
|
|
133
|
+
rbs_allocator_page_t *page = allocator->page;
|
|
134
|
+
uintptr_t base = (uintptr_t) page + sizeof(rbs_allocator_page_t);
|
|
135
|
+
|
|
136
|
+
// Compute aligned offset within the payload
|
|
137
|
+
size_t used_aligned = (size_t) (rbs_align_up_uintptr(base + page->used, alignment) - base);
|
|
138
|
+
|
|
139
|
+
if (used_aligned + size > page->size) {
|
|
140
|
+
// Not enough space. Allocate a new small page and prepend it to the allocator's linked list.
|
|
141
|
+
rbs_allocator_page_t *new_page = rbs_allocator_page_new(allocator->default_page_payload_size);
|
|
142
|
+
new_page->next = allocator->page;
|
|
143
|
+
allocator->page = new_page;
|
|
144
|
+
page = new_page;
|
|
145
|
+
base = (uintptr_t) page + sizeof(rbs_allocator_page_t);
|
|
146
|
+
used_aligned = (size_t) (rbs_align_up_uintptr(base, alignment) - base); // start of fresh page (usually 0 if header is aligned)
|
|
147
|
+
}
|
|
148
|
+
|
|
149
|
+
uintptr_t pointer = base + used_aligned;
|
|
150
|
+
page->used = used_aligned + size;
|
|
151
|
+
return (void *) pointer;
|
|
130
152
|
}
|
|
131
153
|
|
|
132
154
|
// Note: This will eagerly fill with zeroes, unlike `calloc()` which can map a page in a page to be zeroed lazily.
|
|
133
155
|
// It's assumed that callers to this function will immediately write to the allocated memory, anyway.
|
|
134
156
|
void *rbs_allocator_calloc_impl(rbs_allocator_t *allocator, size_t count, size_t size, size_t alignment) {
|
|
135
157
|
void *p = rbs_allocator_malloc_many_impl(allocator, count, size, alignment);
|
|
136
|
-
#if defined(__linux__)
|
|
137
|
-
// mmap with MAP_ANONYMOUS gives zero-filled pages.
|
|
138
|
-
#else
|
|
139
158
|
memset(p, 0, count * size);
|
|
140
|
-
#endif
|
|
141
159
|
return p;
|
|
142
160
|
}
|
|
143
161
|
|
data/src/util/rbs_assert.c
CHANGED
data/src/util/rbs_buffer.c
CHANGED
|
@@ -25,7 +25,7 @@ void rbs_buffer_append_string(rbs_allocator_t *allocator, rbs_buffer_t *buffer,
|
|
|
25
25
|
if (next_length > buffer->capacity) {
|
|
26
26
|
size_t old_capacity = buffer->capacity;
|
|
27
27
|
|
|
28
|
-
|
|
28
|
+
RBS_ASSERT(old_capacity != 0, "Precondition: capacity must be at least 1. Got %zu", old_capacity);
|
|
29
29
|
|
|
30
30
|
size_t new_capacity = buffer->capacity * 2;
|
|
31
31
|
|
|
@@ -34,7 +34,7 @@ void rbs_buffer_append_string(rbs_allocator_t *allocator, rbs_buffer_t *buffer,
|
|
|
34
34
|
}
|
|
35
35
|
|
|
36
36
|
char *new_value = rbs_allocator_realloc(allocator, buffer->value, old_capacity, new_capacity, char);
|
|
37
|
-
|
|
37
|
+
RBS_ASSERT(new_value != NULL, "Failed to append to buffer. Old capacity: %zu, new capacity: %zu", old_capacity, new_capacity);
|
|
38
38
|
|
|
39
39
|
buffer->value = new_value;
|
|
40
40
|
buffer->capacity = new_capacity;
|
|
@@ -37,7 +37,7 @@ next_power_of_two(uint32_t v) {
|
|
|
37
37
|
return v;
|
|
38
38
|
}
|
|
39
39
|
|
|
40
|
-
static bool is_power_of_two(uint32_t size) {
|
|
40
|
+
RBS_ATTRIBUTE_UNUSED static bool is_power_of_two(uint32_t size) {
|
|
41
41
|
return (size & (size - 1)) == 0;
|
|
42
42
|
}
|
|
43
43
|
|
|
@@ -46,7 +46,7 @@ static bool is_power_of_two(uint32_t size) {
|
|
|
46
46
|
*/
|
|
47
47
|
static inline bool
|
|
48
48
|
rbs_constant_pool_resize(rbs_constant_pool_t *pool) {
|
|
49
|
-
|
|
49
|
+
RBS_ASSERT(is_power_of_two(pool->capacity), "pool->capacity is not a power of two. Got %i", pool->capacity);
|
|
50
50
|
|
|
51
51
|
uint32_t next_capacity = pool->capacity * 2;
|
|
52
52
|
if (next_capacity < pool->capacity) return false;
|
|
@@ -57,8 +57,8 @@ rbs_constant_pool_resize(rbs_constant_pool_t *pool) {
|
|
|
57
57
|
void *next = calloc(next_capacity, element_size);
|
|
58
58
|
if (next == NULL) return false;
|
|
59
59
|
|
|
60
|
-
rbs_constant_pool_bucket_t *next_buckets = next;
|
|
61
|
-
rbs_constant_t *next_constants = (
|
|
60
|
+
rbs_constant_pool_bucket_t *next_buckets = (rbs_constant_pool_bucket_t *) next;
|
|
61
|
+
rbs_constant_t *next_constants = (rbs_constant_t *) (((char *) next) + next_capacity * sizeof(rbs_constant_pool_bucket_t));
|
|
62
62
|
|
|
63
63
|
// For each bucket in the current constant pool, find the index in the
|
|
64
64
|
// next constant pool, and insert it.
|
|
@@ -95,10 +95,6 @@ rbs_constant_pool_resize(rbs_constant_pool_t *pool) {
|
|
|
95
95
|
return true;
|
|
96
96
|
}
|
|
97
97
|
|
|
98
|
-
// This storage is initialized by `Init_rbs_extension()` in `main.c`.
|
|
99
|
-
static rbs_constant_pool_t RBS_GLOBAL_CONSTANT_POOL_STORAGE = { 0 };
|
|
100
|
-
rbs_constant_pool_t *RBS_GLOBAL_CONSTANT_POOL = &RBS_GLOBAL_CONSTANT_POOL_STORAGE;
|
|
101
|
-
|
|
102
98
|
/**
|
|
103
99
|
* Initialize a new constant pool with a given capacity.
|
|
104
100
|
*/
|
|
@@ -111,8 +107,8 @@ bool rbs_constant_pool_init(rbs_constant_pool_t *pool, uint32_t capacity) {
|
|
|
111
107
|
void *memory = calloc(capacity, element_size);
|
|
112
108
|
if (memory == NULL) return false;
|
|
113
109
|
|
|
114
|
-
pool->buckets = memory;
|
|
115
|
-
pool->constants = (
|
|
110
|
+
pool->buckets = (rbs_constant_pool_bucket_t *) memory;
|
|
111
|
+
pool->constants = (rbs_constant_t *) (((char *) memory) + capacity * sizeof(rbs_constant_pool_bucket_t));
|
|
116
112
|
pool->size = 0;
|
|
117
113
|
pool->capacity = capacity;
|
|
118
114
|
return true;
|
|
@@ -123,7 +119,7 @@ bool rbs_constant_pool_init(rbs_constant_pool_t *pool, uint32_t capacity) {
|
|
|
123
119
|
*/
|
|
124
120
|
rbs_constant_t *
|
|
125
121
|
rbs_constant_pool_id_to_constant(const rbs_constant_pool_t *pool, rbs_constant_id_t constant_id) {
|
|
126
|
-
|
|
122
|
+
RBS_ASSERT(constant_id != RBS_CONSTANT_ID_UNSET && constant_id <= pool->size, "constant_id is not valid. Got %i, pool->size: %i", constant_id, pool->size);
|
|
127
123
|
return &pool->constants[constant_id - 1];
|
|
128
124
|
}
|
|
129
125
|
|
|
@@ -133,7 +129,7 @@ rbs_constant_pool_id_to_constant(const rbs_constant_pool_t *pool, rbs_constant_i
|
|
|
133
129
|
*/
|
|
134
130
|
rbs_constant_id_t
|
|
135
131
|
rbs_constant_pool_find(const rbs_constant_pool_t *pool, const uint8_t *start, size_t length) {
|
|
136
|
-
|
|
132
|
+
RBS_ASSERT(is_power_of_two(pool->capacity), "pool->capacity is not a power of two. Got %i", pool->capacity);
|
|
137
133
|
const uint32_t mask = pool->capacity - 1;
|
|
138
134
|
|
|
139
135
|
uint32_t hash = rbs_constant_pool_hash(start, length);
|
|
@@ -161,7 +157,7 @@ rbs_constant_pool_insert(rbs_constant_pool_t *pool, const uint8_t *start, size_t
|
|
|
161
157
|
if (!rbs_constant_pool_resize(pool)) return RBS_CONSTANT_ID_UNSET;
|
|
162
158
|
}
|
|
163
159
|
|
|
164
|
-
|
|
160
|
+
RBS_ASSERT(is_power_of_two(pool->capacity), "pool->capacity is not a power of two. Got %i", pool->capacity);
|
|
165
161
|
const uint32_t mask = pool->capacity - 1;
|
|
166
162
|
|
|
167
163
|
uint32_t hash = rbs_constant_pool_hash(start, length);
|
|
@@ -202,7 +198,7 @@ rbs_constant_pool_insert(rbs_constant_pool_t *pool, const uint8_t *start, size_t
|
|
|
202
198
|
// IDs are allocated starting at 1, since the value 0 denotes a non-existent
|
|
203
199
|
// constant.
|
|
204
200
|
uint32_t id = ++pool->size;
|
|
205
|
-
|
|
201
|
+
RBS_ASSERT(pool->size < ((uint32_t) (1 << 30)), "pool->size is too large. Got %i", pool->size);
|
|
206
202
|
|
|
207
203
|
*bucket = (rbs_constant_pool_bucket_t) {
|
|
208
204
|
.id = (unsigned int) (id & 0x3fffffff),
|
data/src/util/rbs_encoding.c
CHANGED
|
@@ -3,12 +3,6 @@
|
|
|
3
3
|
|
|
4
4
|
#include <ctype.h>
|
|
5
5
|
|
|
6
|
-
#if defined(__GNUC__)
|
|
7
|
-
#define RBS_ATTRIBUTE_UNUSED __attribute__((unused))
|
|
8
|
-
#else
|
|
9
|
-
#define RBS_ATTRIBUTE_UNUSED
|
|
10
|
-
#endif
|
|
11
|
-
|
|
12
6
|
typedef uint32_t rbs_unicode_codepoint_t;
|
|
13
7
|
|
|
14
8
|
#define UNICODE_ALPHA_CODEPOINTS_LENGTH 1450
|
|
@@ -4620,6 +4614,7 @@ rbs_unicode_codepoint_match(rbs_unicode_codepoint_t codepoint, const rbs_unicode
|
|
|
4620
4614
|
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
4621
4615
|
* SOFTWARE.
|
|
4622
4616
|
*/
|
|
4617
|
+
// clang-format off
|
|
4623
4618
|
static const uint8_t rbs_utf_8_dfa[] = {
|
|
4624
4619
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 00..1f
|
|
4625
4620
|
0,
|
|
@@ -4991,6 +4986,7 @@ static const uint8_t rbs_utf_8_dfa[] = {
|
|
|
4991
4986
|
1,
|
|
4992
4987
|
1, // s7..s8
|
|
4993
4988
|
};
|
|
4989
|
+
// clang-format on
|
|
4994
4990
|
|
|
4995
4991
|
/**
|
|
4996
4992
|
* Given a pointer to a string and the number of bytes remaining in the string,
|
|
@@ -4999,7 +4995,7 @@ static const uint8_t rbs_utf_8_dfa[] = {
|
|
|
4999
4995
|
*/
|
|
5000
4996
|
static rbs_unicode_codepoint_t
|
|
5001
4997
|
rbs_utf_8_codepoint(const uint8_t *b, ptrdiff_t n, size_t *width) {
|
|
5002
|
-
|
|
4998
|
+
RBS_ASSERT(n >= 0, "[rbs_unicode_codepoint_t] n must be greater than or equal to 0. Got %ti", n);
|
|
5003
4999
|
|
|
5004
5000
|
size_t maximum = (n > 4) ? 4 : ((size_t) n);
|
|
5005
5001
|
uint32_t codepoint;
|
|
@@ -5029,7 +5025,7 @@ rbs_utf_8_codepoint(const uint8_t *b, ptrdiff_t n, size_t *width) {
|
|
|
5029
5025
|
*/
|
|
5030
5026
|
size_t
|
|
5031
5027
|
rbs_encoding_utf_8_char_width(const uint8_t *b, ptrdiff_t n) {
|
|
5032
|
-
|
|
5028
|
+
RBS_ASSERT(n >= 0, "[rbs_encoding_utf_8_char_width] n must be greater than or equal to 0. Got %ti", n);
|
|
5033
5029
|
|
|
5034
5030
|
size_t maximum = (n > 4) ? 4 : ((size_t) n);
|
|
5035
5031
|
uint32_t state = 0;
|
data/src/util/rbs_unescape.c
CHANGED
|
@@ -1,4 +1,5 @@
|
|
|
1
1
|
#include "rbs/util/rbs_unescape.h"
|
|
2
|
+
#include "rbs/util/rbs_encoding.h"
|
|
2
3
|
#include <string.h>
|
|
3
4
|
#include <stdlib.h>
|
|
4
5
|
#include <ctype.h>
|
|
@@ -42,20 +43,44 @@ static int octal_to_int(const char *octal, int length) {
|
|
|
42
43
|
return result;
|
|
43
44
|
}
|
|
44
45
|
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
if (
|
|
50
|
-
|
|
46
|
+
// Fills buf starting at index 'start' with the UTF-8 encoding of 'codepoint'.
|
|
47
|
+
// Returns the number of bytes written, or 0 when the output is not changed.
|
|
48
|
+
//
|
|
49
|
+
size_t rbs_utf8_fill_codepoint(char *buf, size_t start, size_t end, unsigned int codepoint) {
|
|
50
|
+
if (start + 4 > end) {
|
|
51
|
+
return 0;
|
|
52
|
+
}
|
|
53
|
+
|
|
54
|
+
if (codepoint <= 0x7F) {
|
|
55
|
+
buf[start] = codepoint & 0x7F;
|
|
56
|
+
return 1;
|
|
57
|
+
} else if (codepoint <= 0x7FF) {
|
|
58
|
+
buf[start + 0] = 0xC0 | ((codepoint >> 6) & 0x1F);
|
|
59
|
+
buf[start + 1] = 0x80 | (codepoint & 0x3F);
|
|
60
|
+
return 2;
|
|
61
|
+
} else if (codepoint <= 0xFFFF) {
|
|
62
|
+
buf[start + 0] = 0xE0 | ((codepoint >> 12) & 0x0F);
|
|
63
|
+
buf[start + 1] = 0x80 | ((codepoint >> 6) & 0x3F);
|
|
64
|
+
buf[start + 2] = 0x80 | (codepoint & 0x3F);
|
|
65
|
+
return 3;
|
|
66
|
+
} else if (codepoint <= 0x10FFFF) {
|
|
67
|
+
buf[start + 0] = 0xF0 | ((codepoint >> 18) & 0x07);
|
|
68
|
+
buf[start + 1] = 0x80 | ((codepoint >> 12) & 0x3F);
|
|
69
|
+
buf[start + 2] = 0x80 | ((codepoint >> 6) & 0x3F);
|
|
70
|
+
buf[start + 3] = 0x80 | (codepoint & 0x3F);
|
|
71
|
+
return 4;
|
|
72
|
+
} else {
|
|
73
|
+
return 0;
|
|
74
|
+
}
|
|
51
75
|
}
|
|
52
76
|
|
|
53
|
-
rbs_string_t unescape_string(rbs_allocator_t *allocator, const rbs_string_t string, bool is_double_quote) {
|
|
77
|
+
rbs_string_t unescape_string(rbs_allocator_t *allocator, const rbs_string_t string, bool is_double_quote, bool is_unicode) {
|
|
54
78
|
if (!string.start) return RBS_STRING_NULL;
|
|
55
79
|
|
|
56
80
|
size_t len = string.end - string.start;
|
|
57
81
|
const char *input = string.start;
|
|
58
82
|
|
|
83
|
+
// The output cannot be longer than the input even after unescaping.
|
|
59
84
|
char *output = rbs_allocator_alloc_many(allocator, len + 1, char);
|
|
60
85
|
if (!output) return RBS_STRING_NULL;
|
|
61
86
|
|
|
@@ -79,9 +104,21 @@ rbs_string_t unescape_string(rbs_allocator_t *allocator, const rbs_string_t stri
|
|
|
79
104
|
i += hex_len + 2;
|
|
80
105
|
} else if (input[i + 1] == 'u' && i + 5 < len) {
|
|
81
106
|
// Unicode escape
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
107
|
+
|
|
108
|
+
if (is_unicode) {
|
|
109
|
+
// The UTF-8 representation is at most 4 bytes, shorter than the input length.
|
|
110
|
+
int value = hex_to_int(input + i + 2, 4);
|
|
111
|
+
j += rbs_utf8_fill_codepoint(output, j, len + 1, value);
|
|
112
|
+
i += 6;
|
|
113
|
+
} else {
|
|
114
|
+
// Copy the escape sequence as-is
|
|
115
|
+
output[j++] = input[i++];
|
|
116
|
+
output[j++] = input[i++];
|
|
117
|
+
output[j++] = input[i++];
|
|
118
|
+
output[j++] = input[i++];
|
|
119
|
+
output[j++] = input[i++];
|
|
120
|
+
output[j++] = input[i++];
|
|
121
|
+
}
|
|
85
122
|
} else {
|
|
86
123
|
// Other escapes
|
|
87
124
|
int found = 0;
|
|
@@ -114,18 +151,17 @@ rbs_string_t unescape_string(rbs_allocator_t *allocator, const rbs_string_t stri
|
|
|
114
151
|
return rbs_string_new(output, output + j);
|
|
115
152
|
}
|
|
116
153
|
|
|
117
|
-
rbs_string_t rbs_unquote_string(rbs_allocator_t *allocator, rbs_string_t input) {
|
|
118
|
-
unsigned int first_char =
|
|
119
|
-
|
|
154
|
+
rbs_string_t rbs_unquote_string(rbs_allocator_t *allocator, rbs_string_t input, const rbs_encoding_t *encoding) {
|
|
155
|
+
unsigned int first_char = input.start[0];
|
|
156
|
+
|
|
157
|
+
const char *new_start = input.start;
|
|
158
|
+
const char *new_end = input.end;
|
|
120
159
|
|
|
121
|
-
ptrdiff_t start_offset = 0;
|
|
122
160
|
if (first_char == '"' || first_char == '\'' || first_char == '`') {
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
byte_length -= 2 * bs;
|
|
161
|
+
new_start += 1;
|
|
162
|
+
new_end -= 1;
|
|
126
163
|
}
|
|
127
164
|
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
return unescape_string(allocator, string, first_char == '"');
|
|
165
|
+
rbs_string_t string = rbs_string_new(new_start, new_end);
|
|
166
|
+
return unescape_string(allocator, string, first_char == '"', encoding == RBS_ENCODING_UTF_8_ENTRY);
|
|
131
167
|
}
|