rbs 4.0.0.dev.4 → 4.0.0.dev.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.github/dependabot.yml +14 -14
- data/.github/workflows/bundle-update.yml +60 -0
- data/.github/workflows/c-check.yml +11 -8
- data/.github/workflows/comments.yml +3 -3
- data/.github/workflows/dependabot.yml +1 -1
- data/.github/workflows/ruby.yml +17 -34
- data/.github/workflows/typecheck.yml +2 -2
- data/.github/workflows/valgrind.yml +42 -0
- data/.github/workflows/windows.yml +2 -2
- data/.rubocop.yml +1 -1
- data/README.md +1 -1
- data/Rakefile +32 -5
- data/config.yml +46 -0
- data/core/array.rbs +96 -46
- data/core/binding.rbs +0 -2
- data/core/builtin.rbs +2 -2
- data/core/comparable.rbs +13 -6
- data/core/complex.rbs +55 -41
- data/core/dir.rbs +4 -4
- data/core/encoding.rbs +7 -10
- data/core/enumerable.rbs +90 -3
- data/core/enumerator/arithmetic_sequence.rbs +70 -0
- data/core/enumerator.rbs +63 -1
- data/core/errno.rbs +8 -0
- data/core/errors.rbs +28 -1
- data/core/exception.rbs +2 -2
- data/core/fiber.rbs +40 -20
- data/core/file.rbs +108 -78
- data/core/file_test.rbs +1 -1
- data/core/float.rbs +225 -69
- data/core/gc.rbs +417 -281
- data/core/hash.rbs +1023 -727
- data/core/integer.rbs +104 -110
- data/core/io/buffer.rbs +21 -10
- data/core/io/wait.rbs +11 -33
- data/core/io.rbs +82 -19
- data/core/kernel.rbs +70 -59
- data/core/marshal.rbs +1 -1
- data/core/match_data.rbs +1 -1
- data/core/math.rbs +42 -3
- data/core/method.rbs +63 -27
- data/core/module.rbs +103 -26
- data/core/nil_class.rbs +3 -3
- data/core/numeric.rbs +43 -35
- data/core/object.rbs +3 -3
- data/core/object_space.rbs +21 -15
- data/core/pathname.rbs +1272 -0
- data/core/proc.rbs +30 -25
- data/core/process.rbs +4 -2
- data/core/ractor.rbs +361 -509
- data/core/random.rbs +17 -0
- data/core/range.rbs +113 -16
- data/core/rational.rbs +56 -85
- data/core/rbs/unnamed/argf.rbs +2 -2
- data/core/rbs/unnamed/env_class.rbs +1 -1
- data/core/rbs/unnamed/random.rbs +4 -113
- data/core/regexp.rbs +25 -20
- data/core/ruby.rbs +53 -0
- data/core/ruby_vm.rbs +6 -4
- data/core/rubygems/errors.rbs +3 -70
- data/core/rubygems/rubygems.rbs +11 -79
- data/core/rubygems/version.rbs +2 -3
- data/core/set.rbs +488 -359
- data/core/signal.rbs +24 -14
- data/core/string.rbs +3171 -1241
- data/core/struct.rbs +1 -1
- data/core/symbol.rbs +17 -11
- data/core/thread.rbs +95 -33
- data/core/time.rbs +35 -9
- data/core/trace_point.rbs +7 -4
- data/core/unbound_method.rbs +14 -6
- data/docs/aliases.md +79 -0
- data/docs/collection.md +2 -2
- data/docs/encoding.md +56 -0
- data/docs/gem.md +0 -1
- data/docs/inline.md +470 -0
- data/docs/sigs.md +3 -3
- data/docs/syntax.md +33 -4
- data/docs/type_fingerprint.md +21 -0
- data/exe/rbs +1 -1
- data/ext/rbs_extension/ast_translation.c +77 -3
- data/ext/rbs_extension/ast_translation.h +3 -0
- data/ext/rbs_extension/class_constants.c +8 -2
- data/ext/rbs_extension/class_constants.h +4 -0
- data/ext/rbs_extension/extconf.rb +5 -1
- data/ext/rbs_extension/legacy_location.c +5 -5
- data/ext/rbs_extension/main.c +37 -20
- data/include/rbs/ast.h +85 -38
- data/include/rbs/defines.h +27 -0
- data/include/rbs/lexer.h +30 -11
- data/include/rbs/parser.h +6 -6
- data/include/rbs/string.h +0 -2
- data/include/rbs/util/rbs_allocator.h +34 -13
- data/include/rbs/util/rbs_assert.h +12 -1
- data/include/rbs/util/rbs_encoding.h +2 -0
- data/include/rbs/util/rbs_unescape.h +2 -1
- data/lib/rbs/ast/annotation.rb +1 -1
- data/lib/rbs/ast/comment.rb +1 -1
- data/lib/rbs/ast/declarations.rb +10 -10
- data/lib/rbs/ast/members.rb +14 -14
- data/lib/rbs/ast/ruby/annotations.rb +137 -0
- data/lib/rbs/ast/ruby/comment_block.rb +24 -0
- data/lib/rbs/ast/ruby/declarations.rb +198 -3
- data/lib/rbs/ast/ruby/helpers/constant_helper.rb +4 -0
- data/lib/rbs/ast/ruby/members.rb +159 -1
- data/lib/rbs/ast/type_param.rb +24 -4
- data/lib/rbs/buffer.rb +20 -15
- data/lib/rbs/cli/diff.rb +16 -15
- data/lib/rbs/cli/validate.rb +38 -51
- data/lib/rbs/cli.rb +52 -19
- data/lib/rbs/collection/config/lockfile_generator.rb +8 -0
- data/lib/rbs/collection/sources/git.rb +1 -0
- data/lib/rbs/definition.rb +1 -1
- data/lib/rbs/definition_builder/ancestor_builder.rb +62 -9
- data/lib/rbs/definition_builder/method_builder.rb +20 -0
- data/lib/rbs/definition_builder.rb +91 -2
- data/lib/rbs/diff.rb +7 -1
- data/lib/rbs/environment.rb +227 -74
- data/lib/rbs/environment_loader.rb +0 -6
- data/lib/rbs/errors.rb +27 -7
- data/lib/rbs/inline_parser.rb +341 -5
- data/lib/rbs/location_aux.rb +1 -1
- data/lib/rbs/locator.rb +5 -1
- data/lib/rbs/method_type.rb +5 -3
- data/lib/rbs/parser_aux.rb +2 -2
- data/lib/rbs/prototype/rb.rb +2 -2
- data/lib/rbs/prototype/rbi.rb +2 -0
- data/lib/rbs/prototype/runtime.rb +8 -0
- data/lib/rbs/resolver/constant_resolver.rb +2 -2
- data/lib/rbs/resolver/type_name_resolver.rb +116 -38
- data/lib/rbs/subtractor.rb +3 -1
- data/lib/rbs/test/type_check.rb +16 -2
- data/lib/rbs/type_name.rb +1 -1
- data/lib/rbs/types.rb +27 -27
- data/lib/rbs/validator.rb +2 -2
- data/lib/rbs/version.rb +1 -1
- data/lib/rbs.rb +1 -1
- data/lib/rdoc/discover.rb +1 -1
- data/lib/rdoc_plugin/parser.rb +1 -1
- data/rbs.gemspec +3 -2
- data/schema/typeParam.json +17 -1
- data/sig/ast/ruby/annotations.rbs +124 -0
- data/sig/ast/ruby/comment_block.rbs +8 -0
- data/sig/ast/ruby/declarations.rbs +102 -4
- data/sig/ast/ruby/members.rbs +87 -1
- data/sig/cli/diff.rbs +5 -11
- data/sig/cli/validate.rbs +13 -4
- data/sig/cli.rbs +18 -18
- data/sig/definition.rbs +6 -1
- data/sig/environment.rbs +70 -12
- data/sig/errors.rbs +13 -6
- data/sig/inline_parser.rbs +39 -2
- data/sig/locator.rbs +0 -2
- data/sig/manifest.yaml +0 -1
- data/sig/method_builder.rbs +3 -1
- data/sig/method_types.rbs +1 -1
- data/sig/parser.rbs +16 -2
- data/sig/resolver/type_name_resolver.rbs +35 -7
- data/sig/source.rbs +3 -3
- data/sig/type_param.rbs +13 -8
- data/sig/types.rbs +4 -4
- data/src/ast.c +80 -1
- data/src/lexer.c +1392 -1313
- data/src/lexer.re +3 -0
- data/src/lexstate.c +58 -37
- data/src/location.c +4 -4
- data/src/parser.c +412 -145
- data/src/string.c +0 -48
- data/src/util/rbs_allocator.c +89 -71
- data/src/util/rbs_assert.c +1 -1
- data/src/util/rbs_buffer.c +2 -2
- data/src/util/rbs_constant_pool.c +10 -10
- data/src/util/rbs_encoding.c +4 -8
- data/src/util/rbs_unescape.c +56 -20
- data/stdlib/bigdecimal/0/big_decimal.rbs +100 -82
- data/stdlib/bigdecimal-math/0/big_math.rbs +169 -8
- data/stdlib/cgi/0/core.rbs +9 -393
- data/stdlib/cgi/0/manifest.yaml +1 -0
- data/stdlib/cgi-escape/0/escape.rbs +171 -0
- data/stdlib/coverage/0/coverage.rbs +3 -1
- data/stdlib/date/0/date.rbs +67 -59
- data/stdlib/date/0/date_time.rbs +1 -1
- data/stdlib/delegate/0/delegator.rbs +10 -7
- data/stdlib/digest/0/digest.rbs +110 -0
- data/stdlib/erb/0/erb.rbs +737 -347
- data/stdlib/fileutils/0/fileutils.rbs +20 -14
- data/stdlib/forwardable/0/forwardable.rbs +3 -0
- data/stdlib/json/0/json.rbs +82 -28
- data/stdlib/net-http/0/net-http.rbs +3 -0
- data/stdlib/objspace/0/objspace.rbs +9 -27
- data/stdlib/open-uri/0/open-uri.rbs +40 -0
- data/stdlib/open3/0/open3.rbs +459 -1
- data/stdlib/openssl/0/openssl.rbs +331 -228
- data/stdlib/optparse/0/optparse.rbs +8 -3
- data/stdlib/pathname/0/pathname.rbs +9 -1379
- data/stdlib/psych/0/psych.rbs +4 -4
- data/stdlib/random-formatter/0/random-formatter.rbs +277 -0
- data/stdlib/rdoc/0/code_object.rbs +2 -1
- data/stdlib/rdoc/0/parser.rbs +1 -1
- data/stdlib/rdoc/0/rdoc.rbs +1 -1
- data/stdlib/rdoc/0/store.rbs +1 -1
- data/stdlib/resolv/0/resolv.rbs +25 -68
- data/stdlib/ripper/0/ripper.rbs +2 -2
- data/stdlib/securerandom/0/manifest.yaml +2 -0
- data/stdlib/securerandom/0/securerandom.rbs +6 -19
- data/stdlib/singleton/0/singleton.rbs +3 -0
- data/stdlib/socket/0/socket.rbs +13 -1
- data/stdlib/socket/0/tcp_socket.rbs +10 -2
- data/stdlib/stringio/0/stringio.rbs +1176 -85
- data/stdlib/strscan/0/string_scanner.rbs +31 -31
- data/stdlib/tempfile/0/tempfile.rbs +3 -3
- data/stdlib/time/0/time.rbs +1 -1
- data/stdlib/timeout/0/timeout.rbs +63 -7
- data/stdlib/tsort/0/cyclic.rbs +3 -0
- data/stdlib/uri/0/common.rbs +16 -2
- data/stdlib/uri/0/file.rbs +1 -1
- data/stdlib/uri/0/generic.rbs +24 -16
- data/stdlib/uri/0/rfc2396_parser.rbs +6 -7
- data/stdlib/zlib/0/gzip_reader.rbs +2 -2
- data/stdlib/zlib/0/gzip_writer.rbs +1 -1
- data/stdlib/zlib/0/zstream.rbs +1 -0
- metadata +30 -4
data/src/string.c
CHANGED
|
@@ -5,54 +5,6 @@
|
|
|
5
5
|
#include <stdio.h>
|
|
6
6
|
#include <ctype.h>
|
|
7
7
|
|
|
8
|
-
unsigned int rbs_utf8_string_to_codepoint(const rbs_string_t string) {
|
|
9
|
-
unsigned int codepoint = 0;
|
|
10
|
-
int remaining_bytes = 0;
|
|
11
|
-
|
|
12
|
-
const char *s = string.start;
|
|
13
|
-
const char *end = string.end;
|
|
14
|
-
|
|
15
|
-
if (s >= end) return 0; // End of string
|
|
16
|
-
|
|
17
|
-
if ((*s & 0x80) == 0) {
|
|
18
|
-
// Single byte character (0xxxxxxx)
|
|
19
|
-
return *s;
|
|
20
|
-
} else if ((*s & 0xE0) == 0xC0) {
|
|
21
|
-
// Two byte character (110xxxxx 10xxxxxx)
|
|
22
|
-
codepoint = *s & 0x1F;
|
|
23
|
-
remaining_bytes = 1;
|
|
24
|
-
} else if ((*s & 0xF0) == 0xE0) {
|
|
25
|
-
// Three byte character (1110xxxx 10xxxxxx 10xxxxxx)
|
|
26
|
-
codepoint = *s & 0x0F;
|
|
27
|
-
remaining_bytes = 2;
|
|
28
|
-
} else if ((*s & 0xF8) == 0xF0) {
|
|
29
|
-
// Four byte character (11110xxx 10xxxxxx 10xxxxxx 10xxxxxx)
|
|
30
|
-
codepoint = *s & 0x07;
|
|
31
|
-
remaining_bytes = 3;
|
|
32
|
-
} else {
|
|
33
|
-
// Invalid UTF-8 sequence
|
|
34
|
-
return 0xFFFD; // Unicode replacement character
|
|
35
|
-
}
|
|
36
|
-
|
|
37
|
-
s++;
|
|
38
|
-
while (remaining_bytes > 0 && s < end) {
|
|
39
|
-
if ((*s & 0xC0) != 0x80) {
|
|
40
|
-
// Invalid continuation byte
|
|
41
|
-
return 0xFFFD;
|
|
42
|
-
}
|
|
43
|
-
codepoint = (codepoint << 6) | (*s & 0x3F);
|
|
44
|
-
s++;
|
|
45
|
-
remaining_bytes--;
|
|
46
|
-
}
|
|
47
|
-
|
|
48
|
-
if (remaining_bytes > 0) {
|
|
49
|
-
// Incomplete sequence
|
|
50
|
-
return 0xFFFD;
|
|
51
|
-
}
|
|
52
|
-
|
|
53
|
-
return codepoint;
|
|
54
|
-
}
|
|
55
|
-
|
|
56
8
|
rbs_string_t rbs_string_new(const char *start, const char *end) {
|
|
57
9
|
return (rbs_string_t) {
|
|
58
10
|
.start = start,
|
data/src/util/rbs_allocator.c
CHANGED
|
@@ -3,6 +3,14 @@
|
|
|
3
3
|
*
|
|
4
4
|
* A simple arena allocator that can be freed all at once.
|
|
5
5
|
*
|
|
6
|
+
* This allocator maintains a linked list of pages, which come in two flavours:
|
|
7
|
+
* 1. Small allocation pages, which are the same size as the system page size.
|
|
8
|
+
* 2. Large allocation pages, which are the exact size requested, for sizes greater than the small page size.
|
|
9
|
+
*
|
|
10
|
+
* Small allocations always fit into the unused space at the end of the "head" page. If there isn't enough room, a new
|
|
11
|
+
* page is allocated, and the small allocation is placed at its start. This approach wastes that unused slack at the
|
|
12
|
+
* end of the previous page, but it means that allocations are instant and never scan the linked list to find a gap.
|
|
13
|
+
*
|
|
6
14
|
* This allocator doesn't support freeing individual allocations. Only the whole arena can be freed at once at the end.
|
|
7
15
|
*/
|
|
8
16
|
|
|
@@ -20,16 +28,19 @@
|
|
|
20
28
|
#include <unistd.h>
|
|
21
29
|
#include <sys/types.h>
|
|
22
30
|
#include <sys/mman.h>
|
|
31
|
+
#include <fcntl.h>
|
|
23
32
|
#endif
|
|
24
33
|
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
34
|
+
typedef struct rbs_allocator_page {
|
|
35
|
+
// The previously allocated page, or NULL if this is the first page.
|
|
36
|
+
struct rbs_allocator_page *next;
|
|
28
37
|
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
38
|
+
// The size of the payload in bytes.
|
|
39
|
+
size_t size;
|
|
40
|
+
|
|
41
|
+
// The offset of the next available byte.
|
|
42
|
+
size_t used;
|
|
43
|
+
} rbs_allocator_page_t;
|
|
33
44
|
|
|
34
45
|
static size_t get_system_page_size(void) {
|
|
35
46
|
#ifdef _WIN32
|
|
@@ -43,73 +54,43 @@ static size_t get_system_page_size(void) {
|
|
|
43
54
|
#endif
|
|
44
55
|
}
|
|
45
56
|
|
|
46
|
-
static
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
#else
|
|
51
|
-
void *result = mmap(NULL, size, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
|
|
52
|
-
rbs_assert(result != MAP_FAILED, "mmap failed");
|
|
53
|
-
#endif
|
|
54
|
-
return result;
|
|
57
|
+
static inline uintptr_t rbs_align_up_uintptr(uintptr_t value, size_t alignment) {
|
|
58
|
+
// alignment must be a non-zero power of two
|
|
59
|
+
RBS_ASSERT(alignment != 0 && (alignment & (alignment - 1)) == 0, "alignment must be a non-zero power of two. alignment: %zu", alignment);
|
|
60
|
+
return (value + (alignment - 1)) & ~(uintptr_t) (alignment - 1);
|
|
55
61
|
}
|
|
56
62
|
|
|
57
|
-
static
|
|
58
|
-
|
|
59
|
-
VirtualFree(memory, 0, MEM_RELEASE);
|
|
60
|
-
#else
|
|
61
|
-
munmap(memory, size);
|
|
62
|
-
#endif
|
|
63
|
-
}
|
|
63
|
+
static rbs_allocator_page_t *rbs_allocator_page_new(size_t payload_size) {
|
|
64
|
+
const size_t page_header_size = sizeof(rbs_allocator_page_t);
|
|
64
65
|
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
BOOL result = VirtualProtect(memory, page_size, PAGE_NOACCESS, &old_protect_);
|
|
69
|
-
rbs_assert(result != 0, "VirtualProtect failed");
|
|
70
|
-
#else
|
|
71
|
-
int result = mprotect(memory, page_size, PROT_NONE);
|
|
72
|
-
rbs_assert(result == 0, "mprotect failed");
|
|
73
|
-
#endif
|
|
74
|
-
}
|
|
66
|
+
rbs_allocator_page_t *page = (rbs_allocator_page_t *) malloc(page_header_size + payload_size);
|
|
67
|
+
page->size = payload_size;
|
|
68
|
+
page->used = 0;
|
|
75
69
|
|
|
76
|
-
|
|
77
|
-
size_t kib = 1024;
|
|
78
|
-
size_t mib = kib * 1024;
|
|
79
|
-
size_t gib = mib * 1024;
|
|
80
|
-
return 4 * gib;
|
|
70
|
+
return page;
|
|
81
71
|
}
|
|
82
72
|
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
}
|
|
73
|
+
rbs_allocator_t *rbs_allocator_init(void) {
|
|
74
|
+
rbs_allocator_t *allocator = (rbs_allocator_t *) malloc(sizeof(rbs_allocator_t));
|
|
86
75
|
|
|
87
|
-
|
|
88
|
-
static uintptr_t align(uintptr_t size, uintptr_t alignment) {
|
|
89
|
-
rbs_assert(is_power_of_two(alignment), "alignment is not a power of two");
|
|
90
|
-
return (size + alignment - 1) & ~(alignment - 1);
|
|
91
|
-
}
|
|
76
|
+
const size_t system_page_size = get_system_page_size();
|
|
92
77
|
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
// consume all memory
|
|
100
|
-
void *last_page = (char *) mem + size;
|
|
101
|
-
guard_page(last_page, page_size);
|
|
102
|
-
uintptr_t start = (uintptr_t) mem;
|
|
103
|
-
rbs_allocator_t header = (rbs_allocator_t) {
|
|
104
|
-
.heap_ptr = start + sizeof header,
|
|
105
|
-
.size = size + page_size,
|
|
106
|
-
};
|
|
107
|
-
memcpy(mem, &header, sizeof header);
|
|
108
|
-
return (rbs_allocator_t *) mem;
|
|
78
|
+
allocator->default_page_payload_size = system_page_size - sizeof(rbs_allocator_page_t);
|
|
79
|
+
|
|
80
|
+
allocator->page = rbs_allocator_page_new(allocator->default_page_payload_size);
|
|
81
|
+
allocator->page->next = NULL;
|
|
82
|
+
|
|
83
|
+
return allocator;
|
|
109
84
|
}
|
|
110
85
|
|
|
111
86
|
void rbs_allocator_free(rbs_allocator_t *allocator) {
|
|
112
|
-
|
|
87
|
+
rbs_allocator_page_t *page = allocator->page;
|
|
88
|
+
while (page) {
|
|
89
|
+
rbs_allocator_page_t *next = page->next;
|
|
90
|
+
free(page);
|
|
91
|
+
page = next;
|
|
92
|
+
}
|
|
93
|
+
free(allocator);
|
|
113
94
|
}
|
|
114
95
|
|
|
115
96
|
// Allocates `new_size` bytes from `allocator`, aligned to an `alignment`-byte boundary.
|
|
@@ -123,21 +104,58 @@ void *rbs_allocator_realloc_impl(rbs_allocator_t *allocator, void *ptr, size_t o
|
|
|
123
104
|
|
|
124
105
|
// Allocates `size` bytes from `allocator`, aligned to an `alignment`-byte boundary.
|
|
125
106
|
void *rbs_allocator_malloc_impl(rbs_allocator_t *allocator, size_t size, size_t alignment) {
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
|
|
107
|
+
if (allocator->default_page_payload_size < size) { // Big allocation, give it its own page.
|
|
108
|
+
// Add padding to ensure we can align the start pointer within this page
|
|
109
|
+
rbs_allocator_page_t *new_page = rbs_allocator_page_new(size + (alignment - 1));
|
|
110
|
+
|
|
111
|
+
// This simple allocator can only put small allocations into the head page.
|
|
112
|
+
// Naively prepending this large allocation page to the head of the allocator before the previous head page
|
|
113
|
+
// would waste the remaining space in the head page.
|
|
114
|
+
// So instead, we'll splice in the large page *after* the head page.
|
|
115
|
+
//
|
|
116
|
+
// +-------+ +-----------+ +-----------+
|
|
117
|
+
// | arena | | head page | | new_page |
|
|
118
|
+
// |-------| |-----------+ |-----------+
|
|
119
|
+
// | *page |--->| size | +--->| size | +---> ... previous tail
|
|
120
|
+
// +-------+ | offset | | | offset | |
|
|
121
|
+
// | *next ----+---+ | *next ----+---+
|
|
122
|
+
// | ... | | ... |
|
|
123
|
+
// +-----------+ +-----------+
|
|
124
|
+
//
|
|
125
|
+
new_page->next = allocator->page->next;
|
|
126
|
+
allocator->page->next = new_page;
|
|
127
|
+
|
|
128
|
+
uintptr_t base = (uintptr_t) new_page + sizeof(rbs_allocator_page_t);
|
|
129
|
+
uintptr_t aligned_ptr = rbs_align_up_uintptr(base, alignment);
|
|
130
|
+
return (void *) aligned_ptr;
|
|
131
|
+
}
|
|
132
|
+
|
|
133
|
+
rbs_allocator_page_t *page = allocator->page;
|
|
134
|
+
uintptr_t base = (uintptr_t) page + sizeof(rbs_allocator_page_t);
|
|
135
|
+
|
|
136
|
+
// Compute aligned offset within the payload
|
|
137
|
+
size_t used_aligned = (size_t) (rbs_align_up_uintptr(base + page->used, alignment) - base);
|
|
138
|
+
|
|
139
|
+
if (used_aligned + size > page->size) {
|
|
140
|
+
// Not enough space. Allocate a new small page and prepend it to the allocator's linked list.
|
|
141
|
+
rbs_allocator_page_t *new_page = rbs_allocator_page_new(allocator->default_page_payload_size);
|
|
142
|
+
new_page->next = allocator->page;
|
|
143
|
+
allocator->page = new_page;
|
|
144
|
+
page = new_page;
|
|
145
|
+
base = (uintptr_t) page + sizeof(rbs_allocator_page_t);
|
|
146
|
+
used_aligned = (size_t) (rbs_align_up_uintptr(base, alignment) - base); // start of fresh page (usually 0 if header is aligned)
|
|
147
|
+
}
|
|
148
|
+
|
|
149
|
+
uintptr_t pointer = base + used_aligned;
|
|
150
|
+
page->used = used_aligned + size;
|
|
151
|
+
return (void *) pointer;
|
|
130
152
|
}
|
|
131
153
|
|
|
132
154
|
// Note: This will eagerly fill with zeroes, unlike `calloc()` which can map a page in a page to be zeroed lazily.
|
|
133
155
|
// It's assumed that callers to this function will immediately write to the allocated memory, anyway.
|
|
134
156
|
void *rbs_allocator_calloc_impl(rbs_allocator_t *allocator, size_t count, size_t size, size_t alignment) {
|
|
135
157
|
void *p = rbs_allocator_malloc_many_impl(allocator, count, size, alignment);
|
|
136
|
-
#if defined(__linux__)
|
|
137
|
-
// mmap with MAP_ANONYMOUS gives zero-filled pages.
|
|
138
|
-
#else
|
|
139
158
|
memset(p, 0, count * size);
|
|
140
|
-
#endif
|
|
141
159
|
return p;
|
|
142
160
|
}
|
|
143
161
|
|
data/src/util/rbs_assert.c
CHANGED
data/src/util/rbs_buffer.c
CHANGED
|
@@ -25,7 +25,7 @@ void rbs_buffer_append_string(rbs_allocator_t *allocator, rbs_buffer_t *buffer,
|
|
|
25
25
|
if (next_length > buffer->capacity) {
|
|
26
26
|
size_t old_capacity = buffer->capacity;
|
|
27
27
|
|
|
28
|
-
|
|
28
|
+
RBS_ASSERT(old_capacity != 0, "Precondition: capacity must be at least 1. Got %zu", old_capacity);
|
|
29
29
|
|
|
30
30
|
size_t new_capacity = buffer->capacity * 2;
|
|
31
31
|
|
|
@@ -34,7 +34,7 @@ void rbs_buffer_append_string(rbs_allocator_t *allocator, rbs_buffer_t *buffer,
|
|
|
34
34
|
}
|
|
35
35
|
|
|
36
36
|
char *new_value = rbs_allocator_realloc(allocator, buffer->value, old_capacity, new_capacity, char);
|
|
37
|
-
|
|
37
|
+
RBS_ASSERT(new_value != NULL, "Failed to append to buffer. Old capacity: %zu, new capacity: %zu", old_capacity, new_capacity);
|
|
38
38
|
|
|
39
39
|
buffer->value = new_value;
|
|
40
40
|
buffer->capacity = new_capacity;
|
|
@@ -37,7 +37,7 @@ next_power_of_two(uint32_t v) {
|
|
|
37
37
|
return v;
|
|
38
38
|
}
|
|
39
39
|
|
|
40
|
-
static bool is_power_of_two(uint32_t size) {
|
|
40
|
+
RBS_ATTRIBUTE_UNUSED static bool is_power_of_two(uint32_t size) {
|
|
41
41
|
return (size & (size - 1)) == 0;
|
|
42
42
|
}
|
|
43
43
|
|
|
@@ -46,7 +46,7 @@ static bool is_power_of_two(uint32_t size) {
|
|
|
46
46
|
*/
|
|
47
47
|
static inline bool
|
|
48
48
|
rbs_constant_pool_resize(rbs_constant_pool_t *pool) {
|
|
49
|
-
|
|
49
|
+
RBS_ASSERT(is_power_of_two(pool->capacity), "pool->capacity is not a power of two. Got %i", pool->capacity);
|
|
50
50
|
|
|
51
51
|
uint32_t next_capacity = pool->capacity * 2;
|
|
52
52
|
if (next_capacity < pool->capacity) return false;
|
|
@@ -57,8 +57,8 @@ rbs_constant_pool_resize(rbs_constant_pool_t *pool) {
|
|
|
57
57
|
void *next = calloc(next_capacity, element_size);
|
|
58
58
|
if (next == NULL) return false;
|
|
59
59
|
|
|
60
|
-
rbs_constant_pool_bucket_t *next_buckets = next;
|
|
61
|
-
rbs_constant_t *next_constants = (
|
|
60
|
+
rbs_constant_pool_bucket_t *next_buckets = (rbs_constant_pool_bucket_t *) next;
|
|
61
|
+
rbs_constant_t *next_constants = (rbs_constant_t *) (((char *) next) + next_capacity * sizeof(rbs_constant_pool_bucket_t));
|
|
62
62
|
|
|
63
63
|
// For each bucket in the current constant pool, find the index in the
|
|
64
64
|
// next constant pool, and insert it.
|
|
@@ -111,8 +111,8 @@ bool rbs_constant_pool_init(rbs_constant_pool_t *pool, uint32_t capacity) {
|
|
|
111
111
|
void *memory = calloc(capacity, element_size);
|
|
112
112
|
if (memory == NULL) return false;
|
|
113
113
|
|
|
114
|
-
pool->buckets = memory;
|
|
115
|
-
pool->constants = (
|
|
114
|
+
pool->buckets = (rbs_constant_pool_bucket_t *) memory;
|
|
115
|
+
pool->constants = (rbs_constant_t *) (((char *) memory) + capacity * sizeof(rbs_constant_pool_bucket_t));
|
|
116
116
|
pool->size = 0;
|
|
117
117
|
pool->capacity = capacity;
|
|
118
118
|
return true;
|
|
@@ -123,7 +123,7 @@ bool rbs_constant_pool_init(rbs_constant_pool_t *pool, uint32_t capacity) {
|
|
|
123
123
|
*/
|
|
124
124
|
rbs_constant_t *
|
|
125
125
|
rbs_constant_pool_id_to_constant(const rbs_constant_pool_t *pool, rbs_constant_id_t constant_id) {
|
|
126
|
-
|
|
126
|
+
RBS_ASSERT(constant_id != RBS_CONSTANT_ID_UNSET && constant_id <= pool->size, "constant_id is not valid. Got %i, pool->size: %i", constant_id, pool->size);
|
|
127
127
|
return &pool->constants[constant_id - 1];
|
|
128
128
|
}
|
|
129
129
|
|
|
@@ -133,7 +133,7 @@ rbs_constant_pool_id_to_constant(const rbs_constant_pool_t *pool, rbs_constant_i
|
|
|
133
133
|
*/
|
|
134
134
|
rbs_constant_id_t
|
|
135
135
|
rbs_constant_pool_find(const rbs_constant_pool_t *pool, const uint8_t *start, size_t length) {
|
|
136
|
-
|
|
136
|
+
RBS_ASSERT(is_power_of_two(pool->capacity), "pool->capacity is not a power of two. Got %i", pool->capacity);
|
|
137
137
|
const uint32_t mask = pool->capacity - 1;
|
|
138
138
|
|
|
139
139
|
uint32_t hash = rbs_constant_pool_hash(start, length);
|
|
@@ -161,7 +161,7 @@ rbs_constant_pool_insert(rbs_constant_pool_t *pool, const uint8_t *start, size_t
|
|
|
161
161
|
if (!rbs_constant_pool_resize(pool)) return RBS_CONSTANT_ID_UNSET;
|
|
162
162
|
}
|
|
163
163
|
|
|
164
|
-
|
|
164
|
+
RBS_ASSERT(is_power_of_two(pool->capacity), "pool->capacity is not a power of two. Got %i", pool->capacity);
|
|
165
165
|
const uint32_t mask = pool->capacity - 1;
|
|
166
166
|
|
|
167
167
|
uint32_t hash = rbs_constant_pool_hash(start, length);
|
|
@@ -202,7 +202,7 @@ rbs_constant_pool_insert(rbs_constant_pool_t *pool, const uint8_t *start, size_t
|
|
|
202
202
|
// IDs are allocated starting at 1, since the value 0 denotes a non-existent
|
|
203
203
|
// constant.
|
|
204
204
|
uint32_t id = ++pool->size;
|
|
205
|
-
|
|
205
|
+
RBS_ASSERT(pool->size < ((uint32_t) (1 << 30)), "pool->size is too large. Got %i", pool->size);
|
|
206
206
|
|
|
207
207
|
*bucket = (rbs_constant_pool_bucket_t) {
|
|
208
208
|
.id = (unsigned int) (id & 0x3fffffff),
|
data/src/util/rbs_encoding.c
CHANGED
|
@@ -3,12 +3,6 @@
|
|
|
3
3
|
|
|
4
4
|
#include <ctype.h>
|
|
5
5
|
|
|
6
|
-
#if defined(__GNUC__)
|
|
7
|
-
#define RBS_ATTRIBUTE_UNUSED __attribute__((unused))
|
|
8
|
-
#else
|
|
9
|
-
#define RBS_ATTRIBUTE_UNUSED
|
|
10
|
-
#endif
|
|
11
|
-
|
|
12
6
|
typedef uint32_t rbs_unicode_codepoint_t;
|
|
13
7
|
|
|
14
8
|
#define UNICODE_ALPHA_CODEPOINTS_LENGTH 1450
|
|
@@ -4620,6 +4614,7 @@ rbs_unicode_codepoint_match(rbs_unicode_codepoint_t codepoint, const rbs_unicode
|
|
|
4620
4614
|
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
4621
4615
|
* SOFTWARE.
|
|
4622
4616
|
*/
|
|
4617
|
+
// clang-format off
|
|
4623
4618
|
static const uint8_t rbs_utf_8_dfa[] = {
|
|
4624
4619
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 00..1f
|
|
4625
4620
|
0,
|
|
@@ -4991,6 +4986,7 @@ static const uint8_t rbs_utf_8_dfa[] = {
|
|
|
4991
4986
|
1,
|
|
4992
4987
|
1, // s7..s8
|
|
4993
4988
|
};
|
|
4989
|
+
// clang-format on
|
|
4994
4990
|
|
|
4995
4991
|
/**
|
|
4996
4992
|
* Given a pointer to a string and the number of bytes remaining in the string,
|
|
@@ -4999,7 +4995,7 @@ static const uint8_t rbs_utf_8_dfa[] = {
|
|
|
4999
4995
|
*/
|
|
5000
4996
|
static rbs_unicode_codepoint_t
|
|
5001
4997
|
rbs_utf_8_codepoint(const uint8_t *b, ptrdiff_t n, size_t *width) {
|
|
5002
|
-
|
|
4998
|
+
RBS_ASSERT(n >= 0, "[rbs_unicode_codepoint_t] n must be greater than or equal to 0. Got %ti", n);
|
|
5003
4999
|
|
|
5004
5000
|
size_t maximum = (n > 4) ? 4 : ((size_t) n);
|
|
5005
5001
|
uint32_t codepoint;
|
|
@@ -5029,7 +5025,7 @@ rbs_utf_8_codepoint(const uint8_t *b, ptrdiff_t n, size_t *width) {
|
|
|
5029
5025
|
*/
|
|
5030
5026
|
size_t
|
|
5031
5027
|
rbs_encoding_utf_8_char_width(const uint8_t *b, ptrdiff_t n) {
|
|
5032
|
-
|
|
5028
|
+
RBS_ASSERT(n >= 0, "[rbs_encoding_utf_8_char_width] n must be greater than or equal to 0. Got %ti", n);
|
|
5033
5029
|
|
|
5034
5030
|
size_t maximum = (n > 4) ? 4 : ((size_t) n);
|
|
5035
5031
|
uint32_t state = 0;
|
data/src/util/rbs_unescape.c
CHANGED
|
@@ -1,4 +1,5 @@
|
|
|
1
1
|
#include "rbs/util/rbs_unescape.h"
|
|
2
|
+
#include "rbs/util/rbs_encoding.h"
|
|
2
3
|
#include <string.h>
|
|
3
4
|
#include <stdlib.h>
|
|
4
5
|
#include <ctype.h>
|
|
@@ -42,20 +43,44 @@ static int octal_to_int(const char *octal, int length) {
|
|
|
42
43
|
return result;
|
|
43
44
|
}
|
|
44
45
|
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
if (
|
|
50
|
-
|
|
46
|
+
// Fills buf starting at index 'start' with the UTF-8 encoding of 'codepoint'.
|
|
47
|
+
// Returns the number of bytes written, or 0 when the output is not changed.
|
|
48
|
+
//
|
|
49
|
+
size_t rbs_utf8_fill_codepoint(char *buf, size_t start, size_t end, unsigned int codepoint) {
|
|
50
|
+
if (start + 4 > end) {
|
|
51
|
+
return 0;
|
|
52
|
+
}
|
|
53
|
+
|
|
54
|
+
if (codepoint <= 0x7F) {
|
|
55
|
+
buf[start] = codepoint & 0x7F;
|
|
56
|
+
return 1;
|
|
57
|
+
} else if (codepoint <= 0x7FF) {
|
|
58
|
+
buf[start + 0] = 0xC0 | ((codepoint >> 6) & 0x1F);
|
|
59
|
+
buf[start + 1] = 0x80 | (codepoint & 0x3F);
|
|
60
|
+
return 2;
|
|
61
|
+
} else if (codepoint <= 0xFFFF) {
|
|
62
|
+
buf[start + 0] = 0xE0 | ((codepoint >> 12) & 0x0F);
|
|
63
|
+
buf[start + 1] = 0x80 | ((codepoint >> 6) & 0x3F);
|
|
64
|
+
buf[start + 2] = 0x80 | (codepoint & 0x3F);
|
|
65
|
+
return 3;
|
|
66
|
+
} else if (codepoint <= 0x10FFFF) {
|
|
67
|
+
buf[start + 0] = 0xF0 | ((codepoint >> 18) & 0x07);
|
|
68
|
+
buf[start + 1] = 0x80 | ((codepoint >> 12) & 0x3F);
|
|
69
|
+
buf[start + 2] = 0x80 | ((codepoint >> 6) & 0x3F);
|
|
70
|
+
buf[start + 3] = 0x80 | (codepoint & 0x3F);
|
|
71
|
+
return 4;
|
|
72
|
+
} else {
|
|
73
|
+
return 0;
|
|
74
|
+
}
|
|
51
75
|
}
|
|
52
76
|
|
|
53
|
-
rbs_string_t unescape_string(rbs_allocator_t *allocator, const rbs_string_t string, bool is_double_quote) {
|
|
77
|
+
rbs_string_t unescape_string(rbs_allocator_t *allocator, const rbs_string_t string, bool is_double_quote, bool is_unicode) {
|
|
54
78
|
if (!string.start) return RBS_STRING_NULL;
|
|
55
79
|
|
|
56
80
|
size_t len = string.end - string.start;
|
|
57
81
|
const char *input = string.start;
|
|
58
82
|
|
|
83
|
+
// The output cannot be longer than the input even after unescaping.
|
|
59
84
|
char *output = rbs_allocator_alloc_many(allocator, len + 1, char);
|
|
60
85
|
if (!output) return RBS_STRING_NULL;
|
|
61
86
|
|
|
@@ -79,9 +104,21 @@ rbs_string_t unescape_string(rbs_allocator_t *allocator, const rbs_string_t stri
|
|
|
79
104
|
i += hex_len + 2;
|
|
80
105
|
} else if (input[i + 1] == 'u' && i + 5 < len) {
|
|
81
106
|
// Unicode escape
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
107
|
+
|
|
108
|
+
if (is_unicode) {
|
|
109
|
+
// The UTF-8 representation is at most 4 bytes, shorter than the input length.
|
|
110
|
+
int value = hex_to_int(input + i + 2, 4);
|
|
111
|
+
j += rbs_utf8_fill_codepoint(output, j, len + 1, value);
|
|
112
|
+
i += 6;
|
|
113
|
+
} else {
|
|
114
|
+
// Copy the escape sequence as-is
|
|
115
|
+
output[j++] = input[i++];
|
|
116
|
+
output[j++] = input[i++];
|
|
117
|
+
output[j++] = input[i++];
|
|
118
|
+
output[j++] = input[i++];
|
|
119
|
+
output[j++] = input[i++];
|
|
120
|
+
output[j++] = input[i++];
|
|
121
|
+
}
|
|
85
122
|
} else {
|
|
86
123
|
// Other escapes
|
|
87
124
|
int found = 0;
|
|
@@ -114,18 +151,17 @@ rbs_string_t unescape_string(rbs_allocator_t *allocator, const rbs_string_t stri
|
|
|
114
151
|
return rbs_string_new(output, output + j);
|
|
115
152
|
}
|
|
116
153
|
|
|
117
|
-
rbs_string_t rbs_unquote_string(rbs_allocator_t *allocator, rbs_string_t input) {
|
|
118
|
-
unsigned int first_char =
|
|
119
|
-
|
|
154
|
+
rbs_string_t rbs_unquote_string(rbs_allocator_t *allocator, rbs_string_t input, const rbs_encoding_t *encoding) {
|
|
155
|
+
unsigned int first_char = input.start[0];
|
|
156
|
+
|
|
157
|
+
const char *new_start = input.start;
|
|
158
|
+
const char *new_end = input.end;
|
|
120
159
|
|
|
121
|
-
ptrdiff_t start_offset = 0;
|
|
122
160
|
if (first_char == '"' || first_char == '\'' || first_char == '`') {
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
byte_length -= 2 * bs;
|
|
161
|
+
new_start += 1;
|
|
162
|
+
new_end -= 1;
|
|
126
163
|
}
|
|
127
164
|
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
return unescape_string(allocator, string, first_char == '"');
|
|
165
|
+
rbs_string_t string = rbs_string_new(new_start, new_end);
|
|
166
|
+
return unescape_string(allocator, string, first_char == '"', encoding == RBS_ENCODING_UTF_8_ENTRY);
|
|
131
167
|
}
|