rbs 3.10.0.pre.1 → 3.10.0.pre.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/docs/encoding.md +56 -0
- data/ext/rbs_extension/class_constants.c +0 -2
- data/ext/rbs_extension/legacy_location.c +5 -5
- data/ext/rbs_extension/main.c +1 -9
- data/include/rbs/parser.h +2 -2
- data/include/rbs/string.h +0 -2
- data/include/rbs/util/rbs_unescape.h +2 -1
- data/lib/rbs/version.rb +1 -1
- data/src/location.c +1 -1
- data/src/parser.c +59 -47
- data/src/string.c +0 -49
- data/src/util/rbs_allocator.c +2 -2
- data/src/util/rbs_assert.c +0 -2
- data/src/util/rbs_constant_pool.c +4 -4
- data/src/util/rbs_unescape.c +56 -20
- metadata +3 -2
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: 2dde36e1704f20a8ad83c8917f449343563d933baf90673a17d2570ec7c44f5a
|
|
4
|
+
data.tar.gz: 051b863ff0f5fac88ff4ab53f3a8434601c3c42803495d354ad0a004c27e7655
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: 9820f43da6cb10c74015b212a9b37f1f1b4b46e1d8d9ec0c7e1c7304bdc49024691db5f03b776179715161e75eec166c5179eeab63adbd4e1044a811de7f8bc6
|
|
7
|
+
data.tar.gz: 1b75689099bd54f88dd2fd30b73fa98a2d0b76894c817b59e236109eb4924a9141d52d8454a82aef98bbe6dbb05ee5b8da9bf6d66254cba88797c39de020ddd5
|
data/docs/encoding.md
ADDED
|
@@ -0,0 +1,56 @@
|
|
|
1
|
+
# RBS File Encoding
|
|
2
|
+
|
|
3
|
+
## Best Practice
|
|
4
|
+
|
|
5
|
+
**Use UTF-8** for both file encoding and your system locale.
|
|
6
|
+
|
|
7
|
+
## Supported Encodings
|
|
8
|
+
|
|
9
|
+
RBS parser supports ASCII-compatible encodings (similar to Ruby's script encoding support).
|
|
10
|
+
|
|
11
|
+
**Examples**: UTF-8, US-ASCII, Shift JIS, EUC-JP, ...
|
|
12
|
+
|
|
13
|
+
## Unicode Codepoint Symbols
|
|
14
|
+
|
|
15
|
+
String literal types in RBS can contain Unicode codepoint escape sequences (`\uXXXX`).
|
|
16
|
+
|
|
17
|
+
When the file encoding is UTF-8, the parser translates Unicode codepoint symbols:
|
|
18
|
+
|
|
19
|
+
```rbs
|
|
20
|
+
# In UTF-8 encoded files
|
|
21
|
+
|
|
22
|
+
type t = "\u0123" # Translated to the actual Unicode character ģ
|
|
23
|
+
type s = "\u3042" # Translated to the actual Unicode character あ
|
|
24
|
+
```
|
|
25
|
+
|
|
26
|
+
When the file encoding is not UTF-8, Unicode escape sequences are interpreted literally as the string `\uXXXX`:
|
|
27
|
+
|
|
28
|
+
```rbs
|
|
29
|
+
# In non-UTF-8 encoded files
|
|
30
|
+
|
|
31
|
+
type t = "\u0123" # Remains as the literal string "\u0123"
|
|
32
|
+
```
|
|
33
|
+
|
|
34
|
+
## Implementation
|
|
35
|
+
|
|
36
|
+
RBS gem currently doesn't do anything for file encoding. It relies on Ruby's encoding handling, specifically `Encoding.default_external` and `Encoding.default_internal`.
|
|
37
|
+
|
|
38
|
+
`Encoding.default_external` is the encoding Ruby assumes when it reads external resources like files. The Ruby interpreter sets it based on the locale. `Encoding.default_internal` is the encoding Ruby converts the external resources to. The default is `nil` (no conversion.)
|
|
39
|
+
|
|
40
|
+
When your locale is set to use `UTF-8` encoding, `default_external` is `Encoding::UTF_8`. So the RBS file content read from the disk will have UTF-8 encoding.
|
|
41
|
+
|
|
42
|
+
### Parsing non UTF-8 RBS source text
|
|
43
|
+
|
|
44
|
+
If you want to work with another encoding, ensure the source string has ASCII compatible encoding.
|
|
45
|
+
|
|
46
|
+
```ruby
|
|
47
|
+
source = '"日本語"'
|
|
48
|
+
RBS::Parser.parse_type(source.encode(Encoding::EUC_JP)) # => Parses successfully
|
|
49
|
+
RBS::Parser.parse_type(source.encode(Encoding::UTF_32)) # => Returns `nil` since UTF-32 is not ASCII compatible
|
|
50
|
+
```
|
|
51
|
+
|
|
52
|
+
### Specifying file encoding
|
|
53
|
+
|
|
54
|
+
Currently, RBS doesn't support specifying file encoding directly.
|
|
55
|
+
|
|
56
|
+
You can use `Encoding.default_external` while the gem loads RBS files from the storage.
|
|
@@ -33,7 +33,7 @@ void rbs_loc_legacy_alloc_children(rbs_loc *loc, unsigned short cap) {
|
|
|
33
33
|
check_children_max(cap);
|
|
34
34
|
|
|
35
35
|
size_t s = RBS_LOC_CHILDREN_SIZE(cap);
|
|
36
|
-
loc->children = malloc(s);
|
|
36
|
+
loc->children = (rbs_loc_children *) malloc(s);
|
|
37
37
|
|
|
38
38
|
*loc->children = (rbs_loc_children) {
|
|
39
39
|
.len = 0,
|
|
@@ -50,7 +50,7 @@ static void check_children_cap(rbs_loc *loc) {
|
|
|
50
50
|
if (loc->children->len == loc->children->cap) {
|
|
51
51
|
check_children_max(loc->children->cap + 1);
|
|
52
52
|
size_t s = RBS_LOC_CHILDREN_SIZE(++loc->children->cap);
|
|
53
|
-
loc->children = realloc(loc->children, s);
|
|
53
|
+
loc->children = (rbs_loc_children *) realloc(loc->children, s);
|
|
54
54
|
}
|
|
55
55
|
}
|
|
56
56
|
}
|
|
@@ -86,12 +86,12 @@ void rbs_loc_free(rbs_loc *loc) {
|
|
|
86
86
|
}
|
|
87
87
|
|
|
88
88
|
static void rbs_loc_mark(void *ptr) {
|
|
89
|
-
rbs_loc *loc = ptr;
|
|
89
|
+
rbs_loc *loc = (rbs_loc *) ptr;
|
|
90
90
|
rb_gc_mark(loc->buffer);
|
|
91
91
|
}
|
|
92
92
|
|
|
93
93
|
static size_t rbs_loc_memsize(const void *ptr) {
|
|
94
|
-
const rbs_loc *loc = ptr;
|
|
94
|
+
const rbs_loc *loc = (const rbs_loc *) ptr;
|
|
95
95
|
if (loc->children == NULL) {
|
|
96
96
|
return sizeof(rbs_loc);
|
|
97
97
|
} else {
|
|
@@ -117,7 +117,7 @@ static VALUE location_s_allocate(VALUE klass) {
|
|
|
117
117
|
}
|
|
118
118
|
|
|
119
119
|
rbs_loc *rbs_check_location(VALUE obj) {
|
|
120
|
-
return rb_check_typeddata(obj, &location_type);
|
|
120
|
+
return (rbs_loc *) rb_check_typeddata(obj, &location_type);
|
|
121
121
|
}
|
|
122
122
|
|
|
123
123
|
static VALUE location_initialize(VALUE self, VALUE buffer, VALUE start_pos, VALUE end_pos) {
|
data/ext/rbs_extension/main.c
CHANGED
|
@@ -187,18 +187,10 @@ static VALUE parse_method_type_try(VALUE a) {
|
|
|
187
187
|
}
|
|
188
188
|
|
|
189
189
|
rbs_method_type_t *method_type = NULL;
|
|
190
|
-
rbs_parse_method_type(parser, &method_type);
|
|
190
|
+
rbs_parse_method_type(parser, &method_type, RB_TEST(arg->require_eof));
|
|
191
191
|
|
|
192
192
|
raise_error_if_any(parser, arg->buffer);
|
|
193
193
|
|
|
194
|
-
if (RB_TEST(arg->require_eof)) {
|
|
195
|
-
rbs_parser_advance(parser);
|
|
196
|
-
if (parser->current_token.type != pEOF) {
|
|
197
|
-
rbs_parser_set_error(parser, parser->current_token, true, "expected a token `%s`", rbs_token_type_str(pEOF));
|
|
198
|
-
raise_error(parser->error, arg->buffer);
|
|
199
|
-
}
|
|
200
|
-
}
|
|
201
|
-
|
|
202
194
|
rbs_translation_context_t ctx = rbs_translation_context_create(
|
|
203
195
|
&parser->constant_pool,
|
|
204
196
|
arg->buffer,
|
data/include/rbs/parser.h
CHANGED
|
@@ -44,7 +44,7 @@ typedef struct rbs_error_t {
|
|
|
44
44
|
* An RBS parser is a LL(3) parser.
|
|
45
45
|
* */
|
|
46
46
|
typedef struct {
|
|
47
|
-
rbs_lexer_t *
|
|
47
|
+
rbs_lexer_t *lexer;
|
|
48
48
|
|
|
49
49
|
rbs_token_t current_token;
|
|
50
50
|
rbs_token_t next_token; /* The first lookahead token */
|
|
@@ -127,7 +127,7 @@ rbs_ast_comment_t *rbs_parser_get_comment(rbs_parser_t *parser, int subject_line
|
|
|
127
127
|
void rbs_parser_set_error(rbs_parser_t *parser, rbs_token_t tok, bool syntax_error, const char *fmt, ...) RBS_ATTRIBUTE_FORMAT(4, 5);
|
|
128
128
|
|
|
129
129
|
bool rbs_parse_type(rbs_parser_t *parser, rbs_node_t **type);
|
|
130
|
-
bool rbs_parse_method_type(rbs_parser_t *parser, rbs_method_type_t **method_type);
|
|
130
|
+
bool rbs_parse_method_type(rbs_parser_t *parser, rbs_method_type_t **method_type, bool require_eof);
|
|
131
131
|
bool rbs_parse_signature(rbs_parser_t *parser, rbs_signature_t **signature);
|
|
132
132
|
|
|
133
133
|
bool rbs_parse_type_params(rbs_parser_t *parser, bool module_type_params, rbs_node_list_t **params);
|
data/include/rbs/string.h
CHANGED
|
@@ -4,6 +4,7 @@
|
|
|
4
4
|
#include <stddef.h>
|
|
5
5
|
#include "rbs/util/rbs_allocator.h"
|
|
6
6
|
#include "rbs/string.h"
|
|
7
|
+
#include "rbs/util/rbs_encoding.h"
|
|
7
8
|
|
|
8
9
|
/**
|
|
9
10
|
* Receives `rbs_parser_t` and `range`, which represents a string token or symbol token, and returns a string VALUE.
|
|
@@ -18,6 +19,6 @@
|
|
|
18
19
|
*
|
|
19
20
|
* @returns A new owned string that will be freed when the allocator is freed.
|
|
20
21
|
* */
|
|
21
|
-
rbs_string_t rbs_unquote_string(rbs_allocator_t *, const rbs_string_t input);
|
|
22
|
+
rbs_string_t rbs_unquote_string(rbs_allocator_t *, const rbs_string_t input, const rbs_encoding_t *encoding);
|
|
22
23
|
|
|
23
24
|
#endif // RBS_RBS_UNESCAPE_H
|
data/lib/rbs/version.rb
CHANGED
data/src/location.c
CHANGED
|
@@ -8,7 +8,7 @@
|
|
|
8
8
|
void rbs_loc_alloc_children(rbs_allocator_t *allocator, rbs_location_t *loc, size_t capacity) {
|
|
9
9
|
RBS_ASSERT(capacity <= sizeof(rbs_loc_entry_bitmap) * 8, "Capacity %zu is too large. Max is %zu", capacity, sizeof(rbs_loc_entry_bitmap) * 8);
|
|
10
10
|
|
|
11
|
-
loc->children = rbs_allocator_malloc_impl(allocator, RBS_LOC_CHILDREN_SIZE(capacity), rbs_alignof(rbs_loc_children));
|
|
11
|
+
loc->children = (rbs_loc_children *) rbs_allocator_malloc_impl(allocator, RBS_LOC_CHILDREN_SIZE(capacity), rbs_alignof(rbs_loc_children));
|
|
12
12
|
|
|
13
13
|
loc->children->len = 0;
|
|
14
14
|
loc->children->required_p = 0;
|
data/src/parser.c
CHANGED
|
@@ -20,12 +20,12 @@
|
|
|
20
20
|
strlen(str) \
|
|
21
21
|
)
|
|
22
22
|
|
|
23
|
-
#define INTERN_TOKEN(parser, tok)
|
|
24
|
-
rbs_constant_pool_insert_shared_with_encoding(
|
|
25
|
-
&parser->constant_pool,
|
|
26
|
-
(const uint8_t *) rbs_peek_token(parser->
|
|
27
|
-
rbs_token_bytes(tok),
|
|
28
|
-
|
|
23
|
+
#define INTERN_TOKEN(parser, tok) \
|
|
24
|
+
rbs_constant_pool_insert_shared_with_encoding( \
|
|
25
|
+
&parser->constant_pool, \
|
|
26
|
+
(const uint8_t *) rbs_peek_token(parser->lexer, tok), \
|
|
27
|
+
rbs_token_bytes(tok), \
|
|
28
|
+
parser->lexer->encoding \
|
|
29
29
|
)
|
|
30
30
|
|
|
31
31
|
#define KEYWORD_CASES \
|
|
@@ -128,7 +128,7 @@ static bool parse_simple(rbs_parser_t *parser, rbs_node_t **type);
|
|
|
128
128
|
static rbs_string_t rbs_parser_peek_current_token(rbs_parser_t *parser) {
|
|
129
129
|
rbs_range_t rg = parser->current_token.range;
|
|
130
130
|
|
|
131
|
-
const char *start = parser->
|
|
131
|
+
const char *start = parser->lexer->string.start + rg.start.byte_pos;
|
|
132
132
|
size_t length = rg.end.byte_pos - rg.start.byte_pos;
|
|
133
133
|
|
|
134
134
|
return rbs_string_new(start, start + length);
|
|
@@ -189,7 +189,7 @@ static bool parse_type_name(rbs_parser_t *parser, TypeNameKind kind, rbs_range_t
|
|
|
189
189
|
.end = parser->current_token.range.end
|
|
190
190
|
};
|
|
191
191
|
rbs_location_t *loc = rbs_location_new(ALLOCATOR(), namespace_range);
|
|
192
|
-
rbs_namespace_t *
|
|
192
|
+
rbs_namespace_t *ns = rbs_namespace_new(ALLOCATOR(), loc, path, absolute);
|
|
193
193
|
|
|
194
194
|
switch (parser->current_token.type) {
|
|
195
195
|
case tLIDENT:
|
|
@@ -213,7 +213,7 @@ success: {
|
|
|
213
213
|
rbs_location_t *symbolLoc = rbs_location_current_token(parser);
|
|
214
214
|
rbs_constant_id_t name = INTERN_TOKEN(parser, parser->current_token);
|
|
215
215
|
rbs_ast_symbol_t *symbol = rbs_ast_symbol_new(ALLOCATOR(), symbolLoc, &parser->constant_pool, name);
|
|
216
|
-
*type_name = rbs_type_name_new(ALLOCATOR(), rbs_location_new(ALLOCATOR(), *rg),
|
|
216
|
+
*type_name = rbs_type_name_new(ALLOCATOR(), rbs_location_new(ALLOCATOR(), *rg), ns, symbol);
|
|
217
217
|
return true;
|
|
218
218
|
}
|
|
219
219
|
|
|
@@ -317,7 +317,7 @@ static bool parse_function_param(rbs_parser_t *parser, rbs_types_function_param_
|
|
|
317
317
|
return false;
|
|
318
318
|
}
|
|
319
319
|
|
|
320
|
-
rbs_string_t unquoted_str = rbs_unquote_string(ALLOCATOR(), rbs_parser_peek_current_token(parser));
|
|
320
|
+
rbs_string_t unquoted_str = rbs_unquote_string(ALLOCATOR(), rbs_parser_peek_current_token(parser), parser->lexer->encoding);
|
|
321
321
|
rbs_location_t *symbolLoc = rbs_location_current_token(parser);
|
|
322
322
|
rbs_constant_id_t constant_id = rbs_constant_pool_insert_string(&parser->constant_pool, unquoted_str);
|
|
323
323
|
rbs_ast_symbol_t *name = rbs_ast_symbol_new(ALLOCATOR(), symbolLoc, &parser->constant_pool, constant_id);
|
|
@@ -334,9 +334,9 @@ static bool parse_function_param(rbs_parser_t *parser, rbs_types_function_param_
|
|
|
334
334
|
static rbs_constant_id_t intern_token_start_end(rbs_parser_t *parser, rbs_token_t start_token, rbs_token_t end_token) {
|
|
335
335
|
return rbs_constant_pool_insert_shared_with_encoding(
|
|
336
336
|
&parser->constant_pool,
|
|
337
|
-
(const uint8_t *) rbs_peek_token(parser->
|
|
337
|
+
(const uint8_t *) rbs_peek_token(parser->lexer, start_token),
|
|
338
338
|
end_token.range.end.byte_pos - start_token.range.start.byte_pos,
|
|
339
|
-
parser->
|
|
339
|
+
parser->lexer->encoding
|
|
340
340
|
);
|
|
341
341
|
}
|
|
342
342
|
|
|
@@ -902,7 +902,7 @@ static bool parse_record_attributes(rbs_parser_t *parser, rbs_hash_t **fields) {
|
|
|
902
902
|
*/
|
|
903
903
|
NODISCARD
|
|
904
904
|
static bool parse_symbol(rbs_parser_t *parser, rbs_location_t *location, rbs_types_literal_t **symbol) {
|
|
905
|
-
size_t offset_bytes = parser->
|
|
905
|
+
size_t offset_bytes = parser->lexer->encoding->char_width((const uint8_t *) ":", (size_t) 1);
|
|
906
906
|
size_t bytes = rbs_token_bytes(parser->current_token) - offset_bytes;
|
|
907
907
|
|
|
908
908
|
rbs_ast_symbol_t *literal;
|
|
@@ -911,7 +911,7 @@ static bool parse_symbol(rbs_parser_t *parser, rbs_location_t *location, rbs_typ
|
|
|
911
911
|
case tSYMBOL: {
|
|
912
912
|
rbs_location_t *symbolLoc = rbs_location_current_token(parser);
|
|
913
913
|
|
|
914
|
-
char *buffer = rbs_peek_token(parser->
|
|
914
|
+
char *buffer = rbs_peek_token(parser->lexer, parser->current_token);
|
|
915
915
|
rbs_constant_id_t constant_id = rbs_constant_pool_insert_shared(
|
|
916
916
|
&parser->constant_pool,
|
|
917
917
|
(const uint8_t *) buffer + offset_bytes,
|
|
@@ -927,7 +927,7 @@ static bool parse_symbol(rbs_parser_t *parser, rbs_location_t *location, rbs_typ
|
|
|
927
927
|
|
|
928
928
|
rbs_string_t symbol = rbs_string_new(current_token.start + offset_bytes, current_token.end);
|
|
929
929
|
|
|
930
|
-
rbs_string_t unquoted_symbol = rbs_unquote_string(ALLOCATOR(), symbol);
|
|
930
|
+
rbs_string_t unquoted_symbol = rbs_unquote_string(ALLOCATOR(), symbol, parser->lexer->encoding);
|
|
931
931
|
|
|
932
932
|
rbs_constant_id_t constant_id = rbs_constant_pool_insert_string(&parser->constant_pool, unquoted_symbol);
|
|
933
933
|
|
|
@@ -951,9 +951,9 @@ static bool parse_symbol(rbs_parser_t *parser, rbs_location_t *location, rbs_typ
|
|
|
951
951
|
*/
|
|
952
952
|
NODISCARD
|
|
953
953
|
static bool parse_instance_type(rbs_parser_t *parser, bool parse_alias, rbs_node_t **type) {
|
|
954
|
-
TypeNameKind expected_kind = INTERFACE_NAME | CLASS_NAME;
|
|
954
|
+
TypeNameKind expected_kind = (TypeNameKind) (INTERFACE_NAME | CLASS_NAME);
|
|
955
955
|
if (parse_alias) {
|
|
956
|
-
expected_kind
|
|
956
|
+
expected_kind = (TypeNameKind) (expected_kind | ALIAS_NAME);
|
|
957
957
|
}
|
|
958
958
|
|
|
959
959
|
rbs_range_t name_range;
|
|
@@ -1157,7 +1157,7 @@ static bool parse_simple(rbs_parser_t *parser, rbs_node_t **type) {
|
|
|
1157
1157
|
case tDQSTRING: {
|
|
1158
1158
|
rbs_location_t *loc = rbs_location_current_token(parser);
|
|
1159
1159
|
|
|
1160
|
-
rbs_string_t unquoted_str = rbs_unquote_string(ALLOCATOR(), rbs_parser_peek_current_token(parser));
|
|
1160
|
+
rbs_string_t unquoted_str = rbs_unquote_string(ALLOCATOR(), rbs_parser_peek_current_token(parser), parser->lexer->encoding);
|
|
1161
1161
|
rbs_node_t *literal = (rbs_node_t *) rbs_ast_string_new(ALLOCATOR(), loc, unquoted_str);
|
|
1162
1162
|
*type = (rbs_node_t *) rbs_types_literal_new(ALLOCATOR(), loc, literal);
|
|
1163
1163
|
return true;
|
|
@@ -1172,7 +1172,7 @@ static bool parse_simple(rbs_parser_t *parser, rbs_node_t **type) {
|
|
|
1172
1172
|
return true;
|
|
1173
1173
|
}
|
|
1174
1174
|
case tUIDENT: {
|
|
1175
|
-
const char *name_str = rbs_peek_token(parser->
|
|
1175
|
+
const char *name_str = rbs_peek_token(parser->lexer, parser->current_token);
|
|
1176
1176
|
size_t name_len = rbs_token_bytes(parser->current_token);
|
|
1177
1177
|
|
|
1178
1178
|
rbs_constant_id_t name = rbs_constant_pool_find(&parser->constant_pool, (const uint8_t *) name_str, name_len);
|
|
@@ -1452,7 +1452,7 @@ static bool parser_pop_typevar_table(rbs_parser_t *parser) {
|
|
|
1452
1452
|
method_type ::= {} type_params <function>
|
|
1453
1453
|
*/
|
|
1454
1454
|
// TODO: Should this be NODISCARD?
|
|
1455
|
-
bool rbs_parse_method_type(rbs_parser_t *parser, rbs_method_type_t **method_type) {
|
|
1455
|
+
bool rbs_parse_method_type(rbs_parser_t *parser, rbs_method_type_t **method_type, bool require_eof) {
|
|
1456
1456
|
rbs_parser_push_typevar_table(parser, false);
|
|
1457
1457
|
|
|
1458
1458
|
rbs_range_t rg;
|
|
@@ -1468,10 +1468,18 @@ bool rbs_parse_method_type(rbs_parser_t *parser, rbs_method_type_t **method_type
|
|
|
1468
1468
|
parse_function_result *result = rbs_allocator_alloc(ALLOCATOR(), parse_function_result);
|
|
1469
1469
|
CHECK_PARSE(parse_function(parser, false, &result));
|
|
1470
1470
|
|
|
1471
|
+
CHECK_PARSE(parser_pop_typevar_table(parser));
|
|
1472
|
+
|
|
1471
1473
|
rg.end = parser->current_token.range.end;
|
|
1472
1474
|
type_range.end = rg.end;
|
|
1473
1475
|
|
|
1474
|
-
|
|
1476
|
+
if (require_eof) {
|
|
1477
|
+
rbs_parser_advance(parser);
|
|
1478
|
+
if (parser->current_token.type != pEOF) {
|
|
1479
|
+
rbs_parser_set_error(parser, parser->current_token, true, "expected a token `%s`", rbs_token_type_str(pEOF));
|
|
1480
|
+
return false;
|
|
1481
|
+
}
|
|
1482
|
+
}
|
|
1475
1483
|
|
|
1476
1484
|
rbs_location_t *loc = rbs_location_new(ALLOCATOR(), rg);
|
|
1477
1485
|
rbs_loc_alloc_children(ALLOCATOR(), loc, 2);
|
|
@@ -1598,14 +1606,16 @@ static bool parse_annotation(rbs_parser_t *parser, rbs_ast_annotation_t **annota
|
|
|
1598
1606
|
rbs_range_t rg = parser->current_token.range;
|
|
1599
1607
|
|
|
1600
1608
|
size_t offset_bytes =
|
|
1601
|
-
parser->
|
|
1602
|
-
parser->
|
|
1609
|
+
parser->lexer->encoding->char_width((const uint8_t *) "%", (size_t) 1) +
|
|
1610
|
+
parser->lexer->encoding->char_width((const uint8_t *) "a", (size_t) 1);
|
|
1603
1611
|
|
|
1604
1612
|
rbs_string_t str = rbs_string_new(
|
|
1605
|
-
parser->
|
|
1606
|
-
parser->
|
|
1613
|
+
parser->lexer->string.start + rg.start.byte_pos + offset_bytes,
|
|
1614
|
+
parser->lexer->string.end
|
|
1607
1615
|
);
|
|
1608
|
-
|
|
1616
|
+
|
|
1617
|
+
// Assumes the input is ASCII compatible
|
|
1618
|
+
unsigned int open_char = str.start[0];
|
|
1609
1619
|
|
|
1610
1620
|
unsigned int close_char;
|
|
1611
1621
|
|
|
@@ -1630,8 +1640,8 @@ static bool parse_annotation(rbs_parser_t *parser, rbs_ast_annotation_t **annota
|
|
|
1630
1640
|
return false;
|
|
1631
1641
|
}
|
|
1632
1642
|
|
|
1633
|
-
size_t open_bytes = parser->
|
|
1634
|
-
size_t close_bytes = parser->
|
|
1643
|
+
size_t open_bytes = parser->lexer->encoding->char_width((const uint8_t *) &open_char, (size_t) 1);
|
|
1644
|
+
size_t close_bytes = parser->lexer->encoding->char_width((const uint8_t *) &close_char, (size_t) 1);
|
|
1635
1645
|
|
|
1636
1646
|
rbs_string_t current_token = rbs_parser_peek_current_token(parser);
|
|
1637
1647
|
size_t total_offset = offset_bytes + open_bytes;
|
|
@@ -1695,9 +1705,9 @@ static bool parse_method_name(rbs_parser_t *parser, rbs_range_t *range, rbs_ast_
|
|
|
1695
1705
|
|
|
1696
1706
|
rbs_constant_id_t constant_id = rbs_constant_pool_insert_shared_with_encoding(
|
|
1697
1707
|
&parser->constant_pool,
|
|
1698
|
-
(const uint8_t *) parser->
|
|
1708
|
+
(const uint8_t *) parser->lexer->string.start + range->start.byte_pos,
|
|
1699
1709
|
range->end.byte_pos - range->start.byte_pos,
|
|
1700
|
-
parser->
|
|
1710
|
+
parser->lexer->encoding
|
|
1701
1711
|
);
|
|
1702
1712
|
|
|
1703
1713
|
rbs_location_t *symbolLoc = rbs_location_new(ALLOCATOR(), *range);
|
|
@@ -1718,7 +1728,7 @@ static bool parse_method_name(rbs_parser_t *parser, rbs_range_t *range, rbs_ast_
|
|
|
1718
1728
|
}
|
|
1719
1729
|
case tQIDENT: {
|
|
1720
1730
|
rbs_string_t string = rbs_parser_peek_current_token(parser);
|
|
1721
|
-
rbs_string_t unquoted_str = rbs_unquote_string(ALLOCATOR(), string);
|
|
1731
|
+
rbs_string_t unquoted_str = rbs_unquote_string(ALLOCATOR(), string, parser->lexer->encoding);
|
|
1722
1732
|
rbs_constant_id_t constant_id = rbs_constant_pool_insert_string(&parser->constant_pool, unquoted_str);
|
|
1723
1733
|
rbs_location_t *symbolLoc = rbs_location_current_token(parser);
|
|
1724
1734
|
*symbol = rbs_ast_symbol_new(ALLOCATOR(), symbolLoc, &parser->constant_pool, constant_id);
|
|
@@ -1879,7 +1889,7 @@ static bool parse_member_def(rbs_parser_t *parser, bool instance_only, bool acce
|
|
|
1879
1889
|
case pLBRACKET:
|
|
1880
1890
|
case pQUESTION: {
|
|
1881
1891
|
rbs_method_type_t *method_type = NULL;
|
|
1882
|
-
CHECK_PARSE(rbs_parse_method_type(parser, &method_type));
|
|
1892
|
+
CHECK_PARSE(rbs_parse_method_type(parser, &method_type, false));
|
|
1883
1893
|
|
|
1884
1894
|
overload_range.end = parser->current_token.range.end;
|
|
1885
1895
|
rbs_location_t *loc = rbs_location_new(ALLOCATOR(), overload_range);
|
|
@@ -2021,7 +2031,7 @@ static bool parse_mixin_member(rbs_parser_t *parser, bool from_interface, rbs_po
|
|
|
2021
2031
|
rbs_type_name_t *name = NULL;
|
|
2022
2032
|
CHECK_PARSE(class_instance_name(
|
|
2023
2033
|
parser,
|
|
2024
|
-
from_interface ? INTERFACE_NAME : (INTERFACE_NAME | CLASS_NAME),
|
|
2034
|
+
from_interface ? INTERFACE_NAME : (TypeNameKind) (INTERFACE_NAME | CLASS_NAME),
|
|
2025
2035
|
args,
|
|
2026
2036
|
&name_range,
|
|
2027
2037
|
&args_range,
|
|
@@ -2486,7 +2496,7 @@ static bool parse_module_self_types(rbs_parser_t *parser, rbs_node_list_t *array
|
|
|
2486
2496
|
|
|
2487
2497
|
rbs_range_t name_range;
|
|
2488
2498
|
rbs_type_name_t *module_name = NULL;
|
|
2489
|
-
CHECK_PARSE(parse_type_name(parser, CLASS_NAME | INTERFACE_NAME, &name_range, &module_name));
|
|
2499
|
+
CHECK_PARSE(parse_type_name(parser, (TypeNameKind) (CLASS_NAME | INTERFACE_NAME), &name_range, &module_name));
|
|
2490
2500
|
self_range.end = name_range.end;
|
|
2491
2501
|
|
|
2492
2502
|
rbs_node_list_t *args = rbs_node_list_new(ALLOCATOR());
|
|
@@ -2949,7 +2959,7 @@ static bool parse_decl(rbs_parser_t *parser, rbs_node_t **decl) {
|
|
|
2949
2959
|
| {} <> (empty -- returns empty namespace)
|
|
2950
2960
|
*/
|
|
2951
2961
|
NODISCARD
|
|
2952
|
-
static bool parse_namespace(rbs_parser_t *parser, rbs_range_t *rg, rbs_namespace_t **
|
|
2962
|
+
static bool parse_namespace(rbs_parser_t *parser, rbs_range_t *rg, rbs_namespace_t **out_ns) {
|
|
2953
2963
|
bool is_absolute = false;
|
|
2954
2964
|
|
|
2955
2965
|
if (parser->next_token.type == pCOLON2) {
|
|
@@ -2980,7 +2990,7 @@ static bool parse_namespace(rbs_parser_t *parser, rbs_range_t *rg, rbs_namespace
|
|
|
2980
2990
|
}
|
|
2981
2991
|
}
|
|
2982
2992
|
|
|
2983
|
-
*
|
|
2993
|
+
*out_ns = rbs_namespace_new(ALLOCATOR(), rbs_location_new(ALLOCATOR(), *rg), path, is_absolute);
|
|
2984
2994
|
return true;
|
|
2985
2995
|
}
|
|
2986
2996
|
|
|
@@ -2995,8 +3005,8 @@ NODISCARD
|
|
|
2995
3005
|
static bool parse_use_clauses(rbs_parser_t *parser, rbs_node_list_t *clauses) {
|
|
2996
3006
|
while (true) {
|
|
2997
3007
|
rbs_range_t namespace_range = NULL_RANGE;
|
|
2998
|
-
rbs_namespace_t *
|
|
2999
|
-
CHECK_PARSE(parse_namespace(parser, &namespace_range, &
|
|
3008
|
+
rbs_namespace_t *ns = NULL;
|
|
3009
|
+
CHECK_PARSE(parse_namespace(parser, &namespace_range, &ns));
|
|
3000
3010
|
|
|
3001
3011
|
switch (parser->next_token.type) {
|
|
3002
3012
|
case tLIDENT:
|
|
@@ -3010,7 +3020,7 @@ static bool parse_use_clauses(rbs_parser_t *parser, rbs_node_list_t *clauses) {
|
|
|
3010
3020
|
|
|
3011
3021
|
rbs_location_t *symbolLoc = rbs_location_current_token(parser);
|
|
3012
3022
|
rbs_ast_symbol_t *symbol = rbs_ast_symbol_new(ALLOCATOR(), symbolLoc, &parser->constant_pool, INTERN_TOKEN(parser, parser->current_token));
|
|
3013
|
-
rbs_type_name_t *type_name = rbs_type_name_new(ALLOCATOR(), rbs_location_new(ALLOCATOR(), type_name_range),
|
|
3023
|
+
rbs_type_name_t *type_name = rbs_type_name_new(ALLOCATOR(), rbs_location_new(ALLOCATOR(), type_name_range), ns, symbol);
|
|
3014
3024
|
|
|
3015
3025
|
rbs_range_t keyword_range = NULL_RANGE;
|
|
3016
3026
|
rbs_range_t new_name_range = NULL_RANGE;
|
|
@@ -3053,7 +3063,7 @@ static bool parse_use_clauses(rbs_parser_t *parser, rbs_node_list_t *clauses) {
|
|
|
3053
3063
|
rbs_loc_add_required_child(loc, INTERN("namespace"), namespace_range);
|
|
3054
3064
|
rbs_loc_add_required_child(loc, INTERN("star"), star_range);
|
|
3055
3065
|
|
|
3056
|
-
rbs_ast_directives_use_wildcard_clause_t *clause = rbs_ast_directives_use_wildcard_clause_new(ALLOCATOR(), loc,
|
|
3066
|
+
rbs_ast_directives_use_wildcard_clause_t *clause = rbs_ast_directives_use_wildcard_clause_new(ALLOCATOR(), loc, ns);
|
|
3057
3067
|
rbs_node_list_append(clauses, (rbs_node_t *) clause);
|
|
3058
3068
|
|
|
3059
3069
|
break;
|
|
@@ -3100,8 +3110,8 @@ static bool parse_use_directive(rbs_parser_t *parser, rbs_ast_directives_use_t *
|
|
|
3100
3110
|
}
|
|
3101
3111
|
|
|
3102
3112
|
static rbs_ast_comment_t *parse_comment_lines(rbs_parser_t *parser, rbs_comment_t *com) {
|
|
3103
|
-
size_t hash_bytes = parser->
|
|
3104
|
-
size_t space_bytes = parser->
|
|
3113
|
+
size_t hash_bytes = parser->lexer->encoding->char_width((const uint8_t *) "#", (size_t) 1);
|
|
3114
|
+
size_t space_bytes = parser->lexer->encoding->char_width((const uint8_t *) " ", (size_t) 1);
|
|
3105
3115
|
|
|
3106
3116
|
rbs_buffer_t rbs_buffer;
|
|
3107
3117
|
rbs_buffer_init(ALLOCATOR(), &rbs_buffer);
|
|
@@ -3109,14 +3119,16 @@ static rbs_ast_comment_t *parse_comment_lines(rbs_parser_t *parser, rbs_comment_
|
|
|
3109
3119
|
for (size_t i = 0; i < com->line_tokens_count; i++) {
|
|
3110
3120
|
rbs_token_t tok = com->line_tokens[i];
|
|
3111
3121
|
|
|
3112
|
-
const char *comment_start = parser->
|
|
3122
|
+
const char *comment_start = parser->lexer->string.start + tok.range.start.byte_pos + hash_bytes;
|
|
3113
3123
|
size_t comment_bytes = RBS_RANGE_BYTES(tok.range) - hash_bytes;
|
|
3114
3124
|
|
|
3115
3125
|
rbs_string_t str = rbs_string_new(
|
|
3116
3126
|
comment_start,
|
|
3117
|
-
parser->
|
|
3127
|
+
parser->lexer->string.end
|
|
3118
3128
|
);
|
|
3119
|
-
|
|
3129
|
+
|
|
3130
|
+
// Assumes the input is ASCII compatible
|
|
3131
|
+
unsigned char c = str.start[0];
|
|
3120
3132
|
|
|
3121
3133
|
if (c == ' ') {
|
|
3122
3134
|
comment_start += space_bytes;
|
|
@@ -3332,7 +3344,7 @@ void rbs_parser_advance(rbs_parser_t *parser) {
|
|
|
3332
3344
|
break;
|
|
3333
3345
|
}
|
|
3334
3346
|
|
|
3335
|
-
parser->next_token3 = rbs_lexer_next_token(parser->
|
|
3347
|
+
parser->next_token3 = rbs_lexer_next_token(parser->lexer);
|
|
3336
3348
|
|
|
3337
3349
|
if (parser->next_token3.type == tCOMMENT) {
|
|
3338
3350
|
// skip
|
|
@@ -3424,7 +3436,7 @@ rbs_parser_t *rbs_parser_new(rbs_string_t string, const rbs_encoding_t *encoding
|
|
|
3424
3436
|
rbs_parser_t *parser = rbs_allocator_alloc(allocator, rbs_parser_t);
|
|
3425
3437
|
|
|
3426
3438
|
*parser = (rbs_parser_t) {
|
|
3427
|
-
.
|
|
3439
|
+
.lexer = lexer,
|
|
3428
3440
|
|
|
3429
3441
|
.current_token = NullToken,
|
|
3430
3442
|
.next_token = NullToken,
|
data/src/string.c
CHANGED
|
@@ -1,59 +1,10 @@
|
|
|
1
1
|
#include "rbs/string.h"
|
|
2
|
-
#include "rbs/defines.h"
|
|
3
2
|
|
|
4
3
|
#include <stdlib.h>
|
|
5
4
|
#include <string.h>
|
|
6
5
|
#include <stdio.h>
|
|
7
6
|
#include <ctype.h>
|
|
8
7
|
|
|
9
|
-
unsigned int rbs_utf8_string_to_codepoint(const rbs_string_t string) {
|
|
10
|
-
unsigned int codepoint = 0;
|
|
11
|
-
int remaining_bytes = 0;
|
|
12
|
-
|
|
13
|
-
const char *s = string.start;
|
|
14
|
-
const char *end = string.end;
|
|
15
|
-
|
|
16
|
-
if (s >= end) return 0; // End of string
|
|
17
|
-
|
|
18
|
-
if (RBS_LIKELY((*s & 0x80) == 0)) {
|
|
19
|
-
// Single byte character (0xxxxxxx)
|
|
20
|
-
return *s;
|
|
21
|
-
} else if ((*s & 0xE0) == 0xC0) {
|
|
22
|
-
// Two byte character (110xxxxx 10xxxxxx)
|
|
23
|
-
codepoint = *s & 0x1F;
|
|
24
|
-
remaining_bytes = 1;
|
|
25
|
-
} else if ((*s & 0xF0) == 0xE0) {
|
|
26
|
-
// Three byte character (1110xxxx 10xxxxxx 10xxxxxx)
|
|
27
|
-
codepoint = *s & 0x0F;
|
|
28
|
-
remaining_bytes = 2;
|
|
29
|
-
} else if ((*s & 0xF8) == 0xF0) {
|
|
30
|
-
// Four byte character (11110xxx 10xxxxxx 10xxxxxx 10xxxxxx)
|
|
31
|
-
codepoint = *s & 0x07;
|
|
32
|
-
remaining_bytes = 3;
|
|
33
|
-
} else {
|
|
34
|
-
// Invalid UTF-8 sequence
|
|
35
|
-
return 0xFFFD; // Unicode replacement character
|
|
36
|
-
}
|
|
37
|
-
|
|
38
|
-
s++;
|
|
39
|
-
while (remaining_bytes > 0 && s < end) {
|
|
40
|
-
if ((*s & 0xC0) != 0x80) {
|
|
41
|
-
// Invalid continuation byte
|
|
42
|
-
return 0xFFFD;
|
|
43
|
-
}
|
|
44
|
-
codepoint = (codepoint << 6) | (*s & 0x3F);
|
|
45
|
-
s++;
|
|
46
|
-
remaining_bytes--;
|
|
47
|
-
}
|
|
48
|
-
|
|
49
|
-
if (remaining_bytes > 0) {
|
|
50
|
-
// Incomplete sequence
|
|
51
|
-
return 0xFFFD;
|
|
52
|
-
}
|
|
53
|
-
|
|
54
|
-
return codepoint;
|
|
55
|
-
}
|
|
56
|
-
|
|
57
8
|
rbs_string_t rbs_string_new(const char *start, const char *end) {
|
|
58
9
|
return (rbs_string_t) {
|
|
59
10
|
.start = start,
|
data/src/util/rbs_allocator.c
CHANGED
|
@@ -57,7 +57,7 @@ static size_t get_system_page_size(void) {
|
|
|
57
57
|
static rbs_allocator_page_t *rbs_allocator_page_new(size_t payload_size) {
|
|
58
58
|
const size_t page_header_size = sizeof(rbs_allocator_page_t);
|
|
59
59
|
|
|
60
|
-
rbs_allocator_page_t *page = malloc(page_header_size + payload_size);
|
|
60
|
+
rbs_allocator_page_t *page = (rbs_allocator_page_t *) malloc(page_header_size + payload_size);
|
|
61
61
|
page->size = payload_size;
|
|
62
62
|
page->used = 0;
|
|
63
63
|
|
|
@@ -65,7 +65,7 @@ static rbs_allocator_page_t *rbs_allocator_page_new(size_t payload_size) {
|
|
|
65
65
|
}
|
|
66
66
|
|
|
67
67
|
rbs_allocator_t *rbs_allocator_init(void) {
|
|
68
|
-
rbs_allocator_t *allocator = malloc(sizeof(rbs_allocator_t));
|
|
68
|
+
rbs_allocator_t *allocator = (rbs_allocator_t *) malloc(sizeof(rbs_allocator_t));
|
|
69
69
|
|
|
70
70
|
const size_t system_page_size = get_system_page_size();
|
|
71
71
|
|
data/src/util/rbs_assert.c
CHANGED
|
@@ -57,8 +57,8 @@ rbs_constant_pool_resize(rbs_constant_pool_t *pool) {
|
|
|
57
57
|
void *next = calloc(next_capacity, element_size);
|
|
58
58
|
if (next == NULL) return false;
|
|
59
59
|
|
|
60
|
-
rbs_constant_pool_bucket_t *next_buckets = next;
|
|
61
|
-
rbs_constant_t *next_constants = (
|
|
60
|
+
rbs_constant_pool_bucket_t *next_buckets = (rbs_constant_pool_bucket_t *) next;
|
|
61
|
+
rbs_constant_t *next_constants = (rbs_constant_t *) (((char *) next) + next_capacity * sizeof(rbs_constant_pool_bucket_t));
|
|
62
62
|
|
|
63
63
|
// For each bucket in the current constant pool, find the index in the
|
|
64
64
|
// next constant pool, and insert it.
|
|
@@ -111,8 +111,8 @@ bool rbs_constant_pool_init(rbs_constant_pool_t *pool, uint32_t capacity) {
|
|
|
111
111
|
void *memory = calloc(capacity, element_size);
|
|
112
112
|
if (memory == NULL) return false;
|
|
113
113
|
|
|
114
|
-
pool->buckets = memory;
|
|
115
|
-
pool->constants = (
|
|
114
|
+
pool->buckets = (rbs_constant_pool_bucket_t *) memory;
|
|
115
|
+
pool->constants = (rbs_constant_t *) (((char *) memory) + capacity * sizeof(rbs_constant_pool_bucket_t));
|
|
116
116
|
pool->size = 0;
|
|
117
117
|
pool->capacity = capacity;
|
|
118
118
|
return true;
|
data/src/util/rbs_unescape.c
CHANGED
|
@@ -1,4 +1,5 @@
|
|
|
1
1
|
#include "rbs/util/rbs_unescape.h"
|
|
2
|
+
#include "rbs/util/rbs_encoding.h"
|
|
2
3
|
#include <string.h>
|
|
3
4
|
#include <stdlib.h>
|
|
4
5
|
#include <ctype.h>
|
|
@@ -42,20 +43,44 @@ static int octal_to_int(const char *octal, int length) {
|
|
|
42
43
|
return result;
|
|
43
44
|
}
|
|
44
45
|
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
if (
|
|
50
|
-
|
|
46
|
+
// Fills buf starting at index 'start' with the UTF-8 encoding of 'codepoint'.
|
|
47
|
+
// Returns the number of bytes written, or 0 when the output is not changed.
|
|
48
|
+
//
|
|
49
|
+
size_t rbs_utf8_fill_codepoint(char *buf, size_t start, size_t end, unsigned int codepoint) {
|
|
50
|
+
if (start + 4 > end) {
|
|
51
|
+
return 0;
|
|
52
|
+
}
|
|
53
|
+
|
|
54
|
+
if (codepoint <= 0x7F) {
|
|
55
|
+
buf[start] = codepoint & 0x7F;
|
|
56
|
+
return 1;
|
|
57
|
+
} else if (codepoint <= 0x7FF) {
|
|
58
|
+
buf[start + 0] = 0xC0 | ((codepoint >> 6) & 0x1F);
|
|
59
|
+
buf[start + 1] = 0x80 | (codepoint & 0x3F);
|
|
60
|
+
return 2;
|
|
61
|
+
} else if (codepoint <= 0xFFFF) {
|
|
62
|
+
buf[start + 0] = 0xE0 | ((codepoint >> 12) & 0x0F);
|
|
63
|
+
buf[start + 1] = 0x80 | ((codepoint >> 6) & 0x3F);
|
|
64
|
+
buf[start + 2] = 0x80 | (codepoint & 0x3F);
|
|
65
|
+
return 3;
|
|
66
|
+
} else if (codepoint <= 0x10FFFF) {
|
|
67
|
+
buf[start + 0] = 0xF0 | ((codepoint >> 18) & 0x07);
|
|
68
|
+
buf[start + 1] = 0x80 | ((codepoint >> 12) & 0x3F);
|
|
69
|
+
buf[start + 2] = 0x80 | ((codepoint >> 6) & 0x3F);
|
|
70
|
+
buf[start + 3] = 0x80 | (codepoint & 0x3F);
|
|
71
|
+
return 4;
|
|
72
|
+
} else {
|
|
73
|
+
return 0;
|
|
74
|
+
}
|
|
51
75
|
}
|
|
52
76
|
|
|
53
|
-
rbs_string_t unescape_string(rbs_allocator_t *allocator, const rbs_string_t string, bool is_double_quote) {
|
|
77
|
+
rbs_string_t unescape_string(rbs_allocator_t *allocator, const rbs_string_t string, bool is_double_quote, bool is_unicode) {
|
|
54
78
|
if (!string.start) return RBS_STRING_NULL;
|
|
55
79
|
|
|
56
80
|
size_t len = string.end - string.start;
|
|
57
81
|
const char *input = string.start;
|
|
58
82
|
|
|
83
|
+
// The output cannot be longer than the input even after unescaping.
|
|
59
84
|
char *output = rbs_allocator_alloc_many(allocator, len + 1, char);
|
|
60
85
|
if (!output) return RBS_STRING_NULL;
|
|
61
86
|
|
|
@@ -79,9 +104,21 @@ rbs_string_t unescape_string(rbs_allocator_t *allocator, const rbs_string_t stri
|
|
|
79
104
|
i += hex_len + 2;
|
|
80
105
|
} else if (input[i + 1] == 'u' && i + 5 < len) {
|
|
81
106
|
// Unicode escape
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
107
|
+
|
|
108
|
+
if (is_unicode) {
|
|
109
|
+
// The UTF-8 representation is at most 4 bytes, shorter than the input length.
|
|
110
|
+
int value = hex_to_int(input + i + 2, 4);
|
|
111
|
+
j += rbs_utf8_fill_codepoint(output, j, len + 1, value);
|
|
112
|
+
i += 6;
|
|
113
|
+
} else {
|
|
114
|
+
// Copy the escape sequence as-is
|
|
115
|
+
output[j++] = input[i++];
|
|
116
|
+
output[j++] = input[i++];
|
|
117
|
+
output[j++] = input[i++];
|
|
118
|
+
output[j++] = input[i++];
|
|
119
|
+
output[j++] = input[i++];
|
|
120
|
+
output[j++] = input[i++];
|
|
121
|
+
}
|
|
85
122
|
} else {
|
|
86
123
|
// Other escapes
|
|
87
124
|
int found = 0;
|
|
@@ -114,18 +151,17 @@ rbs_string_t unescape_string(rbs_allocator_t *allocator, const rbs_string_t stri
|
|
|
114
151
|
return rbs_string_new(output, output + j);
|
|
115
152
|
}
|
|
116
153
|
|
|
117
|
-
rbs_string_t rbs_unquote_string(rbs_allocator_t *allocator, rbs_string_t input) {
|
|
118
|
-
unsigned int first_char =
|
|
119
|
-
|
|
154
|
+
rbs_string_t rbs_unquote_string(rbs_allocator_t *allocator, rbs_string_t input, const rbs_encoding_t *encoding) {
|
|
155
|
+
unsigned int first_char = input.start[0];
|
|
156
|
+
|
|
157
|
+
const char *new_start = input.start;
|
|
158
|
+
const char *new_end = input.end;
|
|
120
159
|
|
|
121
|
-
ptrdiff_t start_offset = 0;
|
|
122
160
|
if (first_char == '"' || first_char == '\'' || first_char == '`') {
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
byte_length -= 2 * bs;
|
|
161
|
+
new_start += 1;
|
|
162
|
+
new_end -= 1;
|
|
126
163
|
}
|
|
127
164
|
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
return unescape_string(allocator, string, first_char == '"');
|
|
165
|
+
rbs_string_t string = rbs_string_new(new_start, new_end);
|
|
166
|
+
return unescape_string(allocator, string, first_char == '"', encoding == RBS_ENCODING_UTF_8_ENTRY);
|
|
131
167
|
}
|
metadata
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: rbs
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 3.10.0.pre.
|
|
4
|
+
version: 3.10.0.pre.2
|
|
5
5
|
platform: ruby
|
|
6
6
|
authors:
|
|
7
7
|
- Soutaro Matsumoto
|
|
@@ -139,6 +139,7 @@ files:
|
|
|
139
139
|
- docs/architecture.md
|
|
140
140
|
- docs/collection.md
|
|
141
141
|
- docs/data_and_struct.md
|
|
142
|
+
- docs/encoding.md
|
|
142
143
|
- docs/gem.md
|
|
143
144
|
- docs/rbs_by_example.md
|
|
144
145
|
- docs/repo.md
|
|
@@ -560,7 +561,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
|
560
561
|
- !ruby/object:Gem::Version
|
|
561
562
|
version: '0'
|
|
562
563
|
requirements: []
|
|
563
|
-
rubygems_version: 4.0.
|
|
564
|
+
rubygems_version: 4.0.1
|
|
564
565
|
specification_version: 4
|
|
565
566
|
summary: Type signature for Ruby.
|
|
566
567
|
test_files: []
|