@shd101wyy/yo 0.1.25 → 0.1.27
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.github/skills/yo-async-effects/SKILL.md +4 -4
- package/.github/skills/yo-async-effects/async-effects-recipes.md +40 -40
- package/.github/skills/yo-core-patterns/SKILL.md +1 -1
- package/.github/skills/yo-core-patterns/core-patterns-cheatsheet.md +30 -26
- package/.github/skills/yo-project-workflow/SKILL.md +6 -3
- package/.github/skills/yo-project-workflow/workflow-cheatsheet.md +34 -11
- package/.github/skills/yo-syntax/SKILL.md +7 -6
- package/.github/skills/yo-syntax/syntax-cheatsheet.md +78 -60
- package/.github/skills/yo-wasm-integration/wasm-integration-cheatsheet.md +3 -3
- package/README.md +10 -8
- package/out/cjs/index.cjs +583 -567
- package/out/cjs/yo-cli.cjs +664 -632
- package/out/cjs/yo-lsp.cjs +510 -485
- package/out/esm/index.mjs +538 -522
- package/out/types/src/codegen/codegen-c.d.ts +2 -2
- package/out/types/src/codegen/functions/collection.d.ts +2 -2
- package/out/types/src/codegen/functions/context.d.ts +3 -2
- package/out/types/src/codegen/types/collection.d.ts +2 -2
- package/out/types/src/codegen/utils/index.d.ts +3 -1
- package/out/types/src/doc/builder.d.ts +2 -2
- package/out/types/src/evaluator/calls/closure-type.d.ts +2 -2
- package/out/types/src/evaluator/calls/record-type.d.ts +11 -0
- package/out/types/src/evaluator/context.d.ts +8 -9
- package/out/types/src/evaluator/index.d.ts +3 -3
- package/out/types/src/evaluator/types/record.d.ts +14 -0
- package/out/types/src/evaluator/types/validation.d.ts +2 -2
- package/out/types/src/evaluator/values/anonymous-module.d.ts +5 -5
- package/out/types/src/evaluator/values/impl.d.ts +1 -1
- package/out/types/src/expr.d.ts +1 -4
- package/out/types/src/formatter.d.ts +11 -0
- package/out/types/src/function-value.d.ts +1 -1
- package/out/types/src/lsp/document-manager.d.ts +1 -1
- package/out/types/src/lsp/formatting.d.ts +2 -0
- package/out/types/src/module-manager.d.ts +3 -3
- package/out/types/src/tests/formatter.test.d.ts +1 -0
- package/out/types/src/types/creators.d.ts +3 -4
- package/out/types/src/types/definitions.d.ts +8 -19
- package/out/types/src/types/guards.d.ts +3 -3
- package/out/types/src/types/tags.d.ts +0 -1
- package/out/types/src/types/utils.d.ts +1 -1
- package/out/types/src/value-tag.d.ts +0 -1
- package/out/types/src/value.d.ts +6 -13
- package/out/types/tsconfig.tsbuildinfo +1 -1
- package/package.json +1 -1
- package/std/alg/hash.yo +13 -21
- package/std/allocator.yo +25 -40
- package/std/async.yo +3 -7
- package/std/build.yo +105 -151
- package/std/cli/arg_parser.yo +184 -169
- package/std/collections/array_list.yo +350 -314
- package/std/collections/btree_map.yo +142 -131
- package/std/collections/deque.yo +132 -128
- package/std/collections/hash_map.yo +542 -566
- package/std/collections/hash_set.yo +623 -687
- package/std/collections/linked_list.yo +275 -293
- package/std/collections/ordered_map.yo +113 -85
- package/std/collections/priority_queue.yo +73 -73
- package/std/crypto/md5.yo +191 -95
- package/std/crypto/random.yo +56 -64
- package/std/crypto/sha256.yo +151 -107
- package/std/encoding/base64.yo +87 -81
- package/std/encoding/hex.yo +43 -50
- package/std/encoding/html.yo +56 -81
- package/std/encoding/html_char_utils.yo +7 -13
- package/std/encoding/html_entities.yo +2248 -2253
- package/std/encoding/json.yo +316 -224
- package/std/encoding/punycode.yo +86 -116
- package/std/encoding/toml.yo +67 -66
- package/std/encoding/utf16.yo +37 -44
- package/std/env.yo +62 -91
- package/std/error.yo +12 -20
- package/std/fmt/display.yo +5 -9
- package/std/fmt/index.yo +8 -14
- package/std/fmt/to_string.yo +330 -315
- package/std/fmt/writer.yo +58 -87
- package/std/fs/dir.yo +83 -102
- package/std/fs/file.yo +147 -180
- package/std/fs/metadata.yo +45 -78
- package/std/fs/temp.yo +55 -65
- package/std/fs/types.yo +27 -40
- package/std/fs/walker.yo +53 -68
- package/std/gc.yo +5 -8
- package/std/glob.yo +30 -43
- package/std/http/client.yo +107 -120
- package/std/http/http.yo +106 -96
- package/std/http/index.yo +4 -6
- package/std/imm/list.yo +88 -93
- package/std/imm/map.yo +528 -464
- package/std/imm/set.yo +52 -57
- package/std/imm/sorted_map.yo +340 -286
- package/std/imm/sorted_set.yo +57 -63
- package/std/imm/string.yo +404 -345
- package/std/imm/vec.yo +173 -181
- package/std/io/reader.yo +3 -6
- package/std/io/writer.yo +4 -8
- package/std/libc/assert.yo +5 -9
- package/std/libc/ctype.yo +32 -22
- package/std/libc/dirent.yo +26 -25
- package/std/libc/errno.yo +164 -90
- package/std/libc/fcntl.yo +52 -45
- package/std/libc/float.yo +66 -44
- package/std/libc/limits.yo +42 -33
- package/std/libc/math.yo +53 -82
- package/std/libc/signal.yo +72 -47
- package/std/libc/stdatomic.yo +217 -188
- package/std/libc/stdint.yo +5 -29
- package/std/libc/stdio.yo +5 -29
- package/std/libc/stdlib.yo +32 -39
- package/std/libc/string.yo +5 -23
- package/std/libc/sys/stat.yo +58 -56
- package/std/libc/time.yo +5 -19
- package/std/libc/unistd.yo +5 -20
- package/std/libc/wctype.yo +6 -9
- package/std/libc/windows.yo +26 -30
- package/std/log.yo +41 -55
- package/std/net/addr.yo +102 -97
- package/std/net/dns.yo +27 -28
- package/std/net/errors.yo +50 -49
- package/std/net/tcp.yo +113 -124
- package/std/net/udp.yo +55 -66
- package/std/os/env.yo +35 -33
- package/std/os/signal.yo +15 -25
- package/std/path.yo +276 -311
- package/std/prelude.yo +6316 -4333
- package/std/process/command.yo +87 -103
- package/std/process/index.yo +12 -31
- package/std/regex/compiler.yo +196 -95
- package/std/regex/flags.yo +58 -39
- package/std/regex/index.yo +157 -173
- package/std/regex/match.yo +20 -31
- package/std/regex/node.yo +134 -152
- package/std/regex/parser.yo +283 -259
- package/std/regex/unicode.yo +172 -202
- package/std/regex/vm.yo +155 -171
- package/std/string/index.yo +5 -7
- package/std/string/rune.yo +45 -55
- package/std/string/string.yo +937 -964
- package/std/string/string_builder.yo +94 -104
- package/std/string/unicode.yo +46 -64
- package/std/sync/channel.yo +72 -73
- package/std/sync/cond.yo +31 -36
- package/std/sync/mutex.yo +30 -32
- package/std/sync/once.yo +13 -16
- package/std/sync/rwlock.yo +26 -31
- package/std/sync/waitgroup.yo +20 -25
- package/std/sys/advise.yo +16 -24
- package/std/sys/bufio/buf_reader.yo +77 -93
- package/std/sys/bufio/buf_writer.yo +52 -65
- package/std/sys/clock.yo +4 -9
- package/std/sys/constants.yo +77 -61
- package/std/sys/copy.yo +4 -10
- package/std/sys/dir.yo +26 -43
- package/std/sys/dns.yo +41 -61
- package/std/sys/errors.yo +95 -103
- package/std/sys/events.yo +45 -57
- package/std/sys/externs.yo +319 -267
- package/std/sys/fallocate.yo +7 -11
- package/std/sys/fcntl.yo +14 -22
- package/std/sys/file.yo +26 -40
- package/std/sys/future.yo +5 -8
- package/std/sys/iov.yo +12 -25
- package/std/sys/lock.yo +12 -13
- package/std/sys/mmap.yo +38 -43
- package/std/sys/path.yo +3 -8
- package/std/sys/perm.yo +7 -21
- package/std/sys/pipe.yo +5 -12
- package/std/sys/process.yo +23 -29
- package/std/sys/seek.yo +10 -12
- package/std/sys/signal.yo +7 -13
- package/std/sys/signals.yo +52 -35
- package/std/sys/socket.yo +63 -58
- package/std/sys/socketpair.yo +3 -6
- package/std/sys/sockinfo.yo +11 -20
- package/std/sys/statfs.yo +11 -34
- package/std/sys/statx.yo +25 -52
- package/std/sys/sysinfo.yo +15 -20
- package/std/sys/tcp.yo +62 -92
- package/std/sys/temp.yo +5 -9
- package/std/sys/time.yo +5 -15
- package/std/sys/timer.yo +6 -11
- package/std/sys/tty.yo +10 -18
- package/std/sys/udp.yo +22 -39
- package/std/sys/umask.yo +3 -6
- package/std/sys/unix.yo +33 -52
- package/std/testing/bench.yo +49 -52
- package/std/thread.yo +10 -15
- package/std/time/datetime.yo +105 -89
- package/std/time/duration.yo +43 -56
- package/std/time/instant.yo +13 -18
- package/std/time/sleep.yo +5 -9
- package/std/url/index.yo +184 -209
- package/std/worker.yo +6 -10
- package/out/types/src/evaluator/calls/module-type.d.ts +0 -11
- package/out/types/src/evaluator/types/module.d.ts +0 -19
|
@@ -1,173 +1,163 @@
|
|
|
1
1
|
//! Mutable UTF-8 string builder for efficient incremental construction.
|
|
2
|
-
|
|
3
|
-
{
|
|
4
|
-
{
|
|
5
|
-
{ String } :: import "./string.yo";
|
|
6
|
-
|
|
2
|
+
{ ArrayList } :: import("../collections/array_list.yo");
|
|
3
|
+
{ rune } :: import("./rune.yo");
|
|
4
|
+
{ String } :: import("./string.yo");
|
|
7
5
|
/**
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
6
|
+
* Mutable buffer for building a `String` incrementally.
|
|
7
|
+
*
|
|
8
|
+
* Use `StringBuilder` when you need to construct a string from many parts,
|
|
9
|
+
* appending bytes or strings in a loop, before converting to an immutable
|
|
10
|
+
* `String` with `to_string()`.
|
|
11
|
+
*
|
|
12
|
+
* ## Example
|
|
13
|
+
* ```rust
|
|
14
|
+
* sb := StringBuilder.new();
|
|
15
|
+
* sb.write_str("Hello");
|
|
16
|
+
* sb.write_str(", ");
|
|
17
|
+
* sb.write_string(`world`);
|
|
18
|
+
* sb.write_byte(u8(33)); // '!'
|
|
19
|
+
* result := sb.to_string();
|
|
20
|
+
* assert(result == `Hello, world!`, "built string");
|
|
21
|
+
* ```
|
|
22
|
+
*/
|
|
25
23
|
StringBuilder :: object(
|
|
26
24
|
_buf : ArrayList(u8)
|
|
27
25
|
);
|
|
28
|
-
|
|
29
|
-
|
|
26
|
+
impl(
|
|
27
|
+
StringBuilder,
|
|
30
28
|
/**
|
|
31
|
-
|
|
32
|
-
|
|
29
|
+
* Create a new, empty `StringBuilder`.
|
|
30
|
+
*/
|
|
33
31
|
new : (fn() -> Self)(
|
|
34
|
-
Self(_buf: ArrayList(u8).new())
|
|
32
|
+
Self(_buf : ArrayList(u8).new())
|
|
35
33
|
),
|
|
36
|
-
|
|
37
34
|
/**
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
with_capacity : (fn(capacity: usize) -> Self)(
|
|
41
|
-
Self(_buf: ArrayList(u8).with_capacity(capacity))
|
|
35
|
+
* Create a `StringBuilder` pre-allocated for `capacity` bytes.
|
|
36
|
+
*/
|
|
37
|
+
with_capacity : (fn(capacity : usize) -> Self)(
|
|
38
|
+
Self(_buf : ArrayList(u8).with_capacity(capacity))
|
|
42
39
|
),
|
|
43
|
-
|
|
44
40
|
/**
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
len : (fn(self: *(Self)) -> usize)(
|
|
41
|
+
* Returns the current number of bytes in the buffer.
|
|
42
|
+
*/
|
|
43
|
+
len : (fn(self : *(Self)) -> usize)(
|
|
48
44
|
self._buf.len()
|
|
49
45
|
),
|
|
50
|
-
|
|
51
46
|
/**
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
is_empty : (fn(self: *(Self)) -> bool)(
|
|
55
|
-
|
|
47
|
+
* Returns true if the buffer is empty.
|
|
48
|
+
*/
|
|
49
|
+
is_empty : (fn(self : *(Self)) -> bool)(
|
|
50
|
+
self._buf.len() == usize(0)
|
|
56
51
|
),
|
|
57
|
-
|
|
58
52
|
/**
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
write_str : (fn(self: *(Self), s: str) -> unit)({
|
|
53
|
+
* Append a `str` (raw byte slice) to the buffer.
|
|
54
|
+
*/
|
|
55
|
+
write_str : (fn(self : *(Self), s : str) -> unit)({
|
|
62
56
|
byte_len := s.len();
|
|
63
|
-
if(
|
|
57
|
+
if(byte_len > usize(0), {
|
|
64
58
|
self._buf.extend_from_ptr(s.ptr(), byte_len);
|
|
65
59
|
});
|
|
66
60
|
}),
|
|
67
|
-
|
|
68
61
|
/**
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
write_string : (fn(self: *(Self), s: String) -> unit)(
|
|
72
|
-
match(
|
|
62
|
+
* Append a `String` to the buffer.
|
|
63
|
+
*/
|
|
64
|
+
write_string : (fn(self : *(Self), s : String) -> unit)(
|
|
65
|
+
match(
|
|
66
|
+
s._bytes,
|
|
73
67
|
.None => (),
|
|
74
68
|
.Some(al) => {
|
|
75
69
|
i := usize(0);
|
|
76
70
|
n := al.len();
|
|
77
|
-
while
|
|
78
|
-
(i = (i + usize(1))),
|
|
79
|
-
{
|
|
71
|
+
while(i < n, i = (i + usize(1)), {
|
|
80
72
|
byte_opt := al.get(i);
|
|
81
|
-
match(
|
|
82
|
-
|
|
73
|
+
match(
|
|
74
|
+
byte_opt,
|
|
75
|
+
.Some(b) => {
|
|
76
|
+
self._buf.push(b);
|
|
77
|
+
},
|
|
83
78
|
.None => ()
|
|
84
79
|
);
|
|
85
|
-
};
|
|
80
|
+
});
|
|
86
81
|
}
|
|
87
82
|
)
|
|
88
83
|
),
|
|
89
|
-
|
|
90
84
|
/**
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
write_byte : (fn(self: *(Self), b: u8) -> unit)({
|
|
85
|
+
* Append a single byte to the buffer.
|
|
86
|
+
*/
|
|
87
|
+
write_byte : (fn(self : *(Self), b : u8) -> unit)({
|
|
94
88
|
self._buf.push(b);
|
|
95
89
|
}),
|
|
96
|
-
|
|
97
90
|
/**
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
write_rune : (fn(self: *(Self), r: rune) -> unit)({
|
|
91
|
+
* Append a single Unicode code point, encoded as UTF-8.
|
|
92
|
+
*
|
|
93
|
+
* ## Example
|
|
94
|
+
* ```rust
|
|
95
|
+
* sb := StringBuilder.new();
|
|
96
|
+
* sb.write_rune(rune(0x1F600)); // 😀
|
|
97
|
+
* sb.write_rune(rune(0x41)); // 'A'
|
|
98
|
+
* ```
|
|
99
|
+
*/
|
|
100
|
+
write_rune : (fn(self : *(Self), r : rune) -> unit)({
|
|
108
101
|
cp := r.char;
|
|
109
102
|
cond(
|
|
110
103
|
(cp < u32(0x80)) => {
|
|
111
104
|
self._buf.push(u8(cp));
|
|
112
105
|
},
|
|
113
106
|
(cp < u32(0x800)) => {
|
|
114
|
-
self._buf.push(u8(
|
|
115
|
-
self._buf.push(u8(
|
|
107
|
+
self._buf.push(u8(u32(0xC0) | (cp >> u32(6))));
|
|
108
|
+
self._buf.push(u8(u32(0x80) | (cp & u32(0x3F))));
|
|
116
109
|
},
|
|
117
110
|
(cp < u32(0x10000)) => {
|
|
118
|
-
self._buf.push(u8(
|
|
119
|
-
self._buf.push(u8(
|
|
120
|
-
self._buf.push(u8(
|
|
111
|
+
self._buf.push(u8(u32(0xE0) | (cp >> u32(12))));
|
|
112
|
+
self._buf.push(u8(u32(0x80) | ((cp >> u32(6)) & u32(0x3F))));
|
|
113
|
+
self._buf.push(u8(u32(0x80) | (cp & u32(0x3F))));
|
|
121
114
|
},
|
|
122
115
|
true => {
|
|
123
|
-
self._buf.push(u8(
|
|
124
|
-
self._buf.push(u8(
|
|
125
|
-
self._buf.push(u8(
|
|
126
|
-
self._buf.push(u8(
|
|
116
|
+
self._buf.push(u8(u32(0xF0) | (cp >> u32(18))));
|
|
117
|
+
self._buf.push(u8(u32(0x80) | ((cp >> u32(12)) & u32(0x3F))));
|
|
118
|
+
self._buf.push(u8(u32(0x80) | ((cp >> u32(6)) & u32(0x3F))));
|
|
119
|
+
self._buf.push(u8(u32(0x80) | (cp & u32(0x3F))));
|
|
127
120
|
}
|
|
128
121
|
);
|
|
129
122
|
}),
|
|
130
|
-
|
|
131
123
|
/**
|
|
132
|
-
|
|
133
|
-
|
|
134
|
-
write_line : (fn(self: *(Self), s: String) -> unit)({
|
|
124
|
+
* Append a `String` followed by a newline byte (`\n`).
|
|
125
|
+
*/
|
|
126
|
+
write_line : (fn(self : *(Self), s : String) -> unit)({
|
|
135
127
|
self.write_string(s);
|
|
136
128
|
self._buf.push(u8(10));
|
|
137
129
|
}),
|
|
138
|
-
|
|
139
130
|
/**
|
|
140
|
-
|
|
141
|
-
|
|
142
|
-
|
|
143
|
-
to_string : (fn(self: *(Self)) -> String)({
|
|
131
|
+
* Consume the builder and return the accumulated `String`.
|
|
132
|
+
* The builder is left empty after this call.
|
|
133
|
+
*/
|
|
134
|
+
to_string : (fn(self : *(Self)) -> String)({
|
|
144
135
|
n := self._buf.len();
|
|
145
136
|
cond(
|
|
146
|
-
(n == usize(0)) => String(_bytes
|
|
137
|
+
(n == usize(0)) => String(_bytes :.None),
|
|
147
138
|
true => {
|
|
148
139
|
buf := ArrayList(u8).with_capacity(n);
|
|
149
140
|
i := usize(0);
|
|
150
|
-
while
|
|
151
|
-
(i = (i + usize(1))),
|
|
152
|
-
{
|
|
141
|
+
while(i < n, i = (i + usize(1)), {
|
|
153
142
|
byte_opt := self._buf.get(i);
|
|
154
|
-
match(
|
|
155
|
-
|
|
143
|
+
match(
|
|
144
|
+
byte_opt,
|
|
145
|
+
.Some(b) => {
|
|
146
|
+
buf.push(b);
|
|
147
|
+
},
|
|
156
148
|
.None => ()
|
|
157
149
|
);
|
|
158
|
-
};
|
|
150
|
+
});
|
|
159
151
|
self._buf = ArrayList(u8).new();
|
|
160
|
-
String(_bytes
|
|
152
|
+
String(_bytes :.Some(buf))
|
|
161
153
|
}
|
|
162
154
|
)
|
|
163
155
|
}),
|
|
164
|
-
|
|
165
156
|
/**
|
|
166
|
-
|
|
167
|
-
|
|
168
|
-
clear : (fn(self: *(Self)) -> unit)({
|
|
157
|
+
* Clear the buffer without freeing memory.
|
|
158
|
+
*/
|
|
159
|
+
clear : (fn(self : *(Self)) -> unit)({
|
|
169
160
|
self._buf = ArrayList(u8).new();
|
|
170
161
|
})
|
|
171
162
|
);
|
|
172
|
-
|
|
173
|
-
export StringBuilder;
|
|
163
|
+
export(StringBuilder);
|
package/std/string/unicode.yo
CHANGED
|
@@ -8,84 +8,78 @@
|
|
|
8
8
|
//! lower := unicode_to_lowercase(`HELLO WÖRLD`); // "hello wörld"
|
|
9
9
|
//! upper := unicode_to_uppercase(`hello wörld`); // "HELLO WÖRLD"
|
|
10
10
|
//! ```
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
{ ArrayList } :: import "../collections/array_list";
|
|
14
|
-
|
|
11
|
+
open(import("../string"));
|
|
12
|
+
{ ArrayList } :: import("../collections/array_list");
|
|
15
13
|
// Declare towlower/towupper directly with i32 to avoid wint_t cast issues.
|
|
16
14
|
// On all major platforms, wint_t is compatible with int/i32.
|
|
17
|
-
c_include
|
|
15
|
+
c_include(
|
|
16
|
+
"<wctype.h>",
|
|
18
17
|
towlower :
|
|
19
18
|
fn(wc : i32) -> i32,
|
|
20
19
|
towupper :
|
|
21
20
|
fn(wc : i32) -> i32
|
|
22
|
-
;
|
|
23
|
-
|
|
21
|
+
);
|
|
24
22
|
// Result of decoding a single UTF-8 codepoint
|
|
25
23
|
_DecodeResult :: struct(
|
|
26
24
|
codepoint : i32,
|
|
27
25
|
bytes_consumed : usize
|
|
28
26
|
);
|
|
29
|
-
|
|
30
27
|
// Decode a single UTF-8 codepoint from bytes at position i.
|
|
31
|
-
_decode_utf8 :: (fn(bytes: ArrayList(u8), i: usize) -> _DecodeResult)({
|
|
28
|
+
_decode_utf8 :: (fn(bytes : ArrayList(u8), i : usize) -> _DecodeResult)({
|
|
32
29
|
(b0 : i32) = i32(bytes.get(i).unwrap());
|
|
33
30
|
cond(
|
|
34
31
|
// 1-byte ASCII
|
|
35
|
-
((b0 & i32(0x80)) == i32(0)) =>
|
|
36
|
-
_DecodeResult(codepoint: b0, bytes_consumed: usize(1))
|
|
37
|
-
,
|
|
32
|
+
((b0 & i32(0x80)) == i32(0)) =>
|
|
33
|
+
_DecodeResult(codepoint : b0, bytes_consumed : usize(1)),
|
|
38
34
|
// 2-byte
|
|
39
35
|
((b0 & i32(0xE0)) == i32(0xC0)) => {
|
|
40
|
-
(b1 : i32) = i32(bytes.get(
|
|
36
|
+
(b1 : i32) = i32(bytes.get(i + usize(1)).unwrap());
|
|
41
37
|
(cp : i32) = (((b0 & i32(0x1F)) << i32(6)) | (b1 & i32(0x3F)));
|
|
42
|
-
_DecodeResult(codepoint: cp, bytes_consumed: usize(2))
|
|
38
|
+
_DecodeResult(codepoint : cp, bytes_consumed : usize(2))
|
|
43
39
|
},
|
|
44
40
|
// 3-byte
|
|
45
41
|
((b0 & i32(0xF0)) == i32(0xE0)) => {
|
|
46
|
-
(b1 : i32) = i32(bytes.get(
|
|
47
|
-
(b2 : i32) = i32(bytes.get(
|
|
42
|
+
(b1 : i32) = i32(bytes.get(i + usize(1)).unwrap());
|
|
43
|
+
(b2 : i32) = i32(bytes.get(i + usize(2)).unwrap());
|
|
48
44
|
(cp : i32) = ((((b0 & i32(0x0F)) << i32(12)) | ((b1 & i32(0x3F)) << i32(6))) | (b2 & i32(0x3F)));
|
|
49
|
-
_DecodeResult(codepoint: cp, bytes_consumed: usize(3))
|
|
45
|
+
_DecodeResult(codepoint : cp, bytes_consumed : usize(3))
|
|
50
46
|
},
|
|
51
47
|
// 4-byte
|
|
52
48
|
true => {
|
|
53
|
-
(b1 : i32) = i32(bytes.get(
|
|
54
|
-
(b2 : i32) = i32(bytes.get(
|
|
55
|
-
(b3 : i32) = i32(bytes.get(
|
|
49
|
+
(b1 : i32) = i32(bytes.get(i + usize(1)).unwrap());
|
|
50
|
+
(b2 : i32) = i32(bytes.get(i + usize(2)).unwrap());
|
|
51
|
+
(b3 : i32) = i32(bytes.get(i + usize(3)).unwrap());
|
|
56
52
|
(cp : i32) = (((((b0 & i32(0x07)) << i32(18)) | ((b1 & i32(0x3F)) << i32(12))) | ((b2 & i32(0x3F)) << i32(6))) | (b3 & i32(0x3F)));
|
|
57
|
-
_DecodeResult(codepoint: cp, bytes_consumed: usize(4))
|
|
53
|
+
_DecodeResult(codepoint : cp, bytes_consumed : usize(4))
|
|
58
54
|
}
|
|
59
55
|
)
|
|
60
56
|
});
|
|
61
|
-
|
|
62
57
|
// Encode a Unicode codepoint as UTF-8 bytes into an ArrayList.
|
|
63
|
-
_encode_utf8 :: (fn(cp: i32, out: *(ArrayList(u8))) -> unit)({
|
|
58
|
+
_encode_utf8 :: (fn(cp : i32, out : *(ArrayList(u8))) -> unit)({
|
|
64
59
|
cond(
|
|
65
60
|
(cp < i32(0x80)) => {
|
|
66
61
|
out.*.push(u8(cp));
|
|
67
62
|
},
|
|
68
63
|
(cp < i32(0x800)) => {
|
|
69
|
-
out.*.push(u8(
|
|
70
|
-
out.*.push(u8(
|
|
64
|
+
out.*.push(u8(i32(0xC0) | (cp >> i32(6))));
|
|
65
|
+
out.*.push(u8(i32(0x80) | (cp & i32(0x3F))));
|
|
71
66
|
},
|
|
72
67
|
(cp < i32(0x10000)) => {
|
|
73
|
-
out.*.push(u8(
|
|
74
|
-
out.*.push(u8(
|
|
75
|
-
out.*.push(u8(
|
|
68
|
+
out.*.push(u8(i32(0xE0) | (cp >> i32(12))));
|
|
69
|
+
out.*.push(u8(i32(0x80) | ((cp >> i32(6)) & i32(0x3F))));
|
|
70
|
+
out.*.push(u8(i32(0x80) | (cp & i32(0x3F))));
|
|
76
71
|
},
|
|
77
72
|
true => {
|
|
78
|
-
out.*.push(u8(
|
|
79
|
-
out.*.push(u8(
|
|
80
|
-
out.*.push(u8(
|
|
81
|
-
out.*.push(u8(
|
|
73
|
+
out.*.push(u8(i32(0xF0) | (cp >> i32(18))));
|
|
74
|
+
out.*.push(u8(i32(0x80) | ((cp >> i32(12)) & i32(0x3F))));
|
|
75
|
+
out.*.push(u8(i32(0x80) | ((cp >> i32(6)) & i32(0x3F))));
|
|
76
|
+
out.*.push(u8(i32(0x80) | (cp & i32(0x3F))));
|
|
82
77
|
}
|
|
83
78
|
);
|
|
84
79
|
});
|
|
85
|
-
|
|
86
80
|
// Special case folding: codepoints that expand to multiple codepoints
|
|
87
81
|
// when lowercased. These are Unicode case folding entries of type 'F' (full).
|
|
88
|
-
_special_to_lower :: (fn(cp: i32, out: *(ArrayList(u8))) -> bool)(
|
|
82
|
+
_special_to_lower :: (fn(cp : i32, out : *(ArrayList(u8))) -> bool)(
|
|
89
83
|
cond(
|
|
90
84
|
// ẞ (U+1E9E LATIN CAPITAL LETTER SHARP S) → ss
|
|
91
85
|
(cp == i32(0x1E9E)) => {
|
|
@@ -103,10 +97,9 @@ _special_to_lower :: (fn(cp: i32, out: *(ArrayList(u8))) -> bool)(
|
|
|
103
97
|
true => false
|
|
104
98
|
)
|
|
105
99
|
);
|
|
106
|
-
|
|
107
100
|
// Special case folding: codepoints that expand to multiple codepoints
|
|
108
101
|
// when uppercased.
|
|
109
|
-
_special_to_upper :: (fn(cp: i32, out: *(ArrayList(u8))) -> bool)(
|
|
102
|
+
_special_to_upper :: (fn(cp : i32, out : *(ArrayList(u8))) -> bool)(
|
|
110
103
|
cond(
|
|
111
104
|
// ß (U+00DF LATIN SMALL LETTER SHARP S) → SS
|
|
112
105
|
(cp == i32(0x00DF)) => {
|
|
@@ -161,21 +154,19 @@ _special_to_upper :: (fn(cp: i32, out: *(ArrayList(u8))) -> bool)(
|
|
|
161
154
|
true => false
|
|
162
155
|
)
|
|
163
156
|
);
|
|
164
|
-
|
|
165
157
|
// Convert a String to lowercase using Unicode-aware case mapping.
|
|
166
158
|
/// Convert a `String` to lowercase using Unicode case mapping rules.
|
|
167
159
|
/// Handles both ASCII and non-ASCII codepoints, plus special multi-char expansions (e.g., ẞ → ss).
|
|
168
|
-
unicode_to_lowercase :: (fn(input: String) -> String)({
|
|
160
|
+
unicode_to_lowercase :: (fn(input : String) -> String)({
|
|
169
161
|
(bytes : ArrayList(u8)) = input.as_bytes();
|
|
170
162
|
(out : ArrayList(u8)) = ArrayList(u8).with_capacity(bytes.len());
|
|
171
163
|
(i : usize) = usize(0);
|
|
172
|
-
|
|
173
|
-
while (i < bytes.len()), {
|
|
164
|
+
while(i < bytes.len(), {
|
|
174
165
|
(b0 : i32) = i32(bytes.get(i).unwrap());
|
|
175
166
|
// Fast path for ASCII
|
|
176
|
-
if((
|
|
177
|
-
if((
|
|
178
|
-
out.push(u8(
|
|
167
|
+
if((b0 & i32(0x80)) == i32(0), {
|
|
168
|
+
if((b0 >= i32(0x41)) && (b0 <= i32(0x5A)), {
|
|
169
|
+
out.push(u8(b0 + i32(0x20)));
|
|
179
170
|
}, {
|
|
180
171
|
out.push(u8(b0));
|
|
181
172
|
});
|
|
@@ -185,35 +176,30 @@ unicode_to_lowercase :: (fn(input: String) -> String)({
|
|
|
185
176
|
(result : _DecodeResult) = _decode_utf8(bytes, i);
|
|
186
177
|
(cp : i32) = result.codepoint;
|
|
187
178
|
(len : usize) = result.bytes_consumed;
|
|
188
|
-
|
|
189
179
|
// Try special case folding first
|
|
190
|
-
if(!(_special_to_lower(cp, (
|
|
180
|
+
if(!(_special_to_lower(cp, &(out))), {
|
|
191
181
|
// Use C towlower for standard Unicode lowercase
|
|
192
182
|
(lower : i32) = i32(towlower(cp));
|
|
193
|
-
_encode_utf8(lower, (
|
|
183
|
+
_encode_utf8(lower, &(out));
|
|
194
184
|
});
|
|
195
|
-
|
|
196
185
|
i = (i + len);
|
|
197
186
|
});
|
|
198
|
-
};
|
|
199
|
-
|
|
187
|
+
});
|
|
200
188
|
String.from_bytes(out)
|
|
201
189
|
});
|
|
202
|
-
|
|
203
190
|
// Convert a String to uppercase using Unicode-aware case mapping.
|
|
204
191
|
/// Convert a `String` to uppercase using Unicode case mapping rules.
|
|
205
192
|
/// Handles both ASCII and non-ASCII codepoints, plus special multi-char expansions (e.g., ß → SS).
|
|
206
|
-
unicode_to_uppercase :: (fn(input: String) -> String)({
|
|
193
|
+
unicode_to_uppercase :: (fn(input : String) -> String)({
|
|
207
194
|
(bytes : ArrayList(u8)) = input.as_bytes();
|
|
208
195
|
(out : ArrayList(u8)) = ArrayList(u8).with_capacity(bytes.len());
|
|
209
196
|
(i : usize) = usize(0);
|
|
210
|
-
|
|
211
|
-
while (i < bytes.len()), {
|
|
197
|
+
while(i < bytes.len(), {
|
|
212
198
|
(b0 : i32) = i32(bytes.get(i).unwrap());
|
|
213
199
|
// Fast path for ASCII
|
|
214
|
-
if((
|
|
215
|
-
if((
|
|
216
|
-
out.push(u8(
|
|
200
|
+
if((b0 & i32(0x80)) == i32(0), {
|
|
201
|
+
if((b0 >= i32(0x61)) && (b0 <= i32(0x7A)), {
|
|
202
|
+
out.push(u8(b0 - i32(0x20)));
|
|
217
203
|
}, {
|
|
218
204
|
out.push(u8(b0));
|
|
219
205
|
});
|
|
@@ -223,19 +209,15 @@ unicode_to_uppercase :: (fn(input: String) -> String)({
|
|
|
223
209
|
(result : _DecodeResult) = _decode_utf8(bytes, i);
|
|
224
210
|
(cp : i32) = result.codepoint;
|
|
225
211
|
(len : usize) = result.bytes_consumed;
|
|
226
|
-
|
|
227
212
|
// Try special case folding first
|
|
228
|
-
if(!(_special_to_upper(cp, (
|
|
213
|
+
if(!(_special_to_upper(cp, &(out))), {
|
|
229
214
|
// Use C towupper for standard Unicode uppercase
|
|
230
215
|
(upper : i32) = i32(towupper(cp));
|
|
231
|
-
_encode_utf8(upper, (
|
|
216
|
+
_encode_utf8(upper, &(out));
|
|
232
217
|
});
|
|
233
|
-
|
|
234
218
|
i = (i + len);
|
|
235
219
|
});
|
|
236
|
-
};
|
|
237
|
-
|
|
220
|
+
});
|
|
238
221
|
String.from_bytes(out)
|
|
239
222
|
});
|
|
240
|
-
|
|
241
|
-
export unicode_to_lowercase, unicode_to_uppercase;
|
|
223
|
+
export(unicode_to_lowercase, unicode_to_uppercase);
|