@shd101wyy/yo 0.1.26 → 0.1.27
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.github/skills/yo-async-effects/SKILL.md +4 -4
- package/.github/skills/yo-async-effects/async-effects-recipes.md +34 -34
- package/.github/skills/yo-core-patterns/SKILL.md +1 -1
- package/.github/skills/yo-core-patterns/core-patterns-cheatsheet.md +26 -26
- package/.github/skills/yo-project-workflow/SKILL.md +6 -3
- package/.github/skills/yo-project-workflow/workflow-cheatsheet.md +34 -11
- package/.github/skills/yo-syntax/SKILL.md +7 -6
- package/.github/skills/yo-syntax/syntax-cheatsheet.md +73 -60
- package/.github/skills/yo-wasm-integration/wasm-integration-cheatsheet.md +3 -3
- package/README.md +10 -8
- package/out/cjs/index.cjs +456 -438
- package/out/cjs/yo-cli.cjs +576 -543
- package/out/cjs/yo-lsp.cjs +559 -532
- package/out/esm/index.mjs +281 -263
- package/out/types/src/formatter.d.ts +11 -0
- package/out/types/src/lsp/formatting.d.ts +2 -0
- package/out/types/src/tests/formatter.test.d.ts +1 -0
- package/out/types/tsconfig.tsbuildinfo +1 -1
- package/package.json +1 -1
- package/std/alg/hash.yo +13 -21
- package/std/allocator.yo +25 -40
- package/std/async.yo +3 -7
- package/std/build.yo +105 -151
- package/std/cli/arg_parser.yo +184 -169
- package/std/collections/array_list.yo +350 -314
- package/std/collections/btree_map.yo +142 -131
- package/std/collections/deque.yo +132 -128
- package/std/collections/hash_map.yo +542 -566
- package/std/collections/hash_set.yo +623 -687
- package/std/collections/linked_list.yo +275 -293
- package/std/collections/ordered_map.yo +113 -85
- package/std/collections/priority_queue.yo +73 -73
- package/std/crypto/md5.yo +191 -95
- package/std/crypto/random.yo +56 -64
- package/std/crypto/sha256.yo +151 -107
- package/std/encoding/base64.yo +87 -81
- package/std/encoding/hex.yo +43 -50
- package/std/encoding/html.yo +56 -81
- package/std/encoding/html_char_utils.yo +7 -13
- package/std/encoding/html_entities.yo +2248 -2253
- package/std/encoding/json.yo +316 -224
- package/std/encoding/punycode.yo +86 -116
- package/std/encoding/toml.yo +67 -66
- package/std/encoding/utf16.yo +37 -44
- package/std/env.yo +62 -91
- package/std/error.yo +7 -15
- package/std/fmt/display.yo +5 -9
- package/std/fmt/index.yo +8 -14
- package/std/fmt/to_string.yo +330 -315
- package/std/fmt/writer.yo +58 -87
- package/std/fs/dir.yo +83 -102
- package/std/fs/file.yo +147 -180
- package/std/fs/metadata.yo +45 -78
- package/std/fs/temp.yo +55 -65
- package/std/fs/types.yo +27 -40
- package/std/fs/walker.yo +53 -68
- package/std/gc.yo +5 -8
- package/std/glob.yo +30 -43
- package/std/http/client.yo +107 -120
- package/std/http/http.yo +106 -96
- package/std/http/index.yo +4 -6
- package/std/imm/list.yo +88 -93
- package/std/imm/map.yo +528 -464
- package/std/imm/set.yo +52 -57
- package/std/imm/sorted_map.yo +340 -286
- package/std/imm/sorted_set.yo +57 -63
- package/std/imm/string.yo +404 -345
- package/std/imm/vec.yo +173 -181
- package/std/io/reader.yo +3 -6
- package/std/io/writer.yo +4 -8
- package/std/libc/assert.yo +5 -9
- package/std/libc/ctype.yo +32 -22
- package/std/libc/dirent.yo +26 -25
- package/std/libc/errno.yo +164 -90
- package/std/libc/fcntl.yo +52 -45
- package/std/libc/float.yo +66 -44
- package/std/libc/limits.yo +42 -33
- package/std/libc/math.yo +53 -82
- package/std/libc/signal.yo +72 -47
- package/std/libc/stdatomic.yo +217 -188
- package/std/libc/stdint.yo +5 -29
- package/std/libc/stdio.yo +5 -29
- package/std/libc/stdlib.yo +32 -39
- package/std/libc/string.yo +5 -23
- package/std/libc/sys/stat.yo +58 -56
- package/std/libc/time.yo +5 -19
- package/std/libc/unistd.yo +5 -20
- package/std/libc/wctype.yo +6 -9
- package/std/libc/windows.yo +26 -30
- package/std/log.yo +41 -55
- package/std/net/addr.yo +102 -97
- package/std/net/dns.yo +27 -28
- package/std/net/errors.yo +50 -49
- package/std/net/tcp.yo +113 -124
- package/std/net/udp.yo +55 -66
- package/std/os/env.yo +35 -33
- package/std/os/signal.yo +15 -25
- package/std/path.yo +276 -311
- package/std/prelude.yo +6304 -4315
- package/std/process/command.yo +87 -103
- package/std/process/index.yo +12 -31
- package/std/regex/compiler.yo +196 -95
- package/std/regex/flags.yo +58 -39
- package/std/regex/index.yo +157 -173
- package/std/regex/match.yo +20 -31
- package/std/regex/node.yo +134 -152
- package/std/regex/parser.yo +283 -259
- package/std/regex/unicode.yo +172 -202
- package/std/regex/vm.yo +155 -171
- package/std/string/index.yo +5 -7
- package/std/string/rune.yo +45 -55
- package/std/string/string.yo +937 -964
- package/std/string/string_builder.yo +94 -104
- package/std/string/unicode.yo +46 -64
- package/std/sync/channel.yo +72 -73
- package/std/sync/cond.yo +31 -36
- package/std/sync/mutex.yo +30 -32
- package/std/sync/once.yo +13 -16
- package/std/sync/rwlock.yo +26 -31
- package/std/sync/waitgroup.yo +20 -25
- package/std/sys/advise.yo +16 -24
- package/std/sys/bufio/buf_reader.yo +77 -93
- package/std/sys/bufio/buf_writer.yo +52 -65
- package/std/sys/clock.yo +4 -9
- package/std/sys/constants.yo +77 -61
- package/std/sys/copy.yo +4 -10
- package/std/sys/dir.yo +26 -43
- package/std/sys/dns.yo +41 -61
- package/std/sys/errors.yo +95 -103
- package/std/sys/events.yo +45 -57
- package/std/sys/externs.yo +319 -267
- package/std/sys/fallocate.yo +7 -11
- package/std/sys/fcntl.yo +14 -22
- package/std/sys/file.yo +26 -40
- package/std/sys/future.yo +5 -8
- package/std/sys/iov.yo +12 -25
- package/std/sys/lock.yo +12 -13
- package/std/sys/mmap.yo +38 -43
- package/std/sys/path.yo +3 -8
- package/std/sys/perm.yo +7 -21
- package/std/sys/pipe.yo +5 -12
- package/std/sys/process.yo +23 -29
- package/std/sys/seek.yo +10 -12
- package/std/sys/signal.yo +7 -13
- package/std/sys/signals.yo +52 -35
- package/std/sys/socket.yo +63 -58
- package/std/sys/socketpair.yo +3 -6
- package/std/sys/sockinfo.yo +11 -20
- package/std/sys/statfs.yo +11 -34
- package/std/sys/statx.yo +25 -52
- package/std/sys/sysinfo.yo +15 -20
- package/std/sys/tcp.yo +62 -92
- package/std/sys/temp.yo +5 -9
- package/std/sys/time.yo +5 -15
- package/std/sys/timer.yo +6 -11
- package/std/sys/tty.yo +10 -18
- package/std/sys/udp.yo +22 -39
- package/std/sys/umask.yo +3 -6
- package/std/sys/unix.yo +33 -52
- package/std/testing/bench.yo +49 -52
- package/std/thread.yo +10 -15
- package/std/time/datetime.yo +105 -89
- package/std/time/duration.yo +43 -56
- package/std/time/instant.yo +13 -18
- package/std/time/sleep.yo +5 -9
- package/std/url/index.yo +184 -209
- package/std/worker.yo +6 -10
package/std/encoding/hex.yo
CHANGED
|
@@ -10,93 +10,86 @@
|
|
|
10
10
|
//! s := hex_encode(data); // "deadbeef"
|
|
11
11
|
//! b := hex_decode("deadbeef");
|
|
12
12
|
//! ```
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
{
|
|
16
|
-
{
|
|
17
|
-
{ ToString } :: import "../fmt";
|
|
18
|
-
|
|
13
|
+
open(import("../string"));
|
|
14
|
+
{ ArrayList } :: import("../collections/array_list");
|
|
15
|
+
{ Error, AnyError, Exception } :: import("../error");
|
|
16
|
+
{ ToString } :: import("../fmt");
|
|
19
17
|
// ============================================================================
|
|
20
18
|
// Error type
|
|
21
19
|
// ============================================================================
|
|
22
|
-
|
|
23
20
|
/// Encoding/decoding error type.
|
|
24
21
|
EncodingError :: enum(
|
|
25
22
|
/// Invalid character encountered during decoding.
|
|
26
|
-
InvalidChar(ch: u8),
|
|
23
|
+
InvalidChar(ch : u8),
|
|
27
24
|
/// Input string has odd length (hex requires even length).
|
|
28
25
|
OddLength
|
|
29
26
|
);
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
27
|
+
impl(
|
|
28
|
+
EncodingError,
|
|
29
|
+
ToString(
|
|
30
|
+
to_string : (
|
|
31
|
+
self ->
|
|
32
|
+
match(
|
|
33
|
+
self,
|
|
34
|
+
.InvalidChar(ch) => `encoding error: invalid character ${ch}`,
|
|
35
|
+
.OddLength => `encoding error: odd length`
|
|
36
|
+
)
|
|
36
37
|
)
|
|
37
38
|
)
|
|
38
|
-
)
|
|
39
|
-
|
|
39
|
+
);
|
|
40
40
|
impl(EncodingError, Error());
|
|
41
|
-
|
|
42
|
-
export EncodingError;
|
|
43
|
-
|
|
41
|
+
export(EncodingError);
|
|
44
42
|
// ============================================================================
|
|
45
43
|
// Encoding
|
|
46
44
|
// ============================================================================
|
|
47
|
-
|
|
48
45
|
_HEX_CHARS :: "0123456789abcdef";
|
|
49
|
-
|
|
50
46
|
// Encode bytes as lowercase hexadecimal.
|
|
51
|
-
_hex_encode_impl :: (fn(data: ArrayList(u8), hex_chars: str) -> String)({
|
|
52
|
-
out := ArrayList(u8).with_capacity(
|
|
47
|
+
_hex_encode_impl :: (fn(data : ArrayList(u8), hex_chars : str) -> String)({
|
|
48
|
+
out := ArrayList(u8).with_capacity(data.len() * usize(2));
|
|
53
49
|
i := usize(0);
|
|
54
|
-
while
|
|
50
|
+
while(i < data.len(), i = (i + usize(1)), {
|
|
55
51
|
b := data.get(i).unwrap();
|
|
56
|
-
hi := usize((
|
|
57
|
-
lo := usize(
|
|
52
|
+
hi := usize((b >> u8(4)) & u8(0xF));
|
|
53
|
+
lo := usize(b & u8(0xF));
|
|
58
54
|
out.push(hex_chars.bytes(hi));
|
|
59
55
|
out.push(hex_chars.bytes(lo));
|
|
60
|
-
};
|
|
56
|
+
});
|
|
61
57
|
String.from_bytes(out)
|
|
62
58
|
});
|
|
63
|
-
|
|
64
59
|
/// Encode bytes as a lowercase hexadecimal string.
|
|
65
|
-
hex_encode :: (fn(data: ArrayList(u8)) -> String)(
|
|
60
|
+
hex_encode :: (fn(data : ArrayList(u8)) -> String)(
|
|
66
61
|
_hex_encode_impl(data, _HEX_CHARS)
|
|
67
62
|
);
|
|
68
|
-
|
|
69
|
-
export hex_encode;
|
|
70
|
-
|
|
63
|
+
export(hex_encode);
|
|
71
64
|
// ============================================================================
|
|
72
65
|
// Decoding
|
|
73
66
|
// ============================================================================
|
|
74
|
-
|
|
75
|
-
_hex_nibble :: (fn(c: u8, using(exn : Exception)) -> u8)(
|
|
67
|
+
_hex_nibble :: (fn(c : u8, using(exn : Exception)) -> u8)(
|
|
76
68
|
cond(
|
|
77
|
-
((c >= u8(48)) && (c <= u8(57)))
|
|
78
|
-
|
|
79
|
-
((c >= u8(
|
|
80
|
-
|
|
69
|
+
((c >= u8(48)) && (c <= u8(57))) => (c - u8(48)),
|
|
70
|
+
// '0'-'9'
|
|
71
|
+
((c >= u8(97)) && (c <= u8(102))) => ((c - u8(97)) + u8(10)),
|
|
72
|
+
// 'a'-'f'
|
|
73
|
+
((c >= u8(65)) && (c <= u8(70))) => ((c - u8(65)) + u8(10)),
|
|
74
|
+
// 'A'-'F'
|
|
75
|
+
true => exn.throw(dyn(EncodingError.InvalidChar(c)))
|
|
81
76
|
)
|
|
82
77
|
);
|
|
83
|
-
|
|
84
78
|
/// Decode a hexadecimal string to bytes. Throws via `Exception` on invalid input.
|
|
85
|
-
hex_decode :: (fn(s: str, using(exn : Exception)) -> ArrayList(u8))({
|
|
79
|
+
hex_decode :: (fn(s : str, using(exn : Exception)) -> ArrayList(u8))({
|
|
86
80
|
cond(
|
|
87
|
-
((
|
|
88
|
-
exn.throw(dyn
|
|
81
|
+
((s.len() % usize(2)) != usize(0)) => {
|
|
82
|
+
exn.throw(dyn(EncodingError.OddLength));
|
|
89
83
|
},
|
|
90
84
|
true => ()
|
|
91
85
|
);
|
|
92
|
-
out := ArrayList(u8).with_capacity(
|
|
93
|
-
i
|
|
94
|
-
while
|
|
86
|
+
out := ArrayList(u8).with_capacity(s.len() / usize(2));
|
|
87
|
+
i := usize(0);
|
|
88
|
+
while(i < s.len(), i = (i + usize(2)), {
|
|
95
89
|
hi := _hex_nibble(s.bytes(i));
|
|
96
|
-
lo := _hex_nibble(s.bytes(
|
|
97
|
-
out.push((
|
|
98
|
-
};
|
|
90
|
+
lo := _hex_nibble(s.bytes(i + usize(1)));
|
|
91
|
+
out.push((hi << u8(4)) | lo);
|
|
92
|
+
});
|
|
99
93
|
out
|
|
100
94
|
});
|
|
101
|
-
|
|
102
|
-
export hex_decode;
|
|
95
|
+
export(hex_decode);
|
package/std/encoding/html.yo
CHANGED
|
@@ -11,86 +11,74 @@
|
|
|
11
11
|
//! result := decode_html(`& < & &`);
|
|
12
12
|
//! assert((result == `& < & &`), "decoded entities");
|
|
13
13
|
//! ```
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
{
|
|
17
|
-
{
|
|
18
|
-
{
|
|
19
|
-
{ _build_entity_map, _build_legacy_set } :: import "./html_entities";
|
|
20
|
-
|
|
14
|
+
open(import("../string"));
|
|
15
|
+
{ HashMap } :: import("../collections/hash_map");
|
|
16
|
+
{ HashSet } :: import("../collections/hash_set");
|
|
17
|
+
{ is_valid_entity_code, from_code_point } :: import("./html_char_utils");
|
|
18
|
+
{ _build_entity_map, _build_legacy_set } :: import("./html_entities");
|
|
21
19
|
// Module-level state: lazily initialized entity map and legacy set.
|
|
22
20
|
_state_initialized := false;
|
|
23
21
|
_entity_map := HashMap(String, String).new();
|
|
24
22
|
_legacy_set := HashSet(String).new();
|
|
25
|
-
|
|
26
23
|
_ensure_init :: (fn() -> unit)({
|
|
27
|
-
if(!(
|
|
24
|
+
if(!(_state_initialized), {
|
|
28
25
|
_entity_map = _build_entity_map();
|
|
29
26
|
_legacy_set = _build_legacy_set();
|
|
30
27
|
_state_initialized = true;
|
|
31
28
|
});
|
|
32
29
|
});
|
|
33
|
-
|
|
34
30
|
// Parse a hex string to i32
|
|
35
|
-
_parse_hex :: (fn(s: String) -> i32)({
|
|
31
|
+
_parse_hex :: (fn(s : String) -> i32)({
|
|
36
32
|
(result : i32) = i32(0);
|
|
37
33
|
(i : usize) = usize(0);
|
|
38
|
-
while
|
|
34
|
+
while(i < s.len(), {
|
|
39
35
|
c := s.at(i).unwrap();
|
|
40
36
|
result = (result * i32(16));
|
|
41
|
-
if((
|
|
37
|
+
if((c >= rune(u32('0'))) && (c <= rune(u32('9'))), {
|
|
42
38
|
result = (result + (i32(c.to_u32()) - i32(48)));
|
|
43
|
-
}, if((
|
|
39
|
+
}, if((c >= rune(u32('a'))) && (c <= rune(u32('f'))), {
|
|
44
40
|
result = (result + ((i32(c.to_u32()) - i32(97)) + i32(10)));
|
|
45
|
-
}, if((
|
|
41
|
+
}, if((c >= rune(u32('A'))) && (c <= rune(u32('F'))), {
|
|
46
42
|
result = (result + ((i32(c.to_u32()) - i32(65)) + i32(10)));
|
|
47
43
|
})));
|
|
48
44
|
i = (i + usize(1));
|
|
49
|
-
};
|
|
45
|
+
});
|
|
50
46
|
result
|
|
51
47
|
});
|
|
52
|
-
|
|
53
48
|
// Parse a decimal string to i32
|
|
54
|
-
_parse_dec :: (fn(s: String) -> i32)({
|
|
49
|
+
_parse_dec :: (fn(s : String) -> i32)({
|
|
55
50
|
(result : i32) = i32(0);
|
|
56
51
|
(i : usize) = usize(0);
|
|
57
|
-
while
|
|
52
|
+
while(i < s.len(), {
|
|
58
53
|
c := s.at(i).unwrap();
|
|
59
54
|
result = ((result * i32(10)) + (i32(c.to_u32()) - i32(48)));
|
|
60
55
|
i = (i + usize(1));
|
|
61
|
-
};
|
|
56
|
+
});
|
|
62
57
|
result
|
|
63
58
|
});
|
|
64
|
-
|
|
65
59
|
// Check if a character is an ASCII alphanumeric
|
|
66
|
-
_is_alpha_numeric :: (fn(c: rune) -> bool)(
|
|
67
|
-
(((
|
|
60
|
+
_is_alpha_numeric :: (fn(c : rune) -> bool)(
|
|
61
|
+
(((c >= rune(u32('a'))) && (c <= rune(u32('z')))) || ((c >= rune(u32('A'))) && (c <= rune(u32('Z'))))) || ((c >= rune(u32('0'))) && (c <= rune(u32('9'))))
|
|
68
62
|
);
|
|
69
|
-
|
|
70
63
|
/// Decode HTML entities in a string.
|
|
71
64
|
///
|
|
72
65
|
/// Supports named (`&`), decimal (`&`), and hexadecimal (`&`) character
|
|
73
66
|
/// references. Legacy mode — entities without trailing semicolons are also decoded.
|
|
74
|
-
decode_html :: (fn(input: String) -> String)({
|
|
67
|
+
decode_html :: (fn(input : String) -> String)({
|
|
75
68
|
_ensure_init();
|
|
76
|
-
|
|
77
69
|
(len : usize) = input.len();
|
|
78
|
-
if(
|
|
79
|
-
return
|
|
70
|
+
if(len == usize(0), {
|
|
71
|
+
return(input);
|
|
80
72
|
});
|
|
81
|
-
|
|
82
73
|
// Quick check: if no '&', return as-is
|
|
83
74
|
if(!(input.contains(`&`)), {
|
|
84
|
-
return
|
|
75
|
+
return(input);
|
|
85
76
|
});
|
|
86
|
-
|
|
87
77
|
(result : String) = ``;
|
|
88
78
|
(i : usize) = usize(0);
|
|
89
|
-
|
|
90
|
-
while ((i < len)), {
|
|
79
|
+
while(i < len, {
|
|
91
80
|
c := input.at(i).unwrap();
|
|
92
|
-
|
|
93
|
-
if((c != rune(u32('&'))), {
|
|
81
|
+
if(c != rune(u32('&')), {
|
|
94
82
|
// Not an entity start, just append the character
|
|
95
83
|
result = `${result}${from_code_point(i32(c.to_u32()))}`;
|
|
96
84
|
i = (i + usize(1));
|
|
@@ -98,26 +86,24 @@ decode_html :: (fn(input: String) -> String)({
|
|
|
98
86
|
// Found '&' — try to decode entity
|
|
99
87
|
(start : usize) = i;
|
|
100
88
|
i = (i + usize(1));
|
|
101
|
-
|
|
102
|
-
if(((i >= len)), {
|
|
89
|
+
if(i >= len, {
|
|
103
90
|
result = `${result}&`;
|
|
104
91
|
}, {
|
|
105
92
|
next := input.at(i).unwrap();
|
|
106
|
-
|
|
107
|
-
if(((next == rune(u32('#')))), {
|
|
93
|
+
if(next == rune(u32('#')), {
|
|
108
94
|
// Numeric entity: &#N; or &#xN;
|
|
109
95
|
i = (i + usize(1));
|
|
110
|
-
if(
|
|
96
|
+
if(i >= len, {
|
|
111
97
|
result = `${result}&#`;
|
|
112
98
|
}, {
|
|
113
99
|
hex_char := input.at(i).unwrap();
|
|
114
|
-
if((
|
|
100
|
+
if((hex_char == rune(u32('x'))) || (hex_char == rune(u32('X'))), {
|
|
115
101
|
// Hex: &#xHH;
|
|
116
102
|
(digit_start : usize) = (i + usize(1));
|
|
117
103
|
(digit_end : usize) = digit_start;
|
|
118
|
-
while
|
|
104
|
+
while(digit_end < len, {
|
|
119
105
|
dc := input.at(digit_end).unwrap();
|
|
120
|
-
if(((
|
|
106
|
+
if(((dc >= rune(u32('0'))) && (dc <= rune(u32('9')))) || (((dc >= rune(u32('a'))) && (dc <= rune(u32('f')))) || ((dc >= rune(u32('A'))) && (dc <= rune(u32('F'))))), {
|
|
121
107
|
digit_end = (digit_end + usize(1));
|
|
122
108
|
}, {
|
|
123
109
|
// Done with hex digits, break out
|
|
@@ -127,35 +113,32 @@ decode_html :: (fn(input: String) -> String)({
|
|
|
127
113
|
// TODO: proper break
|
|
128
114
|
digit_end = (len + usize(1));
|
|
129
115
|
});
|
|
130
|
-
};
|
|
116
|
+
});
|
|
131
117
|
// Fix digit_end if it overflowed
|
|
132
|
-
if(
|
|
118
|
+
if(digit_end > len, {
|
|
133
119
|
// We used the overflow trick - find actual end
|
|
134
120
|
digit_end = digit_start;
|
|
135
|
-
while
|
|
121
|
+
while(digit_end < len, {
|
|
136
122
|
dc2 := input.at(digit_end).unwrap();
|
|
137
|
-
if(((
|
|
123
|
+
if(((dc2 >= rune(u32('0'))) && (dc2 <= rune(u32('9')))) || (((dc2 >= rune(u32('a'))) && (dc2 <= rune(u32('f')))) || ((dc2 >= rune(u32('A'))) && (dc2 <= rune(u32('F'))))), {
|
|
138
124
|
digit_end = (digit_end + usize(1));
|
|
139
125
|
}, {
|
|
140
126
|
digit_end = ((len + digit_end) + usize(1));
|
|
141
127
|
});
|
|
142
|
-
};
|
|
143
|
-
if(
|
|
128
|
+
});
|
|
129
|
+
if(digit_end > len, {
|
|
144
130
|
digit_end = ((digit_end - len) - usize(1));
|
|
145
131
|
});
|
|
146
132
|
});
|
|
147
|
-
|
|
148
|
-
if(((digit_end > digit_start)), {
|
|
133
|
+
if(digit_end > digit_start, {
|
|
149
134
|
hex_str := input.substring(digit_start, digit_end);
|
|
150
135
|
(code : i32) = _parse_hex(hex_str);
|
|
151
|
-
|
|
152
136
|
// Check for semicolon
|
|
153
|
-
if((
|
|
137
|
+
if((digit_end < len) && (input.at(digit_end).unwrap() == rune(u32(';'))), {
|
|
154
138
|
i = (digit_end + usize(1));
|
|
155
139
|
}, {
|
|
156
140
|
i = digit_end;
|
|
157
141
|
});
|
|
158
|
-
|
|
159
142
|
if(is_valid_entity_code(code), {
|
|
160
143
|
result = `${result}${from_code_point(code)}`;
|
|
161
144
|
}, {
|
|
@@ -172,29 +155,26 @@ decode_html :: (fn(input: String) -> String)({
|
|
|
172
155
|
// Decimal: &#DD;
|
|
173
156
|
(digit_start : usize) = i;
|
|
174
157
|
(digit_end : usize) = digit_start;
|
|
175
|
-
while
|
|
158
|
+
while(digit_end < len, {
|
|
176
159
|
dc := input.at(digit_end).unwrap();
|
|
177
|
-
if((
|
|
160
|
+
if((dc >= rune(u32('0'))) && (dc <= rune(u32('9'))), {
|
|
178
161
|
digit_end = (digit_end + usize(1));
|
|
179
162
|
}, {
|
|
180
163
|
digit_end = ((len + digit_end) + usize(1));
|
|
181
164
|
});
|
|
182
|
-
};
|
|
183
|
-
if(
|
|
165
|
+
});
|
|
166
|
+
if(digit_end > len, {
|
|
184
167
|
digit_end = ((digit_end - len) - usize(1));
|
|
185
168
|
});
|
|
186
|
-
|
|
187
|
-
if(((digit_end > digit_start)), {
|
|
169
|
+
if(digit_end > digit_start, {
|
|
188
170
|
dec_str := input.substring(digit_start, digit_end);
|
|
189
171
|
(code : i32) = _parse_dec(dec_str);
|
|
190
|
-
|
|
191
172
|
// Check for semicolon
|
|
192
|
-
if((
|
|
173
|
+
if((digit_end < len) && (input.at(digit_end).unwrap() == rune(u32(';'))), {
|
|
193
174
|
i = (digit_end + usize(1));
|
|
194
175
|
}, {
|
|
195
176
|
i = digit_end;
|
|
196
177
|
});
|
|
197
|
-
|
|
198
178
|
if(is_valid_entity_code(code), {
|
|
199
179
|
result = `${result}${from_code_point(code)}`;
|
|
200
180
|
}, {
|
|
@@ -212,9 +192,9 @@ decode_html :: (fn(input: String) -> String)({
|
|
|
212
192
|
// Named entity: &name; or &name (legacy)
|
|
213
193
|
(name_start : usize) = i;
|
|
214
194
|
(name_end : usize) = name_start;
|
|
215
|
-
while
|
|
195
|
+
while(name_end < len, {
|
|
216
196
|
nc := input.at(name_end).unwrap();
|
|
217
|
-
if(
|
|
197
|
+
if(nc == rune(u32(';')), {
|
|
218
198
|
// Found semicolon — end of entity name
|
|
219
199
|
name_end = ((len + name_end) + usize(1));
|
|
220
200
|
}, if(_is_alpha_numeric(nc), {
|
|
@@ -223,22 +203,20 @@ decode_html :: (fn(input: String) -> String)({
|
|
|
223
203
|
// Non-alphanumeric, non-semicolon — end of potential entity
|
|
224
204
|
name_end = ((len + name_end) + usize(1));
|
|
225
205
|
}));
|
|
226
|
-
};
|
|
206
|
+
});
|
|
227
207
|
// Decode the overflow trick
|
|
228
208
|
(found_end : bool) = false;
|
|
229
|
-
if(
|
|
209
|
+
if(name_end > len, {
|
|
230
210
|
name_end = ((name_end - len) - usize(1));
|
|
231
211
|
found_end = true;
|
|
232
212
|
});
|
|
233
|
-
|
|
234
213
|
name_str := input.substring(name_start, name_end);
|
|
235
|
-
|
|
236
214
|
// Check for semicolon at name_end
|
|
237
|
-
(has_semi : bool) = ((
|
|
238
|
-
|
|
215
|
+
(has_semi : bool) = ((name_end < len) && (input.at(name_end).unwrap() == rune(u32(';'))));
|
|
239
216
|
if(has_semi, {
|
|
240
217
|
// Try exact match with semicolon
|
|
241
|
-
match(
|
|
218
|
+
match(
|
|
219
|
+
_entity_map.get(name_str),
|
|
242
220
|
.Some(decoded) => {
|
|
243
221
|
result = `${result}${decoded}`;
|
|
244
222
|
i = (name_end + usize(1));
|
|
@@ -253,11 +231,11 @@ decode_html :: (fn(input: String) -> String)({
|
|
|
253
231
|
// Legacy mode: try progressively shorter names
|
|
254
232
|
(matched : bool) = false;
|
|
255
233
|
(try_end : usize) = name_end;
|
|
256
|
-
|
|
257
|
-
while ((((try_end > name_start) && !(matched)))), {
|
|
234
|
+
while((try_end > name_start) && !(matched), {
|
|
258
235
|
try_name := input.substring(name_start, try_end);
|
|
259
236
|
if(_legacy_set.contains(try_name), {
|
|
260
|
-
match(
|
|
237
|
+
match(
|
|
238
|
+
_entity_map.get(try_name),
|
|
261
239
|
.Some(decoded) => {
|
|
262
240
|
result = `${result}${decoded}`;
|
|
263
241
|
i = try_end;
|
|
@@ -270,8 +248,7 @@ decode_html :: (fn(input: String) -> String)({
|
|
|
270
248
|
}, {
|
|
271
249
|
try_end = (try_end - usize(1));
|
|
272
250
|
});
|
|
273
|
-
};
|
|
274
|
-
|
|
251
|
+
});
|
|
275
252
|
if(!(matched), {
|
|
276
253
|
// No legacy match — output '&' literally and continue
|
|
277
254
|
result = `${result}&`;
|
|
@@ -281,9 +258,7 @@ decode_html :: (fn(input: String) -> String)({
|
|
|
281
258
|
});
|
|
282
259
|
});
|
|
283
260
|
});
|
|
284
|
-
};
|
|
285
|
-
|
|
261
|
+
});
|
|
286
262
|
result
|
|
287
263
|
});
|
|
288
|
-
|
|
289
|
-
export decode_html, is_valid_entity_code, from_code_point;
|
|
264
|
+
export(decode_html, is_valid_entity_code, from_code_point);
|
|
@@ -10,11 +10,9 @@
|
|
|
10
10
|
//! assert(is_valid_entity_code(i32(65)), "A is valid");
|
|
11
11
|
//! s := from_code_point(i32(65)); // "A"
|
|
12
12
|
//! ```
|
|
13
|
-
|
|
14
|
-
open import "../string";
|
|
15
|
-
|
|
13
|
+
open(import("../string"));
|
|
16
14
|
/// Check if a Unicode codepoint is a valid HTML entity value.
|
|
17
|
-
is_valid_entity_code :: (fn(c: i32) -> bool)(
|
|
15
|
+
is_valid_entity_code :: (fn(c : i32) -> bool)(
|
|
18
16
|
cond(
|
|
19
17
|
((c >= i32(0xD800)) && (c <= i32(0xDFFF))) => false,
|
|
20
18
|
((c >= i32(0xFDD0)) && (c <= i32(0xFDEF))) => false,
|
|
@@ -27,13 +25,9 @@ is_valid_entity_code :: (fn(c: i32) -> bool)(
|
|
|
27
25
|
true => true
|
|
28
26
|
)
|
|
29
27
|
);
|
|
30
|
-
|
|
31
28
|
/// Convert a Unicode codepoint to a `String`.
|
|
32
|
-
from_code_point :: (fn(c: i32) -> String)(
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
);
|
|
38
|
-
|
|
39
|
-
export is_valid_entity_code, from_code_point;
|
|
29
|
+
from_code_point :: (fn(c : i32) -> String)({
|
|
30
|
+
(r : rune) = rune(u32(c));
|
|
31
|
+
`${r}`
|
|
32
|
+
});
|
|
33
|
+
export(is_valid_entity_code, from_code_point);
|