@shd101wyy/yo 0.1.5 → 0.1.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +7 -6
- package/out/cjs/index.cjs +508 -503
- package/out/cjs/yo-cli.cjs +619 -612
- package/out/esm/index.mjs +397 -392
- package/out/types/src/codegen/codegen-c.d.ts +2 -0
- package/out/types/src/codegen/functions/context.d.ts +1 -0
- package/out/types/src/codegen/functions/generation.d.ts +10 -0
- package/out/types/src/codegen/utils/index.d.ts +1 -0
- package/out/types/src/env.d.ts +1 -0
- package/out/types/src/evaluator/builtins/build.d.ts +1 -0
- package/out/types/src/evaluator/context.d.ts +1 -0
- package/out/types/src/expr.d.ts +2 -0
- package/out/types/src/target.d.ts +1 -0
- package/out/types/src/value.d.ts +2 -1
- package/out/types/tsconfig.tsbuildinfo +1 -1
- package/package.json +1 -1
- package/std/build.yo +2 -1
- package/std/collections/array_list.yo +133 -1
- package/std/encoding/html.yo +283 -0
- package/std/encoding/html_char_utils.yo +36 -0
- package/std/encoding/html_entities.yo +2262 -0
- package/std/encoding/punycode.yo +366 -0
- package/std/fmt/to_string.yo +5 -4
- package/std/glob/index.yo +2 -2
- package/std/libc/wctype.yo +55 -0
- package/std/path.yo +6 -6
- package/std/prelude.yo +8 -0
- package/std/regex/parser.yo +69 -4
- package/std/regex/vm.yo +18 -31
- package/std/string/string.yo +1388 -1337
- package/std/string/unicode.yo +242 -0
package/package.json
CHANGED
package/std/build.yo
CHANGED
|
@@ -57,6 +57,7 @@ CompilationTarget :: {
|
|
|
57
57
|
X86_64_Linux_Gnu: "x86_64-linux-gnu",
|
|
58
58
|
X86_64_Linux_Musl: "x86_64-linux-musl",
|
|
59
59
|
Aarch64_Linux_Gnu: "aarch64-linux-gnu",
|
|
60
|
+
Aarch64_Linux_Musl: "aarch64-linux-musl",
|
|
60
61
|
Aarch64_Macos: "aarch64-macos",
|
|
61
62
|
X86_64_Macos: "x86_64-macos",
|
|
62
63
|
X86_64_Windows_Msvc: "x86_64-windows-msvc",
|
|
@@ -80,7 +81,7 @@ Executable :: struct(
|
|
|
80
81
|
root : comptime_string,
|
|
81
82
|
(target : comptime_string) ?= __yo_build_target_host(),
|
|
82
83
|
(optimize : Optimize) ?= Optimize.Debug,
|
|
83
|
-
(allocator : Allocator) ?= Allocator.
|
|
84
|
+
(allocator : Allocator) ?= Allocator.Libc,
|
|
84
85
|
(sanitize : Sanitize) ?= Sanitize.None
|
|
85
86
|
);
|
|
86
87
|
export Executable;
|
|
@@ -8,7 +8,7 @@
|
|
|
8
8
|
* - RAII for automatic cleanup
|
|
9
9
|
*/
|
|
10
10
|
{ GlobalAllocator, AllocError } :: import "../allocator.yo";
|
|
11
|
-
{ memmove } :: import "../libc/string.yo";
|
|
11
|
+
{ memmove, memcpy, memset } :: import "../libc/string.yo";
|
|
12
12
|
{ malloc, calloc, realloc, free, aligned_alloc } :: GlobalAllocator;
|
|
13
13
|
|
|
14
14
|
/**
|
|
@@ -415,12 +415,144 @@ impl(forall(T : Type), ArrayList(T),
|
|
|
415
415
|
)
|
|
416
416
|
),
|
|
417
417
|
|
|
418
|
+
/**
|
|
419
|
+
* Ensure the ArrayList can hold at least `min_cap` total elements
|
|
420
|
+
* without further reallocation.
|
|
421
|
+
*/
|
|
422
|
+
ensure_total_capacity : (fn(self: Self, min_cap: usize) -> unit)({
|
|
423
|
+
cond(
|
|
424
|
+
(min_cap <= self._capacity) => (),
|
|
425
|
+
true => {
|
|
426
|
+
new_capacity := cond(
|
|
427
|
+
(self._capacity == usize(0)) => min_cap,
|
|
428
|
+
true => {
|
|
429
|
+
cap := self._capacity;
|
|
430
|
+
while (cap < min_cap), {
|
|
431
|
+
cap = (cap * usize(2));
|
|
432
|
+
};
|
|
433
|
+
cap
|
|
434
|
+
}
|
|
435
|
+
);
|
|
436
|
+
new_some_ptr := match(self._ptr,
|
|
437
|
+
.None => GlobalAllocator.malloc((sizeof(T) * new_capacity)),
|
|
438
|
+
.Some(old_ptr) => GlobalAllocator.realloc(
|
|
439
|
+
.Some((*(void))(old_ptr)),
|
|
440
|
+
(sizeof(T) * new_capacity)
|
|
441
|
+
)
|
|
442
|
+
);
|
|
443
|
+
match(new_some_ptr,
|
|
444
|
+
.Some(new_ptr) => {
|
|
445
|
+
self._ptr = .Some((*(T))(new_ptr));
|
|
446
|
+
self._capacity = new_capacity;
|
|
447
|
+
},
|
|
448
|
+
.None => panic("ArrayList.ensure_total_capacity: allocation failed")
|
|
449
|
+
);
|
|
450
|
+
}
|
|
451
|
+
);
|
|
452
|
+
}),
|
|
453
|
+
|
|
454
|
+
/**
|
|
455
|
+
* Append `count` elements from a raw pointer using memcpy.
|
|
456
|
+
* The caller must ensure `src` points to at least `count` valid elements.
|
|
457
|
+
*/
|
|
458
|
+
extend_from_ptr : (fn(self: Self, src: *(T), count: usize) -> unit)({
|
|
459
|
+
cond(
|
|
460
|
+
(count == usize(0)) => (),
|
|
461
|
+
true => {
|
|
462
|
+
self.ensure_total_capacity((self._length + count));
|
|
463
|
+
match(self._ptr,
|
|
464
|
+
.Some(dst_base) => {
|
|
465
|
+
dst := (*(void))((dst_base &+ self._length));
|
|
466
|
+
_ := memcpy(dst, (*(void))(src), (count * sizeof(T)));
|
|
467
|
+
self._length = (self._length + count);
|
|
468
|
+
},
|
|
469
|
+
.None => panic("ArrayList.extend_from_ptr: no ptr after ensure_total_capacity")
|
|
470
|
+
);
|
|
471
|
+
}
|
|
472
|
+
);
|
|
473
|
+
}),
|
|
474
|
+
|
|
418
475
|
/**
|
|
419
476
|
* Clear all elements but keep capacity
|
|
420
477
|
*/
|
|
421
478
|
clear : (fn(self: Self) -> unit)({
|
|
422
479
|
Self._free_elements(self);
|
|
423
480
|
self._length = usize(0);
|
|
481
|
+
}),
|
|
482
|
+
|
|
483
|
+
/**
|
|
484
|
+
* Get element at index without bounds checking.
|
|
485
|
+
* Caller must ensure index < len.
|
|
486
|
+
*/
|
|
487
|
+
get_unchecked : (fn(self: Self, index: usize) -> T)(
|
|
488
|
+
match(self._ptr,
|
|
489
|
+
.None => panic("ArrayList.get_unchecked: no ptr"),
|
|
490
|
+
.Some(_ptr) => (_ptr &+ index).*
|
|
491
|
+
)
|
|
492
|
+
),
|
|
493
|
+
|
|
494
|
+
/**
|
|
495
|
+
* Get a pointer to element at index without bounds checking or copying.
|
|
496
|
+
* Caller must ensure index < len. The pointer is valid until the list is modified.
|
|
497
|
+
*/
|
|
498
|
+
get_ptr : (fn(self: Self, index: usize) -> *(T))(
|
|
499
|
+
match(self._ptr,
|
|
500
|
+
.None => panic("ArrayList.get_ptr: no ptr"),
|
|
501
|
+
.Some(_ptr) => (_ptr &+ index)
|
|
502
|
+
)
|
|
503
|
+
),
|
|
504
|
+
|
|
505
|
+
/**
|
|
506
|
+
* Set element at index without bounds checking.
|
|
507
|
+
* Caller must ensure index < len.
|
|
508
|
+
*/
|
|
509
|
+
set_unchecked : (fn(self: Self, index: usize, value: T) -> unit)(
|
|
510
|
+
match(self._ptr,
|
|
511
|
+
.None => panic("ArrayList.set_unchecked: no ptr"),
|
|
512
|
+
.Some(_ptr) => {
|
|
513
|
+
target_ptr := (_ptr &+ index);
|
|
514
|
+
target_ptr.* = value;
|
|
515
|
+
}
|
|
516
|
+
)
|
|
517
|
+
),
|
|
518
|
+
|
|
519
|
+
/**
|
|
520
|
+
* Fill all elements with a byte pattern using memset.
|
|
521
|
+
* Useful for zeroing bool/integer arrays in O(1).
|
|
522
|
+
* Only safe for types without RC (e.g., bool, u8, usize).
|
|
523
|
+
*/
|
|
524
|
+
fill_with_byte : (fn(self: Self, byte_val: int) -> unit)(
|
|
525
|
+
match(self._ptr,
|
|
526
|
+
.None => (),
|
|
527
|
+
.Some(_ptr) => {
|
|
528
|
+
_ := memset((*(void))(_ptr), byte_val, (self._length * sizeof(T)));
|
|
529
|
+
}
|
|
530
|
+
)
|
|
531
|
+
),
|
|
532
|
+
|
|
533
|
+
/**
|
|
534
|
+
* Resize ArrayList to exactly `new_len` elements, filling new slots with
|
|
535
|
+
* a byte pattern via memset. Does not call destructors on removed elements.
|
|
536
|
+
* Only safe for trivial types (bool, u8, usize, etc.).
|
|
537
|
+
*/
|
|
538
|
+
resize_with_byte : (fn(self: Self, new_len: usize, byte_val: int) -> unit)({
|
|
539
|
+
cond(
|
|
540
|
+
(new_len <= self._length) => {
|
|
541
|
+
self._length = new_len;
|
|
542
|
+
},
|
|
543
|
+
true => {
|
|
544
|
+
self.ensure_total_capacity(new_len);
|
|
545
|
+
match(self._ptr,
|
|
546
|
+
.Some(_ptr) => {
|
|
547
|
+
start := (*(void))((_ptr &+ self._length));
|
|
548
|
+
fill_count := ((new_len - self._length) * sizeof(T));
|
|
549
|
+
_ := memset(start, byte_val, fill_count);
|
|
550
|
+
self._length = new_len;
|
|
551
|
+
},
|
|
552
|
+
.None => panic("ArrayList.resize_with_byte: no ptr after ensure")
|
|
553
|
+
);
|
|
554
|
+
}
|
|
555
|
+
);
|
|
424
556
|
})
|
|
425
557
|
);
|
|
426
558
|
impl(forall(T : Type), ArrayList(T), Dispose(
|
|
@@ -0,0 +1,283 @@
|
|
|
1
|
+
// HTML entity decoding
|
|
2
|
+
//
|
|
3
|
+
// Decodes named (&), decimal (&), and hex (&) HTML character references.
|
|
4
|
+
// Uses Legacy mode — entities without trailing semicolon are also decoded.
|
|
5
|
+
//
|
|
6
|
+
// Example:
|
|
7
|
+
// { decode_html } :: import "std/encoding/html";
|
|
8
|
+
//
|
|
9
|
+
// result := decode_html(`& < & &`);
|
|
10
|
+
// assert((result == `& < & &`), "decoded entities");
|
|
11
|
+
|
|
12
|
+
open import "../string";
|
|
13
|
+
{ HashMap } :: import "../collections/hash_map";
|
|
14
|
+
{ HashSet } :: import "../collections/hash_set";
|
|
15
|
+
{ is_valid_entity_code, from_code_point } :: import "./html_char_utils";
|
|
16
|
+
{ _build_entity_map, _build_legacy_set } :: import "./html_entities";
|
|
17
|
+
|
|
18
|
+
// Module-level state: lazily initialized entity map and legacy set.
|
|
19
|
+
_state_initialized := false;
|
|
20
|
+
_entity_map := HashMap(String, String).new();
|
|
21
|
+
_legacy_set := HashSet(String).new();
|
|
22
|
+
|
|
23
|
+
_ensure_init :: (fn() -> unit)({
|
|
24
|
+
if(!((_state_initialized)), {
|
|
25
|
+
_entity_map = _build_entity_map();
|
|
26
|
+
_legacy_set = _build_legacy_set();
|
|
27
|
+
_state_initialized = true;
|
|
28
|
+
});
|
|
29
|
+
});
|
|
30
|
+
|
|
31
|
+
// Parse a hex string to i32
|
|
32
|
+
_parse_hex :: (fn(s: String) -> i32)({
|
|
33
|
+
(result : i32) = i32(0);
|
|
34
|
+
(i : usize) = usize(0);
|
|
35
|
+
while ((i < s.len())), {
|
|
36
|
+
c := s.at(i).unwrap();
|
|
37
|
+
result = (result * i32(16));
|
|
38
|
+
if(((c >= rune(u32('0'))) && (c <= rune(u32('9')))), {
|
|
39
|
+
result = (result + (i32(c.to_u32()) - i32(48)));
|
|
40
|
+
}, if(((c >= rune(u32('a'))) && (c <= rune(u32('f')))), {
|
|
41
|
+
result = (result + ((i32(c.to_u32()) - i32(97)) + i32(10)));
|
|
42
|
+
}, if(((c >= rune(u32('A'))) && (c <= rune(u32('F')))), {
|
|
43
|
+
result = (result + ((i32(c.to_u32()) - i32(65)) + i32(10)));
|
|
44
|
+
})));
|
|
45
|
+
i = (i + usize(1));
|
|
46
|
+
};
|
|
47
|
+
result
|
|
48
|
+
});
|
|
49
|
+
|
|
50
|
+
// Parse a decimal string to i32
|
|
51
|
+
_parse_dec :: (fn(s: String) -> i32)({
|
|
52
|
+
(result : i32) = i32(0);
|
|
53
|
+
(i : usize) = usize(0);
|
|
54
|
+
while ((i < s.len())), {
|
|
55
|
+
c := s.at(i).unwrap();
|
|
56
|
+
result = ((result * i32(10)) + (i32(c.to_u32()) - i32(48)));
|
|
57
|
+
i = (i + usize(1));
|
|
58
|
+
};
|
|
59
|
+
result
|
|
60
|
+
});
|
|
61
|
+
|
|
62
|
+
// Check if a character is an ASCII alphanumeric
|
|
63
|
+
_is_alpha_numeric :: (fn(c: rune) -> bool)(
|
|
64
|
+
((((c >= rune(u32('a'))) && (c <= rune(u32('z')))) || ((c >= rune(u32('A'))) && (c <= rune(u32('Z'))))) || ((c >= rune(u32('0'))) && (c <= rune(u32('9')))))
|
|
65
|
+
);
|
|
66
|
+
|
|
67
|
+
// Decode HTML entities in a string (Legacy mode — entities without ; are also decoded).
|
|
68
|
+
decode_html :: (fn(input: String) -> String)({
|
|
69
|
+
_ensure_init();
|
|
70
|
+
|
|
71
|
+
(len : usize) = input.len();
|
|
72
|
+
if(((len == usize(0))), {
|
|
73
|
+
return input;
|
|
74
|
+
});
|
|
75
|
+
|
|
76
|
+
// Quick check: if no '&', return as-is
|
|
77
|
+
if(!(input.contains(`&`)), {
|
|
78
|
+
return input;
|
|
79
|
+
});
|
|
80
|
+
|
|
81
|
+
(result : String) = ``;
|
|
82
|
+
(i : usize) = usize(0);
|
|
83
|
+
|
|
84
|
+
while ((i < len)), {
|
|
85
|
+
c := input.at(i).unwrap();
|
|
86
|
+
|
|
87
|
+
if((c != rune(u32('&'))), {
|
|
88
|
+
// Not an entity start, just append the character
|
|
89
|
+
result = `${result}${from_code_point(i32(c.to_u32()))}`;
|
|
90
|
+
i = (i + usize(1));
|
|
91
|
+
}, {
|
|
92
|
+
// Found '&' — try to decode entity
|
|
93
|
+
(start : usize) = i;
|
|
94
|
+
i = (i + usize(1));
|
|
95
|
+
|
|
96
|
+
if(((i >= len)), {
|
|
97
|
+
result = `${result}&`;
|
|
98
|
+
}, {
|
|
99
|
+
next := input.at(i).unwrap();
|
|
100
|
+
|
|
101
|
+
if(((next == rune(u32('#')))), {
|
|
102
|
+
// Numeric entity: &#N; or &#xN;
|
|
103
|
+
i = (i + usize(1));
|
|
104
|
+
if(((i >= len)), {
|
|
105
|
+
result = `${result}&#`;
|
|
106
|
+
}, {
|
|
107
|
+
hex_char := input.at(i).unwrap();
|
|
108
|
+
if(((hex_char == rune(u32('x'))) || (hex_char == rune(u32('X')))), {
|
|
109
|
+
// Hex: &#xHH;
|
|
110
|
+
(digit_start : usize) = (i + usize(1));
|
|
111
|
+
(digit_end : usize) = digit_start;
|
|
112
|
+
while (((digit_end < len))), {
|
|
113
|
+
dc := input.at(digit_end).unwrap();
|
|
114
|
+
if(((((dc >= rune(u32('0'))) && (dc <= rune(u32('9')))) || (((dc >= rune(u32('a'))) && (dc <= rune(u32('f')))) || ((dc >= rune(u32('A'))) && (dc <= rune(u32('F'))))))), {
|
|
115
|
+
digit_end = (digit_end + usize(1));
|
|
116
|
+
}, {
|
|
117
|
+
// Done with hex digits, break out
|
|
118
|
+
digit_end = (digit_end + usize(0));
|
|
119
|
+
// Use a flag to break
|
|
120
|
+
return_early := true;
|
|
121
|
+
// TODO: proper break
|
|
122
|
+
digit_end = (len + usize(1));
|
|
123
|
+
});
|
|
124
|
+
};
|
|
125
|
+
// Fix digit_end if it overflowed
|
|
126
|
+
if(((digit_end > len)), {
|
|
127
|
+
// We used the overflow trick - find actual end
|
|
128
|
+
digit_end = digit_start;
|
|
129
|
+
while ((digit_end < len)), {
|
|
130
|
+
dc2 := input.at(digit_end).unwrap();
|
|
131
|
+
if(((((dc2 >= rune(u32('0'))) && (dc2 <= rune(u32('9')))) || (((dc2 >= rune(u32('a'))) && (dc2 <= rune(u32('f')))) || ((dc2 >= rune(u32('A'))) && (dc2 <= rune(u32('F'))))))), {
|
|
132
|
+
digit_end = (digit_end + usize(1));
|
|
133
|
+
}, {
|
|
134
|
+
digit_end = ((len + digit_end) + usize(1));
|
|
135
|
+
});
|
|
136
|
+
};
|
|
137
|
+
if(((digit_end > len)), {
|
|
138
|
+
digit_end = ((digit_end - len) - usize(1));
|
|
139
|
+
});
|
|
140
|
+
});
|
|
141
|
+
|
|
142
|
+
if(((digit_end > digit_start)), {
|
|
143
|
+
hex_str := input.substring(digit_start, digit_end);
|
|
144
|
+
(code : i32) = _parse_hex(hex_str);
|
|
145
|
+
|
|
146
|
+
// Check for semicolon
|
|
147
|
+
if((((digit_end < len) && (input.at(digit_end).unwrap() == rune(u32(';'))))), {
|
|
148
|
+
i = (digit_end + usize(1));
|
|
149
|
+
}, {
|
|
150
|
+
i = digit_end;
|
|
151
|
+
});
|
|
152
|
+
|
|
153
|
+
if(is_valid_entity_code(code), {
|
|
154
|
+
result = `${result}${from_code_point(code)}`;
|
|
155
|
+
}, {
|
|
156
|
+
// Invalid code (e.g., surrogates) — keep original entity text
|
|
157
|
+
(orig_hex : String) = input.substring(start, i);
|
|
158
|
+
result = `${result}${orig_hex}`;
|
|
159
|
+
});
|
|
160
|
+
}, {
|
|
161
|
+
// No hex digits — output literally
|
|
162
|
+
result = `${result}&#${from_code_point(i32(hex_char.to_u32()))}`;
|
|
163
|
+
i = (i + usize(1));
|
|
164
|
+
});
|
|
165
|
+
}, {
|
|
166
|
+
// Decimal: &#DD;
|
|
167
|
+
(digit_start : usize) = i;
|
|
168
|
+
(digit_end : usize) = digit_start;
|
|
169
|
+
while ((digit_end < len)), {
|
|
170
|
+
dc := input.at(digit_end).unwrap();
|
|
171
|
+
if((((dc >= rune(u32('0'))) && (dc <= rune(u32('9'))))), {
|
|
172
|
+
digit_end = (digit_end + usize(1));
|
|
173
|
+
}, {
|
|
174
|
+
digit_end = ((len + digit_end) + usize(1));
|
|
175
|
+
});
|
|
176
|
+
};
|
|
177
|
+
if(((digit_end > len)), {
|
|
178
|
+
digit_end = ((digit_end - len) - usize(1));
|
|
179
|
+
});
|
|
180
|
+
|
|
181
|
+
if(((digit_end > digit_start)), {
|
|
182
|
+
dec_str := input.substring(digit_start, digit_end);
|
|
183
|
+
(code : i32) = _parse_dec(dec_str);
|
|
184
|
+
|
|
185
|
+
// Check for semicolon
|
|
186
|
+
if((((digit_end < len) && (input.at(digit_end).unwrap() == rune(u32(';'))))), {
|
|
187
|
+
i = (digit_end + usize(1));
|
|
188
|
+
}, {
|
|
189
|
+
i = digit_end;
|
|
190
|
+
});
|
|
191
|
+
|
|
192
|
+
if(is_valid_entity_code(code), {
|
|
193
|
+
result = `${result}${from_code_point(code)}`;
|
|
194
|
+
}, {
|
|
195
|
+
// Invalid code (e.g., surrogates) — keep original entity text
|
|
196
|
+
(orig_dec : String) = input.substring(start, i);
|
|
197
|
+
result = `${result}${orig_dec}`;
|
|
198
|
+
});
|
|
199
|
+
}, {
|
|
200
|
+
// No decimal digits
|
|
201
|
+
result = `${result}&#`;
|
|
202
|
+
});
|
|
203
|
+
});
|
|
204
|
+
});
|
|
205
|
+
}, {
|
|
206
|
+
// Named entity: &name; or &name (legacy)
|
|
207
|
+
(name_start : usize) = i;
|
|
208
|
+
(name_end : usize) = name_start;
|
|
209
|
+
while ((name_end < len)), {
|
|
210
|
+
nc := input.at(name_end).unwrap();
|
|
211
|
+
if(((nc == rune(u32(';')))), {
|
|
212
|
+
// Found semicolon — end of entity name
|
|
213
|
+
name_end = ((len + name_end) + usize(1));
|
|
214
|
+
}, if(_is_alpha_numeric(nc), {
|
|
215
|
+
name_end = (name_end + usize(1));
|
|
216
|
+
}, {
|
|
217
|
+
// Non-alphanumeric, non-semicolon — end of potential entity
|
|
218
|
+
name_end = ((len + name_end) + usize(1));
|
|
219
|
+
}));
|
|
220
|
+
};
|
|
221
|
+
// Decode the overflow trick
|
|
222
|
+
(found_end : bool) = false;
|
|
223
|
+
if(((name_end > len)), {
|
|
224
|
+
name_end = ((name_end - len) - usize(1));
|
|
225
|
+
found_end = true;
|
|
226
|
+
});
|
|
227
|
+
|
|
228
|
+
name_str := input.substring(name_start, name_end);
|
|
229
|
+
|
|
230
|
+
// Check for semicolon at name_end
|
|
231
|
+
(has_semi : bool) = (((name_end < len) && (input.at(name_end).unwrap() == rune(u32(';')))));
|
|
232
|
+
|
|
233
|
+
if(has_semi, {
|
|
234
|
+
// Try exact match with semicolon
|
|
235
|
+
match(_entity_map.get(name_str),
|
|
236
|
+
.Some(decoded) => {
|
|
237
|
+
result = `${result}${decoded}`;
|
|
238
|
+
i = (name_end + usize(1));
|
|
239
|
+
},
|
|
240
|
+
.None => {
|
|
241
|
+
// Unknown entity — output literally
|
|
242
|
+
result = `${result}&${name_str};`;
|
|
243
|
+
i = (name_end + usize(1));
|
|
244
|
+
}
|
|
245
|
+
);
|
|
246
|
+
}, {
|
|
247
|
+
// Legacy mode: try progressively shorter names
|
|
248
|
+
(matched : bool) = false;
|
|
249
|
+
(try_end : usize) = name_end;
|
|
250
|
+
|
|
251
|
+
while ((((try_end > name_start) && !(matched)))), {
|
|
252
|
+
try_name := input.substring(name_start, try_end);
|
|
253
|
+
if(_legacy_set.contains(try_name), {
|
|
254
|
+
match(_entity_map.get(try_name),
|
|
255
|
+
.Some(decoded) => {
|
|
256
|
+
result = `${result}${decoded}`;
|
|
257
|
+
i = try_end;
|
|
258
|
+
matched = true;
|
|
259
|
+
},
|
|
260
|
+
.None => {
|
|
261
|
+
try_end = (try_end - usize(1));
|
|
262
|
+
}
|
|
263
|
+
);
|
|
264
|
+
}, {
|
|
265
|
+
try_end = (try_end - usize(1));
|
|
266
|
+
});
|
|
267
|
+
};
|
|
268
|
+
|
|
269
|
+
if(!(matched), {
|
|
270
|
+
// No legacy match — output '&' literally and continue
|
|
271
|
+
result = `${result}&`;
|
|
272
|
+
i = name_start;
|
|
273
|
+
});
|
|
274
|
+
});
|
|
275
|
+
});
|
|
276
|
+
});
|
|
277
|
+
});
|
|
278
|
+
};
|
|
279
|
+
|
|
280
|
+
result
|
|
281
|
+
});
|
|
282
|
+
|
|
283
|
+
export decode_html, is_valid_entity_code, from_code_point;
|
|
@@ -0,0 +1,36 @@
|
|
|
1
|
+
// HTML character utility functions
|
|
2
|
+
//
|
|
3
|
+
// Provides Unicode codepoint validation and conversion for HTML entity processing.
|
|
4
|
+
//
|
|
5
|
+
// Example:
|
|
6
|
+
// { is_valid_entity_code, from_code_point } :: import "std/encoding/html_char_utils";
|
|
7
|
+
//
|
|
8
|
+
// assert(is_valid_entity_code(i32(65)), "A is valid");
|
|
9
|
+
// s := from_code_point(i32(65)); // "A"
|
|
10
|
+
|
|
11
|
+
open import "../string";
|
|
12
|
+
|
|
13
|
+
// Check if a Unicode codepoint is a valid HTML entity value.
|
|
14
|
+
is_valid_entity_code :: (fn(c: i32) -> bool)(
|
|
15
|
+
cond(
|
|
16
|
+
((c >= i32(0xD800)) && (c <= i32(0xDFFF))) => false,
|
|
17
|
+
((c >= i32(0xFDD0)) && (c <= i32(0xFDEF))) => false,
|
|
18
|
+
(((c & i32(0xFFFF)) == i32(0xFFFF)) || ((c & i32(0xFFFF)) == i32(0xFFFE))) => false,
|
|
19
|
+
((c >= i32(0x00)) && (c <= i32(0x08))) => false,
|
|
20
|
+
(c == i32(0x0B)) => false,
|
|
21
|
+
((c >= i32(0x0E)) && (c <= i32(0x1F))) => false,
|
|
22
|
+
((c >= i32(0x7F)) && (c <= i32(0x9F))) => false,
|
|
23
|
+
(c > i32(0x10FFFF)) => false,
|
|
24
|
+
true => true
|
|
25
|
+
)
|
|
26
|
+
);
|
|
27
|
+
|
|
28
|
+
// Convert a Unicode codepoint to a String.
|
|
29
|
+
from_code_point :: (fn(c: i32) -> String)(
|
|
30
|
+
{
|
|
31
|
+
(r : rune) = rune(u32(c));
|
|
32
|
+
`${r}`
|
|
33
|
+
}
|
|
34
|
+
);
|
|
35
|
+
|
|
36
|
+
export is_valid_entity_code, from_code_point;
|