@shd101wyy/yo 0.1.5 → 0.1.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (50) hide show
  1. package/README.md +8 -6
  2. package/out/cjs/index.cjs +691 -636
  3. package/out/cjs/yo-cli.cjs +710 -653
  4. package/out/esm/index.mjs +649 -594
  5. package/out/types/src/build-runner.d.ts +1 -1
  6. package/out/types/src/codegen/async/runtime-io-common.d.ts +2 -1
  7. package/out/types/src/codegen/async/runtime.d.ts +5 -1
  8. package/out/types/src/codegen/codegen-c.d.ts +2 -0
  9. package/out/types/src/codegen/functions/collection.d.ts +1 -1
  10. package/out/types/src/codegen/functions/context.d.ts +1 -0
  11. package/out/types/src/codegen/functions/generation.d.ts +10 -0
  12. package/out/types/src/codegen/utils/index.d.ts +4 -0
  13. package/out/types/src/env.d.ts +1 -0
  14. package/out/types/src/evaluator/builtins/build.d.ts +1 -0
  15. package/out/types/src/evaluator/builtins/comptime-index-fns.d.ts +17 -0
  16. package/out/types/src/evaluator/calls/index-trait.d.ts +17 -0
  17. package/out/types/src/evaluator/context.d.ts +19 -14
  18. package/out/types/src/evaluator/index.d.ts +3 -1
  19. package/out/types/src/evaluator/trait-checking.d.ts +1 -0
  20. package/out/types/src/evaluator/values/anonymous-module.d.ts +3 -2
  21. package/out/types/src/expr.d.ts +22 -1
  22. package/out/types/src/module-manager.d.ts +1 -0
  23. package/out/types/src/target.d.ts +1 -0
  24. package/out/types/src/value.d.ts +4 -1
  25. package/out/types/tsconfig.tsbuildinfo +1 -1
  26. package/package.json +1 -1
  27. package/std/build.yo +2 -1
  28. package/std/collections/array_list.yo +114 -26
  29. package/std/collections/btree_map.yo +13 -3
  30. package/std/collections/deque.yo +10 -0
  31. package/std/collections/hash_map.yo +15 -0
  32. package/std/collections/priority_queue.yo +5 -5
  33. package/std/encoding/html.yo +283 -0
  34. package/std/encoding/html_char_utils.yo +36 -0
  35. package/std/encoding/html_entities.yo +2262 -0
  36. package/std/encoding/punycode.yo +366 -0
  37. package/std/encoding/toml.yo +1 -1
  38. package/std/fmt/to_string.yo +5 -4
  39. package/std/glob/index.yo +2 -2
  40. package/std/libc/wctype.yo +55 -0
  41. package/std/path.yo +6 -6
  42. package/std/prelude.yo +826 -205
  43. package/std/process.yo +1 -1
  44. package/std/regex/compiler.yo +11 -11
  45. package/std/regex/index.yo +2 -4
  46. package/std/regex/parser.yo +69 -4
  47. package/std/regex/vm.yo +53 -46
  48. package/std/string/string.yo +1424 -1339
  49. package/std/string/unicode.yo +242 -0
  50. package/out/types/src/evaluator/calls/array.d.ts +0 -14
package/package.json CHANGED
@@ -1,7 +1,7 @@
1
1
  {
2
2
  "name": "@shd101wyy/yo",
3
3
  "displayName": "Yo",
4
- "version": "0.1.5",
4
+ "version": "0.1.7",
5
5
  "main": "./out/cjs/index.cjs",
6
6
  "module": "./out/esm/index.mjs",
7
7
  "types": "./out/types/src/index.d.ts",
package/std/build.yo CHANGED
@@ -57,6 +57,7 @@ CompilationTarget :: {
57
57
  X86_64_Linux_Gnu: "x86_64-linux-gnu",
58
58
  X86_64_Linux_Musl: "x86_64-linux-musl",
59
59
  Aarch64_Linux_Gnu: "aarch64-linux-gnu",
60
+ Aarch64_Linux_Musl: "aarch64-linux-musl",
60
61
  Aarch64_Macos: "aarch64-macos",
61
62
  X86_64_Macos: "x86_64-macos",
62
63
  X86_64_Windows_Msvc: "x86_64-windows-msvc",
@@ -80,7 +81,7 @@ Executable :: struct(
80
81
  root : comptime_string,
81
82
  (target : comptime_string) ?= __yo_build_target_host(),
82
83
  (optimize : Optimize) ?= Optimize.Debug,
83
- (allocator : Allocator) ?= Allocator.Mimalloc,
84
+ (allocator : Allocator) ?= Allocator.Libc,
84
85
  (sanitize : Sanitize) ?= Sanitize.None
85
86
  );
86
87
  export Executable;
@@ -8,7 +8,7 @@
8
8
  * - RAII for automatic cleanup
9
9
  */
10
10
  { GlobalAllocator, AllocError } :: import "../allocator.yo";
11
- { memmove } :: import "../libc/string.yo";
11
+ { memmove, memcpy, memset } :: import "../libc/string.yo";
12
12
  { malloc, calloc, realloc, free, aligned_alloc } :: GlobalAllocator;
13
13
 
14
14
  /**
@@ -195,26 +195,6 @@ impl(forall(T : Type), ArrayList(T),
195
195
  )
196
196
  ),
197
197
 
198
- /**
199
- * Set an element at a specific index (bounds checked)
200
- * Returns Ok(()) on success, or Error with bounds information
201
- */
202
- set : (fn(self: Self, index: usize, value: T) -> Result(unit, ArrayListError))(
203
- cond(
204
- (index >= self._length) =>
205
- .Err(.IndexOutOfBounds(index: index, length: self._length)),
206
- true =>
207
- match(self._ptr,
208
- .None => panic("ArrayList has length but no ptr"),
209
- .Some(_ptr) => {
210
- target_ptr := (_ptr &+ index);
211
- target_ptr.* = value;
212
- .Ok(())
213
- }
214
- )
215
- )
216
- ),
217
-
218
198
  /**
219
199
  * Shrink the capacity to match the current length
220
200
  * This reduces memory usage but may cause reallocation on next push
@@ -262,11 +242,13 @@ impl(forall(T : Type), ArrayList(T),
262
242
  _free_elements : (fn(self : Self) -> unit)(
263
243
  cond(
264
244
  Type.contains_rc_type(T) => {
265
- i := usize(0);
266
- base_ptr := self._ptr.unwrap();
267
- while(i < self._length, i = (i + usize(1)), {
268
- element_ptr := (base_ptr &+ i);
269
- unsafe.drop(element_ptr.*);
245
+ if((self._length > usize(0)), {
246
+ i := usize(0);
247
+ base_ptr := self._ptr.unwrap();
248
+ while(i < self._length, i = (i + usize(1)), {
249
+ element_ptr := (base_ptr &+ i);
250
+ unsafe.drop(element_ptr.*);
251
+ });
270
252
  });
271
253
  },
272
254
  true => ()
@@ -415,14 +397,120 @@ impl(forall(T : Type), ArrayList(T),
415
397
  )
416
398
  ),
417
399
 
400
+ /**
401
+ * Ensure the ArrayList can hold at least `min_cap` total elements
402
+ * without further reallocation.
403
+ */
404
+ ensure_total_capacity : (fn(self: Self, min_cap: usize) -> unit)({
405
+ cond(
406
+ (min_cap <= self._capacity) => (),
407
+ true => {
408
+ new_capacity := cond(
409
+ (self._capacity == usize(0)) => min_cap,
410
+ true => {
411
+ cap := self._capacity;
412
+ while (cap < min_cap), {
413
+ cap = (cap * usize(2));
414
+ };
415
+ cap
416
+ }
417
+ );
418
+ new_some_ptr := match(self._ptr,
419
+ .None => GlobalAllocator.malloc((sizeof(T) * new_capacity)),
420
+ .Some(old_ptr) => GlobalAllocator.realloc(
421
+ .Some((*(void))(old_ptr)),
422
+ (sizeof(T) * new_capacity)
423
+ )
424
+ );
425
+ match(new_some_ptr,
426
+ .Some(new_ptr) => {
427
+ self._ptr = .Some((*(T))(new_ptr));
428
+ self._capacity = new_capacity;
429
+ },
430
+ .None => panic("ArrayList.ensure_total_capacity: allocation failed")
431
+ );
432
+ }
433
+ );
434
+ }),
435
+
436
+ /**
437
+ * Append `count` elements from a raw pointer using memcpy.
438
+ * The caller must ensure `src` points to at least `count` valid elements.
439
+ */
440
+ extend_from_ptr : (fn(self: Self, src: *(T), count: usize) -> unit)({
441
+ cond(
442
+ (count == usize(0)) => (),
443
+ true => {
444
+ self.ensure_total_capacity((self._length + count));
445
+ match(self._ptr,
446
+ .Some(dst_base) => {
447
+ dst := (*(void))((dst_base &+ self._length));
448
+ _ := memcpy(dst, (*(void))(src), (count * sizeof(T)));
449
+ self._length = (self._length + count);
450
+ },
451
+ .None => panic("ArrayList.extend_from_ptr: no ptr after ensure_total_capacity")
452
+ );
453
+ }
454
+ );
455
+ }),
456
+
418
457
  /**
419
458
  * Clear all elements but keep capacity
420
459
  */
421
460
  clear : (fn(self: Self) -> unit)({
422
461
  Self._free_elements(self);
423
462
  self._length = usize(0);
463
+ }),
464
+
465
+ /**
466
+ * Fill all elements with a byte pattern using memset.
467
+ * Useful for zeroing bool/integer arrays in O(1).
468
+ * Only safe for types without RC (e.g., bool, u8, usize).
469
+ */
470
+ fill_with_byte : (fn(self: Self, byte_val: int) -> unit)(
471
+ match(self._ptr,
472
+ .None => (),
473
+ .Some(_ptr) => {
474
+ _ := memset((*(void))(_ptr), byte_val, (self._length * sizeof(T)));
475
+ }
476
+ )
477
+ ),
478
+
479
+ /**
480
+ * Resize ArrayList to exactly `new_len` elements, filling new slots with
481
+ * a byte pattern via memset. Does not call destructors on removed elements.
482
+ * Only safe for trivial types (bool, u8, usize, etc.).
483
+ */
484
+ resize_with_byte : (fn(self: Self, new_len: usize, byte_val: int) -> unit)({
485
+ cond(
486
+ (new_len <= self._length) => {
487
+ self._length = new_len;
488
+ },
489
+ true => {
490
+ self.ensure_total_capacity(new_len);
491
+ match(self._ptr,
492
+ .Some(_ptr) => {
493
+ start := (*(void))((_ptr &+ self._length));
494
+ fill_count := ((new_len - self._length) * sizeof(T));
495
+ _ := memset(start, byte_val, fill_count);
496
+ self._length = new_len;
497
+ },
498
+ .None => panic("ArrayList.resize_with_byte: no ptr after ensure")
499
+ );
500
+ }
501
+ );
424
502
  })
425
503
  );
504
+ impl(forall(T : Type), ArrayList(T), Index(usize)(
505
+ Output : T,
506
+ index : (fn(self: *(Self), idx: usize) -> *(Self.Output))({
507
+ assert((idx < self.*._length), "ArrayList: index out of bounds");
508
+ match(self.*._ptr,
509
+ .Some(_ptr) => (_ptr &+ idx),
510
+ .None => panic("ArrayList: index on empty list")
511
+ )
512
+ })
513
+ ));
426
514
  impl(forall(T : Type), ArrayList(T), Dispose(
427
515
  /**
428
516
  * RAII destructor - automatically called when ArrayList goes out of scope
@@ -86,7 +86,7 @@ impl(forall(K : Type, V : Type), BTreeMap(K, V),
86
86
  cond(
87
87
  r.found => {
88
88
  entry := self._entries.get(r.idx).unwrap();
89
- self._entries.set(r.idx, BTreeEntry(K, V)(key: entry.key, value: v));
89
+ &(self._entries(r.idx)).* = BTreeEntry(K, V)(key: entry.key, value: v);
90
90
  },
91
91
  true => {
92
92
  // Append new entry then bubble left to sorted position
@@ -95,8 +95,8 @@ impl(forall(K : Type, V : Type), BTreeMap(K, V),
95
95
  while (pos > r.idx), (pos = (pos - usize(1))), {
96
96
  curr := self._entries.get(pos).unwrap();
97
97
  prev := self._entries.get((pos - usize(1))).unwrap();
98
- self._entries.set(pos, prev);
99
- self._entries.set((pos - usize(1)), curr);
98
+ &(self._entries(pos)).* = prev;
99
+ &(self._entries((pos - usize(1)))).* = curr;
100
100
  };
101
101
  }
102
102
  );
@@ -246,6 +246,16 @@ impl(forall(K : Type, V : Type), BTreeMap(K, V),
246
246
  )
247
247
  );
248
248
 
249
+ impl(forall(K : Type, V : Type), BTreeMap(K, V), Index(K)(
250
+ Output : V,
251
+ index : (fn(self: *(Self), idx: K, where(K <: Ord(K))) -> *(Self.Output))({
252
+ r := self.*._find(idx);
253
+ assert(r.found, "BTreeMap: key not found");
254
+ entry_ptr := &(self.*._entries(r.idx));
255
+ &(entry_ptr.*.value)
256
+ })
257
+ ));
258
+
249
259
  export
250
260
  BTreeMap,
251
261
  BTreeMapIter,
@@ -250,6 +250,16 @@ impl(forall(T : Type), Deque(T),
250
250
  )
251
251
  );
252
252
 
253
+ impl(forall(T : Type), Deque(T), Index(usize)(
254
+ Output : T,
255
+ index : (fn(self: *(Self), idx: usize) -> *(Self.Output))({
256
+ assert((idx < self.*._len), "Deque: index out of bounds");
257
+ buf := self.*._buf.unwrap();
258
+ physical_idx := ((self.*._head + idx) % self.*._capacity);
259
+ (buf &+ physical_idx)
260
+ })
261
+ ));
262
+
253
263
  export
254
264
  Deque,
255
265
  DequeIter,
@@ -697,6 +697,21 @@ impl(forall(K : Type, V : Type), where(K <: (Eq(K), Hash)), HashMap(K, V),
697
697
  )
698
698
  );
699
699
 
700
+ impl(forall(K : Type, V : Type), HashMap(K, V), Index(K)(
701
+ Output : V,
702
+ index : (fn(self: *(Self), idx: K, where(K <: (Eq(K), Hash))) -> *(Self.Output))({
703
+ hash := idx.hash();
704
+ bucket_opt := Self._find_bucket(self.*, idx, hash);
705
+ match(bucket_opt,
706
+ .Some(i) => {
707
+ data_ptr := Self._data_ptr(self.*);
708
+ &((data_ptr &+ i).*.value)
709
+ },
710
+ .None => panic("HashMap: key not found")
711
+ )
712
+ })
713
+ ));
714
+
700
715
  export
701
716
  HashMap,
702
717
  HashMapError,
@@ -59,8 +59,8 @@ impl(forall(T : Type), PriorityQueue(T),
59
59
  parent_val := self._data.get(parent).unwrap();
60
60
  cond(
61
61
  (child_val < parent_val) => {
62
- self._data.set(i, parent_val);
63
- self._data.set(parent, child_val);
62
+ &(self._data(i)).* = parent_val;
63
+ &(self._data(parent)).* = child_val;
64
64
  i = parent;
65
65
  },
66
66
  true => {
@@ -78,7 +78,7 @@ impl(forall(T : Type), PriorityQueue(T),
78
78
  true => {
79
79
  top := self._data.get(usize(0)).unwrap();
80
80
  last := self._data.get((n - usize(1))).unwrap();
81
- self._data.set(usize(0), last);
81
+ &(self._data(usize(0))).* = last;
82
82
  self._data.pop();
83
83
  // Sift down
84
84
  i := usize(0);
@@ -106,8 +106,8 @@ impl(forall(T : Type), PriorityQueue(T),
106
106
  true => {
107
107
  a := self._data.get(i).unwrap();
108
108
  b := self._data.get(smallest).unwrap();
109
- self._data.set(i, b);
110
- self._data.set(smallest, a);
109
+ &(self._data(i)).* = b;
110
+ &(self._data(smallest)).* = a;
111
111
  i = smallest;
112
112
  }
113
113
  );
@@ -0,0 +1,283 @@
1
+ // HTML entity decoding
2
+ //
3
+ // Decodes named (&amp;), decimal (&#38;), and hex (&#x26;) HTML character references.
4
+ // Uses Legacy mode — entities without trailing semicolon are also decoded.
5
+ //
6
+ // Example:
7
+ // { decode_html } :: import "std/encoding/html";
8
+ //
9
+ // result := decode_html(`&amp; &lt; &#38; &#x26;`);
10
+ // assert((result == `& < & &`), "decoded entities");
11
+
12
+ open import "../string";
13
+ { HashMap } :: import "../collections/hash_map";
14
+ { HashSet } :: import "../collections/hash_set";
15
+ { is_valid_entity_code, from_code_point } :: import "./html_char_utils";
16
+ { _build_entity_map, _build_legacy_set } :: import "./html_entities";
17
+
18
+ // Module-level state: lazily initialized entity map and legacy set.
19
+ _state_initialized := false;
20
+ _entity_map := HashMap(String, String).new();
21
+ _legacy_set := HashSet(String).new();
22
+
23
+ _ensure_init :: (fn() -> unit)({
24
+ if(!((_state_initialized)), {
25
+ _entity_map = _build_entity_map();
26
+ _legacy_set = _build_legacy_set();
27
+ _state_initialized = true;
28
+ });
29
+ });
30
+
31
+ // Parse a hex string to i32
32
+ _parse_hex :: (fn(s: String) -> i32)({
33
+ (result : i32) = i32(0);
34
+ (i : usize) = usize(0);
35
+ while ((i < s.len())), {
36
+ c := s.at(i).unwrap();
37
+ result = (result * i32(16));
38
+ if(((c >= rune(u32('0'))) && (c <= rune(u32('9')))), {
39
+ result = (result + (i32(c.to_u32()) - i32(48)));
40
+ }, if(((c >= rune(u32('a'))) && (c <= rune(u32('f')))), {
41
+ result = (result + ((i32(c.to_u32()) - i32(97)) + i32(10)));
42
+ }, if(((c >= rune(u32('A'))) && (c <= rune(u32('F')))), {
43
+ result = (result + ((i32(c.to_u32()) - i32(65)) + i32(10)));
44
+ })));
45
+ i = (i + usize(1));
46
+ };
47
+ result
48
+ });
49
+
50
+ // Parse a decimal string to i32
51
+ _parse_dec :: (fn(s: String) -> i32)({
52
+ (result : i32) = i32(0);
53
+ (i : usize) = usize(0);
54
+ while ((i < s.len())), {
55
+ c := s.at(i).unwrap();
56
+ result = ((result * i32(10)) + (i32(c.to_u32()) - i32(48)));
57
+ i = (i + usize(1));
58
+ };
59
+ result
60
+ });
61
+
62
+ // Check if a character is an ASCII alphanumeric
63
+ _is_alpha_numeric :: (fn(c: rune) -> bool)(
64
+ ((((c >= rune(u32('a'))) && (c <= rune(u32('z')))) || ((c >= rune(u32('A'))) && (c <= rune(u32('Z'))))) || ((c >= rune(u32('0'))) && (c <= rune(u32('9')))))
65
+ );
66
+
67
+ // Decode HTML entities in a string (Legacy mode — entities without ; are also decoded).
68
+ decode_html :: (fn(input: String) -> String)({
69
+ _ensure_init();
70
+
71
+ (len : usize) = input.len();
72
+ if(((len == usize(0))), {
73
+ return input;
74
+ });
75
+
76
+ // Quick check: if no '&', return as-is
77
+ if(!(input.contains(`&`)), {
78
+ return input;
79
+ });
80
+
81
+ (result : String) = ``;
82
+ (i : usize) = usize(0);
83
+
84
+ while ((i < len)), {
85
+ c := input.at(i).unwrap();
86
+
87
+ if((c != rune(u32('&'))), {
88
+ // Not an entity start, just append the character
89
+ result = `${result}${from_code_point(i32(c.to_u32()))}`;
90
+ i = (i + usize(1));
91
+ }, {
92
+ // Found '&' — try to decode entity
93
+ (start : usize) = i;
94
+ i = (i + usize(1));
95
+
96
+ if(((i >= len)), {
97
+ result = `${result}&`;
98
+ }, {
99
+ next := input.at(i).unwrap();
100
+
101
+ if(((next == rune(u32('#')))), {
102
+ // Numeric entity: &#N; or &#xN;
103
+ i = (i + usize(1));
104
+ if(((i >= len)), {
105
+ result = `${result}&#`;
106
+ }, {
107
+ hex_char := input.at(i).unwrap();
108
+ if(((hex_char == rune(u32('x'))) || (hex_char == rune(u32('X')))), {
109
+ // Hex: &#xHH;
110
+ (digit_start : usize) = (i + usize(1));
111
+ (digit_end : usize) = digit_start;
112
+ while (((digit_end < len))), {
113
+ dc := input.at(digit_end).unwrap();
114
+ if(((((dc >= rune(u32('0'))) && (dc <= rune(u32('9')))) || (((dc >= rune(u32('a'))) && (dc <= rune(u32('f')))) || ((dc >= rune(u32('A'))) && (dc <= rune(u32('F'))))))), {
115
+ digit_end = (digit_end + usize(1));
116
+ }, {
117
+ // Done with hex digits, break out
118
+ digit_end = (digit_end + usize(0));
119
+ // Use a flag to break
120
+ return_early := true;
121
+ // TODO: proper break
122
+ digit_end = (len + usize(1));
123
+ });
124
+ };
125
+ // Fix digit_end if it overflowed
126
+ if(((digit_end > len)), {
127
+ // We used the overflow trick - find actual end
128
+ digit_end = digit_start;
129
+ while ((digit_end < len)), {
130
+ dc2 := input.at(digit_end).unwrap();
131
+ if(((((dc2 >= rune(u32('0'))) && (dc2 <= rune(u32('9')))) || (((dc2 >= rune(u32('a'))) && (dc2 <= rune(u32('f')))) || ((dc2 >= rune(u32('A'))) && (dc2 <= rune(u32('F'))))))), {
132
+ digit_end = (digit_end + usize(1));
133
+ }, {
134
+ digit_end = ((len + digit_end) + usize(1));
135
+ });
136
+ };
137
+ if(((digit_end > len)), {
138
+ digit_end = ((digit_end - len) - usize(1));
139
+ });
140
+ });
141
+
142
+ if(((digit_end > digit_start)), {
143
+ hex_str := input.substring(digit_start, digit_end);
144
+ (code : i32) = _parse_hex(hex_str);
145
+
146
+ // Check for semicolon
147
+ if((((digit_end < len) && (input.at(digit_end).unwrap() == rune(u32(';'))))), {
148
+ i = (digit_end + usize(1));
149
+ }, {
150
+ i = digit_end;
151
+ });
152
+
153
+ if(is_valid_entity_code(code), {
154
+ result = `${result}${from_code_point(code)}`;
155
+ }, {
156
+ // Invalid code (e.g., surrogates) — keep original entity text
157
+ (orig_hex : String) = input.substring(start, i);
158
+ result = `${result}${orig_hex}`;
159
+ });
160
+ }, {
161
+ // No hex digits — output literally
162
+ result = `${result}&#${from_code_point(i32(hex_char.to_u32()))}`;
163
+ i = (i + usize(1));
164
+ });
165
+ }, {
166
+ // Decimal: &#DD;
167
+ (digit_start : usize) = i;
168
+ (digit_end : usize) = digit_start;
169
+ while ((digit_end < len)), {
170
+ dc := input.at(digit_end).unwrap();
171
+ if((((dc >= rune(u32('0'))) && (dc <= rune(u32('9'))))), {
172
+ digit_end = (digit_end + usize(1));
173
+ }, {
174
+ digit_end = ((len + digit_end) + usize(1));
175
+ });
176
+ };
177
+ if(((digit_end > len)), {
178
+ digit_end = ((digit_end - len) - usize(1));
179
+ });
180
+
181
+ if(((digit_end > digit_start)), {
182
+ dec_str := input.substring(digit_start, digit_end);
183
+ (code : i32) = _parse_dec(dec_str);
184
+
185
+ // Check for semicolon
186
+ if((((digit_end < len) && (input.at(digit_end).unwrap() == rune(u32(';'))))), {
187
+ i = (digit_end + usize(1));
188
+ }, {
189
+ i = digit_end;
190
+ });
191
+
192
+ if(is_valid_entity_code(code), {
193
+ result = `${result}${from_code_point(code)}`;
194
+ }, {
195
+ // Invalid code (e.g., surrogates) — keep original entity text
196
+ (orig_dec : String) = input.substring(start, i);
197
+ result = `${result}${orig_dec}`;
198
+ });
199
+ }, {
200
+ // No decimal digits
201
+ result = `${result}&#`;
202
+ });
203
+ });
204
+ });
205
+ }, {
206
+ // Named entity: &name; or &name (legacy)
207
+ (name_start : usize) = i;
208
+ (name_end : usize) = name_start;
209
+ while ((name_end < len)), {
210
+ nc := input.at(name_end).unwrap();
211
+ if(((nc == rune(u32(';')))), {
212
+ // Found semicolon — end of entity name
213
+ name_end = ((len + name_end) + usize(1));
214
+ }, if(_is_alpha_numeric(nc), {
215
+ name_end = (name_end + usize(1));
216
+ }, {
217
+ // Non-alphanumeric, non-semicolon — end of potential entity
218
+ name_end = ((len + name_end) + usize(1));
219
+ }));
220
+ };
221
+ // Decode the overflow trick
222
+ (found_end : bool) = false;
223
+ if(((name_end > len)), {
224
+ name_end = ((name_end - len) - usize(1));
225
+ found_end = true;
226
+ });
227
+
228
+ name_str := input.substring(name_start, name_end);
229
+
230
+ // Check for semicolon at name_end
231
+ (has_semi : bool) = (((name_end < len) && (input.at(name_end).unwrap() == rune(u32(';')))));
232
+
233
+ if(has_semi, {
234
+ // Try exact match with semicolon
235
+ match(_entity_map.get(name_str),
236
+ .Some(decoded) => {
237
+ result = `${result}${decoded}`;
238
+ i = (name_end + usize(1));
239
+ },
240
+ .None => {
241
+ // Unknown entity — output literally
242
+ result = `${result}&${name_str};`;
243
+ i = (name_end + usize(1));
244
+ }
245
+ );
246
+ }, {
247
+ // Legacy mode: try progressively shorter names
248
+ (matched : bool) = false;
249
+ (try_end : usize) = name_end;
250
+
251
+ while ((((try_end > name_start) && !(matched)))), {
252
+ try_name := input.substring(name_start, try_end);
253
+ if(_legacy_set.contains(try_name), {
254
+ match(_entity_map.get(try_name),
255
+ .Some(decoded) => {
256
+ result = `${result}${decoded}`;
257
+ i = try_end;
258
+ matched = true;
259
+ },
260
+ .None => {
261
+ try_end = (try_end - usize(1));
262
+ }
263
+ );
264
+ }, {
265
+ try_end = (try_end - usize(1));
266
+ });
267
+ };
268
+
269
+ if(!(matched), {
270
+ // No legacy match — output '&' literally and continue
271
+ result = `${result}&`;
272
+ i = name_start;
273
+ });
274
+ });
275
+ });
276
+ });
277
+ });
278
+ };
279
+
280
+ result
281
+ });
282
+
283
+ export decode_html, is_valid_entity_code, from_code_point;
@@ -0,0 +1,36 @@
1
+ // HTML character utility functions
2
+ //
3
+ // Provides Unicode codepoint validation and conversion for HTML entity processing.
4
+ //
5
+ // Example:
6
+ // { is_valid_entity_code, from_code_point } :: import "std/encoding/html_char_utils";
7
+ //
8
+ // assert(is_valid_entity_code(i32(65)), "A is valid");
9
+ // s := from_code_point(i32(65)); // "A"
10
+
11
+ open import "../string";
12
+
13
+ // Check if a Unicode codepoint is a valid HTML entity value.
14
+ is_valid_entity_code :: (fn(c: i32) -> bool)(
15
+ cond(
16
+ ((c >= i32(0xD800)) && (c <= i32(0xDFFF))) => false,
17
+ ((c >= i32(0xFDD0)) && (c <= i32(0xFDEF))) => false,
18
+ (((c & i32(0xFFFF)) == i32(0xFFFF)) || ((c & i32(0xFFFF)) == i32(0xFFFE))) => false,
19
+ ((c >= i32(0x00)) && (c <= i32(0x08))) => false,
20
+ (c == i32(0x0B)) => false,
21
+ ((c >= i32(0x0E)) && (c <= i32(0x1F))) => false,
22
+ ((c >= i32(0x7F)) && (c <= i32(0x9F))) => false,
23
+ (c > i32(0x10FFFF)) => false,
24
+ true => true
25
+ )
26
+ );
27
+
28
+ // Convert a Unicode codepoint to a String.
29
+ from_code_point :: (fn(c: i32) -> String)(
30
+ {
31
+ (r : rune) = rune(u32(c));
32
+ `${r}`
33
+ }
34
+ );
35
+
36
+ export is_valid_entity_code, from_code_point;