@shd101wyy/yo 0.1.5 → 0.1.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,7 +1,7 @@
1
1
  {
2
2
  "name": "@shd101wyy/yo",
3
3
  "displayName": "Yo",
4
- "version": "0.1.5",
4
+ "version": "0.1.6",
5
5
  "main": "./out/cjs/index.cjs",
6
6
  "module": "./out/esm/index.mjs",
7
7
  "types": "./out/types/src/index.d.ts",
package/std/build.yo CHANGED
@@ -57,6 +57,7 @@ CompilationTarget :: {
57
57
  X86_64_Linux_Gnu: "x86_64-linux-gnu",
58
58
  X86_64_Linux_Musl: "x86_64-linux-musl",
59
59
  Aarch64_Linux_Gnu: "aarch64-linux-gnu",
60
+ Aarch64_Linux_Musl: "aarch64-linux-musl",
60
61
  Aarch64_Macos: "aarch64-macos",
61
62
  X86_64_Macos: "x86_64-macos",
62
63
  X86_64_Windows_Msvc: "x86_64-windows-msvc",
@@ -80,7 +81,7 @@ Executable :: struct(
80
81
  root : comptime_string,
81
82
  (target : comptime_string) ?= __yo_build_target_host(),
82
83
  (optimize : Optimize) ?= Optimize.Debug,
83
- (allocator : Allocator) ?= Allocator.Mimalloc,
84
+ (allocator : Allocator) ?= Allocator.Libc,
84
85
  (sanitize : Sanitize) ?= Sanitize.None
85
86
  );
86
87
  export Executable;
@@ -8,7 +8,7 @@
8
8
  * - RAII for automatic cleanup
9
9
  */
10
10
  { GlobalAllocator, AllocError } :: import "../allocator.yo";
11
- { memmove } :: import "../libc/string.yo";
11
+ { memmove, memcpy, memset } :: import "../libc/string.yo";
12
12
  { malloc, calloc, realloc, free, aligned_alloc } :: GlobalAllocator;
13
13
 
14
14
  /**
@@ -415,12 +415,144 @@ impl(forall(T : Type), ArrayList(T),
415
415
  )
416
416
  ),
417
417
 
418
+ /**
419
+ * Ensure the ArrayList can hold at least `min_cap` total elements
420
+ * without further reallocation.
421
+ */
422
+ ensure_total_capacity : (fn(self: Self, min_cap: usize) -> unit)({
423
+ cond(
424
+ (min_cap <= self._capacity) => (),
425
+ true => {
426
+ new_capacity := cond(
427
+ (self._capacity == usize(0)) => min_cap,
428
+ true => {
429
+ cap := self._capacity;
430
+ while (cap < min_cap), {
431
+ cap = (cap * usize(2));
432
+ };
433
+ cap
434
+ }
435
+ );
436
+ new_some_ptr := match(self._ptr,
437
+ .None => GlobalAllocator.malloc((sizeof(T) * new_capacity)),
438
+ .Some(old_ptr) => GlobalAllocator.realloc(
439
+ .Some((*(void))(old_ptr)),
440
+ (sizeof(T) * new_capacity)
441
+ )
442
+ );
443
+ match(new_some_ptr,
444
+ .Some(new_ptr) => {
445
+ self._ptr = .Some((*(T))(new_ptr));
446
+ self._capacity = new_capacity;
447
+ },
448
+ .None => panic("ArrayList.ensure_total_capacity: allocation failed")
449
+ );
450
+ }
451
+ );
452
+ }),
453
+
454
+ /**
455
+ * Append `count` elements from a raw pointer using memcpy.
456
+ * The caller must ensure `src` points to at least `count` valid elements.
457
+ */
458
+ extend_from_ptr : (fn(self: Self, src: *(T), count: usize) -> unit)({
459
+ cond(
460
+ (count == usize(0)) => (),
461
+ true => {
462
+ self.ensure_total_capacity((self._length + count));
463
+ match(self._ptr,
464
+ .Some(dst_base) => {
465
+ dst := (*(void))((dst_base &+ self._length));
466
+ _ := memcpy(dst, (*(void))(src), (count * sizeof(T)));
467
+ self._length = (self._length + count);
468
+ },
469
+ .None => panic("ArrayList.extend_from_ptr: no ptr after ensure_total_capacity")
470
+ );
471
+ }
472
+ );
473
+ }),
474
+
418
475
  /**
419
476
  * Clear all elements but keep capacity
420
477
  */
421
478
  clear : (fn(self: Self) -> unit)({
422
479
  Self._free_elements(self);
423
480
  self._length = usize(0);
481
+ }),
482
+
483
+ /**
484
+ * Get element at index without bounds checking.
485
+ * Caller must ensure index < len.
486
+ */
487
+ get_unchecked : (fn(self: Self, index: usize) -> T)(
488
+ match(self._ptr,
489
+ .None => panic("ArrayList.get_unchecked: no ptr"),
490
+ .Some(_ptr) => (_ptr &+ index).*
491
+ )
492
+ ),
493
+
494
+ /**
495
+ * Get a pointer to element at index without bounds checking or copying.
496
+ * Caller must ensure index < len. The pointer is valid until the list is modified.
497
+ */
498
+ get_ptr : (fn(self: Self, index: usize) -> *(T))(
499
+ match(self._ptr,
500
+ .None => panic("ArrayList.get_ptr: no ptr"),
501
+ .Some(_ptr) => (_ptr &+ index)
502
+ )
503
+ ),
504
+
505
+ /**
506
+ * Set element at index without bounds checking.
507
+ * Caller must ensure index < len.
508
+ */
509
+ set_unchecked : (fn(self: Self, index: usize, value: T) -> unit)(
510
+ match(self._ptr,
511
+ .None => panic("ArrayList.set_unchecked: no ptr"),
512
+ .Some(_ptr) => {
513
+ target_ptr := (_ptr &+ index);
514
+ target_ptr.* = value;
515
+ }
516
+ )
517
+ ),
518
+
519
+ /**
520
+ * Fill all elements with a byte pattern using memset.
521
+ * Useful for zeroing bool/integer arrays in O(1).
522
+ * Only safe for types without RC (e.g., bool, u8, usize).
523
+ */
524
+ fill_with_byte : (fn(self: Self, byte_val: int) -> unit)(
525
+ match(self._ptr,
526
+ .None => (),
527
+ .Some(_ptr) => {
528
+ _ := memset((*(void))(_ptr), byte_val, (self._length * sizeof(T)));
529
+ }
530
+ )
531
+ ),
532
+
533
+ /**
534
+ * Resize ArrayList to exactly `new_len` elements, filling new slots with
535
+ * a byte pattern via memset. Does not call destructors on removed elements.
536
+ * Only safe for trivial types (bool, u8, usize, etc.).
537
+ */
538
+ resize_with_byte : (fn(self: Self, new_len: usize, byte_val: int) -> unit)({
539
+ cond(
540
+ (new_len <= self._length) => {
541
+ self._length = new_len;
542
+ },
543
+ true => {
544
+ self.ensure_total_capacity(new_len);
545
+ match(self._ptr,
546
+ .Some(_ptr) => {
547
+ start := (*(void))((_ptr &+ self._length));
548
+ fill_count := ((new_len - self._length) * sizeof(T));
549
+ _ := memset(start, byte_val, fill_count);
550
+ self._length = new_len;
551
+ },
552
+ .None => panic("ArrayList.resize_with_byte: no ptr after ensure")
553
+ );
554
+ }
555
+ );
424
556
  })
425
557
  );
426
558
  impl(forall(T : Type), ArrayList(T), Dispose(
@@ -0,0 +1,283 @@
1
+ // HTML entity decoding
2
+ //
3
+ // Decodes named (&amp;), decimal (&#38;), and hex (&#x26;) HTML character references.
4
+ // Uses Legacy mode — entities without trailing semicolon are also decoded.
5
+ //
6
+ // Example:
7
+ // { decode_html } :: import "std/encoding/html";
8
+ //
9
+ // result := decode_html(`&amp; &lt; &#38; &#x26;`);
10
+ // assert((result == `& < & &`), "decoded entities");
11
+
12
+ open import "../string";
13
+ { HashMap } :: import "../collections/hash_map";
14
+ { HashSet } :: import "../collections/hash_set";
15
+ { is_valid_entity_code, from_code_point } :: import "./html_char_utils";
16
+ { _build_entity_map, _build_legacy_set } :: import "./html_entities";
17
+
18
+ // Module-level state: lazily initialized entity map and legacy set.
19
+ _state_initialized := false;
20
+ _entity_map := HashMap(String, String).new();
21
+ _legacy_set := HashSet(String).new();
22
+
23
+ _ensure_init :: (fn() -> unit)({
24
+ if(!((_state_initialized)), {
25
+ _entity_map = _build_entity_map();
26
+ _legacy_set = _build_legacy_set();
27
+ _state_initialized = true;
28
+ });
29
+ });
30
+
31
+ // Parse a hex string to i32
32
+ _parse_hex :: (fn(s: String) -> i32)({
33
+ (result : i32) = i32(0);
34
+ (i : usize) = usize(0);
35
+ while ((i < s.len())), {
36
+ c := s.at(i).unwrap();
37
+ result = (result * i32(16));
38
+ if(((c >= rune(u32('0'))) && (c <= rune(u32('9')))), {
39
+ result = (result + (i32(c.to_u32()) - i32(48)));
40
+ }, if(((c >= rune(u32('a'))) && (c <= rune(u32('f')))), {
41
+ result = (result + ((i32(c.to_u32()) - i32(97)) + i32(10)));
42
+ }, if(((c >= rune(u32('A'))) && (c <= rune(u32('F')))), {
43
+ result = (result + ((i32(c.to_u32()) - i32(65)) + i32(10)));
44
+ })));
45
+ i = (i + usize(1));
46
+ };
47
+ result
48
+ });
49
+
50
+ // Parse a decimal string to i32
51
+ _parse_dec :: (fn(s: String) -> i32)({
52
+ (result : i32) = i32(0);
53
+ (i : usize) = usize(0);
54
+ while ((i < s.len())), {
55
+ c := s.at(i).unwrap();
56
+ result = ((result * i32(10)) + (i32(c.to_u32()) - i32(48)));
57
+ i = (i + usize(1));
58
+ };
59
+ result
60
+ });
61
+
62
+ // Check if a character is an ASCII alphanumeric
63
+ _is_alpha_numeric :: (fn(c: rune) -> bool)(
64
+ ((((c >= rune(u32('a'))) && (c <= rune(u32('z')))) || ((c >= rune(u32('A'))) && (c <= rune(u32('Z'))))) || ((c >= rune(u32('0'))) && (c <= rune(u32('9')))))
65
+ );
66
+
67
+ // Decode HTML entities in a string (Legacy mode — entities without ; are also decoded).
68
+ decode_html :: (fn(input: String) -> String)({
69
+ _ensure_init();
70
+
71
+ (len : usize) = input.len();
72
+ if(((len == usize(0))), {
73
+ return input;
74
+ });
75
+
76
+ // Quick check: if no '&', return as-is
77
+ if(!(input.contains(`&`)), {
78
+ return input;
79
+ });
80
+
81
+ (result : String) = ``;
82
+ (i : usize) = usize(0);
83
+
84
+ while ((i < len)), {
85
+ c := input.at(i).unwrap();
86
+
87
+ if((c != rune(u32('&'))), {
88
+ // Not an entity start, just append the character
89
+ result = `${result}${from_code_point(i32(c.to_u32()))}`;
90
+ i = (i + usize(1));
91
+ }, {
92
+ // Found '&' — try to decode entity
93
+ (start : usize) = i;
94
+ i = (i + usize(1));
95
+
96
+ if(((i >= len)), {
97
+ result = `${result}&`;
98
+ }, {
99
+ next := input.at(i).unwrap();
100
+
101
+ if(((next == rune(u32('#')))), {
102
+ // Numeric entity: &#N; or &#xN;
103
+ i = (i + usize(1));
104
+ if(((i >= len)), {
105
+ result = `${result}&#`;
106
+ }, {
107
+ hex_char := input.at(i).unwrap();
108
+ if(((hex_char == rune(u32('x'))) || (hex_char == rune(u32('X')))), {
109
+ // Hex: &#xHH;
110
+ (digit_start : usize) = (i + usize(1));
111
+ (digit_end : usize) = digit_start;
112
+ while (((digit_end < len))), {
113
+ dc := input.at(digit_end).unwrap();
114
+ if(((((dc >= rune(u32('0'))) && (dc <= rune(u32('9')))) || (((dc >= rune(u32('a'))) && (dc <= rune(u32('f')))) || ((dc >= rune(u32('A'))) && (dc <= rune(u32('F'))))))), {
115
+ digit_end = (digit_end + usize(1));
116
+ }, {
117
+ // Done with hex digits, break out
118
+ digit_end = (digit_end + usize(0));
119
+ // Use a flag to break
120
+ return_early := true;
121
+ // TODO: proper break
122
+ digit_end = (len + usize(1));
123
+ });
124
+ };
125
+ // Fix digit_end if it overflowed
126
+ if(((digit_end > len)), {
127
+ // We used the overflow trick - find actual end
128
+ digit_end = digit_start;
129
+ while ((digit_end < len)), {
130
+ dc2 := input.at(digit_end).unwrap();
131
+ if(((((dc2 >= rune(u32('0'))) && (dc2 <= rune(u32('9')))) || (((dc2 >= rune(u32('a'))) && (dc2 <= rune(u32('f')))) || ((dc2 >= rune(u32('A'))) && (dc2 <= rune(u32('F'))))))), {
132
+ digit_end = (digit_end + usize(1));
133
+ }, {
134
+ digit_end = ((len + digit_end) + usize(1));
135
+ });
136
+ };
137
+ if(((digit_end > len)), {
138
+ digit_end = ((digit_end - len) - usize(1));
139
+ });
140
+ });
141
+
142
+ if(((digit_end > digit_start)), {
143
+ hex_str := input.substring(digit_start, digit_end);
144
+ (code : i32) = _parse_hex(hex_str);
145
+
146
+ // Check for semicolon
147
+ if((((digit_end < len) && (input.at(digit_end).unwrap() == rune(u32(';'))))), {
148
+ i = (digit_end + usize(1));
149
+ }, {
150
+ i = digit_end;
151
+ });
152
+
153
+ if(is_valid_entity_code(code), {
154
+ result = `${result}${from_code_point(code)}`;
155
+ }, {
156
+ // Invalid code (e.g., surrogates) — keep original entity text
157
+ (orig_hex : String) = input.substring(start, i);
158
+ result = `${result}${orig_hex}`;
159
+ });
160
+ }, {
161
+ // No hex digits — output literally
162
+ result = `${result}&#${from_code_point(i32(hex_char.to_u32()))}`;
163
+ i = (i + usize(1));
164
+ });
165
+ }, {
166
+ // Decimal: &#DD;
167
+ (digit_start : usize) = i;
168
+ (digit_end : usize) = digit_start;
169
+ while ((digit_end < len)), {
170
+ dc := input.at(digit_end).unwrap();
171
+ if((((dc >= rune(u32('0'))) && (dc <= rune(u32('9'))))), {
172
+ digit_end = (digit_end + usize(1));
173
+ }, {
174
+ digit_end = ((len + digit_end) + usize(1));
175
+ });
176
+ };
177
+ if(((digit_end > len)), {
178
+ digit_end = ((digit_end - len) - usize(1));
179
+ });
180
+
181
+ if(((digit_end > digit_start)), {
182
+ dec_str := input.substring(digit_start, digit_end);
183
+ (code : i32) = _parse_dec(dec_str);
184
+
185
+ // Check for semicolon
186
+ if((((digit_end < len) && (input.at(digit_end).unwrap() == rune(u32(';'))))), {
187
+ i = (digit_end + usize(1));
188
+ }, {
189
+ i = digit_end;
190
+ });
191
+
192
+ if(is_valid_entity_code(code), {
193
+ result = `${result}${from_code_point(code)}`;
194
+ }, {
195
+ // Invalid code (e.g., surrogates) — keep original entity text
196
+ (orig_dec : String) = input.substring(start, i);
197
+ result = `${result}${orig_dec}`;
198
+ });
199
+ }, {
200
+ // No decimal digits
201
+ result = `${result}&#`;
202
+ });
203
+ });
204
+ });
205
+ }, {
206
+ // Named entity: &name; or &name (legacy)
207
+ (name_start : usize) = i;
208
+ (name_end : usize) = name_start;
209
+ while ((name_end < len)), {
210
+ nc := input.at(name_end).unwrap();
211
+ if(((nc == rune(u32(';')))), {
212
+ // Found semicolon — end of entity name
213
+ name_end = ((len + name_end) + usize(1));
214
+ }, if(_is_alpha_numeric(nc), {
215
+ name_end = (name_end + usize(1));
216
+ }, {
217
+ // Non-alphanumeric, non-semicolon — end of potential entity
218
+ name_end = ((len + name_end) + usize(1));
219
+ }));
220
+ };
221
+ // Decode the overflow trick
222
+ (found_end : bool) = false;
223
+ if(((name_end > len)), {
224
+ name_end = ((name_end - len) - usize(1));
225
+ found_end = true;
226
+ });
227
+
228
+ name_str := input.substring(name_start, name_end);
229
+
230
+ // Check for semicolon at name_end
231
+ (has_semi : bool) = (((name_end < len) && (input.at(name_end).unwrap() == rune(u32(';')))));
232
+
233
+ if(has_semi, {
234
+ // Try exact match with semicolon
235
+ match(_entity_map.get(name_str),
236
+ .Some(decoded) => {
237
+ result = `${result}${decoded}`;
238
+ i = (name_end + usize(1));
239
+ },
240
+ .None => {
241
+ // Unknown entity — output literally
242
+ result = `${result}&${name_str};`;
243
+ i = (name_end + usize(1));
244
+ }
245
+ );
246
+ }, {
247
+ // Legacy mode: try progressively shorter names
248
+ (matched : bool) = false;
249
+ (try_end : usize) = name_end;
250
+
251
+ while ((((try_end > name_start) && !(matched)))), {
252
+ try_name := input.substring(name_start, try_end);
253
+ if(_legacy_set.contains(try_name), {
254
+ match(_entity_map.get(try_name),
255
+ .Some(decoded) => {
256
+ result = `${result}${decoded}`;
257
+ i = try_end;
258
+ matched = true;
259
+ },
260
+ .None => {
261
+ try_end = (try_end - usize(1));
262
+ }
263
+ );
264
+ }, {
265
+ try_end = (try_end - usize(1));
266
+ });
267
+ };
268
+
269
+ if(!(matched), {
270
+ // No legacy match — output '&' literally and continue
271
+ result = `${result}&`;
272
+ i = name_start;
273
+ });
274
+ });
275
+ });
276
+ });
277
+ });
278
+ };
279
+
280
+ result
281
+ });
282
+
283
+ export decode_html, is_valid_entity_code, from_code_point;
@@ -0,0 +1,36 @@
1
+ // HTML character utility functions
2
+ //
3
+ // Provides Unicode codepoint validation and conversion for HTML entity processing.
4
+ //
5
+ // Example:
6
+ // { is_valid_entity_code, from_code_point } :: import "std/encoding/html_char_utils";
7
+ //
8
+ // assert(is_valid_entity_code(i32(65)), "A is valid");
9
+ // s := from_code_point(i32(65)); // "A"
10
+
11
+ open import "../string";
12
+
13
+ // Check if a Unicode codepoint is a valid HTML entity value.
14
+ is_valid_entity_code :: (fn(c: i32) -> bool)(
15
+ cond(
16
+ ((c >= i32(0xD800)) && (c <= i32(0xDFFF))) => false,
17
+ ((c >= i32(0xFDD0)) && (c <= i32(0xFDEF))) => false,
18
+ (((c & i32(0xFFFF)) == i32(0xFFFF)) || ((c & i32(0xFFFF)) == i32(0xFFFE))) => false,
19
+ ((c >= i32(0x00)) && (c <= i32(0x08))) => false,
20
+ (c == i32(0x0B)) => false,
21
+ ((c >= i32(0x0E)) && (c <= i32(0x1F))) => false,
22
+ ((c >= i32(0x7F)) && (c <= i32(0x9F))) => false,
23
+ (c > i32(0x10FFFF)) => false,
24
+ true => true
25
+ )
26
+ );
27
+
28
+ // Convert a Unicode codepoint to a String.
29
+ from_code_point :: (fn(c: i32) -> String)(
30
+ {
31
+ (r : rune) = rune(u32(c));
32
+ `${r}`
33
+ }
34
+ );
35
+
36
+ export is_valid_entity_code, from_code_point;