@shd101wyy/yo 0.1.5 → 0.1.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (50) hide show
  1. package/README.md +8 -6
  2. package/out/cjs/index.cjs +691 -636
  3. package/out/cjs/yo-cli.cjs +710 -653
  4. package/out/esm/index.mjs +649 -594
  5. package/out/types/src/build-runner.d.ts +1 -1
  6. package/out/types/src/codegen/async/runtime-io-common.d.ts +2 -1
  7. package/out/types/src/codegen/async/runtime.d.ts +5 -1
  8. package/out/types/src/codegen/codegen-c.d.ts +2 -0
  9. package/out/types/src/codegen/functions/collection.d.ts +1 -1
  10. package/out/types/src/codegen/functions/context.d.ts +1 -0
  11. package/out/types/src/codegen/functions/generation.d.ts +10 -0
  12. package/out/types/src/codegen/utils/index.d.ts +4 -0
  13. package/out/types/src/env.d.ts +1 -0
  14. package/out/types/src/evaluator/builtins/build.d.ts +1 -0
  15. package/out/types/src/evaluator/builtins/comptime-index-fns.d.ts +17 -0
  16. package/out/types/src/evaluator/calls/index-trait.d.ts +17 -0
  17. package/out/types/src/evaluator/context.d.ts +19 -14
  18. package/out/types/src/evaluator/index.d.ts +3 -1
  19. package/out/types/src/evaluator/trait-checking.d.ts +1 -0
  20. package/out/types/src/evaluator/values/anonymous-module.d.ts +3 -2
  21. package/out/types/src/expr.d.ts +22 -1
  22. package/out/types/src/module-manager.d.ts +1 -0
  23. package/out/types/src/target.d.ts +1 -0
  24. package/out/types/src/value.d.ts +4 -1
  25. package/out/types/tsconfig.tsbuildinfo +1 -1
  26. package/package.json +1 -1
  27. package/std/build.yo +2 -1
  28. package/std/collections/array_list.yo +114 -26
  29. package/std/collections/btree_map.yo +13 -3
  30. package/std/collections/deque.yo +10 -0
  31. package/std/collections/hash_map.yo +15 -0
  32. package/std/collections/priority_queue.yo +5 -5
  33. package/std/encoding/html.yo +283 -0
  34. package/std/encoding/html_char_utils.yo +36 -0
  35. package/std/encoding/html_entities.yo +2262 -0
  36. package/std/encoding/punycode.yo +366 -0
  37. package/std/encoding/toml.yo +1 -1
  38. package/std/fmt/to_string.yo +5 -4
  39. package/std/glob/index.yo +2 -2
  40. package/std/libc/wctype.yo +55 -0
  41. package/std/path.yo +6 -6
  42. package/std/prelude.yo +826 -205
  43. package/std/process.yo +1 -1
  44. package/std/regex/compiler.yo +11 -11
  45. package/std/regex/index.yo +2 -4
  46. package/std/regex/parser.yo +69 -4
  47. package/std/regex/vm.yo +53 -46
  48. package/std/string/string.yo +1424 -1339
  49. package/std/string/unicode.yo +242 -0
  50. package/out/types/src/evaluator/calls/array.d.ts +0 -14
package/std/process.yo CHANGED
@@ -66,7 +66,7 @@ extern "Yo",
66
66
  // The first element is the program name
67
67
  // Example: ["./program", "arg1", "arg2"]
68
68
  raw_args :: (fn() -> [*(u8)]) {
69
- return __yo_args(:);
69
+ return __yo_args;
70
70
  };
71
71
  export raw_args;
72
72
 
@@ -183,14 +183,14 @@ impl(NfaCompiler,
183
183
  right := node.children.get(usize(1)).unwrap();
184
184
  split_pc := self._emit(Instr.split_instr(usize(0), usize(0)));
185
185
  left_start := self._current_pc();
186
- self._program.instructions.set(split_pc, Instr.split_instr(left_start, usize(0)));
186
+ &(self._program.instructions(split_pc)).* = Instr.split_instr(left_start, usize(0));
187
187
  recur(self, left);
188
188
  jump_pc := self._emit(Instr.jump_instr(usize(0)));
189
189
  right_start := self._current_pc();
190
- self._program.instructions.set(split_pc, Instr.split_instr(left_start, right_start));
190
+ &(self._program.instructions(split_pc)).* = Instr.split_instr(left_start, right_start);
191
191
  recur(self, right);
192
192
  end_pc := self._current_pc();
193
- self._program.instructions.set(jump_pc, Instr.jump_instr(end_pc));
193
+ &(self._program.instructions(jump_pc)).* = Instr.jump_instr(end_pc);
194
194
  },
195
195
  .Quantifier => {
196
196
  child := node.children.get(usize(0)).unwrap();
@@ -217,10 +217,10 @@ impl(NfaCompiler,
217
217
  l3 := self._current_pc();
218
218
  cond(
219
219
  greedy => {
220
- self._program.instructions.set(split_pc, Instr.split_instr(l2, l3));
220
+ &(self._program.instructions(split_pc)).* = Instr.split_instr(l2, l3);
221
221
  },
222
222
  true => {
223
- self._program.instructions.set(split_pc, Instr.split_instr(l3, l2));
223
+ &(self._program.instructions(split_pc)).* = Instr.split_instr(l3, l2);
224
224
  }
225
225
  );
226
226
  },
@@ -230,10 +230,10 @@ impl(NfaCompiler,
230
230
  l2 := self._current_pc();
231
231
  cond(
232
232
  greedy => {
233
- self._program.instructions.set(split_pc, Instr.split_instr(last_body_start, l2));
233
+ &(self._program.instructions(split_pc)).* = Instr.split_instr(last_body_start, l2);
234
234
  },
235
235
  true => {
236
- self._program.instructions.set(split_pc, Instr.split_instr(l2, last_body_start));
236
+ &(self._program.instructions(split_pc)).* = Instr.split_instr(l2, last_body_start);
237
237
  }
238
238
  );
239
239
  },
@@ -248,10 +248,10 @@ impl(NfaCompiler,
248
248
  after := self._current_pc();
249
249
  cond(
250
250
  greedy => {
251
- self._program.instructions.set(split_pc, Instr.split_instr(body_start, after));
251
+ &(self._program.instructions(split_pc)).* = Instr.split_instr(body_start, after);
252
252
  },
253
253
  true => {
254
- self._program.instructions.set(split_pc, Instr.split_instr(after, body_start));
254
+ &(self._program.instructions(split_pc)).* = Instr.split_instr(after, body_start);
255
255
  }
256
256
  );
257
257
  };
@@ -282,7 +282,7 @@ impl(NfaCompiler,
282
282
  recur(self, child);
283
283
  self._emit(Instr.match_instr());
284
284
  sub_end := self._current_pc();
285
- self._program.instructions.set(la_pc, Instr.lookahead_instr(sub_start, sub_end, positive));
285
+ &(self._program.instructions(la_pc)).* = Instr.lookahead_instr(sub_start, sub_end, positive);
286
286
  },
287
287
  .Lookbehind => {
288
288
  child := node.children.get(usize(0)).unwrap();
@@ -292,7 +292,7 @@ impl(NfaCompiler,
292
292
  recur(self, child);
293
293
  self._emit(Instr.match_instr());
294
294
  sub_end := self._current_pc();
295
- self._program.instructions.set(lb_pc, Instr.lookbehind_instr(sub_start, sub_end, positive));
295
+ &(self._program.instructions(lb_pc)).* = Instr.lookbehind_instr(sub_start, sub_end, positive);
296
296
  }
297
297
  );
298
298
  })
@@ -186,11 +186,11 @@ impl(Regex,
186
186
  exec : (fn(self : Self, input : String) -> Option(RegexMatch))({
187
187
  bytes := input.as_bytes();
188
188
  input_len := bytes.len();
189
+ vm := NfaVm.new(self._program, self._flags, input);
189
190
 
190
191
  // Sticky flag: only try matching at position 0
191
192
  cond(
192
193
  self._flags.sticky => {
193
- vm := NfaVm.new(self._program, self._flags, input);
194
194
  result := vm.exec_at(usize(0));
195
195
  cond(
196
196
  result.matched => .Some(self._build_match(result.slots, input)),
@@ -205,7 +205,6 @@ impl(Regex,
205
205
  );
206
206
 
207
207
  while (byte_pos <= input_len), {
208
- vm := NfaVm.new(self._program, self._flags, input);
209
208
  result := vm.exec_at(byte_pos);
210
209
 
211
210
  cond(
@@ -244,13 +243,13 @@ impl(Regex,
244
243
  matches := ArrayList(RegexMatch).new();
245
244
  bytes := input.as_bytes();
246
245
  input_len := bytes.len();
246
+ vm := NfaVm.new(self._program, self._flags, input);
247
247
 
248
248
  cond(
249
249
  self._flags.sticky => {
250
250
  // Sticky: only try at position 0, then at end of each match
251
251
  (byte_pos : usize) = usize(0);
252
252
  while (byte_pos <= input_len), {
253
- vm := NfaVm.new(self._program, self._flags, input);
254
253
  result := vm.exec_at(byte_pos);
255
254
 
256
255
  cond(
@@ -293,7 +292,6 @@ impl(Regex,
293
292
  );
294
293
 
295
294
  while (byte_pos <= input_len), {
296
- vm := NfaVm.new(self._program, self._flags, input);
297
295
  result := vm.exec_at(byte_pos);
298
296
 
299
297
  cond(
@@ -189,6 +189,27 @@ impl(RegexParser,
189
189
  r
190
190
  }),
191
191
 
192
+ // Parse \xHH hex escape — reads exactly 2 hex digits and returns the codepoint.
193
+ _parse_hex_byte : (fn(self : Self) -> Option(u32))({
194
+ if(((self._pos + usize(2)) > self._bytes.len()), { return .None; });
195
+ (h1 : u8) = self._bytes.get(self._pos).unwrap();
196
+ (h2 : u8) = self._bytes.get((self._pos + usize(1))).unwrap();
197
+ (v1 : i32) = cond(
198
+ ((h1 >= u8(48)) && (h1 <= u8(57))) => (i32(h1) - i32(48)),
199
+ ((h1 >= u8(65)) && (h1 <= u8(70))) => ((i32(h1) - i32(65)) + i32(10)),
200
+ ((h1 >= u8(97)) && (h1 <= u8(102))) => ((i32(h1) - i32(97)) + i32(10)),
201
+ true => { return .None; }
202
+ );
203
+ (v2 : i32) = cond(
204
+ ((h2 >= u8(48)) && (h2 <= u8(57))) => (i32(h2) - i32(48)),
205
+ ((h2 >= u8(65)) && (h2 <= u8(70))) => ((i32(h2) - i32(65)) + i32(10)),
206
+ ((h2 >= u8(97)) && (h2 <= u8(102))) => ((i32(h2) - i32(97)) + i32(10)),
207
+ true => { return .None; }
208
+ );
209
+ self._pos = (self._pos + usize(2));
210
+ .Some(u32(((v1 << i32(4)) | v2)))
211
+ }),
212
+
192
213
  _parse_class_escape : (fn(self : Self) -> Result(ArrayList(CharRange), String))({
193
214
  b := self._advance();
194
215
  match(b,
@@ -219,6 +240,14 @@ impl(RegexParser,
219
240
  r.push(CharRange(low: u32(33), high: u32(0x10FFFF)));
220
241
  .Ok(r)
221
242
  },
243
+ (ch == u8(120)) => {
244
+ r := ArrayList(CharRange).new();
245
+ match(self._parse_hex_byte(),
246
+ .Some(v) => r.push(CharRange(low: v, high: v)),
247
+ .None => r.push(CharRange(low: u32(ch), high: u32(ch)))
248
+ );
249
+ .Ok(r)
250
+ },
222
251
  true => {
223
252
  r := ArrayList(CharRange).new();
224
253
  codepoint := self._escape_char_codepoint(ch);
@@ -246,6 +275,31 @@ impl(RegexParser,
246
275
  (end_first == u8(93)) => {
247
276
  ranges.push(CharRange(low: low, high: low));
248
277
  },
278
+ (end_first == u8(92)) => {
279
+ // High end is an escape sequence (e.g. \x20, \0, \n)
280
+ self._pos = (self._pos + usize(1));
281
+ self._pos = (self._pos + usize(1));
282
+ esc := self._parse_class_escape();
283
+ match(esc,
284
+ .Ok(esc_ranges) => {
285
+ if(((esc_ranges.len() == usize(1)) && (esc_ranges.get(usize(0)).unwrap().low == esc_ranges.get(usize(0)).unwrap().high)), {
286
+ (high : u32) = esc_ranges.get(usize(0)).unwrap().low;
287
+ ranges.push(CharRange(low: low, high: high));
288
+ }, {
289
+ // Multi-range escape like \d can't be range endpoint; treat dash as literal
290
+ ranges.push(CharRange(low: low, high: low));
291
+ ranges.push(CharRange(low: u32(45), high: u32(45)));
292
+ j := usize(0);
293
+ while (j < esc_ranges.len()), (j = (j + usize(1))), {
294
+ ranges.push(esc_ranges.get(j).unwrap());
295
+ };
296
+ });
297
+ },
298
+ .Err(_e) => {
299
+ ranges.push(CharRange(low: low, high: low));
300
+ }
301
+ );
302
+ },
249
303
  true => {
250
304
  // Consume dash
251
305
  self._pos = (self._pos + usize(1));
@@ -284,10 +338,16 @@ impl(RegexParser,
284
338
  esc := self._parse_class_escape();
285
339
  match(esc,
286
340
  .Ok(esc_ranges) => {
287
- j := usize(0);
288
- while (j < esc_ranges.len()), (j = (j + usize(1))), {
289
- ranges.push(esc_ranges.get(j).unwrap());
290
- };
341
+ // If escape produced a single codepoint, check for range (e.g. \0-\x20)
342
+ if(((esc_ranges.len() == usize(1)) && (esc_ranges.get(usize(0)).unwrap().low == esc_ranges.get(usize(0)).unwrap().high)), {
343
+ (low : u32) = esc_ranges.get(usize(0)).unwrap().low;
344
+ self._try_parse_char_range(ranges, low);
345
+ }, {
346
+ j := usize(0);
347
+ while (j < esc_ranges.len()), (j = (j + usize(1))), {
348
+ ranges.push(esc_ranges.get(j).unwrap());
349
+ };
350
+ });
291
351
  },
292
352
  .Err(e) => { return .Err(e); }
293
353
  );
@@ -452,6 +512,11 @@ impl(RegexParser,
452
512
  (ch == u8(112)) => self._parse_unicode_property(false),
453
513
  // Negated unicode property \P{Name}
454
514
  (ch == u8(80)) => self._parse_unicode_property(true),
515
+ // Hex escape \xHH
516
+ (ch == u8(120)) => match(self._parse_hex_byte(),
517
+ .Some(v) => .Ok(RegexNode.literal(v)),
518
+ .None => .Ok(RegexNode.literal(u32(ch)))
519
+ ),
455
520
  true => .Ok(RegexNode.literal(self._escape_char_codepoint(ch)))
456
521
  ),
457
522
  .None => .Err(`Unexpected end of pattern after backslash`)
package/std/regex/vm.yo CHANGED
@@ -22,21 +22,18 @@ NfaThread :: object(
22
22
  impl(NfaThread,
23
23
  new : (fn(pc : usize, n_slots : usize) -> Self)({
24
24
  s := ArrayList(usize).with_capacity(n_slots);
25
- i := usize(0);
26
- while (i < n_slots), (i = (i + usize(1))), {
27
- // usize.MAX signals "unset"
28
- s.push(usize.MAX);
29
- };
25
+ // usize.MAX = 0xFF..FF, so memset with 0xFF fills each byte
26
+ s.resize_with_byte(n_slots, int(255));
30
27
  Self(pc: pc, slots: s)
31
28
  }),
32
29
 
33
30
  // Clone a thread with a new PC
34
31
  fork : (fn(self : Self, new_pc : usize) -> Self)({
35
32
  new_slots := ArrayList(usize).with_capacity(self.slots.len());
36
- i := usize(0);
37
- while (i < self.slots.len()), (i = (i + usize(1))), {
38
- new_slots.push(self.slots.get(i).unwrap());
39
- };
33
+ match(self.slots.ptr(),
34
+ .Some(src) => new_slots.extend_from_ptr(src, self.slots.len()),
35
+ .None => ()
36
+ );
40
37
  Self(pc: new_pc, slots: new_slots)
41
38
  })
42
39
  );
@@ -47,15 +44,6 @@ VmMatch :: struct(
47
44
  slots : ArrayList(usize)
48
45
  );
49
46
 
50
- // The NFA virtual machine
51
- NfaVm :: object(
52
- _program : NfaProgram,
53
- _flags : RegexFlags,
54
- _input : String,
55
- _bytes : ArrayList(u8),
56
- _n_slots : usize
57
- );
58
-
59
47
  // Helper struct for decoded characters
60
48
  DecodedChar :: struct(
61
49
  codepoint : u32,
@@ -68,16 +56,34 @@ DeferredThread :: struct(
68
56
  thread : NfaThread
69
57
  );
70
58
 
59
+ // The NFA virtual machine
60
+ NfaVm :: object(
61
+ _program : NfaProgram,
62
+ _flags : RegexFlags,
63
+ _input : String,
64
+ _bytes : ArrayList(u8),
65
+ _n_slots : usize,
66
+ _seen : ArrayList(bool),
67
+ _next_seen : ArrayList(bool)
68
+ );
69
+
71
70
  // Block 1: Constructor and leaf helpers (no method dependencies)
72
71
  impl(NfaVm,
73
72
  new : (fn(program : NfaProgram, flags : RegexFlags, input : String) -> Self)({
74
73
  n_slots := ((program.n_groups + usize(1)) * usize(2));
74
+ n_instr := program.instructions.len();
75
+ seen := ArrayList(bool).with_capacity(n_instr);
76
+ seen.resize_with_byte(n_instr, int(0));
77
+ next_seen := ArrayList(bool).with_capacity(n_instr);
78
+ next_seen.resize_with_byte(n_instr, int(0));
75
79
  Self(
76
80
  _program: program,
77
81
  _flags: flags,
78
82
  _input: input,
79
83
  _bytes: input.as_bytes(),
80
- _n_slots: n_slots
84
+ _n_slots: n_slots,
85
+ _seen: seen,
86
+ _next_seen: next_seen
81
87
  )
82
88
  }),
83
89
 
@@ -233,14 +239,15 @@ impl(NfaVm,
233
239
  true => ()
234
240
  );
235
241
 
236
- is_seen := seen.*.get(thread.pc).unwrap();
242
+ // Already checked bounds above: thread.pc < instructions.len()
243
+ is_seen := (seen.*)(thread.pc);
237
244
  cond(
238
245
  is_seen => { return (); },
239
246
  true => ()
240
247
  );
241
- seen.*.set(thread.pc, true);
248
+ &((seen.*)(thread.pc)).* = true;
242
249
 
243
- instr := self._program.instructions.get(thread.pc).unwrap();
250
+ instr := self._program.instructions(thread.pc);
244
251
 
245
252
  match(instr.kind,
246
253
  .Split => {
@@ -256,7 +263,7 @@ impl(NfaVm,
256
263
  .Save => {
257
264
  cond(
258
265
  (instr.slot < thread.slots.len()) => {
259
- thread.slots.set(instr.slot, byte_pos);
266
+ &(thread.slots(instr.slot)).* = byte_pos;
260
267
  },
261
268
  true => ()
262
269
  );
@@ -326,10 +333,7 @@ impl(NfaVm,
326
333
  sub_next := ArrayList(NfaThread).new();
327
334
 
328
335
  sub_seen := ArrayList(bool).with_capacity(self._program.instructions.len());
329
- si := usize(0);
330
- while (si < self._program.instructions.len()), (si = (si + usize(1))), {
331
- sub_seen.push(false);
332
- };
336
+ sub_seen.resize_with_byte(self._program.instructions.len(), int(0));
333
337
 
334
338
  initial := NfaThread.new(sub_start_pc, self._n_slots);
335
339
  self._add_thread(&(sub_current), initial, start_byte, &(sub_seen));
@@ -376,10 +380,7 @@ impl(NfaVm,
376
380
  sub_blen := decoded.byte_len;
377
381
 
378
382
  // Clear seen
379
- sj := usize(0);
380
- while (sj < sub_seen.len()), (sj = (sj + usize(1))), {
381
- sub_seen.set(sj, false);
382
- };
383
+ sub_seen.fill_with_byte(int(0));
383
384
 
384
385
  // Process consuming instructions
385
386
  st2 := usize(0);
@@ -428,8 +429,11 @@ impl(NfaVm,
428
429
  );
429
430
  };
430
431
 
432
+ // Swap sub_current/sub_next and clear for reuse
433
+ tmp_sub := sub_current;
431
434
  sub_current = sub_next;
432
- sub_next = ArrayList(NfaThread).new();
435
+ sub_next = tmp_sub;
436
+ sub_next.clear();
433
437
  sub_pos = (sub_pos + sub_blen);
434
438
  };
435
439
 
@@ -440,17 +444,16 @@ impl(NfaVm,
440
444
  // Block 5: exec_at (depends on Block 1+2+3+4)
441
445
  impl(NfaVm,
442
446
  exec_at : (fn(self : Self, start_byte : usize) -> VmMatch)({
447
+ // Reuse pre-allocated seen buffers from the VM
443
448
  current := ArrayList(NfaThread).new();
444
449
  next := ArrayList(NfaThread).new();
450
+ seen := self._seen;
451
+ next_seen := self._next_seen;
445
452
  deferred := ArrayList(DeferredThread).new();
446
453
 
447
- seen := ArrayList(bool).with_capacity(self._program.instructions.len());
448
- next_seen := ArrayList(bool).with_capacity(self._program.instructions.len());
449
- i := usize(0);
450
- while (i < self._program.instructions.len()), (i = (i + usize(1))), {
451
- seen.push(false);
452
- next_seen.push(false);
453
- };
454
+ // Clear for this execution
455
+ seen.fill_with_byte(int(0));
456
+ next_seen.fill_with_byte(int(0));
454
457
 
455
458
  initial := NfaThread.new(usize(0), self._n_slots);
456
459
  self._add_thread(&(current), initial, start_byte, &(seen));
@@ -474,12 +477,9 @@ impl(NfaVm,
474
477
  true => ()
475
478
  );
476
479
 
477
- // Clear seen flags for this generation
478
- j := usize(0);
479
- while (j < seen.len()), (j = (j + usize(1))), {
480
- seen.set(j, false);
481
- next_seen.set(j, false);
482
- };
480
+ // Clear seen flags for this generation using memset
481
+ seen.fill_with_byte(int(0));
482
+ next_seen.fill_with_byte(int(0));
483
483
 
484
484
  // Process deferred threads targeting this byte_pos
485
485
  new_deferred := ArrayList(DeferredThread).new();
@@ -714,8 +714,11 @@ impl(NfaVm,
714
714
  );
715
715
  };
716
716
 
717
+ // Swap current/next and clear for reuse
718
+ tmp_c := current;
717
719
  current = next;
718
- next = ArrayList(NfaThread).new();
720
+ next = tmp_c;
721
+ next.clear();
719
722
 
720
723
  cond(
721
724
  at_end => { break; },
@@ -725,6 +728,10 @@ impl(NfaVm,
725
728
  );
726
729
  };
727
730
 
731
+ // Store seen buffers back for reuse in next exec_at call
732
+ self._seen = seen;
733
+ self._next_seen = next_seen;
734
+
728
735
  best_match
729
736
  })
730
737
  );