@shd101wyy/yo 0.0.28 → 0.0.30

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (48) hide show
  1. package/README.md +20 -1
  2. package/out/cjs/index.cjs +7554 -7826
  3. package/out/cjs/yo-cli.cjs +7604 -7876
  4. package/out/esm/index.mjs +7453 -7725
  5. package/out/types/src/codegen/async/runtime-core.d.ts +2 -1
  6. package/out/types/src/codegen/async/runtime-io-common.d.ts +3 -1
  7. package/out/types/src/codegen/async/runtime-io-linux.d.ts +1 -0
  8. package/out/types/src/codegen/async/runtime-io-macos.d.ts +1 -0
  9. package/out/types/src/codegen/async/runtime-io-windows.d.ts +1 -0
  10. package/out/types/src/codegen/async/runtime.d.ts +2 -1
  11. package/out/types/src/codegen/async/state-machine.d.ts +18 -2
  12. package/out/types/src/codegen/functions/context.d.ts +5 -0
  13. package/out/types/src/codegen/parallelism/runtime.d.ts +2 -1
  14. package/out/types/src/codegen/utils/index.d.ts +2 -0
  15. package/out/types/src/target.d.ts +1 -0
  16. package/out/types/tsconfig.tsbuildinfo +1 -1
  17. package/package.json +1 -1
  18. package/std/cli/arg_parser.yo +365 -0
  19. package/std/collections/array_list.yo +108 -0
  20. package/std/collections/hash_map.yo +1 -1
  21. package/std/collections/hash_set.yo +7 -7
  22. package/std/collections/linked_list.yo +1 -1
  23. package/std/encoding/base64.yo +73 -0
  24. package/std/fs/file.yo +113 -6
  25. package/std/fs/types.yo +21 -0
  26. package/std/glob/glob.yo +206 -0
  27. package/std/http/http.yo +196 -0
  28. package/std/io/reader.yo +17 -0
  29. package/std/io/writer.yo +19 -0
  30. package/std/net/tcp.yo +1 -1
  31. package/std/prelude.yo +69 -0
  32. package/std/regex/compiler.yo +355 -0
  33. package/std/regex/flags.yo +104 -0
  34. package/std/regex/match.yo +83 -0
  35. package/std/regex/node.yo +283 -0
  36. package/std/regex/parser.yo +847 -0
  37. package/std/regex/regex.yo +714 -0
  38. package/std/regex/unicode.yo +365 -0
  39. package/std/regex/vm.yo +737 -0
  40. package/std/string/string.yo +398 -4
  41. package/std/sync/cond.yo +19 -19
  42. package/std/sync/mutex.yo +16 -16
  43. package/std/sys/bufio/buf_reader.yo +2 -2
  44. package/std/sys/future.yo +3 -3
  45. package/std/time/sleep.yo +18 -0
  46. package/std/toml/toml.yo +179 -0
  47. package/std/testing/assert.yo +0 -173
  48. package/std/time.yo +0 -13
@@ -0,0 +1,737 @@
1
+ // std/regex/vm.yo - NFA virtual machine (Thompson simulation)
2
+ //
3
+ // Executes a compiled NFA program against an input string.
4
+ // Uses Thompson's NFA simulation with parallel state tracking
5
+ // for O(n*m) worst-case time complexity.
6
+
7
+ open import "std/collections/array_list";
8
+ open import "std/string";
9
+ { NfaProgram, Instr, InstrKind, ClassEntry, GroupNameEntry } :: import "./compiler.yo";
10
+ { CharRange } :: import "./node.yo";
11
+ { RegexFlags } :: import "./flags.yo";
12
+
13
+ // Max number of capture slots (group 0 + up to 99 groups = 200 slots)
14
+ MAX_SLOTS :: 200;
15
+
16
+ // A single NFA thread: an instruction pointer + capture slots
17
+ NfaThread :: object(
18
+ pc : usize,
19
+ slots : ArrayList(usize)
20
+ );
21
+
22
+ impl(NfaThread,
23
+ new : (fn(pc : usize, n_slots : usize) -> Self)({
24
+ s := ArrayList(usize).with_capacity(n_slots);
25
+ i := usize(0);
26
+ while (i < n_slots), (i = (i + usize(1))), {
27
+ // 0xFFFFFFFFFFFFFFFF signals "unset"
28
+ s.push(usize(0xFFFFFFFFFFFFFFFF));
29
+ };
30
+ Self(pc: pc, slots: s)
31
+ }),
32
+
33
+ // Clone a thread with a new PC
34
+ fork : (fn(self : Self, new_pc : usize) -> Self)({
35
+ new_slots := ArrayList(usize).with_capacity(self.slots.len());
36
+ i := usize(0);
37
+ while (i < self.slots.len()), (i = (i + usize(1))), {
38
+ new_slots.push(self.slots.get(i).unwrap());
39
+ };
40
+ Self(pc: new_pc, slots: new_slots)
41
+ })
42
+ );
43
+
44
+ // VM execution result
45
+ VmMatch :: struct(
46
+ matched : bool,
47
+ slots : ArrayList(usize)
48
+ );
49
+
50
+ // The NFA virtual machine
51
+ NfaVm :: object(
52
+ _program : NfaProgram,
53
+ _flags : RegexFlags,
54
+ _input : String,
55
+ _bytes : ArrayList(u8),
56
+ _n_slots : usize
57
+ );
58
+
59
+ // Helper struct for decoded characters
60
+ DecodedChar :: struct(
61
+ codepoint : u32,
62
+ byte_len : usize
63
+ );
64
+
65
+ // A thread deferred to a future byte position (for multi-byte backrefs)
66
+ DeferredThread :: struct(
67
+ target_byte_pos : usize,
68
+ thread : NfaThread
69
+ );
70
+
71
+ // Block 1: Constructor and leaf helpers (no method dependencies)
72
+ impl(NfaVm,
73
+ new : (fn(program : NfaProgram, flags : RegexFlags, input : String) -> Self)({
74
+ n_slots := ((program.n_groups + usize(1)) * usize(2));
75
+ Self(
76
+ _program: program,
77
+ _flags: flags,
78
+ _input: input,
79
+ _bytes: input.as_bytes(),
80
+ _n_slots: n_slots
81
+ )
82
+ }),
83
+
84
+ _decode_codepoint : (fn(self : Self, pos : usize) -> DecodedChar)({
85
+ first := self._bytes.get(pos).unwrap();
86
+ cond(
87
+ (first < u8(0x80)) =>
88
+ DecodedChar(codepoint: u32(first), byte_len: usize(1)),
89
+ ((first >= u8(0xC0)) && (first < u8(0xE0))) => {
90
+ second := self._bytes.get((pos + usize(1))).unwrap();
91
+ cp := (((u32(first) & u32(0x1F)) << u32(6)) | (u32(second) & u32(0x3F)));
92
+ DecodedChar(codepoint: cp, byte_len: usize(2))
93
+ },
94
+ ((first >= u8(0xE0)) && (first < u8(0xF0))) => {
95
+ second := self._bytes.get((pos + usize(1))).unwrap();
96
+ third := self._bytes.get((pos + usize(2))).unwrap();
97
+ cp := ((((u32(first) & u32(0x0F)) << u32(12)) | ((u32(second) & u32(0x3F)) << u32(6))) | (u32(third) & u32(0x3F)));
98
+ DecodedChar(codepoint: cp, byte_len: usize(3))
99
+ },
100
+ true => {
101
+ second := self._bytes.get((pos + usize(1))).unwrap();
102
+ third := self._bytes.get((pos + usize(2))).unwrap();
103
+ fourth := self._bytes.get((pos + usize(3))).unwrap();
104
+ cp := (((((u32(first) & u32(0x07)) << u32(18)) | ((u32(second) & u32(0x3F)) << u32(12))) | ((u32(third) & u32(0x3F)) << u32(6))) | (u32(fourth) & u32(0x3F)));
105
+ DecodedChar(codepoint: cp, byte_len: usize(4))
106
+ }
107
+ )
108
+ }),
109
+
110
+ _to_lower : (fn(self : Self, cp : u32) -> u32)(
111
+ cond(
112
+ ((cp >= u32(65)) && (cp <= u32(90))) => (cp + u32(32)),
113
+ true => cp
114
+ )
115
+ ),
116
+
117
+ _is_word_char : (fn(self : Self, cp : u32) -> bool)(
118
+ ((((cp >= u32(48)) && (cp <= u32(57))) || ((cp >= u32(65)) && (cp <= u32(90)))) || (((cp >= u32(97)) && (cp <= u32(122))) || (cp == u32(95))))
119
+ ),
120
+
121
+ _find_prev_char_start : (fn(self : Self, pos : usize) -> usize)({
122
+ p := (pos - usize(1));
123
+ while (p > usize(0)), (p = (p - usize(1))), {
124
+ b := self._bytes.get(p).unwrap();
125
+ cond(
126
+ ((b < u8(0x80)) || (b >= u8(0xC0))) => { return p; },
127
+ true => ()
128
+ );
129
+ };
130
+ p
131
+ }),
132
+
133
+ _prev_byte_is_newline : (fn(self : Self, pos : usize) -> bool)(
134
+ cond(
135
+ (pos == usize(0)) => false,
136
+ true => {
137
+ b := self._bytes.get((pos - usize(1))).unwrap();
138
+ (b == u8(10))
139
+ }
140
+ )
141
+ ),
142
+
143
+ _cur_byte_is_newline : (fn(self : Self, pos : usize) -> bool)(
144
+ cond(
145
+ (pos >= self._bytes.len()) => false,
146
+ true => {
147
+ b := self._bytes.get(pos).unwrap();
148
+ (b == u8(10))
149
+ }
150
+ )
151
+ )
152
+ );
153
+
154
+ // Block 2: Methods that depend on Block 1
155
+ impl(NfaVm,
156
+ _char_matches : (fn(self : Self, expected : u32, actual : u32) -> bool)(
157
+ cond(
158
+ (expected == actual) => true,
159
+ self._flags.ignore_case => (self._to_lower(expected) == self._to_lower(actual)),
160
+ true => false
161
+ )
162
+ ),
163
+
164
+ _codepoint_in_class : (fn(self : Self, cp : u32, cls : ClassEntry) -> bool)({
165
+ check_cp := cond(
166
+ self._flags.ignore_case => self._to_lower(cp),
167
+ true => cp
168
+ );
169
+
170
+ (found : bool) = false;
171
+ i := usize(0);
172
+ while ((i < cls.ranges.len()) && (!(found))), (i = (i + usize(1))), {
173
+ r := cls.ranges.get(i).unwrap();
174
+ (low : u32) = cond(
175
+ self._flags.ignore_case => self._to_lower(r.low),
176
+ true => r.low
177
+ );
178
+ (high : u32) = cond(
179
+ self._flags.ignore_case => self._to_lower(r.high),
180
+ true => r.high
181
+ );
182
+ cond(
183
+ ((check_cp >= low) && (check_cp <= high)) => { found = true; },
184
+ true => ()
185
+ );
186
+ };
187
+
188
+ cond(
189
+ ((!(found)) && self._flags.ignore_case) => {
190
+ i2 := usize(0);
191
+ while ((i2 < cls.ranges.len()) && (!(found))), (i2 = (i2 + usize(1))), {
192
+ r := cls.ranges.get(i2).unwrap();
193
+ cond(
194
+ ((cp >= r.low) && (cp <= r.high)) => { found = true; },
195
+ true => ()
196
+ );
197
+ };
198
+ },
199
+ true => ()
200
+ );
201
+
202
+ cond(
203
+ cls.negated => (!(found)),
204
+ true => found
205
+ )
206
+ }),
207
+
208
+ _is_word_boundary : (fn(self : Self, pos : usize) -> bool)({
209
+ left_is_word := cond(
210
+ (pos == usize(0)) => false,
211
+ true => {
212
+ prev_pos := self._find_prev_char_start(pos);
213
+ prev := self._decode_codepoint(prev_pos);
214
+ self._is_word_char(prev.codepoint)
215
+ }
216
+ );
217
+ right_is_word := cond(
218
+ (pos >= self._bytes.len()) => false,
219
+ true => {
220
+ cur := self._decode_codepoint(pos);
221
+ self._is_word_char(cur.codepoint)
222
+ }
223
+ );
224
+ (left_is_word != right_is_word)
225
+ })
226
+ );
227
+
228
+ // Block 3: _add_thread (recursive, depends on Block 1+2)
229
+ impl(NfaVm,
230
+ _add_thread : (fn(self : Self, list : *(ArrayList(NfaThread)), thread : NfaThread, byte_pos : usize, seen : *(ArrayList(bool))) -> unit)({
231
+ cond(
232
+ (thread.pc >= self._program.instructions.len()) => { return (); },
233
+ true => ()
234
+ );
235
+
236
+ is_seen := seen.*.get(thread.pc).unwrap();
237
+ cond(
238
+ is_seen => { return (); },
239
+ true => ()
240
+ );
241
+ seen.*.set(thread.pc, true);
242
+
243
+ instr := self._program.instructions.get(thread.pc).unwrap();
244
+
245
+ match(instr.kind,
246
+ .Split => {
247
+ t1 := thread.fork(instr.target_a);
248
+ t2 := thread.fork(instr.target_b);
249
+ recur(self, list, t1, byte_pos, seen);
250
+ recur(self, list, t2, byte_pos, seen);
251
+ },
252
+ .Jump => {
253
+ new_t := thread.fork(instr.target);
254
+ recur(self, list, new_t, byte_pos, seen);
255
+ },
256
+ .Save => {
257
+ cond(
258
+ (instr.slot < thread.slots.len()) => {
259
+ thread.slots.set(instr.slot, byte_pos);
260
+ },
261
+ true => ()
262
+ );
263
+ new_t := NfaThread(pc: (thread.pc + usize(1)), slots: thread.slots);
264
+ recur(self, list, new_t, byte_pos, seen);
265
+ },
266
+ .AssertStart => {
267
+ passes := cond(
268
+ self._flags.multiline =>
269
+ ((byte_pos == usize(0)) || self._prev_byte_is_newline(byte_pos)),
270
+ true => (byte_pos == usize(0))
271
+ );
272
+ cond(
273
+ passes => {
274
+ new_t := thread.fork((thread.pc + usize(1)));
275
+ recur(self, list, new_t, byte_pos, seen);
276
+ },
277
+ true => ()
278
+ );
279
+ },
280
+ .AssertEnd => {
281
+ passes := cond(
282
+ self._flags.multiline =>
283
+ ((byte_pos >= self._bytes.len()) || self._cur_byte_is_newline(byte_pos)),
284
+ true => (byte_pos >= self._bytes.len())
285
+ );
286
+ cond(
287
+ passes => {
288
+ new_t := thread.fork((thread.pc + usize(1)));
289
+ recur(self, list, new_t, byte_pos, seen);
290
+ },
291
+ true => ()
292
+ );
293
+ },
294
+ .AssertWordBoundary => {
295
+ cond(
296
+ self._is_word_boundary(byte_pos) => {
297
+ new_t := thread.fork((thread.pc + usize(1)));
298
+ recur(self, list, new_t, byte_pos, seen);
299
+ },
300
+ true => ()
301
+ );
302
+ },
303
+ .AssertNonWordBoundary => {
304
+ cond(
305
+ (!(self._is_word_boundary(byte_pos))) => {
306
+ new_t := thread.fork((thread.pc + usize(1)));
307
+ recur(self, list, new_t, byte_pos, seen);
308
+ },
309
+ true => ()
310
+ );
311
+ },
312
+ _ => {
313
+ list.*.push(thread);
314
+ }
315
+ );
316
+ })
317
+ );
318
+
319
+ // Block 4: Sub-VM for lookahead/lookbehind (depends on Block 1+2+3)
320
+ impl(NfaVm,
321
+ // Run a sub-VM starting at sub_start_pc from start_byte.
322
+ // If required_end is not UNSET, only succeed when Match is found at exactly required_end.
323
+ _run_sub_vm : (fn(self : Self, sub_start_pc : usize, start_byte : usize, required_end : usize) -> bool)({
324
+ unset := usize(0xFFFFFFFFFFFFFFFF);
325
+ sub_current := ArrayList(NfaThread).new();
326
+ sub_next := ArrayList(NfaThread).new();
327
+
328
+ sub_seen := ArrayList(bool).with_capacity(self._program.instructions.len());
329
+ si := usize(0);
330
+ while (si < self._program.instructions.len()), (si = (si + usize(1))), {
331
+ sub_seen.push(false);
332
+ };
333
+
334
+ initial := NfaThread.new(sub_start_pc, self._n_slots);
335
+ self._add_thread(&(sub_current), initial, start_byte, &(sub_seen));
336
+
337
+ sub_pos := start_byte;
338
+ input_len := self._bytes.len();
339
+
340
+ while (sub_pos <= input_len), {
341
+ cond(
342
+ (sub_current.len() == usize(0)) => { break; },
343
+ true => ()
344
+ );
345
+
346
+ // Check for Match in current threads
347
+ st := usize(0);
348
+ while (st < sub_current.len()), (st = (st + usize(1))), {
349
+ st_thread := sub_current.get(st).unwrap();
350
+ st_instr := self._program.instructions.get(st_thread.pc).unwrap();
351
+ match(st_instr.kind,
352
+ .Match => {
353
+ cond(
354
+ (required_end == unset) => { return true; },
355
+ (sub_pos == required_end) => { return true; },
356
+ true => ()
357
+ );
358
+ },
359
+ _ => ()
360
+ );
361
+ };
362
+
363
+ cond(
364
+ (sub_pos >= input_len) => { break; },
365
+ true => ()
366
+ );
367
+
368
+ // Stop if we've passed the required end position
369
+ cond(
370
+ ((required_end != unset) && (sub_pos > required_end)) => { break; },
371
+ true => ()
372
+ );
373
+
374
+ decoded := self._decode_codepoint(sub_pos);
375
+ sub_cp := decoded.codepoint;
376
+ sub_blen := decoded.byte_len;
377
+
378
+ // Clear seen
379
+ sj := usize(0);
380
+ while (sj < sub_seen.len()), (sj = (sj + usize(1))), {
381
+ sub_seen.set(sj, false);
382
+ };
383
+
384
+ // Process consuming instructions
385
+ st2 := usize(0);
386
+ while (st2 < sub_current.len()), (st2 = (st2 + usize(1))), {
387
+ st_thread := sub_current.get(st2).unwrap();
388
+ st_instr := self._program.instructions.get(st_thread.pc).unwrap();
389
+ match(st_instr.kind,
390
+ .Char => {
391
+ cond(
392
+ self._char_matches(st_instr.codepoint, sub_cp) => {
393
+ new_t := st_thread.fork((st_thread.pc + usize(1)));
394
+ self._add_thread(&(sub_next), new_t, (sub_pos + sub_blen), &(sub_seen));
395
+ },
396
+ true => ()
397
+ );
398
+ },
399
+ .AnyChar => {
400
+ should_match := cond(
401
+ self._flags.dot_all => true,
402
+ true => (sub_cp != u32(10))
403
+ );
404
+ cond(
405
+ should_match => {
406
+ new_t := st_thread.fork((st_thread.pc + usize(1)));
407
+ self._add_thread(&(sub_next), new_t, (sub_pos + sub_blen), &(sub_seen));
408
+ },
409
+ true => ()
410
+ );
411
+ },
412
+ .CharClass => {
413
+ class_opt := self._program.classes.get(st_instr.class_idx);
414
+ match(class_opt,
415
+ .Some(cls) => {
416
+ cond(
417
+ self._codepoint_in_class(sub_cp, cls) => {
418
+ new_t := st_thread.fork((st_thread.pc + usize(1)));
419
+ self._add_thread(&(sub_next), new_t, (sub_pos + sub_blen), &(sub_seen));
420
+ },
421
+ true => ()
422
+ );
423
+ },
424
+ .None => ()
425
+ );
426
+ },
427
+ _ => ()
428
+ );
429
+ };
430
+
431
+ sub_current = sub_next;
432
+ sub_next = ArrayList(NfaThread).new();
433
+ sub_pos = (sub_pos + sub_blen);
434
+ };
435
+
436
+ false
437
+ })
438
+ );
439
+
440
+ // Block 5: exec_at (depends on Block 1+2+3+4)
441
+ impl(NfaVm,
442
+ exec_at : (fn(self : Self, start_byte : usize) -> VmMatch)({
443
+ current := ArrayList(NfaThread).new();
444
+ next := ArrayList(NfaThread).new();
445
+ deferred := ArrayList(DeferredThread).new();
446
+
447
+ seen := ArrayList(bool).with_capacity(self._program.instructions.len());
448
+ next_seen := ArrayList(bool).with_capacity(self._program.instructions.len());
449
+ i := usize(0);
450
+ while (i < self._program.instructions.len()), (i = (i + usize(1))), {
451
+ seen.push(false);
452
+ next_seen.push(false);
453
+ };
454
+
455
+ initial := NfaThread.new(usize(0), self._n_slots);
456
+ self._add_thread(&(current), initial, start_byte, &(seen));
457
+
458
+ best_match := VmMatch(matched: false, slots: ArrayList(usize).new());
459
+ byte_pos := start_byte;
460
+ input_len := self._bytes.len();
461
+ unset := usize(0xFFFFFFFFFFFFFFFF);
462
+
463
+ while (byte_pos <= input_len), {
464
+ (cur_cp : u32) = u32(0);
465
+ (char_byte_len : usize) = usize(0);
466
+ (at_end : bool) = (byte_pos >= input_len);
467
+
468
+ cond(
469
+ (!(at_end)) => {
470
+ decoded := self._decode_codepoint(byte_pos);
471
+ cur_cp = decoded.codepoint;
472
+ char_byte_len = decoded.byte_len;
473
+ },
474
+ true => ()
475
+ );
476
+
477
+ // Clear seen flags for this generation
478
+ j := usize(0);
479
+ while (j < seen.len()), (j = (j + usize(1))), {
480
+ seen.set(j, false);
481
+ next_seen.set(j, false);
482
+ };
483
+
484
+ // Process deferred threads targeting this byte_pos
485
+ new_deferred := ArrayList(DeferredThread).new();
486
+ di := usize(0);
487
+ while (di < deferred.len()), (di = (di + usize(1))), {
488
+ d := deferred.get(di).unwrap();
489
+ cond(
490
+ (d.target_byte_pos == byte_pos) => {
491
+ self._add_thread(&(current), d.thread, byte_pos, &(seen));
492
+ },
493
+ true => {
494
+ new_deferred.push(d);
495
+ }
496
+ );
497
+ };
498
+ deferred = new_deferred;
499
+
500
+ // Break if no threads and no deferred
501
+ cond(
502
+ ((current.len() == usize(0)) && (deferred.len() == usize(0))) => { break; },
503
+ true => ()
504
+ );
505
+
506
+ // Process each thread (priority order — first match wins in gen)
507
+ (found_match_in_gen : bool) = false;
508
+ t := usize(0);
509
+ while ((t < current.len()) && (!(found_match_in_gen))), (t = (t + usize(1))), {
510
+ thread := current.get(t).unwrap();
511
+ instr := self._program.instructions.get(thread.pc).unwrap();
512
+
513
+ match(instr.kind,
514
+ .Char => {
515
+ cond(
516
+ ((!(at_end)) && (self._char_matches(instr.codepoint, cur_cp))) => {
517
+ new_thread := thread.fork((thread.pc + usize(1)));
518
+ self._add_thread(&(next), new_thread, (byte_pos + char_byte_len), &(next_seen));
519
+ },
520
+ true => ()
521
+ );
522
+ },
523
+ .AnyChar => {
524
+ cond(
525
+ (!(at_end)) => {
526
+ should_match := cond(
527
+ self._flags.dot_all => true,
528
+ true => (cur_cp != u32(10))
529
+ );
530
+ cond(
531
+ should_match => {
532
+ new_thread := thread.fork((thread.pc + usize(1)));
533
+ self._add_thread(&(next), new_thread, (byte_pos + char_byte_len), &(next_seen));
534
+ },
535
+ true => ()
536
+ );
537
+ },
538
+ true => ()
539
+ );
540
+ },
541
+ .CharClass => {
542
+ cond(
543
+ (!(at_end)) => {
544
+ class_opt := self._program.classes.get(instr.class_idx);
545
+ match(class_opt,
546
+ .Some(cls) => {
547
+ cond(
548
+ self._codepoint_in_class(cur_cp, cls) => {
549
+ new_thread := thread.fork((thread.pc + usize(1)));
550
+ self._add_thread(&(next), new_thread, (byte_pos + char_byte_len), &(next_seen));
551
+ },
552
+ true => ()
553
+ );
554
+ },
555
+ .None => ()
556
+ );
557
+ },
558
+ true => ()
559
+ );
560
+ },
561
+ .Backref => {
562
+ group_start_slot := (instr.slot * usize(2));
563
+ group_end_slot := ((instr.slot * usize(2)) + usize(1));
564
+ cond(
565
+ ((group_start_slot < thread.slots.len()) && (group_end_slot < thread.slots.len())) => {
566
+ gs := thread.slots.get(group_start_slot).unwrap();
567
+ ge := thread.slots.get(group_end_slot).unwrap();
568
+ cond(
569
+ ((gs != unset) && (ge != unset)) => {
570
+ captured_len := (ge - gs);
571
+ cond(
572
+ ((byte_pos + captured_len) <= input_len) => {
573
+ // Compare captured bytes against input at current position
574
+ (bytes_match : bool) = true;
575
+ bi := usize(0);
576
+ while (bi < captured_len), (bi = (bi + usize(1))), {
577
+ expected := self._bytes.get((gs + bi)).unwrap();
578
+ actual := self._bytes.get((byte_pos + bi)).unwrap();
579
+ cond(
580
+ self._flags.ignore_case => {
581
+ el := cond(
582
+ ((expected >= u8(65)) && (expected <= u8(90))) => (expected + u8(32)),
583
+ true => expected
584
+ );
585
+ al := cond(
586
+ ((actual >= u8(65)) && (actual <= u8(90))) => (actual + u8(32)),
587
+ true => actual
588
+ );
589
+ cond(
590
+ (el != al) => { bytes_match = false; },
591
+ true => ()
592
+ );
593
+ },
594
+ true => {
595
+ cond(
596
+ (expected != actual) => { bytes_match = false; },
597
+ true => ()
598
+ );
599
+ }
600
+ );
601
+ };
602
+ cond(
603
+ bytes_match => {
604
+ new_pos := (byte_pos + captured_len);
605
+ new_thread := thread.fork((thread.pc + usize(1)));
606
+ cond(
607
+ (captured_len == usize(0)) => {
608
+ // Empty capture — epsilon-like, process in current gen
609
+ self._add_thread(&(current), new_thread, byte_pos, &(seen));
610
+ },
611
+ true => {
612
+ // Defer to the target byte position
613
+ deferred.push(DeferredThread(target_byte_pos: new_pos, thread: new_thread));
614
+ }
615
+ );
616
+ },
617
+ true => ()
618
+ );
619
+ },
620
+ true => ()
621
+ );
622
+ },
623
+ true => ()
624
+ );
625
+ },
626
+ true => ()
627
+ );
628
+ },
629
+ .Lookahead => {
630
+ // Zero-width assertion: run sub-VM at current position
631
+ sub_start := instr.target_a;
632
+ positive := (instr.slot == usize(1));
633
+ sub_matched := self._run_sub_vm(sub_start, byte_pos, unset);
634
+ cond(
635
+ (sub_matched == positive) => {
636
+ sub_end := instr.target_b;
637
+ new_thread := thread.fork(sub_end);
638
+ self._add_thread(&(current), new_thread, byte_pos, &(seen));
639
+ },
640
+ true => ()
641
+ );
642
+ },
643
+ .Lookbehind => {
644
+ // Zero-width assertion: check if sub-pattern matches ending at current position
645
+ sub_start := instr.target_a;
646
+ positive := (instr.slot == usize(1));
647
+ (lb_matched : bool) = false;
648
+
649
+ // Try each byte position backwards from byte_pos
650
+ cond(
651
+ (byte_pos == usize(0)) => {
652
+ // Only try empty match at position 0
653
+ cond(
654
+ self._run_sub_vm(sub_start, usize(0), usize(0)) => {
655
+ lb_matched = true;
656
+ },
657
+ true => ()
658
+ );
659
+ },
660
+ true => {
661
+ // Try from byte_pos backwards to 0
662
+ (try_pos : usize) = byte_pos;
663
+ while (!(lb_matched)), {
664
+ cond(
665
+ (try_pos == usize(0)) => {
666
+ cond(
667
+ self._run_sub_vm(sub_start, usize(0), byte_pos) => {
668
+ lb_matched = true;
669
+ },
670
+ true => ()
671
+ );
672
+ break;
673
+ },
674
+ true => {
675
+ try_pos = (try_pos - usize(1));
676
+ // Skip UTF-8 continuation bytes (10xxxxxx)
677
+ cond(
678
+ (try_pos > usize(0)) => {
679
+ tb := self._bytes.get(try_pos).unwrap();
680
+ while (((tb >= u8(0x80)) && (tb < u8(0xC0))) && (try_pos > usize(0))), {
681
+ try_pos = (try_pos - usize(1));
682
+ tb = self._bytes.get(try_pos).unwrap();
683
+ };
684
+ },
685
+ true => ()
686
+ );
687
+ cond(
688
+ self._run_sub_vm(sub_start, try_pos, byte_pos) => {
689
+ lb_matched = true;
690
+ },
691
+ true => ()
692
+ );
693
+ }
694
+ );
695
+ };
696
+ }
697
+ );
698
+
699
+ cond(
700
+ (lb_matched == positive) => {
701
+ sub_end := instr.target_b;
702
+ new_thread := thread.fork(sub_end);
703
+ self._add_thread(&(current), new_thread, byte_pos, &(seen));
704
+ },
705
+ true => ()
706
+ );
707
+ },
708
+ .Match => {
709
+ // Record match and kill lower-priority threads
710
+ best_match = VmMatch(matched: true, slots: thread.slots);
711
+ found_match_in_gen = true;
712
+ },
713
+ _ => ()
714
+ );
715
+ };
716
+
717
+ current = next;
718
+ next = ArrayList(NfaThread).new();
719
+
720
+ cond(
721
+ at_end => { break; },
722
+ true => {
723
+ byte_pos = (byte_pos + char_byte_len);
724
+ }
725
+ );
726
+ };
727
+
728
+ best_match
729
+ })
730
+ );
731
+
732
+ export
733
+ NfaVm,
734
+ NfaThread,
735
+ VmMatch,
736
+ DecodedChar
737
+ ;