@shd101wyy/yo 0.0.27 → 0.0.29

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (46) hide show
  1. package/README.md +2 -1
  2. package/out/cjs/index.cjs +513 -513
  3. package/out/cjs/yo-cli.cjs +677 -552
  4. package/out/esm/index.mjs +478 -478
  5. package/out/types/src/build-runner.d.ts +22 -0
  6. package/out/types/src/cache.d.ts +3 -0
  7. package/out/types/src/codegen/codegen-c.d.ts +3 -0
  8. package/out/types/src/codegen/index.d.ts +4 -0
  9. package/out/types/src/codegen/utils/index.d.ts +3 -0
  10. package/out/types/src/evaluator/builtins/build.d.ts +135 -0
  11. package/out/types/src/expr.d.ts +17 -0
  12. package/out/types/src/fetch-command.d.ts +6 -0
  13. package/out/types/src/fetch.d.ts +10 -0
  14. package/out/types/src/init.d.ts +5 -0
  15. package/out/types/src/install-command.d.ts +6 -0
  16. package/out/types/src/lock-file.d.ts +16 -0
  17. package/out/types/src/module-manager.d.ts +3 -1
  18. package/out/types/src/pkg-config.d.ts +11 -0
  19. package/out/types/src/target.d.ts +28 -0
  20. package/out/types/src/tests/build-system.test.d.ts +1 -0
  21. package/out/types/tsconfig.tsbuildinfo +1 -1
  22. package/package.json +1 -1
  23. package/std/build.yo +287 -0
  24. package/std/crypto/random.yo +2 -2
  25. package/std/fs/dir.yo +1 -1
  26. package/std/fs/temp.yo +1 -1
  27. package/std/os/env.yo +5 -5
  28. package/std/os/signal.yo +8 -8
  29. package/std/path.yo +2 -2
  30. package/std/process.yo +23 -43
  31. package/std/regex/compiler.yo +355 -0
  32. package/std/regex/flags.yo +104 -0
  33. package/std/regex/match.yo +83 -0
  34. package/std/regex/node.yo +283 -0
  35. package/std/regex/parser.yo +847 -0
  36. package/std/regex/regex.yo +714 -0
  37. package/std/regex/unicode.yo +365 -0
  38. package/std/regex/vm.yo +737 -0
  39. package/std/sys/clock.yo +1 -1
  40. package/std/sys/constants.yo +3 -3
  41. package/std/sys/mmap.yo +2 -2
  42. package/std/sys/signals.yo +4 -4
  43. package/std/sys/socket.yo +25 -25
  44. package/std/sys/sysinfo.yo +4 -4
  45. package/std/time/sleep.yo +18 -0
  46. package/std/time.yo +0 -13
@@ -0,0 +1,714 @@
1
+ // std/regex/regex.yo - Main Regex type
2
+ //
3
+ // High-level regex API similar to JavaScript's RegExp.
4
+ //
5
+ // Example:
6
+ // { Regex } :: import "std/regex";
7
+ // re := Regex.new(`\\d+`).unwrap();
8
+ // m := re.exec(`abc123def`);
9
+ // match(m,
10
+ // .Some(result) => println(result.value()),
11
+ // .None => println(`no match`)
12
+ // );
13
+
14
+ open import "std/collections/array_list";
15
+ open import "std/string";
16
+ { RegexParser } :: import "./parser.yo";
17
+ { NfaCompiler, NfaProgram, Instr, InstrKind, ClassEntry, GroupNameEntry } :: import "./compiler.yo";
18
+ { NfaVm, VmMatch } :: import "./vm.yo";
19
+ { RegexFlags } :: import "./flags.yo";
20
+ { RegexMatch } :: import "./match.yo";
21
+
22
+ // The Regex type: a compiled regular expression
23
+ Regex :: object(
24
+ _program : NfaProgram,
25
+ _flags : RegexFlags,
26
+ _pattern : String,
27
+ _n_groups : usize,
28
+ _group_names : ArrayList(GroupNameEntry)
29
+ );
30
+
31
+ // Block 1: Constructor and leaf methods
32
+ impl(Regex,
33
+ new : (fn(pattern : String, flags_str : String) -> Result(Self, String))({
34
+ flags_result := RegexFlags.parse(flags_str);
35
+ match(flags_result,
36
+ .Err(e) => .Err(e),
37
+ .Ok(flags) => {
38
+ parser := RegexParser.new(pattern);
39
+ ast_result := parser.parse();
40
+ match(ast_result,
41
+ .Err(e) => .Err(e),
42
+ .Ok(ast) => {
43
+ n_groups := parser.group_count();
44
+ gnames := parser.group_names();
45
+ compiler := NfaCompiler.new();
46
+ program := compiler.compile(ast, n_groups, gnames);
47
+ // Literal prefix scan is case-sensitive; disable when ignoreCase
48
+ cond(
49
+ flags.ignore_case => { program.literal_prefix = ArrayList(u8).new(); },
50
+ true => ()
51
+ );
52
+ .Ok(Self(
53
+ _program: program,
54
+ _flags: flags,
55
+ _pattern: pattern,
56
+ _n_groups: n_groups,
57
+ _group_names: gnames
58
+ ))
59
+ }
60
+ )
61
+ }
62
+ )
63
+ }),
64
+
65
+ source : (fn(self : Self) -> String)(
66
+ self._pattern
67
+ ),
68
+
69
+ _extract_substring : (fn(self : Self, bytes : ArrayList(u8), start : usize, end_pos : usize) -> String)({
70
+ result_bytes := ArrayList(u8).with_capacity((end_pos - start));
71
+ i := start;
72
+ while (i < end_pos), (i = (i + usize(1))), {
73
+ result_bytes.push(bytes.get(i).unwrap());
74
+ };
75
+ String.from_bytes(result_bytes)
76
+ }),
77
+
78
+ _byte_to_char_index : (fn(self : Self, bytes : ArrayList(u8), byte_pos : usize) -> usize)({
79
+ char_idx := usize(0);
80
+ i := usize(0);
81
+ while (i < byte_pos), {
82
+ b := bytes.get(i).unwrap();
83
+ char_len := cond(
84
+ (b < u8(0x80)) => usize(1),
85
+ ((b >= u8(0xC0)) && (b < u8(0xE0))) => usize(2),
86
+ ((b >= u8(0xE0)) && (b < u8(0xF0))) => usize(3),
87
+ true => usize(4)
88
+ );
89
+ i = (i + char_len);
90
+ char_idx = (char_idx + usize(1));
91
+ };
92
+ char_idx
93
+ }),
94
+
95
+ // Fast-scan: find the next byte position where the literal prefix matches.
96
+ // Returns the byte position or input_len if not found.
97
+ _find_prefix_pos : (fn(self : Self, input_bytes : ArrayList(u8), from_byte : usize) -> usize)({
98
+ prefix := self._program.literal_prefix;
99
+ prefix_len := prefix.len();
100
+ input_len := input_bytes.len();
101
+
102
+ cond(
103
+ (prefix_len == usize(0)) => from_byte,
104
+ (input_len < prefix_len) => (input_len + usize(1)),
105
+ true => {
106
+ first_byte := prefix.get(usize(0)).unwrap();
107
+ (pos : usize) = from_byte;
108
+ (found : bool) = false;
109
+
110
+ while ((pos <= (input_len - prefix_len)) && (!(found))), {
111
+ cond(
112
+ (input_bytes.get(pos).unwrap() == first_byte) => {
113
+ // Check remaining prefix bytes
114
+ (match_ok : bool) = true;
115
+ pi := usize(1);
116
+ while ((pi < prefix_len) && match_ok), (pi = (pi + usize(1))), {
117
+ cond(
118
+ (input_bytes.get((pos + pi)).unwrap() != prefix.get(pi).unwrap()) => {
119
+ match_ok = false;
120
+ },
121
+ true => ()
122
+ );
123
+ };
124
+ cond(
125
+ match_ok => { found = true; },
126
+ true => { pos = (pos + usize(1)); }
127
+ );
128
+ },
129
+ true => { pos = (pos + usize(1)); }
130
+ );
131
+ };
132
+
133
+ cond(
134
+ found => pos,
135
+ true => (input_len + usize(1))
136
+ )
137
+ }
138
+ )
139
+ })
140
+ );
141
+
142
+ // Block 2: _build_match (depends on Block 1)
143
+ impl(Regex,
144
+ _build_match : (fn(self : Self, slots : ArrayList(usize), input : String) -> RegexMatch)({
145
+ bytes := input.as_bytes();
146
+ unset := usize(0xFFFFFFFFFFFFFFFF);
147
+
148
+ match_start_byte := slots.get(usize(0)).unwrap();
149
+ match_end_byte := slots.get(usize(1)).unwrap();
150
+
151
+ match_text := self._extract_substring(bytes, match_start_byte, match_end_byte);
152
+ match_char_index := self._byte_to_char_index(bytes, match_start_byte);
153
+
154
+ groups := ArrayList(Option(String)).new();
155
+ g := usize(1);
156
+ while (g <= self._n_groups), (g = (g + usize(1))), {
157
+ start_slot := (g * usize(2));
158
+ end_slot := ((g * usize(2)) + usize(1));
159
+
160
+ cond(
161
+ ((start_slot < slots.len()) && (end_slot < slots.len())) => {
162
+ gs := slots.get(start_slot).unwrap();
163
+ ge := slots.get(end_slot).unwrap();
164
+ cond(
165
+ ((gs != unset) && (ge != unset)) => {
166
+ group_text := self._extract_substring(bytes, gs, ge);
167
+ groups.push(.Some(group_text));
168
+ },
169
+ true => {
170
+ groups.push(.None);
171
+ }
172
+ );
173
+ },
174
+ true => {
175
+ groups.push(.None);
176
+ }
177
+ );
178
+ };
179
+
180
+ RegexMatch.new(match_text, match_char_index, input, groups, self._group_names)
181
+ })
182
+ );
183
+
184
+ // Block 3: exec, match_all (depend on Block 2)
185
+ impl(Regex,
186
+ exec : (fn(self : Self, input : String) -> Option(RegexMatch))({
187
+ bytes := input.as_bytes();
188
+ input_len := bytes.len();
189
+
190
+ // Sticky flag: only try matching at position 0
191
+ cond(
192
+ self._flags.sticky => {
193
+ vm := NfaVm.new(self._program, self._flags, input);
194
+ result := vm.exec_at(usize(0));
195
+ cond(
196
+ result.matched => .Some(self._build_match(result.slots, input)),
197
+ true => .None
198
+ )
199
+ },
200
+ true => {
201
+ has_prefix := (self._program.literal_prefix.len() > usize(0));
202
+ byte_pos := cond(
203
+ has_prefix => self._find_prefix_pos(bytes, usize(0)),
204
+ true => usize(0)
205
+ );
206
+
207
+ while (byte_pos <= input_len), {
208
+ vm := NfaVm.new(self._program, self._flags, input);
209
+ result := vm.exec_at(byte_pos);
210
+
211
+ cond(
212
+ result.matched => {
213
+ m := self._build_match(result.slots, input);
214
+ return .Some(m);
215
+ },
216
+ true => ()
217
+ );
218
+
219
+ cond(
220
+ (byte_pos >= input_len) => { break; },
221
+ true => {
222
+ b := bytes.get(byte_pos).unwrap();
223
+ char_len := cond(
224
+ (b < u8(0x80)) => usize(1),
225
+ ((b >= u8(0xC0)) && (b < u8(0xE0))) => usize(2),
226
+ ((b >= u8(0xE0)) && (b < u8(0xF0))) => usize(3),
227
+ true => usize(4)
228
+ );
229
+ next_pos := (byte_pos + char_len);
230
+ byte_pos = cond(
231
+ has_prefix => self._find_prefix_pos(bytes, next_pos),
232
+ true => next_pos
233
+ );
234
+ }
235
+ );
236
+ };
237
+
238
+ .None
239
+ }
240
+ )
241
+ }),
242
+
243
+ match_all : (fn(self : Self, input : String) -> ArrayList(RegexMatch))({
244
+ matches := ArrayList(RegexMatch).new();
245
+ bytes := input.as_bytes();
246
+ input_len := bytes.len();
247
+
248
+ cond(
249
+ self._flags.sticky => {
250
+ // Sticky: only try at position 0, then at end of each match
251
+ (byte_pos : usize) = usize(0);
252
+ while (byte_pos <= input_len), {
253
+ vm := NfaVm.new(self._program, self._flags, input);
254
+ result := vm.exec_at(byte_pos);
255
+
256
+ cond(
257
+ result.matched => {
258
+ m := self._build_match(result.slots, input);
259
+ matches.push(m);
260
+ match_start := result.slots.get(usize(0)).unwrap();
261
+ match_end := result.slots.get(usize(1)).unwrap();
262
+ cond(
263
+ (match_end == match_start) => {
264
+ // Empty match: advance one char to avoid infinite loop
265
+ cond(
266
+ (byte_pos >= input_len) => { break; },
267
+ true => {
268
+ b := bytes.get(byte_pos).unwrap();
269
+ char_len := cond(
270
+ (b < u8(0x80)) => usize(1),
271
+ ((b >= u8(0xC0)) && (b < u8(0xE0))) => usize(2),
272
+ ((b >= u8(0xE0)) && (b < u8(0xF0))) => usize(3),
273
+ true => usize(4)
274
+ );
275
+ byte_pos = (byte_pos + char_len);
276
+ }
277
+ );
278
+ },
279
+ true => {
280
+ byte_pos = match_end;
281
+ }
282
+ );
283
+ },
284
+ true => { break; }
285
+ );
286
+ };
287
+ },
288
+ true => {
289
+ has_prefix := (self._program.literal_prefix.len() > usize(0));
290
+ byte_pos := cond(
291
+ has_prefix => self._find_prefix_pos(bytes, usize(0)),
292
+ true => usize(0)
293
+ );
294
+
295
+ while (byte_pos <= input_len), {
296
+ vm := NfaVm.new(self._program, self._flags, input);
297
+ result := vm.exec_at(byte_pos);
298
+
299
+ cond(
300
+ result.matched => {
301
+ m := self._build_match(result.slots, input);
302
+ matches.push(m);
303
+
304
+ match_start := result.slots.get(usize(0)).unwrap();
305
+ match_end := result.slots.get(usize(1)).unwrap();
306
+
307
+ cond(
308
+ (match_end == match_start) => {
309
+ cond(
310
+ (byte_pos >= input_len) => { break; },
311
+ true => {
312
+ b := bytes.get(byte_pos).unwrap();
313
+ char_len := cond(
314
+ (b < u8(0x80)) => usize(1),
315
+ ((b >= u8(0xC0)) && (b < u8(0xE0))) => usize(2),
316
+ ((b >= u8(0xE0)) && (b < u8(0xF0))) => usize(3),
317
+ true => usize(4)
318
+ );
319
+ next_pos := (byte_pos + char_len);
320
+ byte_pos = cond(
321
+ has_prefix => self._find_prefix_pos(bytes, next_pos),
322
+ true => next_pos
323
+ );
324
+ }
325
+ );
326
+ },
327
+ true => {
328
+ byte_pos = cond(
329
+ has_prefix => self._find_prefix_pos(bytes, match_end),
330
+ true => match_end
331
+ );
332
+ }
333
+ );
334
+ },
335
+ true => {
336
+ cond(
337
+ (byte_pos >= input_len) => { break; },
338
+ true => {
339
+ b := bytes.get(byte_pos).unwrap();
340
+ char_len := cond(
341
+ (b < u8(0x80)) => usize(1),
342
+ ((b >= u8(0xC0)) && (b < u8(0xE0))) => usize(2),
343
+ ((b >= u8(0xE0)) && (b < u8(0xF0))) => usize(3),
344
+ true => usize(4)
345
+ );
346
+ next_pos := (byte_pos + char_len);
347
+ byte_pos = cond(
348
+ has_prefix => self._find_prefix_pos(bytes, next_pos),
349
+ true => next_pos
350
+ );
351
+ }
352
+ );
353
+ }
354
+ );
355
+ };
356
+ }
357
+ );
358
+
359
+ matches
360
+ })
361
+ );
362
+
363
+ // Block 4: test, search (depends on Block 3)
364
+ impl(Regex,
365
+ test : (fn(self : Self, input : String) -> bool)({
366
+ result := self.exec(input);
367
+ result.is_some()
368
+ }),
369
+
370
+ search : (fn(self : Self, input : String) -> Option(usize))({
371
+ result := self.exec(input);
372
+ match(result,
373
+ .Some(m) => .Some(m.index()),
374
+ .None => .None
375
+ )
376
+ })
377
+ );
378
+
379
+ // Block 5: _apply_replacement helper (depends on Block 2)
380
+ impl(Regex,
381
+ // Process replacement patterns: $& (full match), $1-$9 (groups),
382
+ // ${name} (named groups), $` (pre-match), $' (post-match), $$ (literal $)
383
+ _apply_replacement : (fn(self : Self, replacement : String, m : RegexMatch) -> String)({
384
+ rep_bytes := replacement.as_bytes();
385
+ rep_len := rep_bytes.len();
386
+ result := ArrayList(u8).new();
387
+ i := usize(0);
388
+
389
+ while (i < rep_len), {
390
+ b := rep_bytes.get(i).unwrap();
391
+ cond(
392
+ ((b == u8(36)) && ((i + usize(1)) < rep_len)) => {
393
+ // '$' character — check next char
394
+ next_b := rep_bytes.get((i + usize(1))).unwrap();
395
+ cond(
396
+ (next_b == u8(36)) => {
397
+ // $$ → literal $
398
+ result.push(u8(36));
399
+ i = (i + usize(2));
400
+ },
401
+ (next_b == u8(38)) => {
402
+ // $& → full match
403
+ match_bytes := m.value().as_bytes();
404
+ mi := usize(0);
405
+ while (mi < match_bytes.len()), (mi = (mi + usize(1))), {
406
+ result.push(match_bytes.get(mi).unwrap());
407
+ };
408
+ i = (i + usize(2));
409
+ },
410
+ (next_b == u8(96)) => {
411
+ // $` → pre-match (text before match)
412
+ input_bytes := m.input().as_bytes();
413
+ // Convert char index to byte index
414
+ char_idx := m.index();
415
+ byte_idx := usize(0);
416
+ ci := usize(0);
417
+ while (ci < char_idx), {
418
+ cb := input_bytes.get(byte_idx).unwrap();
419
+ cbl := cond(
420
+ (cb < u8(0x80)) => usize(1),
421
+ ((cb >= u8(0xC0)) && (cb < u8(0xE0))) => usize(2),
422
+ ((cb >= u8(0xE0)) && (cb < u8(0xF0))) => usize(3),
423
+ true => usize(4)
424
+ );
425
+ byte_idx = (byte_idx + cbl);
426
+ ci = (ci + usize(1));
427
+ };
428
+ pi := usize(0);
429
+ while (pi < byte_idx), (pi = (pi + usize(1))), {
430
+ result.push(input_bytes.get(pi).unwrap());
431
+ };
432
+ i = (i + usize(2));
433
+ },
434
+ (next_b == u8(39)) => {
435
+ // $' → post-match (text after match)
436
+ input_bytes := m.input().as_bytes();
437
+ // Find byte position after match end
438
+ char_idx := m.index();
439
+ byte_idx := usize(0);
440
+ ci := usize(0);
441
+ while (ci < char_idx), {
442
+ cb := input_bytes.get(byte_idx).unwrap();
443
+ cbl := cond(
444
+ (cb < u8(0x80)) => usize(1),
445
+ ((cb >= u8(0xC0)) && (cb < u8(0xE0))) => usize(2),
446
+ ((cb >= u8(0xE0)) && (cb < u8(0xF0))) => usize(3),
447
+ true => usize(4)
448
+ );
449
+ byte_idx = (byte_idx + cbl);
450
+ ci = (ci + usize(1));
451
+ };
452
+ // Advance past the matched text
453
+ match_bytes := m.value().as_bytes();
454
+ byte_idx = (byte_idx + match_bytes.len());
455
+ pi := byte_idx;
456
+ while (pi < input_bytes.len()), (pi = (pi + usize(1))), {
457
+ result.push(input_bytes.get(pi).unwrap());
458
+ };
459
+ i = (i + usize(2));
460
+ },
461
+ ((next_b >= u8(48)) && (next_b <= u8(57))) => {
462
+ // $0-$9 → group reference
463
+ group_idx := usize((next_b - u8(48)));
464
+ grp := m.group(group_idx);
465
+ match(grp,
466
+ .Some(g) => {
467
+ g_bytes := g.as_bytes();
468
+ gi := usize(0);
469
+ while (gi < g_bytes.len()), (gi = (gi + usize(1))), {
470
+ result.push(g_bytes.get(gi).unwrap());
471
+ };
472
+ },
473
+ .None => ()
474
+ );
475
+ i = (i + usize(2));
476
+ },
477
+ (next_b == u8(123)) => {
478
+ // ${ → named group reference ${name}
479
+ name_start := (i + usize(2));
480
+ (name_end : usize) = name_start;
481
+ (found_close : bool) = false;
482
+ while (name_end < rep_len), {
483
+ nb := rep_bytes.get(name_end).unwrap();
484
+ cond(
485
+ (nb == u8(125)) => {
486
+ found_close = true;
487
+ break;
488
+ },
489
+ true => {
490
+ name_end = (name_end + usize(1));
491
+ }
492
+ );
493
+ };
494
+ cond(
495
+ found_close => {
496
+ name_bytes := ArrayList(u8).with_capacity((name_end - name_start));
497
+ ni := name_start;
498
+ while (ni < name_end), (ni = (ni + usize(1))), {
499
+ name_bytes.push(rep_bytes.get(ni).unwrap());
500
+ };
501
+ name := String.from_bytes(name_bytes);
502
+ grp := m.named_group(name);
503
+ match(grp,
504
+ .Some(g) => {
505
+ g_bytes := g.as_bytes();
506
+ gi := usize(0);
507
+ while (gi < g_bytes.len()), (gi = (gi + usize(1))), {
508
+ result.push(g_bytes.get(gi).unwrap());
509
+ };
510
+ },
511
+ .None => ()
512
+ );
513
+ i = (name_end + usize(1));
514
+ },
515
+ true => {
516
+ // No closing } — emit literal ${
517
+ result.push(u8(36));
518
+ i = (i + usize(1));
519
+ }
520
+ );
521
+ },
522
+ true => {
523
+ // Unknown $ sequence — emit literal $
524
+ result.push(u8(36));
525
+ i = (i + usize(1));
526
+ }
527
+ );
528
+ },
529
+ true => {
530
+ result.push(b);
531
+ i = (i + usize(1));
532
+ }
533
+ );
534
+ };
535
+
536
+ String.from_bytes(result)
537
+ })
538
+ );
539
+
540
+ // Block 6: replace, replace_all, split (depends on Block 3+5)
541
+ impl(Regex,
542
+ replace : (fn(self : Self, input : String, replacement : String) -> String)({
543
+ result := self.exec(input);
544
+ match(result,
545
+ .None => input,
546
+ .Some(m) => {
547
+ input_bytes := input.as_bytes();
548
+ // Convert char index to byte index for match start
549
+ char_idx := m.index();
550
+ (match_start_byte : usize) = usize(0);
551
+ ci := usize(0);
552
+ while (ci < char_idx), {
553
+ cb := input_bytes.get(match_start_byte).unwrap();
554
+ cbl := cond(
555
+ (cb < u8(0x80)) => usize(1),
556
+ ((cb >= u8(0xC0)) && (cb < u8(0xE0))) => usize(2),
557
+ ((cb >= u8(0xE0)) && (cb < u8(0xF0))) => usize(3),
558
+ true => usize(4)
559
+ );
560
+ match_start_byte = (match_start_byte + cbl);
561
+ ci = (ci + usize(1));
562
+ };
563
+ match_end_byte := (match_start_byte + m.value().as_bytes().len());
564
+
565
+ // Build result: pre-match + replacement + post-match
566
+ out := ArrayList(u8).new();
567
+ pi := usize(0);
568
+ while (pi < match_start_byte), (pi = (pi + usize(1))), {
569
+ out.push(input_bytes.get(pi).unwrap());
570
+ };
571
+ rep := self._apply_replacement(replacement, m);
572
+ rep_bytes := rep.as_bytes();
573
+ ri := usize(0);
574
+ while (ri < rep_bytes.len()), (ri = (ri + usize(1))), {
575
+ out.push(rep_bytes.get(ri).unwrap());
576
+ };
577
+ pi = match_end_byte;
578
+ while (pi < input_bytes.len()), (pi = (pi + usize(1))), {
579
+ out.push(input_bytes.get(pi).unwrap());
580
+ };
581
+ String.from_bytes(out)
582
+ }
583
+ )
584
+ }),
585
+
586
+ replace_all : (fn(self : Self, input : String, replacement : String) -> String)({
587
+ all_matches := self.match_all(input);
588
+ cond(
589
+ (all_matches.len() == usize(0)) => input,
590
+ true => {
591
+ input_bytes := input.as_bytes();
592
+ out := ArrayList(u8).new();
593
+ (last_end_byte : usize) = usize(0);
594
+
595
+ mi := usize(0);
596
+ while (mi < all_matches.len()), (mi = (mi + usize(1))), {
597
+ m := all_matches.get(mi).unwrap();
598
+ // Convert char index to byte index for match start
599
+ char_idx := m.index();
600
+ (match_start_byte : usize) = usize(0);
601
+ ci := usize(0);
602
+ while (ci < char_idx), {
603
+ cb := input_bytes.get(match_start_byte).unwrap();
604
+ cbl := cond(
605
+ (cb < u8(0x80)) => usize(1),
606
+ ((cb >= u8(0xC0)) && (cb < u8(0xE0))) => usize(2),
607
+ ((cb >= u8(0xE0)) && (cb < u8(0xF0))) => usize(3),
608
+ true => usize(4)
609
+ );
610
+ match_start_byte = (match_start_byte + cbl);
611
+ ci = (ci + usize(1));
612
+ };
613
+ match_end_byte := (match_start_byte + m.value().as_bytes().len());
614
+
615
+ // Copy text between last match end and this match start
616
+ pi := last_end_byte;
617
+ while (pi < match_start_byte), (pi = (pi + usize(1))), {
618
+ out.push(input_bytes.get(pi).unwrap());
619
+ };
620
+
621
+ // Apply replacement
622
+ rep := self._apply_replacement(replacement, m);
623
+ rep_bytes := rep.as_bytes();
624
+ ri := usize(0);
625
+ while (ri < rep_bytes.len()), (ri = (ri + usize(1))), {
626
+ out.push(rep_bytes.get(ri).unwrap());
627
+ };
628
+
629
+ last_end_byte = match_end_byte;
630
+ };
631
+
632
+ // Copy remaining text after last match
633
+ pi := last_end_byte;
634
+ while (pi < input_bytes.len()), (pi = (pi + usize(1))), {
635
+ out.push(input_bytes.get(pi).unwrap());
636
+ };
637
+
638
+ String.from_bytes(out)
639
+ }
640
+ )
641
+ }),
642
+
643
+ split : (fn(self : Self, input : String) -> ArrayList(String))({
644
+ parts := ArrayList(String).new();
645
+ all_matches := self.match_all(input);
646
+
647
+ cond(
648
+ (all_matches.len() == usize(0)) => {
649
+ parts.push(input);
650
+ },
651
+ true => {
652
+ input_bytes := input.as_bytes();
653
+ (last_end_byte : usize) = usize(0);
654
+
655
+ mi := usize(0);
656
+ while (mi < all_matches.len()), (mi = (mi + usize(1))), {
657
+ m := all_matches.get(mi).unwrap();
658
+ // Convert char index to byte index for match start
659
+ char_idx := m.index();
660
+ (match_start_byte : usize) = usize(0);
661
+ ci := usize(0);
662
+ while (ci < char_idx), {
663
+ cb := input_bytes.get(match_start_byte).unwrap();
664
+ cbl := cond(
665
+ (cb < u8(0x80)) => usize(1),
666
+ ((cb >= u8(0xC0)) && (cb < u8(0xE0))) => usize(2),
667
+ ((cb >= u8(0xE0)) && (cb < u8(0xF0))) => usize(3),
668
+ true => usize(4)
669
+ );
670
+ match_start_byte = (match_start_byte + cbl);
671
+ ci = (ci + usize(1));
672
+ };
673
+ match_end_byte := (match_start_byte + m.value().as_bytes().len());
674
+
675
+ // Extract text from last_end_byte to match_start_byte
676
+ part_bytes := ArrayList(u8).new();
677
+ pi := last_end_byte;
678
+ while (pi < match_start_byte), (pi = (pi + usize(1))), {
679
+ part_bytes.push(input_bytes.get(pi).unwrap());
680
+ };
681
+ parts.push(String.from_bytes(part_bytes));
682
+
683
+ // Include capture groups in split result (like JS)
684
+ gi := usize(1);
685
+ while (gi <= m.group_count()), (gi = (gi + usize(1))), {
686
+ grp := m.group(gi);
687
+ match(grp,
688
+ .Some(g) => { parts.push(g); },
689
+ .None => { parts.push(`undefined`); }
690
+ );
691
+ };
692
+
693
+ last_end_byte = match_end_byte;
694
+ };
695
+
696
+ // Add remaining text after last match
697
+ remaining := ArrayList(u8).new();
698
+ pi := last_end_byte;
699
+ while (pi < input_bytes.len()), (pi = (pi + usize(1))), {
700
+ remaining.push(input_bytes.get(pi).unwrap());
701
+ };
702
+ parts.push(String.from_bytes(remaining));
703
+ }
704
+ );
705
+
706
+ parts
707
+ })
708
+ );
709
+
710
+ export
711
+ Regex,
712
+ RegexMatch,
713
+ RegexFlags
714
+ ;