@shd101wyy/yo 0.1.28 → 0.1.30
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.github/skills/yo-async-effects/SKILL.md +15 -15
- package/.github/skills/yo-async-effects/async-effects-recipes.md +118 -121
- package/.github/skills/yo-core-patterns/core-patterns-cheatsheet.md +33 -13
- package/.github/skills/yo-project-workflow/workflow-cheatsheet.md +1 -1
- package/.github/skills/yo-syntax/SKILL.md +2 -2
- package/.github/skills/yo-syntax/syntax-cheatsheet.md +108 -96
- package/README.md +6 -3
- package/out/cjs/index.cjs +812 -706
- package/out/cjs/yo-cli.cjs +1023 -907
- package/out/cjs/yo-lsp.cjs +836 -730
- package/out/esm/index.mjs +757 -651
- package/out/types/src/codegen/exprs/async.d.ts +2 -0
- package/out/types/src/codegen/exprs/await.d.ts +1 -0
- package/out/types/src/codegen/exprs/closures.d.ts +4 -0
- package/out/types/src/codegen/functions/context.d.ts +6 -0
- package/out/types/src/codegen/functions/declarations.d.ts +1 -1
- package/out/types/src/doc/model.d.ts +0 -1
- package/out/types/src/env.d.ts +2 -2
- package/out/types/src/evaluator/builtins/pragma.d.ts +9 -0
- package/out/types/src/evaluator/builtins/unsafe.d.ts +8 -0
- package/out/types/src/evaluator/context.d.ts +3 -1
- package/out/types/src/evaluator/exprs/{escape.d.ts → unwind.d.ts} +1 -1
- package/out/types/src/evaluator/index.d.ts +1 -1
- package/out/types/src/evaluator/memory-safety.d.ts +14 -0
- package/out/types/src/evaluator/types/flowability.d.ts +6 -0
- package/out/types/src/evaluator/types/function.d.ts +1 -2
- package/out/types/src/evaluator/utils.d.ts +0 -1
- package/out/types/src/expr-traversal.d.ts +1 -0
- package/out/types/src/expr.d.ts +9 -7
- package/out/types/src/public-safe-report.d.ts +19 -0
- package/out/types/src/tests/comptime-ref-gate.test.d.ts +1 -0
- package/out/types/src/tests/pragma-validation.test.d.ts +1 -0
- package/out/types/src/tests/public-safe-report.test.d.ts +1 -0
- package/out/types/src/tests/type-representation-pointer.test.d.ts +1 -0
- package/out/types/src/tests/unsafe-gate.test.d.ts +1 -0
- package/out/types/src/tests/unsafe-report-classify.test.d.ts +1 -0
- package/out/types/src/types/creators.d.ts +4 -6
- package/out/types/src/types/definitions.d.ts +9 -16
- package/out/types/src/types/guards.d.ts +1 -2
- package/out/types/src/types/tags.d.ts +0 -1
- package/out/types/src/types/utils.d.ts +5 -0
- package/out/types/src/unsafe-report.d.ts +29 -0
- package/out/types/src/value.d.ts +1 -0
- package/out/types/tsconfig.tsbuildinfo +1 -1
- package/package.json +1 -1
- package/scripts/add-pragma-for-pointer-decls.ts +134 -0
- package/scripts/add-pragma.ts +58 -0
- package/scripts/migrate-amp-method-calls.ts +186 -0
- package/scripts/migrate-clone-calls.ts +93 -0
- package/scripts/migrate-get-unwrap.ts +166 -0
- package/scripts/migrate-index-patterns.ts +210 -0
- package/scripts/migrate-index-trait.ts +142 -0
- package/scripts/migrate-iterator.ts +150 -0
- package/scripts/migrate-self-ptr.ts +220 -0
- package/scripts/migrate-skip-pragmas.ts +109 -0
- package/scripts/migrate-tostring.ts +134 -0
- package/scripts/trim-pragma.ts +130 -0
- package/scripts/wrap-extern-calls.ts +161 -0
- package/std/alg/hash.yo +3 -2
- package/std/allocator.yo +6 -5
- package/std/async.yo +2 -2
- package/std/collections/array_list.yo +59 -40
- package/std/collections/btree_map.yo +19 -18
- package/std/collections/deque.yo +9 -8
- package/std/collections/hash_map.yo +101 -13
- package/std/collections/hash_set.yo +5 -4
- package/std/collections/linked_list.yo +39 -4
- package/std/collections/ordered_map.yo +3 -3
- package/std/collections/priority_queue.yo +14 -13
- package/std/crypto/md5.yo +2 -1
- package/std/crypto/random.yo +21 -20
- package/std/crypto/sha256.yo +2 -1
- package/std/encoding/base64.yo +18 -18
- package/std/encoding/hex.yo +5 -5
- package/std/encoding/json.yo +62 -13
- package/std/encoding/punycode.yo +24 -23
- package/std/encoding/toml.yo +4 -3
- package/std/encoding/utf16.yo +3 -3
- package/std/env.yo +43 -28
- package/std/error.yo +15 -3
- package/std/fmt/display.yo +2 -2
- package/std/fmt/index.yo +6 -5
- package/std/fmt/to_string.yo +39 -38
- package/std/fmt/writer.yo +9 -8
- package/std/fs/dir.yo +61 -66
- package/std/fs/file.yo +121 -126
- package/std/fs/metadata.yo +13 -18
- package/std/fs/temp.yo +35 -30
- package/std/fs/walker.yo +14 -19
- package/std/gc.yo +1 -0
- package/std/glob.yo +7 -7
- package/std/http/client.yo +33 -36
- package/std/http/http.yo +6 -6
- package/std/http/index.yo +4 -4
- package/std/imm/list.yo +33 -0
- package/std/imm/map.yo +2 -1
- package/std/imm/set.yo +1 -0
- package/std/imm/sorted_map.yo +1 -0
- package/std/imm/sorted_set.yo +1 -0
- package/std/imm/string.yo +27 -23
- package/std/imm/vec.yo +18 -2
- package/std/io/reader.yo +2 -1
- package/std/io/writer.yo +3 -2
- package/std/libc/assert.yo +1 -0
- package/std/libc/ctype.yo +1 -0
- package/std/libc/dirent.yo +1 -0
- package/std/libc/errno.yo +1 -0
- package/std/libc/fcntl.yo +1 -0
- package/std/libc/float.yo +1 -0
- package/std/libc/limits.yo +1 -0
- package/std/libc/math.yo +1 -0
- package/std/libc/signal.yo +1 -0
- package/std/libc/stdatomic.yo +1 -0
- package/std/libc/stdint.yo +1 -0
- package/std/libc/stdio.yo +1 -0
- package/std/libc/stdlib.yo +1 -0
- package/std/libc/string.yo +1 -0
- package/std/libc/sys/stat.yo +1 -0
- package/std/libc/time.yo +1 -0
- package/std/libc/unistd.yo +1 -0
- package/std/libc/wctype.yo +1 -0
- package/std/libc/windows.yo +2 -0
- package/std/log.yo +7 -6
- package/std/net/addr.yo +6 -5
- package/std/net/dns.yo +13 -16
- package/std/net/errors.yo +9 -9
- package/std/net/tcp.yo +71 -74
- package/std/net/udp.yo +40 -43
- package/std/os/signal.yo +5 -5
- package/std/path.yo +1 -0
- package/std/prelude.yo +377 -200
- package/std/process/command.yo +57 -46
- package/std/process/index.yo +2 -1
- package/std/regex/compiler.yo +10 -9
- package/std/regex/index.yo +41 -41
- package/std/regex/match.yo +2 -2
- package/std/regex/parser.yo +31 -31
- package/std/regex/vm.yo +42 -41
- package/std/string/string.yo +95 -40
- package/std/string/string_builder.yo +9 -9
- package/std/string/unicode.yo +50 -49
- package/std/sync/channel.yo +2 -1
- package/std/sync/cond.yo +5 -4
- package/std/sync/mutex.yo +4 -3
- package/std/sys/advise.yo +1 -0
- package/std/sys/bufio/buf_reader.yo +27 -26
- package/std/sys/bufio/buf_writer.yo +22 -21
- package/std/sys/clock.yo +1 -0
- package/std/sys/copy.yo +1 -0
- package/std/sys/dir.yo +10 -9
- package/std/sys/dns.yo +6 -5
- package/std/sys/errors.yo +12 -12
- package/std/sys/events.yo +1 -0
- package/std/sys/externs.yo +38 -37
- package/std/sys/file.yo +17 -16
- package/std/sys/future.yo +4 -3
- package/std/sys/iov.yo +1 -0
- package/std/sys/mmap.yo +1 -0
- package/std/sys/path.yo +1 -0
- package/std/sys/perm.yo +2 -1
- package/std/sys/pipe.yo +1 -0
- package/std/sys/process.yo +5 -4
- package/std/sys/signal.yo +1 -0
- package/std/sys/socketpair.yo +1 -0
- package/std/sys/sockinfo.yo +1 -0
- package/std/sys/statfs.yo +2 -1
- package/std/sys/statx.yo +1 -0
- package/std/sys/sysinfo.yo +1 -0
- package/std/sys/tcp.yo +15 -14
- package/std/sys/temp.yo +1 -0
- package/std/sys/time.yo +2 -1
- package/std/sys/timer.yo +6 -6
- package/std/sys/tty.yo +2 -1
- package/std/sys/udp.yo +13 -12
- package/std/sys/unix.yo +12 -11
- package/std/testing/bench.yo +4 -3
- package/std/thread.yo +7 -6
- package/std/time/datetime.yo +18 -15
- package/std/time/duration.yo +11 -10
- package/std/time/instant.yo +4 -4
- package/std/time/sleep.yo +1 -0
- package/std/url/index.yo +5 -5
- package/std/worker.yo +4 -3
package/std/regex/parser.yo
CHANGED
|
@@ -57,20 +57,20 @@ impl(
|
|
|
57
57
|
cond(
|
|
58
58
|
(first < u8(0x80)) => u32(first),
|
|
59
59
|
((first >= u8(0xC0)) && (first < u8(0xE0))) => {
|
|
60
|
-
second := self._bytes
|
|
60
|
+
second := self._bytes(self._pos);
|
|
61
61
|
self._pos = (self._pos + usize(1));
|
|
62
62
|
((u32(first) & u32(0x1F)) << u32(6)) | (u32(second) & u32(0x3F))
|
|
63
63
|
},
|
|
64
64
|
((first >= u8(0xE0)) && (first < u8(0xF0))) => {
|
|
65
|
-
second := self._bytes
|
|
66
|
-
third := self._bytes
|
|
65
|
+
second := self._bytes(self._pos);
|
|
66
|
+
third := self._bytes(self._pos + usize(1));
|
|
67
67
|
self._pos = (self._pos + usize(2));
|
|
68
68
|
(((u32(first) & u32(0x0F)) << u32(12)) | ((u32(second) & u32(0x3F)) << u32(6))) | (u32(third) & u32(0x3F))
|
|
69
69
|
},
|
|
70
70
|
true => {
|
|
71
|
-
second := self._bytes
|
|
72
|
-
third := self._bytes
|
|
73
|
-
fourth := self._bytes
|
|
71
|
+
second := self._bytes(self._pos);
|
|
72
|
+
third := self._bytes(self._pos + usize(1));
|
|
73
|
+
fourth := self._bytes(self._pos + usize(2));
|
|
74
74
|
self._pos = (self._pos + usize(3));
|
|
75
75
|
((((u32(first) & u32(0x07)) << u32(18)) | ((u32(second) & u32(0x3F)) << u32(12))) | ((u32(third) & u32(0x3F)) << u32(6))) | (u32(fourth) & u32(0x3F))
|
|
76
76
|
}
|
|
@@ -88,7 +88,7 @@ impl(
|
|
|
88
88
|
_lookup_group_name : (fn(self : Self, name : String) -> Option(usize))({
|
|
89
89
|
i := usize(0);
|
|
90
90
|
while(i < self._group_names.len(), i = (i + usize(1)), {
|
|
91
|
-
entry := self._group_names
|
|
91
|
+
entry := self._group_names(i);
|
|
92
92
|
cond(
|
|
93
93
|
(entry.name == name) => {
|
|
94
94
|
return(.Some(entry.index));
|
|
@@ -136,7 +136,7 @@ impl(
|
|
|
136
136
|
result := usize(0);
|
|
137
137
|
i := start;
|
|
138
138
|
while(i < self._pos, i = (i + usize(1)), {
|
|
139
|
-
d := self._bytes
|
|
139
|
+
d := self._bytes(i);
|
|
140
140
|
result = ((result * usize(10)) + usize(d - u8(48)));
|
|
141
141
|
});
|
|
142
142
|
.Some(result)
|
|
@@ -176,13 +176,13 @@ impl(
|
|
|
176
176
|
r.push(CharRange(low : u32(32), high : u32(32)));
|
|
177
177
|
r
|
|
178
178
|
}),
|
|
179
|
-
// Parse \xHH hex
|
|
179
|
+
// Parse \xHH hex unwind — reads exactly 2 hex digits and returns the codepoint.
|
|
180
180
|
_parse_hex_byte : (fn(self : Self) -> Option(u32))({
|
|
181
181
|
if((self._pos + usize(2)) > self._bytes.len(), {
|
|
182
182
|
return(.None);
|
|
183
183
|
});
|
|
184
|
-
(h1 : u8) = self._bytes
|
|
185
|
-
(h2 : u8) = self._bytes
|
|
184
|
+
(h1 : u8) = self._bytes(self._pos);
|
|
185
|
+
(h2 : u8) = self._bytes(self._pos + usize(1));
|
|
186
186
|
(v1 : i32) = cond(
|
|
187
187
|
((h1 >= u8(48)) && (h1 <= u8(57))) => (i32(h1) - i32(48)),
|
|
188
188
|
((h1 >= u8(65)) && (h1 <= u8(70))) => ((i32(h1) - i32(65)) + i32(10)),
|
|
@@ -202,7 +202,7 @@ impl(
|
|
|
202
202
|
self._pos = (self._pos + usize(2));
|
|
203
203
|
.Some(u32((v1 << i32(4)) | v2))
|
|
204
204
|
}),
|
|
205
|
-
|
|
205
|
+
_parse_class_unwind : (fn(self : Self) -> Result(ArrayList(CharRange), String))({
|
|
206
206
|
b := self._advance();
|
|
207
207
|
match(
|
|
208
208
|
b,
|
|
@@ -264,7 +264,7 @@ impl(
|
|
|
264
264
|
has_end := ((self._pos + usize(1)) < self._bytes.len());
|
|
265
265
|
cond(
|
|
266
266
|
has_end => {
|
|
267
|
-
end_first := self._bytes
|
|
267
|
+
end_first := self._bytes(self._pos + usize(1));
|
|
268
268
|
cond(
|
|
269
269
|
(end_first == u8(93)) => {
|
|
270
270
|
ranges.push(CharRange(low : low, high : low));
|
|
@@ -273,20 +273,20 @@ impl(
|
|
|
273
273
|
// High end is an escape sequence (e.g. \x20, \0, \n)
|
|
274
274
|
self._pos = (self._pos + usize(1));
|
|
275
275
|
self._pos = (self._pos + usize(1));
|
|
276
|
-
esc := self.
|
|
276
|
+
esc := self._parse_class_unwind();
|
|
277
277
|
match(
|
|
278
278
|
esc,
|
|
279
279
|
.Ok(esc_ranges) => {
|
|
280
|
-
if((esc_ranges.len() == usize(1)) && (esc_ranges
|
|
281
|
-
(high : u32) = esc_ranges
|
|
280
|
+
if((esc_ranges.len() == usize(1)) && (esc_ranges(usize(0)).low == esc_ranges(usize(0)).high), {
|
|
281
|
+
(high : u32) = esc_ranges(usize(0)).low;
|
|
282
282
|
ranges.push(CharRange(low : low, high : high));
|
|
283
283
|
}, {
|
|
284
|
-
// Multi-range
|
|
284
|
+
// Multi-range unwind like \d can't be range endpoint; treat dash as literal
|
|
285
285
|
ranges.push(CharRange(low : low, high : low));
|
|
286
286
|
ranges.push(CharRange(low : u32(45), high : u32(45)));
|
|
287
287
|
j := usize(0);
|
|
288
288
|
while(j < esc_ranges.len(), j = (j + usize(1)), {
|
|
289
|
-
ranges.push(esc_ranges
|
|
289
|
+
ranges.push(esc_ranges(j));
|
|
290
290
|
});
|
|
291
291
|
});
|
|
292
292
|
},
|
|
@@ -331,18 +331,18 @@ impl(
|
|
|
331
331
|
},
|
|
332
332
|
(b == u8(92)) => {
|
|
333
333
|
self._pos = (self._pos + usize(1));
|
|
334
|
-
esc := self.
|
|
334
|
+
esc := self._parse_class_unwind();
|
|
335
335
|
match(
|
|
336
336
|
esc,
|
|
337
337
|
.Ok(esc_ranges) => {
|
|
338
|
-
// If
|
|
339
|
-
if((esc_ranges.len() == usize(1)) && (esc_ranges
|
|
340
|
-
(low : u32) = esc_ranges
|
|
338
|
+
// If unwind produced a single codepoint, check for range (e.g. \0-\x20)
|
|
339
|
+
if((esc_ranges.len() == usize(1)) && (esc_ranges(usize(0)).low == esc_ranges(usize(0)).high), {
|
|
340
|
+
(low : u32) = esc_ranges(usize(0)).low;
|
|
341
341
|
self._try_parse_char_range(ranges, low);
|
|
342
342
|
}, {
|
|
343
343
|
j := usize(0);
|
|
344
344
|
while(j < esc_ranges.len(), j = (j + usize(1)), {
|
|
345
|
-
ranges.push(esc_ranges
|
|
345
|
+
ranges.push(esc_ranges(j));
|
|
346
346
|
});
|
|
347
347
|
});
|
|
348
348
|
},
|
|
@@ -402,7 +402,7 @@ impl(
|
|
|
402
402
|
});
|
|
403
403
|
.Err(`Unterminated character class`)
|
|
404
404
|
}),
|
|
405
|
-
// Parse \p{PropertyName} or \P{PropertyName} unicode property
|
|
405
|
+
// Parse \p{PropertyName} or \P{PropertyName} unicode property unwind.
|
|
406
406
|
// negated=true for \P (inverted match).
|
|
407
407
|
_parse_unicode_property : (fn(self : Self, negated : bool) -> Result(RegexNode, String))({
|
|
408
408
|
// Expect opening '{'
|
|
@@ -451,7 +451,7 @@ impl(
|
|
|
451
451
|
});
|
|
452
452
|
.Err(`Unterminated Unicode property \\p{...}`)
|
|
453
453
|
}),
|
|
454
|
-
|
|
454
|
+
_parse_unwind : (fn(self : Self) -> Result(RegexNode, String))({
|
|
455
455
|
b := self._advance();
|
|
456
456
|
match(
|
|
457
457
|
b,
|
|
@@ -524,7 +524,7 @@ impl(
|
|
|
524
524
|
(ch == u8(112)) => self._parse_unicode_property(false),
|
|
525
525
|
// Negated unicode property \P{Name}
|
|
526
526
|
(ch == u8(80)) => self._parse_unicode_property(true),
|
|
527
|
-
// Hex
|
|
527
|
+
// Hex unwind \xHH
|
|
528
528
|
(ch == u8(120)) => match(
|
|
529
529
|
self._parse_hex_byte(),
|
|
530
530
|
.Some(v) =>.Ok(RegexNode.literal(v)),
|
|
@@ -627,7 +627,7 @@ impl(
|
|
|
627
627
|
(ch == u8(94)) =>.Ok(RegexNode.anchor_node(.Start)),
|
|
628
628
|
(ch == u8(36)) =>.Ok(RegexNode.anchor_node(.End)),
|
|
629
629
|
(ch == u8(91)) => self._parse_char_class(),
|
|
630
|
-
(ch == u8(92)) => self.
|
|
630
|
+
(ch == u8(92)) => self._parse_unwind(),
|
|
631
631
|
true =>.Ok(RegexNode.literal(self._read_codepoint(ch)))
|
|
632
632
|
),
|
|
633
633
|
.None =>.Err(`Unexpected end of pattern`)
|
|
@@ -677,7 +677,7 @@ impl(
|
|
|
677
677
|
_make_sequence : (fn(self : Self, nodes : ArrayList(RegexNode)) -> RegexNode)(
|
|
678
678
|
cond(
|
|
679
679
|
(nodes.len() == usize(0)) => RegexNode.sequence(ArrayList(RegexNode).new()),
|
|
680
|
-
(nodes.len() == usize(1)) => nodes
|
|
680
|
+
(nodes.len() == usize(1)) => nodes(usize(0)),
|
|
681
681
|
true => RegexNode.sequence(nodes)
|
|
682
682
|
)
|
|
683
683
|
),
|
|
@@ -689,10 +689,10 @@ impl(
|
|
|
689
689
|
true => ()
|
|
690
690
|
);
|
|
691
691
|
alts.push(seq);
|
|
692
|
-
result := self._make_sequence(alts
|
|
692
|
+
result := self._make_sequence(alts(usize(0)));
|
|
693
693
|
i := usize(1);
|
|
694
694
|
while(i < alts.len(), i = (i + usize(1)), {
|
|
695
|
-
right := self._make_sequence(alts
|
|
695
|
+
right := self._make_sequence(alts(i));
|
|
696
696
|
result = RegexNode.alternation(result, right);
|
|
697
697
|
});
|
|
698
698
|
result
|
|
@@ -873,7 +873,7 @@ impl(
|
|
|
873
873
|
cur_non_cap => RegexNode.non_capturing_group(inner),
|
|
874
874
|
true => RegexNode.group(inner, cur_group_idx)
|
|
875
875
|
);
|
|
876
|
-
parent := stack
|
|
876
|
+
parent := stack(stack.len() - usize(1));
|
|
877
877
|
stack.pop();
|
|
878
878
|
cur_alts = parent.alternatives;
|
|
879
879
|
cur_seq = parent.current;
|
package/std/regex/vm.yo
CHANGED
|
@@ -3,6 +3,7 @@
|
|
|
3
3
|
//! Executes a compiled NFA program against an input string.
|
|
4
4
|
//! Uses Thompson's NFA simulation with parallel state tracking
|
|
5
5
|
//! for O(n×m) worst-case time complexity.
|
|
6
|
+
pragma(Pragma.AllowUnsafe);
|
|
6
7
|
open(import("std/collections/array_list"));
|
|
7
8
|
open(import("std/string"));
|
|
8
9
|
{ NfaProgram, Instr, InstrKind, ClassEntry, GroupNameEntry } :: import("./compiler.yo");
|
|
@@ -80,25 +81,25 @@ impl(
|
|
|
80
81
|
)
|
|
81
82
|
}),
|
|
82
83
|
_decode_codepoint : (fn(self : Self, pos : usize) -> DecodedChar)({
|
|
83
|
-
first := self._bytes
|
|
84
|
+
first := self._bytes(pos);
|
|
84
85
|
cond(
|
|
85
86
|
(first < u8(0x80)) =>
|
|
86
87
|
DecodedChar(codepoint : u32(first), byte_len : usize(1)),
|
|
87
88
|
((first >= u8(0xC0)) && (first < u8(0xE0))) => {
|
|
88
|
-
second := self._bytes
|
|
89
|
+
second := self._bytes(pos + usize(1));
|
|
89
90
|
cp := (((u32(first) & u32(0x1F)) << u32(6)) | (u32(second) & u32(0x3F)));
|
|
90
91
|
DecodedChar(codepoint : cp, byte_len : usize(2))
|
|
91
92
|
},
|
|
92
93
|
((first >= u8(0xE0)) && (first < u8(0xF0))) => {
|
|
93
|
-
second := self._bytes
|
|
94
|
-
third := self._bytes
|
|
94
|
+
second := self._bytes(pos + usize(1));
|
|
95
|
+
third := self._bytes(pos + usize(2));
|
|
95
96
|
cp := ((((u32(first) & u32(0x0F)) << u32(12)) | ((u32(second) & u32(0x3F)) << u32(6))) | (u32(third) & u32(0x3F)));
|
|
96
97
|
DecodedChar(codepoint : cp, byte_len : usize(3))
|
|
97
98
|
},
|
|
98
99
|
true => {
|
|
99
|
-
second := self._bytes
|
|
100
|
-
third := self._bytes
|
|
101
|
-
fourth := self._bytes
|
|
100
|
+
second := self._bytes(pos + usize(1));
|
|
101
|
+
third := self._bytes(pos + usize(2));
|
|
102
|
+
fourth := self._bytes(pos + usize(3));
|
|
102
103
|
cp := (((((u32(first) & u32(0x07)) << u32(18)) | ((u32(second) & u32(0x3F)) << u32(12))) | ((u32(third) & u32(0x3F)) << u32(6))) | (u32(fourth) & u32(0x3F)));
|
|
103
104
|
DecodedChar(codepoint : cp, byte_len : usize(4))
|
|
104
105
|
}
|
|
@@ -116,7 +117,7 @@ impl(
|
|
|
116
117
|
_find_prev_char_start : (fn(self : Self, pos : usize) -> usize)({
|
|
117
118
|
p := (pos - usize(1));
|
|
118
119
|
while(p > usize(0), p = (p - usize(1)), {
|
|
119
|
-
b := self._bytes
|
|
120
|
+
b := self._bytes(p);
|
|
120
121
|
cond(
|
|
121
122
|
((b < u8(0x80)) || (b >= u8(0xC0))) => {
|
|
122
123
|
return(p);
|
|
@@ -130,7 +131,7 @@ impl(
|
|
|
130
131
|
cond(
|
|
131
132
|
(pos == usize(0)) => false,
|
|
132
133
|
true => {
|
|
133
|
-
b := self._bytes
|
|
134
|
+
b := self._bytes(pos - usize(1));
|
|
134
135
|
b == u8(10)
|
|
135
136
|
}
|
|
136
137
|
)
|
|
@@ -139,7 +140,7 @@ impl(
|
|
|
139
140
|
cond(
|
|
140
141
|
(pos >= self._bytes.len()) => false,
|
|
141
142
|
true => {
|
|
142
|
-
b := self._bytes
|
|
143
|
+
b := self._bytes(pos);
|
|
143
144
|
b == u8(10)
|
|
144
145
|
}
|
|
145
146
|
)
|
|
@@ -163,7 +164,7 @@ impl(
|
|
|
163
164
|
(found : bool) = false;
|
|
164
165
|
i := usize(0);
|
|
165
166
|
while((i < cls.ranges.len()) && (!(found)), i = (i + usize(1)), {
|
|
166
|
-
r := cls.ranges
|
|
167
|
+
r := cls.ranges(i);
|
|
167
168
|
(low : u32) = cond(
|
|
168
169
|
self._flags.ignore_case => self._to_lower(r.low),
|
|
169
170
|
true => r.low
|
|
@@ -183,7 +184,7 @@ impl(
|
|
|
183
184
|
((!(found)) && self._flags.ignore_case) => {
|
|
184
185
|
i2 := usize(0);
|
|
185
186
|
while((i2 < cls.ranges.len()) && (!(found)), i2 = (i2 + usize(1)), {
|
|
186
|
-
r := cls.ranges
|
|
187
|
+
r := cls.ranges(i2);
|
|
187
188
|
cond(
|
|
188
189
|
((cp >= r.low) && (cp <= r.high)) => {
|
|
189
190
|
found = true;
|
|
@@ -221,7 +222,7 @@ impl(
|
|
|
221
222
|
// Block 3: _add_thread (recursive, depends on Block 1+2)
|
|
222
223
|
impl(
|
|
223
224
|
NfaVm,
|
|
224
|
-
_add_thread : (fn(self : Self, list :
|
|
225
|
+
_add_thread : (fn(self : Self, list : ArrayList(NfaThread), thread : NfaThread, byte_pos : usize, seen : ArrayList(bool)) -> unit)({
|
|
225
226
|
cond(
|
|
226
227
|
(thread.pc >= self._program.instructions.len()) => {
|
|
227
228
|
return(());
|
|
@@ -229,14 +230,14 @@ impl(
|
|
|
229
230
|
true => ()
|
|
230
231
|
);
|
|
231
232
|
// Already checked bounds above: thread.pc < instructions.len()
|
|
232
|
-
is_seen :=
|
|
233
|
+
is_seen := seen(thread.pc);
|
|
233
234
|
cond(
|
|
234
235
|
is_seen => {
|
|
235
236
|
return(());
|
|
236
237
|
},
|
|
237
238
|
true => ()
|
|
238
239
|
);
|
|
239
|
-
|
|
240
|
+
seen(thread.pc) = true;
|
|
240
241
|
instr := self._program.instructions(thread.pc);
|
|
241
242
|
match(
|
|
242
243
|
instr.kind,
|
|
@@ -307,7 +308,7 @@ impl(
|
|
|
307
308
|
);
|
|
308
309
|
},
|
|
309
310
|
_ => {
|
|
310
|
-
list
|
|
311
|
+
list.push(thread);
|
|
311
312
|
}
|
|
312
313
|
);
|
|
313
314
|
})
|
|
@@ -324,7 +325,7 @@ impl(
|
|
|
324
325
|
sub_seen := ArrayList(bool).with_capacity(self._program.instructions.len());
|
|
325
326
|
sub_seen.resize_with_byte(self._program.instructions.len(), int(0));
|
|
326
327
|
initial := NfaThread.new(sub_start_pc, self._n_slots);
|
|
327
|
-
self._add_thread(
|
|
328
|
+
self._add_thread(sub_current, initial, start_byte, sub_seen);
|
|
328
329
|
sub_pos := start_byte;
|
|
329
330
|
input_len := self._bytes.len();
|
|
330
331
|
while(sub_pos <= input_len, {
|
|
@@ -337,8 +338,8 @@ impl(
|
|
|
337
338
|
// Check for Match in current threads
|
|
338
339
|
st := usize(0);
|
|
339
340
|
while(st < sub_current.len(), st = (st + usize(1)), {
|
|
340
|
-
st_thread := sub_current
|
|
341
|
-
st_instr := self._program.instructions
|
|
341
|
+
st_thread := sub_current(st);
|
|
342
|
+
st_instr := self._program.instructions(st_thread.pc);
|
|
342
343
|
match(
|
|
343
344
|
st_instr.kind,
|
|
344
345
|
.Match => {
|
|
@@ -376,15 +377,15 @@ impl(
|
|
|
376
377
|
// Process consuming instructions
|
|
377
378
|
st2 := usize(0);
|
|
378
379
|
while(st2 < sub_current.len(), st2 = (st2 + usize(1)), {
|
|
379
|
-
st_thread := sub_current
|
|
380
|
-
st_instr := self._program.instructions
|
|
380
|
+
st_thread := sub_current(st2);
|
|
381
|
+
st_instr := self._program.instructions(st_thread.pc);
|
|
381
382
|
match(
|
|
382
383
|
st_instr.kind,
|
|
383
384
|
.Char => {
|
|
384
385
|
cond(
|
|
385
386
|
self._char_matches(st_instr.codepoint, sub_cp) => {
|
|
386
387
|
new_t := st_thread.fork(st_thread.pc + usize(1));
|
|
387
|
-
self._add_thread(
|
|
388
|
+
self._add_thread(sub_next, new_t, sub_pos + sub_blen, sub_seen);
|
|
388
389
|
},
|
|
389
390
|
true => ()
|
|
390
391
|
);
|
|
@@ -397,7 +398,7 @@ impl(
|
|
|
397
398
|
cond(
|
|
398
399
|
should_match => {
|
|
399
400
|
new_t := st_thread.fork(st_thread.pc + usize(1));
|
|
400
|
-
self._add_thread(
|
|
401
|
+
self._add_thread(sub_next, new_t, sub_pos + sub_blen, sub_seen);
|
|
401
402
|
},
|
|
402
403
|
true => ()
|
|
403
404
|
);
|
|
@@ -410,7 +411,7 @@ impl(
|
|
|
410
411
|
cond(
|
|
411
412
|
self._codepoint_in_class(sub_cp, cls) => {
|
|
412
413
|
new_t := st_thread.fork(st_thread.pc + usize(1));
|
|
413
|
-
self._add_thread(
|
|
414
|
+
self._add_thread(sub_next, new_t, sub_pos + sub_blen, sub_seen);
|
|
414
415
|
},
|
|
415
416
|
true => ()
|
|
416
417
|
);
|
|
@@ -445,7 +446,7 @@ impl(
|
|
|
445
446
|
seen.fill_with_byte(int(0));
|
|
446
447
|
next_seen.fill_with_byte(int(0));
|
|
447
448
|
initial := NfaThread.new(usize(0), self._n_slots);
|
|
448
|
-
self._add_thread(
|
|
449
|
+
self._add_thread(current, initial, start_byte, seen);
|
|
449
450
|
best_match := VmMatch(matched : false, slots : ArrayList(usize).new());
|
|
450
451
|
byte_pos := start_byte;
|
|
451
452
|
input_len := self._bytes.len();
|
|
@@ -469,10 +470,10 @@ impl(
|
|
|
469
470
|
new_deferred := ArrayList(DeferredThread).new();
|
|
470
471
|
di := usize(0);
|
|
471
472
|
while(di < deferred.len(), di = (di + usize(1)), {
|
|
472
|
-
d := deferred
|
|
473
|
+
d := deferred(di);
|
|
473
474
|
cond(
|
|
474
475
|
(d.target_byte_pos == byte_pos) => {
|
|
475
|
-
self._add_thread(
|
|
476
|
+
self._add_thread(current, d.thread, byte_pos, seen);
|
|
476
477
|
},
|
|
477
478
|
true => {
|
|
478
479
|
new_deferred.push(d);
|
|
@@ -491,15 +492,15 @@ impl(
|
|
|
491
492
|
(found_match_in_gen : bool) = false;
|
|
492
493
|
t := usize(0);
|
|
493
494
|
while((t < current.len()) && (!(found_match_in_gen)), t = (t + usize(1)), {
|
|
494
|
-
thread := current
|
|
495
|
-
instr := self._program.instructions
|
|
495
|
+
thread := current(t);
|
|
496
|
+
instr := self._program.instructions(thread.pc);
|
|
496
497
|
match(
|
|
497
498
|
instr.kind,
|
|
498
499
|
.Char => {
|
|
499
500
|
cond(
|
|
500
501
|
((!(at_end)) && (self._char_matches(instr.codepoint, cur_cp))) => {
|
|
501
502
|
new_thread := thread.fork(thread.pc + usize(1));
|
|
502
|
-
self._add_thread(
|
|
503
|
+
self._add_thread(next, new_thread, byte_pos + char_byte_len, next_seen);
|
|
503
504
|
},
|
|
504
505
|
true => ()
|
|
505
506
|
);
|
|
@@ -514,7 +515,7 @@ impl(
|
|
|
514
515
|
cond(
|
|
515
516
|
should_match => {
|
|
516
517
|
new_thread := thread.fork(thread.pc + usize(1));
|
|
517
|
-
self._add_thread(
|
|
518
|
+
self._add_thread(next, new_thread, byte_pos + char_byte_len, next_seen);
|
|
518
519
|
},
|
|
519
520
|
true => ()
|
|
520
521
|
);
|
|
@@ -532,7 +533,7 @@ impl(
|
|
|
532
533
|
cond(
|
|
533
534
|
self._codepoint_in_class(cur_cp, cls) => {
|
|
534
535
|
new_thread := thread.fork(thread.pc + usize(1));
|
|
535
|
-
self._add_thread(
|
|
536
|
+
self._add_thread(next, new_thread, byte_pos + char_byte_len, next_seen);
|
|
536
537
|
},
|
|
537
538
|
true => ()
|
|
538
539
|
);
|
|
@@ -548,8 +549,8 @@ impl(
|
|
|
548
549
|
group_end_slot := ((instr.slot * usize(2)) + usize(1));
|
|
549
550
|
cond(
|
|
550
551
|
((group_start_slot < thread.slots.len()) && (group_end_slot < thread.slots.len())) => {
|
|
551
|
-
gs := thread.slots
|
|
552
|
-
ge := thread.slots
|
|
552
|
+
gs := thread.slots(group_start_slot);
|
|
553
|
+
ge := thread.slots(group_end_slot);
|
|
553
554
|
cond(
|
|
554
555
|
((gs != unset) && (ge != unset)) => {
|
|
555
556
|
captured_len := (ge - gs);
|
|
@@ -559,8 +560,8 @@ impl(
|
|
|
559
560
|
(bytes_match : bool) = true;
|
|
560
561
|
bi := usize(0);
|
|
561
562
|
while(bi < captured_len, bi = (bi + usize(1)), {
|
|
562
|
-
expected := self._bytes
|
|
563
|
-
actual := self._bytes
|
|
563
|
+
expected := self._bytes(gs + bi);
|
|
564
|
+
actual := self._bytes(byte_pos + bi);
|
|
564
565
|
cond(
|
|
565
566
|
self._flags.ignore_case => {
|
|
566
567
|
el := cond(
|
|
@@ -595,7 +596,7 @@ impl(
|
|
|
595
596
|
cond(
|
|
596
597
|
(captured_len == usize(0)) => {
|
|
597
598
|
// Empty capture — epsilon-like, process in current gen
|
|
598
|
-
self._add_thread(
|
|
599
|
+
self._add_thread(current, new_thread, byte_pos, seen);
|
|
599
600
|
},
|
|
600
601
|
true => {
|
|
601
602
|
// Defer to the target byte position
|
|
@@ -624,7 +625,7 @@ impl(
|
|
|
624
625
|
(sub_matched == positive) => {
|
|
625
626
|
sub_end := instr.target_b;
|
|
626
627
|
new_thread := thread.fork(sub_end);
|
|
627
|
-
self._add_thread(
|
|
628
|
+
self._add_thread(current, new_thread, byte_pos, seen);
|
|
628
629
|
},
|
|
629
630
|
true => ()
|
|
630
631
|
);
|
|
@@ -664,10 +665,10 @@ impl(
|
|
|
664
665
|
// Skip UTF-8 continuation bytes (10xxxxxx)
|
|
665
666
|
cond(
|
|
666
667
|
(try_pos > usize(0)) => {
|
|
667
|
-
tb := self._bytes
|
|
668
|
+
tb := self._bytes(try_pos);
|
|
668
669
|
while(((tb >= u8(0x80)) && (tb < u8(0xC0))) && (try_pos > usize(0)), {
|
|
669
670
|
try_pos = (try_pos - usize(1));
|
|
670
|
-
tb = self._bytes
|
|
671
|
+
tb = self._bytes(try_pos);
|
|
671
672
|
});
|
|
672
673
|
},
|
|
673
674
|
true => ()
|
|
@@ -687,7 +688,7 @@ impl(
|
|
|
687
688
|
(lb_matched == positive) => {
|
|
688
689
|
sub_end := instr.target_b;
|
|
689
690
|
new_thread := thread.fork(sub_end);
|
|
690
|
-
self._add_thread(
|
|
691
|
+
self._add_thread(current, new_thread, byte_pos, seen);
|
|
691
692
|
},
|
|
692
693
|
true => ()
|
|
693
694
|
);
|