@shd101wyy/yo 0.1.26 → 0.1.27
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.github/skills/yo-async-effects/SKILL.md +4 -4
- package/.github/skills/yo-async-effects/async-effects-recipes.md +34 -34
- package/.github/skills/yo-core-patterns/SKILL.md +1 -1
- package/.github/skills/yo-core-patterns/core-patterns-cheatsheet.md +26 -26
- package/.github/skills/yo-project-workflow/SKILL.md +6 -3
- package/.github/skills/yo-project-workflow/workflow-cheatsheet.md +34 -11
- package/.github/skills/yo-syntax/SKILL.md +7 -6
- package/.github/skills/yo-syntax/syntax-cheatsheet.md +73 -60
- package/.github/skills/yo-wasm-integration/wasm-integration-cheatsheet.md +3 -3
- package/README.md +10 -8
- package/out/cjs/index.cjs +456 -438
- package/out/cjs/yo-cli.cjs +576 -543
- package/out/cjs/yo-lsp.cjs +559 -532
- package/out/esm/index.mjs +281 -263
- package/out/types/src/formatter.d.ts +11 -0
- package/out/types/src/lsp/formatting.d.ts +2 -0
- package/out/types/src/tests/formatter.test.d.ts +1 -0
- package/out/types/tsconfig.tsbuildinfo +1 -1
- package/package.json +1 -1
- package/std/alg/hash.yo +13 -21
- package/std/allocator.yo +25 -40
- package/std/async.yo +3 -7
- package/std/build.yo +105 -151
- package/std/cli/arg_parser.yo +184 -169
- package/std/collections/array_list.yo +350 -314
- package/std/collections/btree_map.yo +142 -131
- package/std/collections/deque.yo +132 -128
- package/std/collections/hash_map.yo +542 -566
- package/std/collections/hash_set.yo +623 -687
- package/std/collections/linked_list.yo +275 -293
- package/std/collections/ordered_map.yo +113 -85
- package/std/collections/priority_queue.yo +73 -73
- package/std/crypto/md5.yo +191 -95
- package/std/crypto/random.yo +56 -64
- package/std/crypto/sha256.yo +151 -107
- package/std/encoding/base64.yo +87 -81
- package/std/encoding/hex.yo +43 -50
- package/std/encoding/html.yo +56 -81
- package/std/encoding/html_char_utils.yo +7 -13
- package/std/encoding/html_entities.yo +2248 -2253
- package/std/encoding/json.yo +316 -224
- package/std/encoding/punycode.yo +86 -116
- package/std/encoding/toml.yo +67 -66
- package/std/encoding/utf16.yo +37 -44
- package/std/env.yo +62 -91
- package/std/error.yo +7 -15
- package/std/fmt/display.yo +5 -9
- package/std/fmt/index.yo +8 -14
- package/std/fmt/to_string.yo +330 -315
- package/std/fmt/writer.yo +58 -87
- package/std/fs/dir.yo +83 -102
- package/std/fs/file.yo +147 -180
- package/std/fs/metadata.yo +45 -78
- package/std/fs/temp.yo +55 -65
- package/std/fs/types.yo +27 -40
- package/std/fs/walker.yo +53 -68
- package/std/gc.yo +5 -8
- package/std/glob.yo +30 -43
- package/std/http/client.yo +107 -120
- package/std/http/http.yo +106 -96
- package/std/http/index.yo +4 -6
- package/std/imm/list.yo +88 -93
- package/std/imm/map.yo +528 -464
- package/std/imm/set.yo +52 -57
- package/std/imm/sorted_map.yo +340 -286
- package/std/imm/sorted_set.yo +57 -63
- package/std/imm/string.yo +404 -345
- package/std/imm/vec.yo +173 -181
- package/std/io/reader.yo +3 -6
- package/std/io/writer.yo +4 -8
- package/std/libc/assert.yo +5 -9
- package/std/libc/ctype.yo +32 -22
- package/std/libc/dirent.yo +26 -25
- package/std/libc/errno.yo +164 -90
- package/std/libc/fcntl.yo +52 -45
- package/std/libc/float.yo +66 -44
- package/std/libc/limits.yo +42 -33
- package/std/libc/math.yo +53 -82
- package/std/libc/signal.yo +72 -47
- package/std/libc/stdatomic.yo +217 -188
- package/std/libc/stdint.yo +5 -29
- package/std/libc/stdio.yo +5 -29
- package/std/libc/stdlib.yo +32 -39
- package/std/libc/string.yo +5 -23
- package/std/libc/sys/stat.yo +58 -56
- package/std/libc/time.yo +5 -19
- package/std/libc/unistd.yo +5 -20
- package/std/libc/wctype.yo +6 -9
- package/std/libc/windows.yo +26 -30
- package/std/log.yo +41 -55
- package/std/net/addr.yo +102 -97
- package/std/net/dns.yo +27 -28
- package/std/net/errors.yo +50 -49
- package/std/net/tcp.yo +113 -124
- package/std/net/udp.yo +55 -66
- package/std/os/env.yo +35 -33
- package/std/os/signal.yo +15 -25
- package/std/path.yo +276 -311
- package/std/prelude.yo +6304 -4315
- package/std/process/command.yo +87 -103
- package/std/process/index.yo +12 -31
- package/std/regex/compiler.yo +196 -95
- package/std/regex/flags.yo +58 -39
- package/std/regex/index.yo +157 -173
- package/std/regex/match.yo +20 -31
- package/std/regex/node.yo +134 -152
- package/std/regex/parser.yo +283 -259
- package/std/regex/unicode.yo +172 -202
- package/std/regex/vm.yo +155 -171
- package/std/string/index.yo +5 -7
- package/std/string/rune.yo +45 -55
- package/std/string/string.yo +937 -964
- package/std/string/string_builder.yo +94 -104
- package/std/string/unicode.yo +46 -64
- package/std/sync/channel.yo +72 -73
- package/std/sync/cond.yo +31 -36
- package/std/sync/mutex.yo +30 -32
- package/std/sync/once.yo +13 -16
- package/std/sync/rwlock.yo +26 -31
- package/std/sync/waitgroup.yo +20 -25
- package/std/sys/advise.yo +16 -24
- package/std/sys/bufio/buf_reader.yo +77 -93
- package/std/sys/bufio/buf_writer.yo +52 -65
- package/std/sys/clock.yo +4 -9
- package/std/sys/constants.yo +77 -61
- package/std/sys/copy.yo +4 -10
- package/std/sys/dir.yo +26 -43
- package/std/sys/dns.yo +41 -61
- package/std/sys/errors.yo +95 -103
- package/std/sys/events.yo +45 -57
- package/std/sys/externs.yo +319 -267
- package/std/sys/fallocate.yo +7 -11
- package/std/sys/fcntl.yo +14 -22
- package/std/sys/file.yo +26 -40
- package/std/sys/future.yo +5 -8
- package/std/sys/iov.yo +12 -25
- package/std/sys/lock.yo +12 -13
- package/std/sys/mmap.yo +38 -43
- package/std/sys/path.yo +3 -8
- package/std/sys/perm.yo +7 -21
- package/std/sys/pipe.yo +5 -12
- package/std/sys/process.yo +23 -29
- package/std/sys/seek.yo +10 -12
- package/std/sys/signal.yo +7 -13
- package/std/sys/signals.yo +52 -35
- package/std/sys/socket.yo +63 -58
- package/std/sys/socketpair.yo +3 -6
- package/std/sys/sockinfo.yo +11 -20
- package/std/sys/statfs.yo +11 -34
- package/std/sys/statx.yo +25 -52
- package/std/sys/sysinfo.yo +15 -20
- package/std/sys/tcp.yo +62 -92
- package/std/sys/temp.yo +5 -9
- package/std/sys/time.yo +5 -15
- package/std/sys/timer.yo +6 -11
- package/std/sys/tty.yo +10 -18
- package/std/sys/udp.yo +22 -39
- package/std/sys/umask.yo +3 -6
- package/std/sys/unix.yo +33 -52
- package/std/testing/bench.yo +49 -52
- package/std/thread.yo +10 -15
- package/std/time/datetime.yo +105 -89
- package/std/time/duration.yo +43 -56
- package/std/time/instant.yo +13 -18
- package/std/time/sleep.yo +5 -9
- package/std/url/index.yo +184 -209
- package/std/worker.yo +6 -10
package/std/regex/vm.yo
CHANGED
|
@@ -3,72 +3,65 @@
|
|
|
3
3
|
//! Executes a compiled NFA program against an input string.
|
|
4
4
|
//! Uses Thompson's NFA simulation with parallel state tracking
|
|
5
5
|
//! for O(n×m) worst-case time complexity.
|
|
6
|
-
|
|
7
|
-
open
|
|
8
|
-
|
|
9
|
-
{
|
|
10
|
-
{
|
|
11
|
-
{ RegexFlags } :: import "./flags.yo";
|
|
12
|
-
|
|
6
|
+
open(import("std/collections/array_list"));
|
|
7
|
+
open(import("std/string"));
|
|
8
|
+
{ NfaProgram, Instr, InstrKind, ClassEntry, GroupNameEntry } :: import("./compiler.yo");
|
|
9
|
+
{ CharRange } :: import("./node.yo");
|
|
10
|
+
{ RegexFlags } :: import("./flags.yo");
|
|
13
11
|
// Max number of capture slots (group 0 + up to 99 groups = 200 slots)
|
|
14
12
|
MAX_SLOTS :: 200;
|
|
15
|
-
|
|
16
13
|
// A single NFA thread: an instruction pointer + capture slots
|
|
17
14
|
NfaThread :: object(
|
|
18
|
-
pc
|
|
15
|
+
pc : usize,
|
|
19
16
|
slots : ArrayList(usize)
|
|
20
17
|
);
|
|
21
|
-
|
|
22
|
-
|
|
18
|
+
impl(
|
|
19
|
+
NfaThread,
|
|
23
20
|
new : (fn(pc : usize, n_slots : usize) -> Self)({
|
|
24
21
|
s := ArrayList(usize).with_capacity(n_slots);
|
|
25
22
|
// usize.MAX = 0xFF..FF, so memset with 0xFF fills each byte
|
|
26
23
|
s.resize_with_byte(n_slots, int(255));
|
|
27
|
-
Self(pc: pc, slots: s)
|
|
24
|
+
Self(pc : pc, slots : s)
|
|
28
25
|
}),
|
|
29
|
-
|
|
30
26
|
// Clone a thread with a new PC
|
|
31
27
|
fork : (fn(self : Self, new_pc : usize) -> Self)({
|
|
32
28
|
new_slots := ArrayList(usize).with_capacity(self.slots.len());
|
|
33
|
-
match(
|
|
29
|
+
match(
|
|
30
|
+
self.slots.ptr(),
|
|
34
31
|
.Some(src) => new_slots.extend_from_ptr(src, self.slots.len()),
|
|
35
32
|
.None => ()
|
|
36
33
|
);
|
|
37
|
-
Self(pc: new_pc, slots: new_slots)
|
|
34
|
+
Self(pc : new_pc, slots : new_slots)
|
|
38
35
|
})
|
|
39
36
|
);
|
|
40
|
-
|
|
41
37
|
// VM execution result
|
|
42
38
|
VmMatch :: struct(
|
|
43
39
|
matched : bool,
|
|
44
|
-
slots
|
|
40
|
+
slots : ArrayList(usize)
|
|
45
41
|
);
|
|
46
|
-
|
|
47
42
|
// Helper struct for decoded characters
|
|
48
43
|
DecodedChar :: struct(
|
|
49
44
|
codepoint : u32,
|
|
50
|
-
byte_len
|
|
45
|
+
byte_len : usize
|
|
51
46
|
);
|
|
52
|
-
|
|
53
47
|
// A thread deferred to a future byte position (for multi-byte backrefs)
|
|
54
48
|
DeferredThread :: struct(
|
|
55
49
|
target_byte_pos : usize,
|
|
56
50
|
thread : NfaThread
|
|
57
51
|
);
|
|
58
|
-
|
|
59
52
|
// The NFA virtual machine
|
|
60
53
|
NfaVm :: object(
|
|
61
|
-
_program
|
|
62
|
-
_flags
|
|
63
|
-
_input
|
|
64
|
-
_bytes
|
|
65
|
-
_n_slots
|
|
66
|
-
_seen
|
|
54
|
+
_program : NfaProgram,
|
|
55
|
+
_flags : RegexFlags,
|
|
56
|
+
_input : String,
|
|
57
|
+
_bytes : ArrayList(u8),
|
|
58
|
+
_n_slots : usize,
|
|
59
|
+
_seen : ArrayList(bool),
|
|
67
60
|
_next_seen : ArrayList(bool)
|
|
68
61
|
);
|
|
69
|
-
|
|
70
62
|
// Block 1: Constructor and leaf helpers (no method dependencies)
|
|
71
|
-
impl(
|
|
63
|
+
impl(
|
|
64
|
+
NfaVm,
|
|
72
65
|
new : (fn(program : NfaProgram, flags : RegexFlags, input : String) -> Self)({
|
|
73
66
|
n_slots := ((program.n_groups + usize(1)) * usize(2));
|
|
74
67
|
n_instr := program.instructions.len();
|
|
@@ -77,88 +70,84 @@ impl(NfaVm,
|
|
|
77
70
|
next_seen := ArrayList(bool).with_capacity(n_instr);
|
|
78
71
|
next_seen.resize_with_byte(n_instr, int(0));
|
|
79
72
|
Self(
|
|
80
|
-
_program: program,
|
|
81
|
-
_flags: flags,
|
|
82
|
-
_input: input,
|
|
83
|
-
_bytes: input.as_bytes(),
|
|
84
|
-
_n_slots: n_slots,
|
|
85
|
-
_seen: seen,
|
|
86
|
-
_next_seen: next_seen
|
|
73
|
+
_program : program,
|
|
74
|
+
_flags : flags,
|
|
75
|
+
_input : input,
|
|
76
|
+
_bytes : input.as_bytes(),
|
|
77
|
+
_n_slots : n_slots,
|
|
78
|
+
_seen : seen,
|
|
79
|
+
_next_seen : next_seen
|
|
87
80
|
)
|
|
88
81
|
}),
|
|
89
|
-
|
|
90
82
|
_decode_codepoint : (fn(self : Self, pos : usize) -> DecodedChar)({
|
|
91
83
|
first := self._bytes.get(pos).unwrap();
|
|
92
84
|
cond(
|
|
93
85
|
(first < u8(0x80)) =>
|
|
94
|
-
DecodedChar(codepoint: u32(first), byte_len: usize(1)),
|
|
86
|
+
DecodedChar(codepoint : u32(first), byte_len : usize(1)),
|
|
95
87
|
((first >= u8(0xC0)) && (first < u8(0xE0))) => {
|
|
96
|
-
second := self._bytes.get(
|
|
88
|
+
second := self._bytes.get(pos + usize(1)).unwrap();
|
|
97
89
|
cp := (((u32(first) & u32(0x1F)) << u32(6)) | (u32(second) & u32(0x3F)));
|
|
98
|
-
DecodedChar(codepoint: cp, byte_len: usize(2))
|
|
90
|
+
DecodedChar(codepoint : cp, byte_len : usize(2))
|
|
99
91
|
},
|
|
100
92
|
((first >= u8(0xE0)) && (first < u8(0xF0))) => {
|
|
101
|
-
second := self._bytes.get(
|
|
102
|
-
third := self._bytes.get(
|
|
93
|
+
second := self._bytes.get(pos + usize(1)).unwrap();
|
|
94
|
+
third := self._bytes.get(pos + usize(2)).unwrap();
|
|
103
95
|
cp := ((((u32(first) & u32(0x0F)) << u32(12)) | ((u32(second) & u32(0x3F)) << u32(6))) | (u32(third) & u32(0x3F)));
|
|
104
|
-
DecodedChar(codepoint: cp, byte_len: usize(3))
|
|
96
|
+
DecodedChar(codepoint : cp, byte_len : usize(3))
|
|
105
97
|
},
|
|
106
98
|
true => {
|
|
107
|
-
second := self._bytes.get(
|
|
108
|
-
third := self._bytes.get(
|
|
109
|
-
fourth := self._bytes.get(
|
|
99
|
+
second := self._bytes.get(pos + usize(1)).unwrap();
|
|
100
|
+
third := self._bytes.get(pos + usize(2)).unwrap();
|
|
101
|
+
fourth := self._bytes.get(pos + usize(3)).unwrap();
|
|
110
102
|
cp := (((((u32(first) & u32(0x07)) << u32(18)) | ((u32(second) & u32(0x3F)) << u32(12))) | ((u32(third) & u32(0x3F)) << u32(6))) | (u32(fourth) & u32(0x3F)));
|
|
111
|
-
DecodedChar(codepoint: cp, byte_len: usize(4))
|
|
103
|
+
DecodedChar(codepoint : cp, byte_len : usize(4))
|
|
112
104
|
}
|
|
113
105
|
)
|
|
114
106
|
}),
|
|
115
|
-
|
|
116
107
|
_to_lower : (fn(self : Self, cp : u32) -> u32)(
|
|
117
108
|
cond(
|
|
118
109
|
((cp >= u32(65)) && (cp <= u32(90))) => (cp + u32(32)),
|
|
119
110
|
true => cp
|
|
120
111
|
)
|
|
121
112
|
),
|
|
122
|
-
|
|
123
113
|
_is_word_char : (fn(self : Self, cp : u32) -> bool)(
|
|
124
|
-
(((
|
|
114
|
+
(((cp >= u32(48)) && (cp <= u32(57))) || ((cp >= u32(65)) && (cp <= u32(90)))) || (((cp >= u32(97)) && (cp <= u32(122))) || (cp == u32(95)))
|
|
125
115
|
),
|
|
126
|
-
|
|
127
116
|
_find_prev_char_start : (fn(self : Self, pos : usize) -> usize)({
|
|
128
117
|
p := (pos - usize(1));
|
|
129
|
-
while
|
|
118
|
+
while(p > usize(0), p = (p - usize(1)), {
|
|
130
119
|
b := self._bytes.get(p).unwrap();
|
|
131
120
|
cond(
|
|
132
|
-
((b < u8(0x80)) || (b >= u8(0xC0))) => {
|
|
121
|
+
((b < u8(0x80)) || (b >= u8(0xC0))) => {
|
|
122
|
+
return(p);
|
|
123
|
+
},
|
|
133
124
|
true => ()
|
|
134
125
|
);
|
|
135
|
-
};
|
|
126
|
+
});
|
|
136
127
|
p
|
|
137
128
|
}),
|
|
138
|
-
|
|
139
129
|
_prev_byte_is_newline : (fn(self : Self, pos : usize) -> bool)(
|
|
140
130
|
cond(
|
|
141
131
|
(pos == usize(0)) => false,
|
|
142
132
|
true => {
|
|
143
|
-
b := self._bytes.get(
|
|
144
|
-
|
|
133
|
+
b := self._bytes.get(pos - usize(1)).unwrap();
|
|
134
|
+
b == u8(10)
|
|
145
135
|
}
|
|
146
136
|
)
|
|
147
137
|
),
|
|
148
|
-
|
|
149
138
|
_cur_byte_is_newline : (fn(self : Self, pos : usize) -> bool)(
|
|
150
139
|
cond(
|
|
151
140
|
(pos >= self._bytes.len()) => false,
|
|
152
141
|
true => {
|
|
153
142
|
b := self._bytes.get(pos).unwrap();
|
|
154
|
-
|
|
143
|
+
b == u8(10)
|
|
155
144
|
}
|
|
156
145
|
)
|
|
157
146
|
)
|
|
158
147
|
);
|
|
159
|
-
|
|
160
148
|
// Block 2: Methods that depend on Block 1
|
|
161
|
-
impl(
|
|
149
|
+
impl(
|
|
150
|
+
NfaVm,
|
|
162
151
|
_char_matches : (fn(self : Self, expected : u32, actual : u32) -> bool)(
|
|
163
152
|
cond(
|
|
164
153
|
(expected == actual) => true,
|
|
@@ -166,16 +155,14 @@ impl(NfaVm,
|
|
|
166
155
|
true => false
|
|
167
156
|
)
|
|
168
157
|
),
|
|
169
|
-
|
|
170
158
|
_codepoint_in_class : (fn(self : Self, cp : u32, cls : ClassEntry) -> bool)({
|
|
171
159
|
check_cp := cond(
|
|
172
160
|
self._flags.ignore_case => self._to_lower(cp),
|
|
173
161
|
true => cp
|
|
174
162
|
);
|
|
175
|
-
|
|
176
163
|
(found : bool) = false;
|
|
177
164
|
i := usize(0);
|
|
178
|
-
while
|
|
165
|
+
while((i < cls.ranges.len()) && (!(found)), i = (i + usize(1)), {
|
|
179
166
|
r := cls.ranges.get(i).unwrap();
|
|
180
167
|
(low : u32) = cond(
|
|
181
168
|
self._flags.ignore_case => self._to_lower(r.low),
|
|
@@ -186,31 +173,32 @@ impl(NfaVm,
|
|
|
186
173
|
true => r.high
|
|
187
174
|
);
|
|
188
175
|
cond(
|
|
189
|
-
((check_cp >= low) && (check_cp <= high)) => {
|
|
176
|
+
((check_cp >= low) && (check_cp <= high)) => {
|
|
177
|
+
found = true;
|
|
178
|
+
},
|
|
190
179
|
true => ()
|
|
191
180
|
);
|
|
192
|
-
};
|
|
193
|
-
|
|
181
|
+
});
|
|
194
182
|
cond(
|
|
195
183
|
((!(found)) && self._flags.ignore_case) => {
|
|
196
184
|
i2 := usize(0);
|
|
197
|
-
while
|
|
185
|
+
while((i2 < cls.ranges.len()) && (!(found)), i2 = (i2 + usize(1)), {
|
|
198
186
|
r := cls.ranges.get(i2).unwrap();
|
|
199
187
|
cond(
|
|
200
|
-
((cp >= r.low) && (cp <= r.high)) => {
|
|
188
|
+
((cp >= r.low) && (cp <= r.high)) => {
|
|
189
|
+
found = true;
|
|
190
|
+
},
|
|
201
191
|
true => ()
|
|
202
192
|
);
|
|
203
|
-
};
|
|
193
|
+
});
|
|
204
194
|
},
|
|
205
195
|
true => ()
|
|
206
196
|
);
|
|
207
|
-
|
|
208
197
|
cond(
|
|
209
198
|
cls.negated => (!(found)),
|
|
210
199
|
true => found
|
|
211
200
|
)
|
|
212
201
|
}),
|
|
213
|
-
|
|
214
202
|
_is_word_boundary : (fn(self : Self, pos : usize) -> bool)({
|
|
215
203
|
left_is_word := cond(
|
|
216
204
|
(pos == usize(0)) => false,
|
|
@@ -227,29 +215,31 @@ impl(NfaVm,
|
|
|
227
215
|
self._is_word_char(cur.codepoint)
|
|
228
216
|
}
|
|
229
217
|
);
|
|
230
|
-
|
|
218
|
+
left_is_word != right_is_word
|
|
231
219
|
})
|
|
232
220
|
);
|
|
233
|
-
|
|
234
221
|
// Block 3: _add_thread (recursive, depends on Block 1+2)
|
|
235
|
-
impl(
|
|
222
|
+
impl(
|
|
223
|
+
NfaVm,
|
|
236
224
|
_add_thread : (fn(self : Self, list : *(ArrayList(NfaThread)), thread : NfaThread, byte_pos : usize, seen : *(ArrayList(bool))) -> unit)({
|
|
237
225
|
cond(
|
|
238
|
-
(thread.pc >= self._program.instructions.len()) => {
|
|
226
|
+
(thread.pc >= self._program.instructions.len()) => {
|
|
227
|
+
return(());
|
|
228
|
+
},
|
|
239
229
|
true => ()
|
|
240
230
|
);
|
|
241
|
-
|
|
242
231
|
// Already checked bounds above: thread.pc < instructions.len()
|
|
243
232
|
is_seen := (seen.*)(thread.pc);
|
|
244
233
|
cond(
|
|
245
|
-
is_seen => {
|
|
234
|
+
is_seen => {
|
|
235
|
+
return(());
|
|
236
|
+
},
|
|
246
237
|
true => ()
|
|
247
238
|
);
|
|
248
239
|
&((seen.*)(thread.pc)).* = true;
|
|
249
|
-
|
|
250
240
|
instr := self._program.instructions(thread.pc);
|
|
251
|
-
|
|
252
|
-
|
|
241
|
+
match(
|
|
242
|
+
instr.kind,
|
|
253
243
|
.Split => {
|
|
254
244
|
t1 := thread.fork(instr.target_a);
|
|
255
245
|
t2 := thread.fork(instr.target_b);
|
|
@@ -267,7 +257,7 @@ impl(NfaVm,
|
|
|
267
257
|
},
|
|
268
258
|
true => ()
|
|
269
259
|
);
|
|
270
|
-
new_t := NfaThread(pc: (thread.pc + usize(1)), slots: thread.slots);
|
|
260
|
+
new_t := NfaThread(pc : (thread.pc + usize(1)), slots : thread.slots);
|
|
271
261
|
recur(self, list, new_t, byte_pos, seen);
|
|
272
262
|
},
|
|
273
263
|
.AssertStart => {
|
|
@@ -278,7 +268,7 @@ impl(NfaVm,
|
|
|
278
268
|
);
|
|
279
269
|
cond(
|
|
280
270
|
passes => {
|
|
281
|
-
new_t := thread.fork(
|
|
271
|
+
new_t := thread.fork(thread.pc + usize(1));
|
|
282
272
|
recur(self, list, new_t, byte_pos, seen);
|
|
283
273
|
},
|
|
284
274
|
true => ()
|
|
@@ -292,7 +282,7 @@ impl(NfaVm,
|
|
|
292
282
|
);
|
|
293
283
|
cond(
|
|
294
284
|
passes => {
|
|
295
|
-
new_t := thread.fork(
|
|
285
|
+
new_t := thread.fork(thread.pc + usize(1));
|
|
296
286
|
recur(self, list, new_t, byte_pos, seen);
|
|
297
287
|
},
|
|
298
288
|
true => ()
|
|
@@ -301,7 +291,7 @@ impl(NfaVm,
|
|
|
301
291
|
.AssertWordBoundary => {
|
|
302
292
|
cond(
|
|
303
293
|
self._is_word_boundary(byte_pos) => {
|
|
304
|
-
new_t := thread.fork(
|
|
294
|
+
new_t := thread.fork(thread.pc + usize(1));
|
|
305
295
|
recur(self, list, new_t, byte_pos, seen);
|
|
306
296
|
},
|
|
307
297
|
true => ()
|
|
@@ -310,7 +300,7 @@ impl(NfaVm,
|
|
|
310
300
|
.AssertNonWordBoundary => {
|
|
311
301
|
cond(
|
|
312
302
|
(!(self._is_word_boundary(byte_pos))) => {
|
|
313
|
-
new_t := thread.fork(
|
|
303
|
+
new_t := thread.fork(thread.pc + usize(1));
|
|
314
304
|
recur(self, list, new_t, byte_pos, seen);
|
|
315
305
|
},
|
|
316
306
|
true => ()
|
|
@@ -322,77 +312,79 @@ impl(NfaVm,
|
|
|
322
312
|
);
|
|
323
313
|
})
|
|
324
314
|
);
|
|
325
|
-
|
|
326
315
|
// Block 4: Sub-VM for lookahead/lookbehind (depends on Block 1+2+3)
|
|
327
|
-
impl(
|
|
316
|
+
impl(
|
|
317
|
+
NfaVm,
|
|
328
318
|
// Run a sub-VM starting at sub_start_pc from start_byte.
|
|
329
319
|
// If required_end is not UNSET, only succeed when Match is found at exactly required_end.
|
|
330
320
|
_run_sub_vm : (fn(self : Self, sub_start_pc : usize, start_byte : usize, required_end : usize) -> bool)({
|
|
331
321
|
unset := usize.MAX;
|
|
332
322
|
sub_current := ArrayList(NfaThread).new();
|
|
333
323
|
sub_next := ArrayList(NfaThread).new();
|
|
334
|
-
|
|
335
324
|
sub_seen := ArrayList(bool).with_capacity(self._program.instructions.len());
|
|
336
325
|
sub_seen.resize_with_byte(self._program.instructions.len(), int(0));
|
|
337
|
-
|
|
338
326
|
initial := NfaThread.new(sub_start_pc, self._n_slots);
|
|
339
327
|
self._add_thread(&(sub_current), initial, start_byte, &(sub_seen));
|
|
340
|
-
|
|
341
328
|
sub_pos := start_byte;
|
|
342
329
|
input_len := self._bytes.len();
|
|
343
|
-
|
|
344
|
-
while (sub_pos <= input_len), {
|
|
330
|
+
while(sub_pos <= input_len, {
|
|
345
331
|
cond(
|
|
346
|
-
(sub_current.len() == usize(0)) => {
|
|
332
|
+
(sub_current.len() == usize(0)) => {
|
|
333
|
+
break;
|
|
334
|
+
},
|
|
347
335
|
true => ()
|
|
348
336
|
);
|
|
349
|
-
|
|
350
337
|
// Check for Match in current threads
|
|
351
338
|
st := usize(0);
|
|
352
|
-
while
|
|
339
|
+
while(st < sub_current.len(), st = (st + usize(1)), {
|
|
353
340
|
st_thread := sub_current.get(st).unwrap();
|
|
354
341
|
st_instr := self._program.instructions.get(st_thread.pc).unwrap();
|
|
355
|
-
match(
|
|
342
|
+
match(
|
|
343
|
+
st_instr.kind,
|
|
356
344
|
.Match => {
|
|
357
345
|
cond(
|
|
358
|
-
(required_end == unset) => {
|
|
359
|
-
|
|
346
|
+
(required_end == unset) => {
|
|
347
|
+
return(true);
|
|
348
|
+
},
|
|
349
|
+
(sub_pos == required_end) => {
|
|
350
|
+
return(true);
|
|
351
|
+
},
|
|
360
352
|
true => ()
|
|
361
353
|
);
|
|
362
354
|
},
|
|
363
355
|
_ => ()
|
|
364
356
|
);
|
|
365
|
-
};
|
|
366
|
-
|
|
357
|
+
});
|
|
367
358
|
cond(
|
|
368
|
-
(sub_pos >= input_len) => {
|
|
359
|
+
(sub_pos >= input_len) => {
|
|
360
|
+
break;
|
|
361
|
+
},
|
|
369
362
|
true => ()
|
|
370
363
|
);
|
|
371
|
-
|
|
372
364
|
// Stop if we've passed the required end position
|
|
373
365
|
cond(
|
|
374
|
-
((required_end != unset) && (sub_pos > required_end)) => {
|
|
366
|
+
((required_end != unset) && (sub_pos > required_end)) => {
|
|
367
|
+
break;
|
|
368
|
+
},
|
|
375
369
|
true => ()
|
|
376
370
|
);
|
|
377
|
-
|
|
378
371
|
decoded := self._decode_codepoint(sub_pos);
|
|
379
372
|
sub_cp := decoded.codepoint;
|
|
380
373
|
sub_blen := decoded.byte_len;
|
|
381
|
-
|
|
382
374
|
// Clear seen
|
|
383
375
|
sub_seen.fill_with_byte(int(0));
|
|
384
|
-
|
|
385
376
|
// Process consuming instructions
|
|
386
377
|
st2 := usize(0);
|
|
387
|
-
while
|
|
378
|
+
while(st2 < sub_current.len(), st2 = (st2 + usize(1)), {
|
|
388
379
|
st_thread := sub_current.get(st2).unwrap();
|
|
389
380
|
st_instr := self._program.instructions.get(st_thread.pc).unwrap();
|
|
390
|
-
match(
|
|
381
|
+
match(
|
|
382
|
+
st_instr.kind,
|
|
391
383
|
.Char => {
|
|
392
384
|
cond(
|
|
393
385
|
self._char_matches(st_instr.codepoint, sub_cp) => {
|
|
394
|
-
new_t := st_thread.fork(
|
|
395
|
-
self._add_thread(&(sub_next), new_t,
|
|
386
|
+
new_t := st_thread.fork(st_thread.pc + usize(1));
|
|
387
|
+
self._add_thread(&(sub_next), new_t, sub_pos + sub_blen, &(sub_seen));
|
|
396
388
|
},
|
|
397
389
|
true => ()
|
|
398
390
|
);
|
|
@@ -404,20 +396,21 @@ impl(NfaVm,
|
|
|
404
396
|
);
|
|
405
397
|
cond(
|
|
406
398
|
should_match => {
|
|
407
|
-
new_t := st_thread.fork(
|
|
408
|
-
self._add_thread(&(sub_next), new_t,
|
|
399
|
+
new_t := st_thread.fork(st_thread.pc + usize(1));
|
|
400
|
+
self._add_thread(&(sub_next), new_t, sub_pos + sub_blen, &(sub_seen));
|
|
409
401
|
},
|
|
410
402
|
true => ()
|
|
411
403
|
);
|
|
412
404
|
},
|
|
413
405
|
.CharClass => {
|
|
414
406
|
class_opt := self._program.classes.get(st_instr.class_idx);
|
|
415
|
-
match(
|
|
407
|
+
match(
|
|
408
|
+
class_opt,
|
|
416
409
|
.Some(cls) => {
|
|
417
410
|
cond(
|
|
418
411
|
self._codepoint_in_class(sub_cp, cls) => {
|
|
419
|
-
new_t := st_thread.fork(
|
|
420
|
-
self._add_thread(&(sub_next), new_t,
|
|
412
|
+
new_t := st_thread.fork(st_thread.pc + usize(1));
|
|
413
|
+
self._add_thread(&(sub_next), new_t, sub_pos + sub_blen, &(sub_seen));
|
|
421
414
|
},
|
|
422
415
|
true => ()
|
|
423
416
|
);
|
|
@@ -427,22 +420,20 @@ impl(NfaVm,
|
|
|
427
420
|
},
|
|
428
421
|
_ => ()
|
|
429
422
|
);
|
|
430
|
-
};
|
|
431
|
-
|
|
423
|
+
});
|
|
432
424
|
// Swap sub_current/sub_next and clear for reuse
|
|
433
425
|
tmp_sub := sub_current;
|
|
434
426
|
sub_current = sub_next;
|
|
435
427
|
sub_next = tmp_sub;
|
|
436
428
|
sub_next.clear();
|
|
437
429
|
sub_pos = (sub_pos + sub_blen);
|
|
438
|
-
};
|
|
439
|
-
|
|
430
|
+
});
|
|
440
431
|
false
|
|
441
432
|
})
|
|
442
433
|
);
|
|
443
|
-
|
|
444
434
|
// Block 5: exec_at (depends on Block 1+2+3+4)
|
|
445
|
-
impl(
|
|
435
|
+
impl(
|
|
436
|
+
NfaVm,
|
|
446
437
|
exec_at : (fn(self : Self, start_byte : usize) -> VmMatch)({
|
|
447
438
|
// Reuse pre-allocated seen buffers from the VM
|
|
448
439
|
current := ArrayList(NfaThread).new();
|
|
@@ -450,24 +441,19 @@ impl(NfaVm,
|
|
|
450
441
|
seen := self._seen;
|
|
451
442
|
next_seen := self._next_seen;
|
|
452
443
|
deferred := ArrayList(DeferredThread).new();
|
|
453
|
-
|
|
454
444
|
// Clear for this execution
|
|
455
445
|
seen.fill_with_byte(int(0));
|
|
456
446
|
next_seen.fill_with_byte(int(0));
|
|
457
|
-
|
|
458
447
|
initial := NfaThread.new(usize(0), self._n_slots);
|
|
459
448
|
self._add_thread(&(current), initial, start_byte, &(seen));
|
|
460
|
-
|
|
461
|
-
best_match := VmMatch(matched: false, slots: ArrayList(usize).new());
|
|
449
|
+
best_match := VmMatch(matched : false, slots : ArrayList(usize).new());
|
|
462
450
|
byte_pos := start_byte;
|
|
463
451
|
input_len := self._bytes.len();
|
|
464
452
|
unset := usize.MAX;
|
|
465
|
-
|
|
466
|
-
while (byte_pos <= input_len), {
|
|
453
|
+
while(byte_pos <= input_len, {
|
|
467
454
|
(cur_cp : u32) = u32(0);
|
|
468
455
|
(char_byte_len : usize) = usize(0);
|
|
469
456
|
(at_end : bool) = (byte_pos >= input_len);
|
|
470
|
-
|
|
471
457
|
cond(
|
|
472
458
|
(!(at_end)) => {
|
|
473
459
|
decoded := self._decode_codepoint(byte_pos);
|
|
@@ -476,15 +462,13 @@ impl(NfaVm,
|
|
|
476
462
|
},
|
|
477
463
|
true => ()
|
|
478
464
|
);
|
|
479
|
-
|
|
480
465
|
// Clear seen flags for this generation using memset
|
|
481
466
|
seen.fill_with_byte(int(0));
|
|
482
467
|
next_seen.fill_with_byte(int(0));
|
|
483
|
-
|
|
484
468
|
// Process deferred threads targeting this byte_pos
|
|
485
469
|
new_deferred := ArrayList(DeferredThread).new();
|
|
486
470
|
di := usize(0);
|
|
487
|
-
while
|
|
471
|
+
while(di < deferred.len(), di = (di + usize(1)), {
|
|
488
472
|
d := deferred.get(di).unwrap();
|
|
489
473
|
cond(
|
|
490
474
|
(d.target_byte_pos == byte_pos) => {
|
|
@@ -494,28 +478,28 @@ impl(NfaVm,
|
|
|
494
478
|
new_deferred.push(d);
|
|
495
479
|
}
|
|
496
480
|
);
|
|
497
|
-
};
|
|
481
|
+
});
|
|
498
482
|
deferred = new_deferred;
|
|
499
|
-
|
|
500
483
|
// Break if no threads and no deferred
|
|
501
484
|
cond(
|
|
502
|
-
((current.len() == usize(0)) && (deferred.len() == usize(0))) => {
|
|
485
|
+
((current.len() == usize(0)) && (deferred.len() == usize(0))) => {
|
|
486
|
+
break;
|
|
487
|
+
},
|
|
503
488
|
true => ()
|
|
504
489
|
);
|
|
505
|
-
|
|
506
490
|
// Process each thread (priority order — first match wins in gen)
|
|
507
491
|
(found_match_in_gen : bool) = false;
|
|
508
492
|
t := usize(0);
|
|
509
|
-
while
|
|
493
|
+
while((t < current.len()) && (!(found_match_in_gen)), t = (t + usize(1)), {
|
|
510
494
|
thread := current.get(t).unwrap();
|
|
511
495
|
instr := self._program.instructions.get(thread.pc).unwrap();
|
|
512
|
-
|
|
513
|
-
|
|
496
|
+
match(
|
|
497
|
+
instr.kind,
|
|
514
498
|
.Char => {
|
|
515
499
|
cond(
|
|
516
500
|
((!(at_end)) && (self._char_matches(instr.codepoint, cur_cp))) => {
|
|
517
|
-
new_thread := thread.fork(
|
|
518
|
-
self._add_thread(&(next), new_thread,
|
|
501
|
+
new_thread := thread.fork(thread.pc + usize(1));
|
|
502
|
+
self._add_thread(&(next), new_thread, byte_pos + char_byte_len, &(next_seen));
|
|
519
503
|
},
|
|
520
504
|
true => ()
|
|
521
505
|
);
|
|
@@ -529,8 +513,8 @@ impl(NfaVm,
|
|
|
529
513
|
);
|
|
530
514
|
cond(
|
|
531
515
|
should_match => {
|
|
532
|
-
new_thread := thread.fork(
|
|
533
|
-
self._add_thread(&(next), new_thread,
|
|
516
|
+
new_thread := thread.fork(thread.pc + usize(1));
|
|
517
|
+
self._add_thread(&(next), new_thread, byte_pos + char_byte_len, &(next_seen));
|
|
534
518
|
},
|
|
535
519
|
true => ()
|
|
536
520
|
);
|
|
@@ -542,12 +526,13 @@ impl(NfaVm,
|
|
|
542
526
|
cond(
|
|
543
527
|
(!(at_end)) => {
|
|
544
528
|
class_opt := self._program.classes.get(instr.class_idx);
|
|
545
|
-
match(
|
|
529
|
+
match(
|
|
530
|
+
class_opt,
|
|
546
531
|
.Some(cls) => {
|
|
547
532
|
cond(
|
|
548
533
|
self._codepoint_in_class(cur_cp, cls) => {
|
|
549
|
-
new_thread := thread.fork(
|
|
550
|
-
self._add_thread(&(next), new_thread,
|
|
534
|
+
new_thread := thread.fork(thread.pc + usize(1));
|
|
535
|
+
self._add_thread(&(next), new_thread, byte_pos + char_byte_len, &(next_seen));
|
|
551
536
|
},
|
|
552
537
|
true => ()
|
|
553
538
|
);
|
|
@@ -573,9 +558,9 @@ impl(NfaVm,
|
|
|
573
558
|
// Compare captured bytes against input at current position
|
|
574
559
|
(bytes_match : bool) = true;
|
|
575
560
|
bi := usize(0);
|
|
576
|
-
while
|
|
577
|
-
expected := self._bytes.get(
|
|
578
|
-
actual := self._bytes.get(
|
|
561
|
+
while(bi < captured_len, bi = (bi + usize(1)), {
|
|
562
|
+
expected := self._bytes.get(gs + bi).unwrap();
|
|
563
|
+
actual := self._bytes.get(byte_pos + bi).unwrap();
|
|
579
564
|
cond(
|
|
580
565
|
self._flags.ignore_case => {
|
|
581
566
|
el := cond(
|
|
@@ -587,22 +572,26 @@ impl(NfaVm,
|
|
|
587
572
|
true => actual
|
|
588
573
|
);
|
|
589
574
|
cond(
|
|
590
|
-
(el != al) => {
|
|
575
|
+
(el != al) => {
|
|
576
|
+
bytes_match = false;
|
|
577
|
+
},
|
|
591
578
|
true => ()
|
|
592
579
|
);
|
|
593
580
|
},
|
|
594
581
|
true => {
|
|
595
582
|
cond(
|
|
596
|
-
(expected != actual) => {
|
|
583
|
+
(expected != actual) => {
|
|
584
|
+
bytes_match = false;
|
|
585
|
+
},
|
|
597
586
|
true => ()
|
|
598
587
|
);
|
|
599
588
|
}
|
|
600
589
|
);
|
|
601
|
-
};
|
|
590
|
+
});
|
|
602
591
|
cond(
|
|
603
592
|
bytes_match => {
|
|
604
593
|
new_pos := (byte_pos + captured_len);
|
|
605
|
-
new_thread := thread.fork(
|
|
594
|
+
new_thread := thread.fork(thread.pc + usize(1));
|
|
606
595
|
cond(
|
|
607
596
|
(captured_len == usize(0)) => {
|
|
608
597
|
// Empty capture — epsilon-like, process in current gen
|
|
@@ -610,7 +599,7 @@ impl(NfaVm,
|
|
|
610
599
|
},
|
|
611
600
|
true => {
|
|
612
601
|
// Defer to the target byte position
|
|
613
|
-
deferred.push(DeferredThread(target_byte_pos: new_pos, thread: new_thread));
|
|
602
|
+
deferred.push(DeferredThread(target_byte_pos : new_pos, thread : new_thread));
|
|
614
603
|
}
|
|
615
604
|
);
|
|
616
605
|
},
|
|
@@ -645,7 +634,6 @@ impl(NfaVm,
|
|
|
645
634
|
sub_start := instr.target_a;
|
|
646
635
|
positive := (instr.slot == usize(1));
|
|
647
636
|
(lb_matched : bool) = false;
|
|
648
|
-
|
|
649
637
|
// Try each byte position backwards from byte_pos
|
|
650
638
|
cond(
|
|
651
639
|
(byte_pos == usize(0)) => {
|
|
@@ -660,7 +648,7 @@ impl(NfaVm,
|
|
|
660
648
|
true => {
|
|
661
649
|
// Try from byte_pos backwards to 0
|
|
662
650
|
(try_pos : usize) = byte_pos;
|
|
663
|
-
while
|
|
651
|
+
while(!(lb_matched), {
|
|
664
652
|
cond(
|
|
665
653
|
(try_pos == usize(0)) => {
|
|
666
654
|
cond(
|
|
@@ -677,10 +665,10 @@ impl(NfaVm,
|
|
|
677
665
|
cond(
|
|
678
666
|
(try_pos > usize(0)) => {
|
|
679
667
|
tb := self._bytes.get(try_pos).unwrap();
|
|
680
|
-
while
|
|
668
|
+
while(((tb >= u8(0x80)) && (tb < u8(0xC0))) && (try_pos > usize(0)), {
|
|
681
669
|
try_pos = (try_pos - usize(1));
|
|
682
670
|
tb = self._bytes.get(try_pos).unwrap();
|
|
683
|
-
};
|
|
671
|
+
});
|
|
684
672
|
},
|
|
685
673
|
true => ()
|
|
686
674
|
);
|
|
@@ -692,10 +680,9 @@ impl(NfaVm,
|
|
|
692
680
|
);
|
|
693
681
|
}
|
|
694
682
|
);
|
|
695
|
-
};
|
|
683
|
+
});
|
|
696
684
|
}
|
|
697
685
|
);
|
|
698
|
-
|
|
699
686
|
cond(
|
|
700
687
|
(lb_matched == positive) => {
|
|
701
688
|
sub_end := instr.target_b;
|
|
@@ -707,38 +694,35 @@ impl(NfaVm,
|
|
|
707
694
|
},
|
|
708
695
|
.Match => {
|
|
709
696
|
// Record match and kill lower-priority threads
|
|
710
|
-
best_match = VmMatch(matched: true, slots: thread.slots);
|
|
697
|
+
best_match = VmMatch(matched : true, slots : thread.slots);
|
|
711
698
|
found_match_in_gen = true;
|
|
712
699
|
},
|
|
713
700
|
_ => ()
|
|
714
701
|
);
|
|
715
|
-
};
|
|
716
|
-
|
|
702
|
+
});
|
|
717
703
|
// Swap current/next and clear for reuse
|
|
718
704
|
tmp_c := current;
|
|
719
705
|
current = next;
|
|
720
706
|
next = tmp_c;
|
|
721
707
|
next.clear();
|
|
722
|
-
|
|
723
708
|
cond(
|
|
724
|
-
at_end => {
|
|
709
|
+
at_end => {
|
|
710
|
+
break;
|
|
711
|
+
},
|
|
725
712
|
true => {
|
|
726
713
|
byte_pos = (byte_pos + char_byte_len);
|
|
727
714
|
}
|
|
728
715
|
);
|
|
729
|
-
};
|
|
730
|
-
|
|
716
|
+
});
|
|
731
717
|
// Store seen buffers back for reuse in next exec_at call
|
|
732
718
|
self._seen = seen;
|
|
733
719
|
self._next_seen = next_seen;
|
|
734
|
-
|
|
735
720
|
best_match
|
|
736
721
|
})
|
|
737
722
|
);
|
|
738
|
-
|
|
739
|
-
export
|
|
723
|
+
export(
|
|
740
724
|
NfaVm,
|
|
741
725
|
NfaThread,
|
|
742
726
|
VmMatch,
|
|
743
727
|
DecodedChar
|
|
744
|
-
;
|
|
728
|
+
);
|