@shd101wyy/yo 0.1.25 → 0.1.27
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.github/skills/yo-async-effects/SKILL.md +4 -4
- package/.github/skills/yo-async-effects/async-effects-recipes.md +40 -40
- package/.github/skills/yo-core-patterns/SKILL.md +1 -1
- package/.github/skills/yo-core-patterns/core-patterns-cheatsheet.md +30 -26
- package/.github/skills/yo-project-workflow/SKILL.md +6 -3
- package/.github/skills/yo-project-workflow/workflow-cheatsheet.md +34 -11
- package/.github/skills/yo-syntax/SKILL.md +7 -6
- package/.github/skills/yo-syntax/syntax-cheatsheet.md +78 -60
- package/.github/skills/yo-wasm-integration/wasm-integration-cheatsheet.md +3 -3
- package/README.md +10 -8
- package/out/cjs/index.cjs +583 -567
- package/out/cjs/yo-cli.cjs +664 -632
- package/out/cjs/yo-lsp.cjs +510 -485
- package/out/esm/index.mjs +538 -522
- package/out/types/src/codegen/codegen-c.d.ts +2 -2
- package/out/types/src/codegen/functions/collection.d.ts +2 -2
- package/out/types/src/codegen/functions/context.d.ts +3 -2
- package/out/types/src/codegen/types/collection.d.ts +2 -2
- package/out/types/src/codegen/utils/index.d.ts +3 -1
- package/out/types/src/doc/builder.d.ts +2 -2
- package/out/types/src/evaluator/calls/closure-type.d.ts +2 -2
- package/out/types/src/evaluator/calls/record-type.d.ts +11 -0
- package/out/types/src/evaluator/context.d.ts +8 -9
- package/out/types/src/evaluator/index.d.ts +3 -3
- package/out/types/src/evaluator/types/record.d.ts +14 -0
- package/out/types/src/evaluator/types/validation.d.ts +2 -2
- package/out/types/src/evaluator/values/anonymous-module.d.ts +5 -5
- package/out/types/src/evaluator/values/impl.d.ts +1 -1
- package/out/types/src/expr.d.ts +1 -4
- package/out/types/src/formatter.d.ts +11 -0
- package/out/types/src/function-value.d.ts +1 -1
- package/out/types/src/lsp/document-manager.d.ts +1 -1
- package/out/types/src/lsp/formatting.d.ts +2 -0
- package/out/types/src/module-manager.d.ts +3 -3
- package/out/types/src/tests/formatter.test.d.ts +1 -0
- package/out/types/src/types/creators.d.ts +3 -4
- package/out/types/src/types/definitions.d.ts +8 -19
- package/out/types/src/types/guards.d.ts +3 -3
- package/out/types/src/types/tags.d.ts +0 -1
- package/out/types/src/types/utils.d.ts +1 -1
- package/out/types/src/value-tag.d.ts +0 -1
- package/out/types/src/value.d.ts +6 -13
- package/out/types/tsconfig.tsbuildinfo +1 -1
- package/package.json +1 -1
- package/std/alg/hash.yo +13 -21
- package/std/allocator.yo +25 -40
- package/std/async.yo +3 -7
- package/std/build.yo +105 -151
- package/std/cli/arg_parser.yo +184 -169
- package/std/collections/array_list.yo +350 -314
- package/std/collections/btree_map.yo +142 -131
- package/std/collections/deque.yo +132 -128
- package/std/collections/hash_map.yo +542 -566
- package/std/collections/hash_set.yo +623 -687
- package/std/collections/linked_list.yo +275 -293
- package/std/collections/ordered_map.yo +113 -85
- package/std/collections/priority_queue.yo +73 -73
- package/std/crypto/md5.yo +191 -95
- package/std/crypto/random.yo +56 -64
- package/std/crypto/sha256.yo +151 -107
- package/std/encoding/base64.yo +87 -81
- package/std/encoding/hex.yo +43 -50
- package/std/encoding/html.yo +56 -81
- package/std/encoding/html_char_utils.yo +7 -13
- package/std/encoding/html_entities.yo +2248 -2253
- package/std/encoding/json.yo +316 -224
- package/std/encoding/punycode.yo +86 -116
- package/std/encoding/toml.yo +67 -66
- package/std/encoding/utf16.yo +37 -44
- package/std/env.yo +62 -91
- package/std/error.yo +12 -20
- package/std/fmt/display.yo +5 -9
- package/std/fmt/index.yo +8 -14
- package/std/fmt/to_string.yo +330 -315
- package/std/fmt/writer.yo +58 -87
- package/std/fs/dir.yo +83 -102
- package/std/fs/file.yo +147 -180
- package/std/fs/metadata.yo +45 -78
- package/std/fs/temp.yo +55 -65
- package/std/fs/types.yo +27 -40
- package/std/fs/walker.yo +53 -68
- package/std/gc.yo +5 -8
- package/std/glob.yo +30 -43
- package/std/http/client.yo +107 -120
- package/std/http/http.yo +106 -96
- package/std/http/index.yo +4 -6
- package/std/imm/list.yo +88 -93
- package/std/imm/map.yo +528 -464
- package/std/imm/set.yo +52 -57
- package/std/imm/sorted_map.yo +340 -286
- package/std/imm/sorted_set.yo +57 -63
- package/std/imm/string.yo +404 -345
- package/std/imm/vec.yo +173 -181
- package/std/io/reader.yo +3 -6
- package/std/io/writer.yo +4 -8
- package/std/libc/assert.yo +5 -9
- package/std/libc/ctype.yo +32 -22
- package/std/libc/dirent.yo +26 -25
- package/std/libc/errno.yo +164 -90
- package/std/libc/fcntl.yo +52 -45
- package/std/libc/float.yo +66 -44
- package/std/libc/limits.yo +42 -33
- package/std/libc/math.yo +53 -82
- package/std/libc/signal.yo +72 -47
- package/std/libc/stdatomic.yo +217 -188
- package/std/libc/stdint.yo +5 -29
- package/std/libc/stdio.yo +5 -29
- package/std/libc/stdlib.yo +32 -39
- package/std/libc/string.yo +5 -23
- package/std/libc/sys/stat.yo +58 -56
- package/std/libc/time.yo +5 -19
- package/std/libc/unistd.yo +5 -20
- package/std/libc/wctype.yo +6 -9
- package/std/libc/windows.yo +26 -30
- package/std/log.yo +41 -55
- package/std/net/addr.yo +102 -97
- package/std/net/dns.yo +27 -28
- package/std/net/errors.yo +50 -49
- package/std/net/tcp.yo +113 -124
- package/std/net/udp.yo +55 -66
- package/std/os/env.yo +35 -33
- package/std/os/signal.yo +15 -25
- package/std/path.yo +276 -311
- package/std/prelude.yo +6316 -4333
- package/std/process/command.yo +87 -103
- package/std/process/index.yo +12 -31
- package/std/regex/compiler.yo +196 -95
- package/std/regex/flags.yo +58 -39
- package/std/regex/index.yo +157 -173
- package/std/regex/match.yo +20 -31
- package/std/regex/node.yo +134 -152
- package/std/regex/parser.yo +283 -259
- package/std/regex/unicode.yo +172 -202
- package/std/regex/vm.yo +155 -171
- package/std/string/index.yo +5 -7
- package/std/string/rune.yo +45 -55
- package/std/string/string.yo +937 -964
- package/std/string/string_builder.yo +94 -104
- package/std/string/unicode.yo +46 -64
- package/std/sync/channel.yo +72 -73
- package/std/sync/cond.yo +31 -36
- package/std/sync/mutex.yo +30 -32
- package/std/sync/once.yo +13 -16
- package/std/sync/rwlock.yo +26 -31
- package/std/sync/waitgroup.yo +20 -25
- package/std/sys/advise.yo +16 -24
- package/std/sys/bufio/buf_reader.yo +77 -93
- package/std/sys/bufio/buf_writer.yo +52 -65
- package/std/sys/clock.yo +4 -9
- package/std/sys/constants.yo +77 -61
- package/std/sys/copy.yo +4 -10
- package/std/sys/dir.yo +26 -43
- package/std/sys/dns.yo +41 -61
- package/std/sys/errors.yo +95 -103
- package/std/sys/events.yo +45 -57
- package/std/sys/externs.yo +319 -267
- package/std/sys/fallocate.yo +7 -11
- package/std/sys/fcntl.yo +14 -22
- package/std/sys/file.yo +26 -40
- package/std/sys/future.yo +5 -8
- package/std/sys/iov.yo +12 -25
- package/std/sys/lock.yo +12 -13
- package/std/sys/mmap.yo +38 -43
- package/std/sys/path.yo +3 -8
- package/std/sys/perm.yo +7 -21
- package/std/sys/pipe.yo +5 -12
- package/std/sys/process.yo +23 -29
- package/std/sys/seek.yo +10 -12
- package/std/sys/signal.yo +7 -13
- package/std/sys/signals.yo +52 -35
- package/std/sys/socket.yo +63 -58
- package/std/sys/socketpair.yo +3 -6
- package/std/sys/sockinfo.yo +11 -20
- package/std/sys/statfs.yo +11 -34
- package/std/sys/statx.yo +25 -52
- package/std/sys/sysinfo.yo +15 -20
- package/std/sys/tcp.yo +62 -92
- package/std/sys/temp.yo +5 -9
- package/std/sys/time.yo +5 -15
- package/std/sys/timer.yo +6 -11
- package/std/sys/tty.yo +10 -18
- package/std/sys/udp.yo +22 -39
- package/std/sys/umask.yo +3 -6
- package/std/sys/unix.yo +33 -52
- package/std/testing/bench.yo +49 -52
- package/std/thread.yo +10 -15
- package/std/time/datetime.yo +105 -89
- package/std/time/duration.yo +43 -56
- package/std/time/instant.yo +13 -18
- package/std/time/sleep.yo +5 -9
- package/std/url/index.yo +184 -209
- package/std/worker.yo +6 -10
- package/out/types/src/evaluator/calls/module-type.d.ts +0 -11
- package/out/types/src/evaluator/types/module.d.ts +0 -19
package/std/regex/index.yo
CHANGED
|
@@ -1,33 +1,33 @@
|
|
|
1
1
|
//! Regular expression engine with an NFA-based virtual machine.
|
|
2
|
-
|
|
3
|
-
open
|
|
4
|
-
|
|
5
|
-
{
|
|
6
|
-
{
|
|
7
|
-
{
|
|
8
|
-
{
|
|
9
|
-
{ RegexMatch } :: import "./match.yo";
|
|
10
|
-
|
|
2
|
+
open(import("std/collections/array_list"));
|
|
3
|
+
open(import("std/string"));
|
|
4
|
+
{ RegexParser } :: import("./parser.yo");
|
|
5
|
+
{ NfaCompiler, NfaProgram, Instr, InstrKind, ClassEntry, GroupNameEntry } :: import("./compiler.yo");
|
|
6
|
+
{ NfaVm, VmMatch } :: import("./vm.yo");
|
|
7
|
+
{ RegexFlags } :: import("./flags.yo");
|
|
8
|
+
{ RegexMatch } :: import("./match.yo");
|
|
11
9
|
/// Compiled regular expression backed by an NFA program.
|
|
12
10
|
Regex :: object(
|
|
13
|
-
_program
|
|
14
|
-
_flags
|
|
15
|
-
_pattern
|
|
16
|
-
_n_groups
|
|
11
|
+
_program : NfaProgram,
|
|
12
|
+
_flags : RegexFlags,
|
|
13
|
+
_pattern : String,
|
|
14
|
+
_n_groups : usize,
|
|
17
15
|
_group_names : ArrayList(GroupNameEntry)
|
|
18
16
|
);
|
|
19
|
-
|
|
20
17
|
// Block 1: Constructor and leaf methods
|
|
21
|
-
impl(
|
|
18
|
+
impl(
|
|
19
|
+
Regex,
|
|
22
20
|
new : (fn(pattern : String, flags_str : String) -> Result(Self, String))({
|
|
23
21
|
flags_result := RegexFlags.parse(flags_str);
|
|
24
|
-
match(
|
|
25
|
-
|
|
22
|
+
match(
|
|
23
|
+
flags_result,
|
|
24
|
+
.Err(e) =>.Err(e),
|
|
26
25
|
.Ok(flags) => {
|
|
27
26
|
parser := RegexParser.new(pattern);
|
|
28
27
|
ast_result := parser.parse();
|
|
29
|
-
match(
|
|
30
|
-
|
|
28
|
+
match(
|
|
29
|
+
ast_result,
|
|
30
|
+
.Err(e) =>.Err(e),
|
|
31
31
|
.Ok(ast) => {
|
|
32
32
|
n_groups := parser.group_count();
|
|
33
33
|
gnames := parser.group_names();
|
|
@@ -35,39 +35,40 @@ impl(Regex,
|
|
|
35
35
|
program := compiler.compile(ast, n_groups, gnames);
|
|
36
36
|
// Literal prefix scan is case-sensitive; disable when ignoreCase
|
|
37
37
|
cond(
|
|
38
|
-
flags.ignore_case => {
|
|
38
|
+
flags.ignore_case => {
|
|
39
|
+
program.literal_prefix = ArrayList(u8).new();
|
|
40
|
+
},
|
|
39
41
|
true => ()
|
|
40
42
|
);
|
|
41
|
-
.Ok(
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
43
|
+
.Ok(
|
|
44
|
+
Self(
|
|
45
|
+
_program : program,
|
|
46
|
+
_flags : flags,
|
|
47
|
+
_pattern : pattern,
|
|
48
|
+
_n_groups : n_groups,
|
|
49
|
+
_group_names : gnames
|
|
50
|
+
)
|
|
51
|
+
)
|
|
48
52
|
}
|
|
49
53
|
)
|
|
50
54
|
}
|
|
51
55
|
)
|
|
52
56
|
}),
|
|
53
|
-
|
|
54
57
|
source : (fn(self : Self) -> String)(
|
|
55
58
|
self._pattern
|
|
56
59
|
),
|
|
57
|
-
|
|
58
60
|
_extract_substring : (fn(self : Self, bytes : ArrayList(u8), start : usize, end_pos : usize) -> String)({
|
|
59
|
-
result_bytes := ArrayList(u8).with_capacity(
|
|
61
|
+
result_bytes := ArrayList(u8).with_capacity(end_pos - start);
|
|
60
62
|
i := start;
|
|
61
|
-
while
|
|
63
|
+
while(i < end_pos, i = (i + usize(1)), {
|
|
62
64
|
result_bytes.push(bytes.get(i).unwrap());
|
|
63
|
-
};
|
|
65
|
+
});
|
|
64
66
|
String.from_bytes(result_bytes)
|
|
65
67
|
}),
|
|
66
|
-
|
|
67
68
|
_byte_to_char_index : (fn(self : Self, bytes : ArrayList(u8), byte_pos : usize) -> usize)({
|
|
68
69
|
char_idx := usize(0);
|
|
69
70
|
i := usize(0);
|
|
70
|
-
while
|
|
71
|
+
while(i < byte_pos, {
|
|
71
72
|
b := bytes.get(i).unwrap();
|
|
72
73
|
char_len := cond(
|
|
73
74
|
(b < u8(0x80)) => usize(1),
|
|
@@ -77,17 +78,15 @@ impl(Regex,
|
|
|
77
78
|
);
|
|
78
79
|
i = (i + char_len);
|
|
79
80
|
char_idx = (char_idx + usize(1));
|
|
80
|
-
};
|
|
81
|
+
});
|
|
81
82
|
char_idx
|
|
82
83
|
}),
|
|
83
|
-
|
|
84
84
|
// Fast-scan: find the next byte position where the literal prefix matches.
|
|
85
85
|
// Returns the byte position or input_len if not found.
|
|
86
86
|
_find_prefix_pos : (fn(self : Self, input_bytes : ArrayList(u8), from_byte : usize) -> usize)({
|
|
87
87
|
prefix := self._program.literal_prefix;
|
|
88
88
|
prefix_len := prefix.len();
|
|
89
89
|
input_len := input_bytes.len();
|
|
90
|
-
|
|
91
90
|
cond(
|
|
92
91
|
(prefix_len == usize(0)) => from_byte,
|
|
93
92
|
(input_len < prefix_len) => (input_len + usize(1)),
|
|
@@ -95,30 +94,34 @@ impl(Regex,
|
|
|
95
94
|
first_byte := prefix.get(usize(0)).unwrap();
|
|
96
95
|
(pos : usize) = from_byte;
|
|
97
96
|
(found : bool) = false;
|
|
98
|
-
|
|
99
|
-
while ((pos <= (input_len - prefix_len)) && (!(found))), {
|
|
97
|
+
while((pos <= (input_len - prefix_len)) && (!(found)), {
|
|
100
98
|
cond(
|
|
101
99
|
(input_bytes.get(pos).unwrap() == first_byte) => {
|
|
102
100
|
// Check remaining prefix bytes
|
|
103
101
|
(match_ok : bool) = true;
|
|
104
102
|
pi := usize(1);
|
|
105
|
-
while
|
|
103
|
+
while((pi < prefix_len) && match_ok, pi = (pi + usize(1)), {
|
|
106
104
|
cond(
|
|
107
|
-
(input_bytes.get(
|
|
105
|
+
(input_bytes.get(pos + pi).unwrap() != prefix.get(pi).unwrap()) => {
|
|
108
106
|
match_ok = false;
|
|
109
107
|
},
|
|
110
108
|
true => ()
|
|
111
109
|
);
|
|
112
|
-
};
|
|
110
|
+
});
|
|
113
111
|
cond(
|
|
114
|
-
match_ok => {
|
|
115
|
-
|
|
112
|
+
match_ok => {
|
|
113
|
+
found = true;
|
|
114
|
+
},
|
|
115
|
+
true => {
|
|
116
|
+
pos = (pos + usize(1));
|
|
117
|
+
}
|
|
116
118
|
);
|
|
117
119
|
},
|
|
118
|
-
true => {
|
|
120
|
+
true => {
|
|
121
|
+
pos = (pos + usize(1));
|
|
122
|
+
}
|
|
119
123
|
);
|
|
120
|
-
};
|
|
121
|
-
|
|
124
|
+
});
|
|
122
125
|
cond(
|
|
123
126
|
found => pos,
|
|
124
127
|
true => (input_len + usize(1))
|
|
@@ -127,25 +130,21 @@ impl(Regex,
|
|
|
127
130
|
)
|
|
128
131
|
})
|
|
129
132
|
);
|
|
130
|
-
|
|
131
133
|
// Block 2: _build_match (depends on Block 1)
|
|
132
|
-
impl(
|
|
134
|
+
impl(
|
|
135
|
+
Regex,
|
|
133
136
|
_build_match : (fn(self : Self, slots : ArrayList(usize), input : String) -> RegexMatch)({
|
|
134
137
|
bytes := input.as_bytes();
|
|
135
138
|
unset := usize.MAX;
|
|
136
|
-
|
|
137
139
|
match_start_byte := slots.get(usize(0)).unwrap();
|
|
138
140
|
match_end_byte := slots.get(usize(1)).unwrap();
|
|
139
|
-
|
|
140
141
|
match_text := self._extract_substring(bytes, match_start_byte, match_end_byte);
|
|
141
142
|
match_char_index := self._byte_to_char_index(bytes, match_start_byte);
|
|
142
|
-
|
|
143
143
|
groups := ArrayList(Option(String)).new();
|
|
144
144
|
g := usize(1);
|
|
145
|
-
while
|
|
145
|
+
while(g <= self._n_groups, g = (g + usize(1)), {
|
|
146
146
|
start_slot := (g * usize(2));
|
|
147
147
|
end_slot := ((g * usize(2)) + usize(1));
|
|
148
|
-
|
|
149
148
|
cond(
|
|
150
149
|
((start_slot < slots.len()) && (end_slot < slots.len())) => {
|
|
151
150
|
gs := slots.get(start_slot).unwrap();
|
|
@@ -164,26 +163,24 @@ impl(Regex,
|
|
|
164
163
|
groups.push(.None);
|
|
165
164
|
}
|
|
166
165
|
);
|
|
167
|
-
};
|
|
168
|
-
|
|
166
|
+
});
|
|
169
167
|
RegexMatch.new(match_text, match_char_index, input, groups, self._group_names)
|
|
170
168
|
})
|
|
171
169
|
);
|
|
172
|
-
|
|
173
170
|
// Block 3: exec, match_all (depend on Block 2)
|
|
174
|
-
impl(
|
|
171
|
+
impl(
|
|
172
|
+
Regex,
|
|
175
173
|
exec : (fn(self : Self, input : String) -> Option(RegexMatch))({
|
|
176
174
|
bytes := input.as_bytes();
|
|
177
175
|
input_len := bytes.len();
|
|
178
176
|
vm := NfaVm.new(self._program, self._flags, input);
|
|
179
|
-
|
|
180
177
|
// Sticky flag: only try matching at position 0
|
|
181
178
|
cond(
|
|
182
179
|
self._flags.sticky => {
|
|
183
180
|
result := vm.exec_at(usize(0));
|
|
184
181
|
cond(
|
|
185
|
-
result.matched
|
|
186
|
-
true
|
|
182
|
+
result.matched =>.Some(self._build_match(result.slots, input)),
|
|
183
|
+
true =>.None
|
|
187
184
|
)
|
|
188
185
|
},
|
|
189
186
|
true => {
|
|
@@ -192,20 +189,19 @@ impl(Regex,
|
|
|
192
189
|
has_prefix => self._find_prefix_pos(bytes, usize(0)),
|
|
193
190
|
true => usize(0)
|
|
194
191
|
);
|
|
195
|
-
|
|
196
|
-
while (byte_pos <= input_len), {
|
|
192
|
+
while(byte_pos <= input_len, {
|
|
197
193
|
result := vm.exec_at(byte_pos);
|
|
198
|
-
|
|
199
194
|
cond(
|
|
200
195
|
result.matched => {
|
|
201
196
|
m := self._build_match(result.slots, input);
|
|
202
|
-
return
|
|
197
|
+
return(.Some(m));
|
|
203
198
|
},
|
|
204
199
|
true => ()
|
|
205
200
|
);
|
|
206
|
-
|
|
207
201
|
cond(
|
|
208
|
-
(byte_pos >= input_len) => {
|
|
202
|
+
(byte_pos >= input_len) => {
|
|
203
|
+
break;
|
|
204
|
+
},
|
|
209
205
|
true => {
|
|
210
206
|
b := bytes.get(byte_pos).unwrap();
|
|
211
207
|
char_len := cond(
|
|
@@ -221,26 +217,22 @@ impl(Regex,
|
|
|
221
217
|
);
|
|
222
218
|
}
|
|
223
219
|
);
|
|
224
|
-
};
|
|
225
|
-
|
|
220
|
+
});
|
|
226
221
|
.None
|
|
227
222
|
}
|
|
228
223
|
)
|
|
229
224
|
}),
|
|
230
|
-
|
|
231
225
|
match_all : (fn(self : Self, input : String) -> ArrayList(RegexMatch))({
|
|
232
226
|
matches := ArrayList(RegexMatch).new();
|
|
233
227
|
bytes := input.as_bytes();
|
|
234
228
|
input_len := bytes.len();
|
|
235
229
|
vm := NfaVm.new(self._program, self._flags, input);
|
|
236
|
-
|
|
237
230
|
cond(
|
|
238
231
|
self._flags.sticky => {
|
|
239
232
|
// Sticky: only try at position 0, then at end of each match
|
|
240
233
|
(byte_pos : usize) = usize(0);
|
|
241
|
-
while
|
|
234
|
+
while(byte_pos <= input_len, {
|
|
242
235
|
result := vm.exec_at(byte_pos);
|
|
243
|
-
|
|
244
236
|
cond(
|
|
245
237
|
result.matched => {
|
|
246
238
|
m := self._build_match(result.slots, input);
|
|
@@ -251,7 +243,9 @@ impl(Regex,
|
|
|
251
243
|
(match_end == match_start) => {
|
|
252
244
|
// Empty match: advance one char to avoid infinite loop
|
|
253
245
|
cond(
|
|
254
|
-
(byte_pos >= input_len) => {
|
|
246
|
+
(byte_pos >= input_len) => {
|
|
247
|
+
break;
|
|
248
|
+
},
|
|
255
249
|
true => {
|
|
256
250
|
b := bytes.get(byte_pos).unwrap();
|
|
257
251
|
char_len := cond(
|
|
@@ -269,9 +263,11 @@ impl(Regex,
|
|
|
269
263
|
}
|
|
270
264
|
);
|
|
271
265
|
},
|
|
272
|
-
true => {
|
|
266
|
+
true => {
|
|
267
|
+
break;
|
|
268
|
+
}
|
|
273
269
|
);
|
|
274
|
-
};
|
|
270
|
+
});
|
|
275
271
|
},
|
|
276
272
|
true => {
|
|
277
273
|
has_prefix := (self._program.literal_prefix.len() > usize(0));
|
|
@@ -279,22 +275,20 @@ impl(Regex,
|
|
|
279
275
|
has_prefix => self._find_prefix_pos(bytes, usize(0)),
|
|
280
276
|
true => usize(0)
|
|
281
277
|
);
|
|
282
|
-
|
|
283
|
-
while (byte_pos <= input_len), {
|
|
278
|
+
while(byte_pos <= input_len, {
|
|
284
279
|
result := vm.exec_at(byte_pos);
|
|
285
|
-
|
|
286
280
|
cond(
|
|
287
281
|
result.matched => {
|
|
288
282
|
m := self._build_match(result.slots, input);
|
|
289
283
|
matches.push(m);
|
|
290
|
-
|
|
291
284
|
match_start := result.slots.get(usize(0)).unwrap();
|
|
292
285
|
match_end := result.slots.get(usize(1)).unwrap();
|
|
293
|
-
|
|
294
286
|
cond(
|
|
295
287
|
(match_end == match_start) => {
|
|
296
288
|
cond(
|
|
297
|
-
(byte_pos >= input_len) => {
|
|
289
|
+
(byte_pos >= input_len) => {
|
|
290
|
+
break;
|
|
291
|
+
},
|
|
298
292
|
true => {
|
|
299
293
|
b := bytes.get(byte_pos).unwrap();
|
|
300
294
|
char_len := cond(
|
|
@@ -321,7 +315,9 @@ impl(Regex,
|
|
|
321
315
|
},
|
|
322
316
|
true => {
|
|
323
317
|
cond(
|
|
324
|
-
(byte_pos >= input_len) => {
|
|
318
|
+
(byte_pos >= input_len) => {
|
|
319
|
+
break;
|
|
320
|
+
},
|
|
325
321
|
true => {
|
|
326
322
|
b := bytes.get(byte_pos).unwrap();
|
|
327
323
|
char_len := cond(
|
|
@@ -339,32 +335,31 @@ impl(Regex,
|
|
|
339
335
|
);
|
|
340
336
|
}
|
|
341
337
|
);
|
|
342
|
-
};
|
|
338
|
+
});
|
|
343
339
|
}
|
|
344
340
|
);
|
|
345
|
-
|
|
346
341
|
matches
|
|
347
342
|
})
|
|
348
343
|
);
|
|
349
|
-
|
|
350
344
|
// Block 4: test, search (depends on Block 3)
|
|
351
|
-
impl(
|
|
345
|
+
impl(
|
|
346
|
+
Regex,
|
|
352
347
|
test : (fn(self : Self, input : String) -> bool)({
|
|
353
348
|
result := self.exec(input);
|
|
354
349
|
result.is_some()
|
|
355
350
|
}),
|
|
356
|
-
|
|
357
351
|
search : (fn(self : Self, input : String) -> Option(usize))({
|
|
358
352
|
result := self.exec(input);
|
|
359
|
-
match(
|
|
360
|
-
|
|
361
|
-
.
|
|
353
|
+
match(
|
|
354
|
+
result,
|
|
355
|
+
.Some(m) =>.Some(m.index()),
|
|
356
|
+
.None =>.None
|
|
362
357
|
)
|
|
363
358
|
})
|
|
364
359
|
);
|
|
365
|
-
|
|
366
360
|
// Block 5: _apply_replacement helper (depends on Block 2)
|
|
367
|
-
impl(
|
|
361
|
+
impl(
|
|
362
|
+
Regex,
|
|
368
363
|
// Process replacement patterns: $& (full match), $1-$9 (groups),
|
|
369
364
|
// ${name} (named groups), $` (pre-match), $' (post-match), $$ (literal $)
|
|
370
365
|
_apply_replacement : (fn(self : Self, replacement : String, m : RegexMatch) -> String)({
|
|
@@ -372,13 +367,12 @@ impl(Regex,
|
|
|
372
367
|
rep_len := rep_bytes.len();
|
|
373
368
|
result := ArrayList(u8).new();
|
|
374
369
|
i := usize(0);
|
|
375
|
-
|
|
376
|
-
while (i < rep_len), {
|
|
370
|
+
while(i < rep_len, {
|
|
377
371
|
b := rep_bytes.get(i).unwrap();
|
|
378
372
|
cond(
|
|
379
373
|
((b == u8(36)) && ((i + usize(1)) < rep_len)) => {
|
|
380
374
|
// '$' character — check next char
|
|
381
|
-
next_b := rep_bytes.get(
|
|
375
|
+
next_b := rep_bytes.get(i + usize(1)).unwrap();
|
|
382
376
|
cond(
|
|
383
377
|
(next_b == u8(36)) => {
|
|
384
378
|
// $$ → literal $
|
|
@@ -389,9 +383,9 @@ impl(Regex,
|
|
|
389
383
|
// $& → full match
|
|
390
384
|
match_bytes := m.value().as_bytes();
|
|
391
385
|
mi := usize(0);
|
|
392
|
-
while
|
|
386
|
+
while(mi < match_bytes.len(), mi = (mi + usize(1)), {
|
|
393
387
|
result.push(match_bytes.get(mi).unwrap());
|
|
394
|
-
};
|
|
388
|
+
});
|
|
395
389
|
i = (i + usize(2));
|
|
396
390
|
},
|
|
397
391
|
(next_b == u8(96)) => {
|
|
@@ -401,7 +395,7 @@ impl(Regex,
|
|
|
401
395
|
char_idx := m.index();
|
|
402
396
|
byte_idx := usize(0);
|
|
403
397
|
ci := usize(0);
|
|
404
|
-
while
|
|
398
|
+
while(ci < char_idx, {
|
|
405
399
|
cb := input_bytes.get(byte_idx).unwrap();
|
|
406
400
|
cbl := cond(
|
|
407
401
|
(cb < u8(0x80)) => usize(1),
|
|
@@ -411,11 +405,11 @@ impl(Regex,
|
|
|
411
405
|
);
|
|
412
406
|
byte_idx = (byte_idx + cbl);
|
|
413
407
|
ci = (ci + usize(1));
|
|
414
|
-
};
|
|
408
|
+
});
|
|
415
409
|
pi := usize(0);
|
|
416
|
-
while
|
|
410
|
+
while(pi < byte_idx, pi = (pi + usize(1)), {
|
|
417
411
|
result.push(input_bytes.get(pi).unwrap());
|
|
418
|
-
};
|
|
412
|
+
});
|
|
419
413
|
i = (i + usize(2));
|
|
420
414
|
},
|
|
421
415
|
(next_b == u8(39)) => {
|
|
@@ -425,7 +419,7 @@ impl(Regex,
|
|
|
425
419
|
char_idx := m.index();
|
|
426
420
|
byte_idx := usize(0);
|
|
427
421
|
ci := usize(0);
|
|
428
|
-
while
|
|
422
|
+
while(ci < char_idx, {
|
|
429
423
|
cb := input_bytes.get(byte_idx).unwrap();
|
|
430
424
|
cbl := cond(
|
|
431
425
|
(cb < u8(0x80)) => usize(1),
|
|
@@ -435,27 +429,28 @@ impl(Regex,
|
|
|
435
429
|
);
|
|
436
430
|
byte_idx = (byte_idx + cbl);
|
|
437
431
|
ci = (ci + usize(1));
|
|
438
|
-
};
|
|
432
|
+
});
|
|
439
433
|
// Advance past the matched text
|
|
440
434
|
match_bytes := m.value().as_bytes();
|
|
441
435
|
byte_idx = (byte_idx + match_bytes.len());
|
|
442
436
|
pi := byte_idx;
|
|
443
|
-
while
|
|
437
|
+
while(pi < input_bytes.len(), pi = (pi + usize(1)), {
|
|
444
438
|
result.push(input_bytes.get(pi).unwrap());
|
|
445
|
-
};
|
|
439
|
+
});
|
|
446
440
|
i = (i + usize(2));
|
|
447
441
|
},
|
|
448
442
|
((next_b >= u8(48)) && (next_b <= u8(57))) => {
|
|
449
443
|
// $0-$9 → group reference
|
|
450
|
-
group_idx := usize(
|
|
444
|
+
group_idx := usize(next_b - u8(48));
|
|
451
445
|
grp := m.group(group_idx);
|
|
452
|
-
match(
|
|
446
|
+
match(
|
|
447
|
+
grp,
|
|
453
448
|
.Some(g) => {
|
|
454
449
|
g_bytes := g.as_bytes();
|
|
455
450
|
gi := usize(0);
|
|
456
|
-
while
|
|
451
|
+
while(gi < g_bytes.len(), gi = (gi + usize(1)), {
|
|
457
452
|
result.push(g_bytes.get(gi).unwrap());
|
|
458
|
-
};
|
|
453
|
+
});
|
|
459
454
|
},
|
|
460
455
|
.None => ()
|
|
461
456
|
);
|
|
@@ -466,7 +461,7 @@ impl(Regex,
|
|
|
466
461
|
name_start := (i + usize(2));
|
|
467
462
|
(name_end : usize) = name_start;
|
|
468
463
|
(found_close : bool) = false;
|
|
469
|
-
while
|
|
464
|
+
while(name_end < rep_len, {
|
|
470
465
|
nb := rep_bytes.get(name_end).unwrap();
|
|
471
466
|
cond(
|
|
472
467
|
(nb == u8(125)) => {
|
|
@@ -477,23 +472,24 @@ impl(Regex,
|
|
|
477
472
|
name_end = (name_end + usize(1));
|
|
478
473
|
}
|
|
479
474
|
);
|
|
480
|
-
};
|
|
475
|
+
});
|
|
481
476
|
cond(
|
|
482
477
|
found_close => {
|
|
483
|
-
name_bytes := ArrayList(u8).with_capacity(
|
|
478
|
+
name_bytes := ArrayList(u8).with_capacity(name_end - name_start);
|
|
484
479
|
ni := name_start;
|
|
485
|
-
while
|
|
480
|
+
while(ni < name_end, ni = (ni + usize(1)), {
|
|
486
481
|
name_bytes.push(rep_bytes.get(ni).unwrap());
|
|
487
|
-
};
|
|
482
|
+
});
|
|
488
483
|
name := String.from_bytes(name_bytes);
|
|
489
484
|
grp := m.named_group(name);
|
|
490
|
-
match(
|
|
485
|
+
match(
|
|
486
|
+
grp,
|
|
491
487
|
.Some(g) => {
|
|
492
488
|
g_bytes := g.as_bytes();
|
|
493
489
|
gi := usize(0);
|
|
494
|
-
while
|
|
490
|
+
while(gi < g_bytes.len(), gi = (gi + usize(1)), {
|
|
495
491
|
result.push(g_bytes.get(gi).unwrap());
|
|
496
|
-
};
|
|
492
|
+
});
|
|
497
493
|
},
|
|
498
494
|
.None => ()
|
|
499
495
|
);
|
|
@@ -518,17 +514,17 @@ impl(Regex,
|
|
|
518
514
|
i = (i + usize(1));
|
|
519
515
|
}
|
|
520
516
|
);
|
|
521
|
-
};
|
|
522
|
-
|
|
517
|
+
});
|
|
523
518
|
String.from_bytes(result)
|
|
524
519
|
})
|
|
525
520
|
);
|
|
526
|
-
|
|
527
521
|
// Block 6: replace, replace_all, split (depends on Block 3+5)
|
|
528
|
-
impl(
|
|
522
|
+
impl(
|
|
523
|
+
Regex,
|
|
529
524
|
replace : (fn(self : Self, input : String, replacement : String) -> String)({
|
|
530
525
|
result := self.exec(input);
|
|
531
|
-
match(
|
|
526
|
+
match(
|
|
527
|
+
result,
|
|
532
528
|
.None => input,
|
|
533
529
|
.Some(m) => {
|
|
534
530
|
input_bytes := input.as_bytes();
|
|
@@ -536,7 +532,7 @@ impl(Regex,
|
|
|
536
532
|
char_idx := m.index();
|
|
537
533
|
(match_start_byte : usize) = usize(0);
|
|
538
534
|
ci := usize(0);
|
|
539
|
-
while
|
|
535
|
+
while(ci < char_idx, {
|
|
540
536
|
cb := input_bytes.get(match_start_byte).unwrap();
|
|
541
537
|
cbl := cond(
|
|
542
538
|
(cb < u8(0x80)) => usize(1),
|
|
@@ -546,30 +542,28 @@ impl(Regex,
|
|
|
546
542
|
);
|
|
547
543
|
match_start_byte = (match_start_byte + cbl);
|
|
548
544
|
ci = (ci + usize(1));
|
|
549
|
-
};
|
|
545
|
+
});
|
|
550
546
|
match_end_byte := (match_start_byte + m.value().as_bytes().len());
|
|
551
|
-
|
|
552
547
|
// Build result: pre-match + replacement + post-match
|
|
553
548
|
out := ArrayList(u8).new();
|
|
554
549
|
pi := usize(0);
|
|
555
|
-
while
|
|
550
|
+
while(pi < match_start_byte, pi = (pi + usize(1)), {
|
|
556
551
|
out.push(input_bytes.get(pi).unwrap());
|
|
557
|
-
};
|
|
552
|
+
});
|
|
558
553
|
rep := self._apply_replacement(replacement, m);
|
|
559
554
|
rep_bytes := rep.as_bytes();
|
|
560
555
|
ri := usize(0);
|
|
561
|
-
while
|
|
556
|
+
while(ri < rep_bytes.len(), ri = (ri + usize(1)), {
|
|
562
557
|
out.push(rep_bytes.get(ri).unwrap());
|
|
563
|
-
};
|
|
558
|
+
});
|
|
564
559
|
pi = match_end_byte;
|
|
565
|
-
while
|
|
560
|
+
while(pi < input_bytes.len(), pi = (pi + usize(1)), {
|
|
566
561
|
out.push(input_bytes.get(pi).unwrap());
|
|
567
|
-
};
|
|
562
|
+
});
|
|
568
563
|
String.from_bytes(out)
|
|
569
564
|
}
|
|
570
565
|
)
|
|
571
566
|
}),
|
|
572
|
-
|
|
573
567
|
replace_all : (fn(self : Self, input : String, replacement : String) -> String)({
|
|
574
568
|
all_matches := self.match_all(input);
|
|
575
569
|
cond(
|
|
@@ -578,15 +572,14 @@ impl(Regex,
|
|
|
578
572
|
input_bytes := input.as_bytes();
|
|
579
573
|
out := ArrayList(u8).new();
|
|
580
574
|
(last_end_byte : usize) = usize(0);
|
|
581
|
-
|
|
582
575
|
mi := usize(0);
|
|
583
|
-
while
|
|
576
|
+
while(mi < all_matches.len(), mi = (mi + usize(1)), {
|
|
584
577
|
m := all_matches.get(mi).unwrap();
|
|
585
578
|
// Convert char index to byte index for match start
|
|
586
579
|
char_idx := m.index();
|
|
587
580
|
(match_start_byte : usize) = usize(0);
|
|
588
581
|
ci := usize(0);
|
|
589
|
-
while
|
|
582
|
+
while(ci < char_idx, {
|
|
590
583
|
cb := input_bytes.get(match_start_byte).unwrap();
|
|
591
584
|
cbl := cond(
|
|
592
585
|
(cb < u8(0x80)) => usize(1),
|
|
@@ -596,41 +589,34 @@ impl(Regex,
|
|
|
596
589
|
);
|
|
597
590
|
match_start_byte = (match_start_byte + cbl);
|
|
598
591
|
ci = (ci + usize(1));
|
|
599
|
-
};
|
|
592
|
+
});
|
|
600
593
|
match_end_byte := (match_start_byte + m.value().as_bytes().len());
|
|
601
|
-
|
|
602
594
|
// Copy text between last match end and this match start
|
|
603
595
|
pi := last_end_byte;
|
|
604
|
-
while
|
|
596
|
+
while(pi < match_start_byte, pi = (pi + usize(1)), {
|
|
605
597
|
out.push(input_bytes.get(pi).unwrap());
|
|
606
|
-
};
|
|
607
|
-
|
|
598
|
+
});
|
|
608
599
|
// Apply replacement
|
|
609
600
|
rep := self._apply_replacement(replacement, m);
|
|
610
601
|
rep_bytes := rep.as_bytes();
|
|
611
602
|
ri := usize(0);
|
|
612
|
-
while
|
|
603
|
+
while(ri < rep_bytes.len(), ri = (ri + usize(1)), {
|
|
613
604
|
out.push(rep_bytes.get(ri).unwrap());
|
|
614
|
-
};
|
|
615
|
-
|
|
605
|
+
});
|
|
616
606
|
last_end_byte = match_end_byte;
|
|
617
|
-
};
|
|
618
|
-
|
|
607
|
+
});
|
|
619
608
|
// Copy remaining text after last match
|
|
620
609
|
pi := last_end_byte;
|
|
621
|
-
while
|
|
610
|
+
while(pi < input_bytes.len(), pi = (pi + usize(1)), {
|
|
622
611
|
out.push(input_bytes.get(pi).unwrap());
|
|
623
|
-
};
|
|
624
|
-
|
|
612
|
+
});
|
|
625
613
|
String.from_bytes(out)
|
|
626
614
|
}
|
|
627
615
|
)
|
|
628
616
|
}),
|
|
629
|
-
|
|
630
617
|
split : (fn(self : Self, input : String) -> ArrayList(String))({
|
|
631
618
|
parts := ArrayList(String).new();
|
|
632
619
|
all_matches := self.match_all(input);
|
|
633
|
-
|
|
634
620
|
cond(
|
|
635
621
|
(all_matches.len() == usize(0)) => {
|
|
636
622
|
parts.push(input);
|
|
@@ -638,15 +624,14 @@ impl(Regex,
|
|
|
638
624
|
true => {
|
|
639
625
|
input_bytes := input.as_bytes();
|
|
640
626
|
(last_end_byte : usize) = usize(0);
|
|
641
|
-
|
|
642
627
|
mi := usize(0);
|
|
643
|
-
while
|
|
628
|
+
while(mi < all_matches.len(), mi = (mi + usize(1)), {
|
|
644
629
|
m := all_matches.get(mi).unwrap();
|
|
645
630
|
// Convert char index to byte index for match start
|
|
646
631
|
char_idx := m.index();
|
|
647
632
|
(match_start_byte : usize) = usize(0);
|
|
648
633
|
ci := usize(0);
|
|
649
|
-
while
|
|
634
|
+
while(ci < char_idx, {
|
|
650
635
|
cb := input_bytes.get(match_start_byte).unwrap();
|
|
651
636
|
cbl := cond(
|
|
652
637
|
(cb < u8(0x80)) => usize(1),
|
|
@@ -656,46 +641,45 @@ impl(Regex,
|
|
|
656
641
|
);
|
|
657
642
|
match_start_byte = (match_start_byte + cbl);
|
|
658
643
|
ci = (ci + usize(1));
|
|
659
|
-
};
|
|
644
|
+
});
|
|
660
645
|
match_end_byte := (match_start_byte + m.value().as_bytes().len());
|
|
661
|
-
|
|
662
646
|
// Extract text from last_end_byte to match_start_byte
|
|
663
647
|
part_bytes := ArrayList(u8).new();
|
|
664
648
|
pi := last_end_byte;
|
|
665
|
-
while
|
|
649
|
+
while(pi < match_start_byte, pi = (pi + usize(1)), {
|
|
666
650
|
part_bytes.push(input_bytes.get(pi).unwrap());
|
|
667
|
-
};
|
|
651
|
+
});
|
|
668
652
|
parts.push(String.from_bytes(part_bytes));
|
|
669
|
-
|
|
670
653
|
// Include capture groups in split result (like JS)
|
|
671
654
|
gi := usize(1);
|
|
672
|
-
while
|
|
655
|
+
while(gi <= m.group_count(), gi = (gi + usize(1)), {
|
|
673
656
|
grp := m.group(gi);
|
|
674
|
-
match(
|
|
675
|
-
|
|
676
|
-
.
|
|
657
|
+
match(
|
|
658
|
+
grp,
|
|
659
|
+
.Some(g) => {
|
|
660
|
+
parts.push(g);
|
|
661
|
+
},
|
|
662
|
+
.None => {
|
|
663
|
+
parts.push(`undefined`);
|
|
664
|
+
}
|
|
677
665
|
);
|
|
678
|
-
};
|
|
679
|
-
|
|
666
|
+
});
|
|
680
667
|
last_end_byte = match_end_byte;
|
|
681
|
-
};
|
|
682
|
-
|
|
668
|
+
});
|
|
683
669
|
// Add remaining text after last match
|
|
684
670
|
remaining := ArrayList(u8).new();
|
|
685
671
|
pi := last_end_byte;
|
|
686
|
-
while
|
|
672
|
+
while(pi < input_bytes.len(), pi = (pi + usize(1)), {
|
|
687
673
|
remaining.push(input_bytes.get(pi).unwrap());
|
|
688
|
-
};
|
|
674
|
+
});
|
|
689
675
|
parts.push(String.from_bytes(remaining));
|
|
690
676
|
}
|
|
691
677
|
);
|
|
692
|
-
|
|
693
678
|
parts
|
|
694
679
|
})
|
|
695
680
|
);
|
|
696
|
-
|
|
697
|
-
export
|
|
681
|
+
export(
|
|
698
682
|
Regex,
|
|
699
683
|
RegexMatch,
|
|
700
684
|
RegexFlags
|
|
701
|
-
;
|
|
685
|
+
);
|