@shd101wyy/yo 0.1.26 → 0.1.27
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.github/skills/yo-async-effects/SKILL.md +4 -4
- package/.github/skills/yo-async-effects/async-effects-recipes.md +34 -34
- package/.github/skills/yo-core-patterns/SKILL.md +1 -1
- package/.github/skills/yo-core-patterns/core-patterns-cheatsheet.md +26 -26
- package/.github/skills/yo-project-workflow/SKILL.md +6 -3
- package/.github/skills/yo-project-workflow/workflow-cheatsheet.md +34 -11
- package/.github/skills/yo-syntax/SKILL.md +7 -6
- package/.github/skills/yo-syntax/syntax-cheatsheet.md +73 -60
- package/.github/skills/yo-wasm-integration/wasm-integration-cheatsheet.md +3 -3
- package/README.md +10 -8
- package/out/cjs/index.cjs +456 -438
- package/out/cjs/yo-cli.cjs +576 -543
- package/out/cjs/yo-lsp.cjs +559 -532
- package/out/esm/index.mjs +281 -263
- package/out/types/src/formatter.d.ts +11 -0
- package/out/types/src/lsp/formatting.d.ts +2 -0
- package/out/types/src/tests/formatter.test.d.ts +1 -0
- package/out/types/tsconfig.tsbuildinfo +1 -1
- package/package.json +1 -1
- package/std/alg/hash.yo +13 -21
- package/std/allocator.yo +25 -40
- package/std/async.yo +3 -7
- package/std/build.yo +105 -151
- package/std/cli/arg_parser.yo +184 -169
- package/std/collections/array_list.yo +350 -314
- package/std/collections/btree_map.yo +142 -131
- package/std/collections/deque.yo +132 -128
- package/std/collections/hash_map.yo +542 -566
- package/std/collections/hash_set.yo +623 -687
- package/std/collections/linked_list.yo +275 -293
- package/std/collections/ordered_map.yo +113 -85
- package/std/collections/priority_queue.yo +73 -73
- package/std/crypto/md5.yo +191 -95
- package/std/crypto/random.yo +56 -64
- package/std/crypto/sha256.yo +151 -107
- package/std/encoding/base64.yo +87 -81
- package/std/encoding/hex.yo +43 -50
- package/std/encoding/html.yo +56 -81
- package/std/encoding/html_char_utils.yo +7 -13
- package/std/encoding/html_entities.yo +2248 -2253
- package/std/encoding/json.yo +316 -224
- package/std/encoding/punycode.yo +86 -116
- package/std/encoding/toml.yo +67 -66
- package/std/encoding/utf16.yo +37 -44
- package/std/env.yo +62 -91
- package/std/error.yo +7 -15
- package/std/fmt/display.yo +5 -9
- package/std/fmt/index.yo +8 -14
- package/std/fmt/to_string.yo +330 -315
- package/std/fmt/writer.yo +58 -87
- package/std/fs/dir.yo +83 -102
- package/std/fs/file.yo +147 -180
- package/std/fs/metadata.yo +45 -78
- package/std/fs/temp.yo +55 -65
- package/std/fs/types.yo +27 -40
- package/std/fs/walker.yo +53 -68
- package/std/gc.yo +5 -8
- package/std/glob.yo +30 -43
- package/std/http/client.yo +107 -120
- package/std/http/http.yo +106 -96
- package/std/http/index.yo +4 -6
- package/std/imm/list.yo +88 -93
- package/std/imm/map.yo +528 -464
- package/std/imm/set.yo +52 -57
- package/std/imm/sorted_map.yo +340 -286
- package/std/imm/sorted_set.yo +57 -63
- package/std/imm/string.yo +404 -345
- package/std/imm/vec.yo +173 -181
- package/std/io/reader.yo +3 -6
- package/std/io/writer.yo +4 -8
- package/std/libc/assert.yo +5 -9
- package/std/libc/ctype.yo +32 -22
- package/std/libc/dirent.yo +26 -25
- package/std/libc/errno.yo +164 -90
- package/std/libc/fcntl.yo +52 -45
- package/std/libc/float.yo +66 -44
- package/std/libc/limits.yo +42 -33
- package/std/libc/math.yo +53 -82
- package/std/libc/signal.yo +72 -47
- package/std/libc/stdatomic.yo +217 -188
- package/std/libc/stdint.yo +5 -29
- package/std/libc/stdio.yo +5 -29
- package/std/libc/stdlib.yo +32 -39
- package/std/libc/string.yo +5 -23
- package/std/libc/sys/stat.yo +58 -56
- package/std/libc/time.yo +5 -19
- package/std/libc/unistd.yo +5 -20
- package/std/libc/wctype.yo +6 -9
- package/std/libc/windows.yo +26 -30
- package/std/log.yo +41 -55
- package/std/net/addr.yo +102 -97
- package/std/net/dns.yo +27 -28
- package/std/net/errors.yo +50 -49
- package/std/net/tcp.yo +113 -124
- package/std/net/udp.yo +55 -66
- package/std/os/env.yo +35 -33
- package/std/os/signal.yo +15 -25
- package/std/path.yo +276 -311
- package/std/prelude.yo +6304 -4315
- package/std/process/command.yo +87 -103
- package/std/process/index.yo +12 -31
- package/std/regex/compiler.yo +196 -95
- package/std/regex/flags.yo +58 -39
- package/std/regex/index.yo +157 -173
- package/std/regex/match.yo +20 -31
- package/std/regex/node.yo +134 -152
- package/std/regex/parser.yo +283 -259
- package/std/regex/unicode.yo +172 -202
- package/std/regex/vm.yo +155 -171
- package/std/string/index.yo +5 -7
- package/std/string/rune.yo +45 -55
- package/std/string/string.yo +937 -964
- package/std/string/string_builder.yo +94 -104
- package/std/string/unicode.yo +46 -64
- package/std/sync/channel.yo +72 -73
- package/std/sync/cond.yo +31 -36
- package/std/sync/mutex.yo +30 -32
- package/std/sync/once.yo +13 -16
- package/std/sync/rwlock.yo +26 -31
- package/std/sync/waitgroup.yo +20 -25
- package/std/sys/advise.yo +16 -24
- package/std/sys/bufio/buf_reader.yo +77 -93
- package/std/sys/bufio/buf_writer.yo +52 -65
- package/std/sys/clock.yo +4 -9
- package/std/sys/constants.yo +77 -61
- package/std/sys/copy.yo +4 -10
- package/std/sys/dir.yo +26 -43
- package/std/sys/dns.yo +41 -61
- package/std/sys/errors.yo +95 -103
- package/std/sys/events.yo +45 -57
- package/std/sys/externs.yo +319 -267
- package/std/sys/fallocate.yo +7 -11
- package/std/sys/fcntl.yo +14 -22
- package/std/sys/file.yo +26 -40
- package/std/sys/future.yo +5 -8
- package/std/sys/iov.yo +12 -25
- package/std/sys/lock.yo +12 -13
- package/std/sys/mmap.yo +38 -43
- package/std/sys/path.yo +3 -8
- package/std/sys/perm.yo +7 -21
- package/std/sys/pipe.yo +5 -12
- package/std/sys/process.yo +23 -29
- package/std/sys/seek.yo +10 -12
- package/std/sys/signal.yo +7 -13
- package/std/sys/signals.yo +52 -35
- package/std/sys/socket.yo +63 -58
- package/std/sys/socketpair.yo +3 -6
- package/std/sys/sockinfo.yo +11 -20
- package/std/sys/statfs.yo +11 -34
- package/std/sys/statx.yo +25 -52
- package/std/sys/sysinfo.yo +15 -20
- package/std/sys/tcp.yo +62 -92
- package/std/sys/temp.yo +5 -9
- package/std/sys/time.yo +5 -15
- package/std/sys/timer.yo +6 -11
- package/std/sys/tty.yo +10 -18
- package/std/sys/udp.yo +22 -39
- package/std/sys/umask.yo +3 -6
- package/std/sys/unix.yo +33 -52
- package/std/testing/bench.yo +49 -52
- package/std/thread.yo +10 -15
- package/std/time/datetime.yo +105 -89
- package/std/time/duration.yo +43 -56
- package/std/time/instant.yo +13 -18
- package/std/time/sleep.yo +5 -9
- package/std/url/index.yo +184 -209
- package/std/worker.yo +6 -10
package/std/regex/compiler.yo
CHANGED
|
@@ -1,10 +1,8 @@
|
|
|
1
1
|
//! NFA compiler — compiles a `RegexNode` AST into a flat list of NFA instructions
|
|
2
2
|
//! using Thompson's construction algorithm.
|
|
3
|
-
|
|
4
|
-
open
|
|
5
|
-
|
|
6
|
-
{ RegexNode, NodeKind, CharRange, AnchorKind, GroupNameEntry } :: import "./node.yo";
|
|
7
|
-
|
|
3
|
+
open(import("std/collections/array_list"));
|
|
4
|
+
open(import("std/string"));
|
|
5
|
+
{ RegexNode, NodeKind, CharRange, AnchorKind, GroupNameEntry } :: import("./node.yo");
|
|
8
6
|
/// NFA instruction types.
|
|
9
7
|
InstrKind :: enum(
|
|
10
8
|
Char,
|
|
@@ -22,136 +20,226 @@ InstrKind :: enum(
|
|
|
22
20
|
Lookahead,
|
|
23
21
|
Lookbehind
|
|
24
22
|
);
|
|
25
|
-
|
|
26
23
|
// A single NFA instruction
|
|
27
24
|
Instr :: struct(
|
|
28
|
-
kind
|
|
25
|
+
kind : InstrKind,
|
|
29
26
|
codepoint : u32,
|
|
30
27
|
class_idx : usize,
|
|
31
|
-
target_a
|
|
32
|
-
target_b
|
|
33
|
-
target
|
|
34
|
-
slot
|
|
28
|
+
target_a : usize,
|
|
29
|
+
target_b : usize,
|
|
30
|
+
target : usize,
|
|
31
|
+
slot : usize
|
|
35
32
|
);
|
|
36
|
-
|
|
37
|
-
|
|
33
|
+
impl(
|
|
34
|
+
Instr,
|
|
38
35
|
char_instr : (fn(cp : u32) -> Self)(
|
|
39
|
-
Self(
|
|
40
|
-
|
|
36
|
+
Self(
|
|
37
|
+
kind :.Char,
|
|
38
|
+
codepoint : cp,
|
|
39
|
+
class_idx : usize(0),
|
|
40
|
+
target_a : usize(0),
|
|
41
|
+
target_b : usize(0),
|
|
42
|
+
target : usize(0),
|
|
43
|
+
slot : usize(0)
|
|
44
|
+
)
|
|
41
45
|
),
|
|
42
46
|
any_char_instr : (fn() -> Self)(
|
|
43
|
-
Self(
|
|
44
|
-
|
|
47
|
+
Self(
|
|
48
|
+
kind :.AnyChar,
|
|
49
|
+
codepoint : u32(0),
|
|
50
|
+
class_idx : usize(0),
|
|
51
|
+
target_a : usize(0),
|
|
52
|
+
target_b : usize(0),
|
|
53
|
+
target : usize(0),
|
|
54
|
+
slot : usize(0)
|
|
55
|
+
)
|
|
45
56
|
),
|
|
46
57
|
char_class_instr : (fn(idx : usize) -> Self)(
|
|
47
|
-
Self(
|
|
48
|
-
|
|
58
|
+
Self(
|
|
59
|
+
kind :.CharClass,
|
|
60
|
+
codepoint : u32(0),
|
|
61
|
+
class_idx : idx,
|
|
62
|
+
target_a : usize(0),
|
|
63
|
+
target_b : usize(0),
|
|
64
|
+
target : usize(0),
|
|
65
|
+
slot : usize(0)
|
|
66
|
+
)
|
|
49
67
|
),
|
|
50
68
|
split_instr : (fn(a : usize, b : usize) -> Self)(
|
|
51
|
-
Self(
|
|
52
|
-
|
|
69
|
+
Self(
|
|
70
|
+
kind :.Split,
|
|
71
|
+
codepoint : u32(0),
|
|
72
|
+
class_idx : usize(0),
|
|
73
|
+
target_a : a,
|
|
74
|
+
target_b : b,
|
|
75
|
+
target : usize(0),
|
|
76
|
+
slot : usize(0)
|
|
77
|
+
)
|
|
53
78
|
),
|
|
54
79
|
jump_instr : (fn(t : usize) -> Self)(
|
|
55
|
-
Self(
|
|
56
|
-
|
|
80
|
+
Self(
|
|
81
|
+
kind :.Jump,
|
|
82
|
+
codepoint : u32(0),
|
|
83
|
+
class_idx : usize(0),
|
|
84
|
+
target_a : usize(0),
|
|
85
|
+
target_b : usize(0),
|
|
86
|
+
target : t,
|
|
87
|
+
slot : usize(0)
|
|
88
|
+
)
|
|
57
89
|
),
|
|
58
90
|
save_instr : (fn(s : usize) -> Self)(
|
|
59
|
-
Self(
|
|
60
|
-
|
|
91
|
+
Self(
|
|
92
|
+
kind :.Save,
|
|
93
|
+
codepoint : u32(0),
|
|
94
|
+
class_idx : usize(0),
|
|
95
|
+
target_a : usize(0),
|
|
96
|
+
target_b : usize(0),
|
|
97
|
+
target : usize(0),
|
|
98
|
+
slot : s
|
|
99
|
+
)
|
|
61
100
|
),
|
|
62
101
|
match_instr : (fn() -> Self)(
|
|
63
|
-
Self(
|
|
64
|
-
|
|
102
|
+
Self(
|
|
103
|
+
kind :.Match,
|
|
104
|
+
codepoint : u32(0),
|
|
105
|
+
class_idx : usize(0),
|
|
106
|
+
target_a : usize(0),
|
|
107
|
+
target_b : usize(0),
|
|
108
|
+
target : usize(0),
|
|
109
|
+
slot : usize(0)
|
|
110
|
+
)
|
|
65
111
|
),
|
|
66
112
|
assert_start_instr : (fn() -> Self)(
|
|
67
|
-
Self(
|
|
68
|
-
|
|
113
|
+
Self(
|
|
114
|
+
kind :.AssertStart,
|
|
115
|
+
codepoint : u32(0),
|
|
116
|
+
class_idx : usize(0),
|
|
117
|
+
target_a : usize(0),
|
|
118
|
+
target_b : usize(0),
|
|
119
|
+
target : usize(0),
|
|
120
|
+
slot : usize(0)
|
|
121
|
+
)
|
|
69
122
|
),
|
|
70
123
|
assert_end_instr : (fn() -> Self)(
|
|
71
|
-
Self(
|
|
72
|
-
|
|
124
|
+
Self(
|
|
125
|
+
kind :.AssertEnd,
|
|
126
|
+
codepoint : u32(0),
|
|
127
|
+
class_idx : usize(0),
|
|
128
|
+
target_a : usize(0),
|
|
129
|
+
target_b : usize(0),
|
|
130
|
+
target : usize(0),
|
|
131
|
+
slot : usize(0)
|
|
132
|
+
)
|
|
73
133
|
),
|
|
74
134
|
assert_word_boundary_instr : (fn() -> Self)(
|
|
75
|
-
Self(
|
|
76
|
-
|
|
135
|
+
Self(
|
|
136
|
+
kind :.AssertWordBoundary,
|
|
137
|
+
codepoint : u32(0),
|
|
138
|
+
class_idx : usize(0),
|
|
139
|
+
target_a : usize(0),
|
|
140
|
+
target_b : usize(0),
|
|
141
|
+
target : usize(0),
|
|
142
|
+
slot : usize(0)
|
|
143
|
+
)
|
|
77
144
|
),
|
|
78
145
|
assert_non_word_boundary_instr : (fn() -> Self)(
|
|
79
|
-
Self(
|
|
80
|
-
|
|
146
|
+
Self(
|
|
147
|
+
kind :.AssertNonWordBoundary,
|
|
148
|
+
codepoint : u32(0),
|
|
149
|
+
class_idx : usize(0),
|
|
150
|
+
target_a : usize(0),
|
|
151
|
+
target_b : usize(0),
|
|
152
|
+
target : usize(0),
|
|
153
|
+
slot : usize(0)
|
|
154
|
+
)
|
|
81
155
|
),
|
|
82
156
|
backref_instr : (fn(group_idx : usize) -> Self)(
|
|
83
|
-
Self(
|
|
84
|
-
|
|
157
|
+
Self(
|
|
158
|
+
kind :.Backref,
|
|
159
|
+
codepoint : u32(0),
|
|
160
|
+
class_idx : usize(0),
|
|
161
|
+
target_a : usize(0),
|
|
162
|
+
target_b : usize(0),
|
|
163
|
+
target : usize(0),
|
|
164
|
+
slot : group_idx
|
|
165
|
+
)
|
|
85
166
|
),
|
|
86
167
|
// Lookahead: target_a=sub_start, target_b=sub_end, slot=1 for positive/0 for negative
|
|
87
168
|
lookahead_instr : (fn(sub_start : usize, sub_end : usize, positive : bool) -> Self)(
|
|
88
|
-
Self(
|
|
89
|
-
|
|
90
|
-
|
|
169
|
+
Self(
|
|
170
|
+
kind :.Lookahead,
|
|
171
|
+
codepoint : u32(0),
|
|
172
|
+
class_idx : usize(0),
|
|
173
|
+
target_a : sub_start,
|
|
174
|
+
target_b : sub_end,
|
|
175
|
+
target : usize(0),
|
|
176
|
+
slot : cond(positive => usize(1), true => usize(0))
|
|
177
|
+
)
|
|
91
178
|
),
|
|
92
179
|
// Lookbehind: target_a=sub_start, target_b=sub_end, slot=1 for positive/0 for negative
|
|
93
180
|
lookbehind_instr : (fn(sub_start : usize, sub_end : usize, positive : bool) -> Self)(
|
|
94
|
-
Self(
|
|
95
|
-
|
|
96
|
-
|
|
181
|
+
Self(
|
|
182
|
+
kind :.Lookbehind,
|
|
183
|
+
codepoint : u32(0),
|
|
184
|
+
class_idx : usize(0),
|
|
185
|
+
target_a : sub_start,
|
|
186
|
+
target_b : sub_end,
|
|
187
|
+
target : usize(0),
|
|
188
|
+
slot : cond(positive => usize(1), true => usize(0))
|
|
189
|
+
)
|
|
97
190
|
)
|
|
98
191
|
);
|
|
99
|
-
|
|
100
192
|
// Character class table entry
|
|
101
193
|
ClassEntry :: struct(
|
|
102
|
-
ranges
|
|
194
|
+
ranges : ArrayList(CharRange),
|
|
103
195
|
negated : bool
|
|
104
196
|
);
|
|
105
|
-
|
|
106
197
|
// The compiled NFA program
|
|
107
198
|
NfaProgram :: object(
|
|
108
|
-
instructions
|
|
109
|
-
classes
|
|
110
|
-
n_groups
|
|
111
|
-
group_names
|
|
199
|
+
instructions : ArrayList(Instr),
|
|
200
|
+
classes : ArrayList(ClassEntry),
|
|
201
|
+
n_groups : usize,
|
|
202
|
+
group_names : ArrayList(GroupNameEntry),
|
|
112
203
|
literal_prefix : ArrayList(u8)
|
|
113
204
|
);
|
|
114
|
-
|
|
115
205
|
// The NFA compiler
|
|
116
206
|
NfaCompiler :: object(
|
|
117
207
|
_program : NfaProgram
|
|
118
208
|
);
|
|
119
|
-
|
|
120
209
|
// Utilities defined first (bottom-up ordering required)
|
|
121
|
-
impl(
|
|
210
|
+
impl(
|
|
211
|
+
NfaCompiler,
|
|
122
212
|
new : (fn() -> Self)(
|
|
123
213
|
Self(
|
|
124
|
-
_program: NfaProgram(
|
|
125
|
-
instructions: ArrayList(Instr).new(),
|
|
126
|
-
classes: ArrayList(ClassEntry).new(),
|
|
127
|
-
n_groups: usize(0),
|
|
128
|
-
group_names: ArrayList(GroupNameEntry).new(),
|
|
129
|
-
literal_prefix: ArrayList(u8).new()
|
|
214
|
+
_program : NfaProgram(
|
|
215
|
+
instructions : ArrayList(Instr).new(),
|
|
216
|
+
classes : ArrayList(ClassEntry).new(),
|
|
217
|
+
n_groups : usize(0),
|
|
218
|
+
group_names : ArrayList(GroupNameEntry).new(),
|
|
219
|
+
literal_prefix : ArrayList(u8).new()
|
|
130
220
|
)
|
|
131
221
|
)
|
|
132
222
|
),
|
|
133
|
-
|
|
134
223
|
_emit : (fn(self : Self, instr : Instr) -> usize)({
|
|
135
224
|
idx := self._program.instructions.len();
|
|
136
225
|
self._program.instructions.push(instr);
|
|
137
226
|
idx
|
|
138
227
|
}),
|
|
139
|
-
|
|
140
228
|
_current_pc : (fn(self : Self) -> usize)(
|
|
141
229
|
self._program.instructions.len()
|
|
142
230
|
),
|
|
143
|
-
|
|
144
231
|
_add_class : (fn(self : Self, ranges : ArrayList(CharRange), negated : bool) -> usize)({
|
|
145
232
|
idx := self._program.classes.len();
|
|
146
|
-
self._program.classes.push(ClassEntry(ranges: ranges, negated: negated));
|
|
233
|
+
self._program.classes.push(ClassEntry(ranges : ranges, negated : negated));
|
|
147
234
|
idx
|
|
148
235
|
})
|
|
149
236
|
);
|
|
150
|
-
|
|
151
237
|
// Compile methods: _compile_node is self-recursive, quantifier logic inlined
|
|
152
|
-
impl(
|
|
238
|
+
impl(
|
|
239
|
+
NfaCompiler,
|
|
153
240
|
_compile_node : (fn(self : Self, node : RegexNode) -> unit)({
|
|
154
|
-
match(
|
|
241
|
+
match(
|
|
242
|
+
node.kind,
|
|
155
243
|
.Literal => {
|
|
156
244
|
self._emit(Instr.char_instr(node.codepoint));
|
|
157
245
|
},
|
|
@@ -163,18 +251,27 @@ impl(NfaCompiler,
|
|
|
163
251
|
self._emit(Instr.char_class_instr(idx));
|
|
164
252
|
},
|
|
165
253
|
.Anchor =>
|
|
166
|
-
match(
|
|
167
|
-
.
|
|
168
|
-
.
|
|
169
|
-
|
|
170
|
-
|
|
254
|
+
match(
|
|
255
|
+
node.anchor,
|
|
256
|
+
.Start => {
|
|
257
|
+
self._emit(Instr.assert_start_instr());
|
|
258
|
+
},
|
|
259
|
+
.End => {
|
|
260
|
+
self._emit(Instr.assert_end_instr());
|
|
261
|
+
},
|
|
262
|
+
.WordBoundary => {
|
|
263
|
+
self._emit(Instr.assert_word_boundary_instr());
|
|
264
|
+
},
|
|
265
|
+
.NonWordBoundary => {
|
|
266
|
+
self._emit(Instr.assert_non_word_boundary_instr());
|
|
267
|
+
}
|
|
171
268
|
),
|
|
172
269
|
.Sequence => {
|
|
173
270
|
i := usize(0);
|
|
174
|
-
while
|
|
271
|
+
while(i < node.children.len(), i = (i + usize(1)), {
|
|
175
272
|
child := node.children.get(i).unwrap();
|
|
176
273
|
recur(self, child);
|
|
177
|
-
};
|
|
274
|
+
});
|
|
178
275
|
},
|
|
179
276
|
.Alternation => {
|
|
180
277
|
left := node.children.get(usize(0)).unwrap();
|
|
@@ -195,15 +292,13 @@ impl(NfaCompiler,
|
|
|
195
292
|
min_val := node.q_min;
|
|
196
293
|
max_val := node.q_max;
|
|
197
294
|
greedy := node.q_greedy;
|
|
198
|
-
|
|
199
295
|
// Emit min required copies, tracking start of last copy for loop-back
|
|
200
296
|
(last_body_start : usize) = self._current_pc();
|
|
201
297
|
qi := usize(0);
|
|
202
|
-
while
|
|
298
|
+
while(qi < min_val, qi = (qi + usize(1)), {
|
|
203
299
|
last_body_start = self._current_pc();
|
|
204
300
|
recur(self, child);
|
|
205
|
-
};
|
|
206
|
-
|
|
301
|
+
});
|
|
207
302
|
cond(
|
|
208
303
|
((max_val == usize(0)) && (min_val == usize(0))) => {
|
|
209
304
|
// * — zero or more (max=0 means unbounded when min=0)
|
|
@@ -239,7 +334,7 @@ impl(NfaCompiler,
|
|
|
239
334
|
// {min, max} — emit (max - min) optional copies
|
|
240
335
|
remaining := (max_val - min_val);
|
|
241
336
|
qj := usize(0);
|
|
242
|
-
while
|
|
337
|
+
while(qj < remaining, qj = (qj + usize(1)), {
|
|
243
338
|
split_pc := self._emit(Instr.split_instr(usize(0), usize(0)));
|
|
244
339
|
body_start := self._current_pc();
|
|
245
340
|
recur(self, child);
|
|
@@ -252,7 +347,7 @@ impl(NfaCompiler,
|
|
|
252
347
|
&(self._program.instructions(split_pc)).* = Instr.split_instr(after, body_start);
|
|
253
348
|
}
|
|
254
349
|
);
|
|
255
|
-
};
|
|
350
|
+
});
|
|
256
351
|
}
|
|
257
352
|
);
|
|
258
353
|
},
|
|
@@ -295,9 +390,9 @@ impl(NfaCompiler,
|
|
|
295
390
|
);
|
|
296
391
|
})
|
|
297
392
|
);
|
|
298
|
-
|
|
299
393
|
// Literal prefix extraction (must be before compile)
|
|
300
|
-
impl(
|
|
394
|
+
impl(
|
|
395
|
+
NfaCompiler,
|
|
301
396
|
// Extract literal bytes from the start of the pattern for fast scanning.
|
|
302
397
|
// Walks past Save/AssertStart instructions, then collects Char instructions.
|
|
303
398
|
_extract_literal_prefix : (fn(self : Self) -> unit)({
|
|
@@ -305,12 +400,16 @@ impl(NfaCompiler,
|
|
|
305
400
|
pc := usize(0);
|
|
306
401
|
instrs := self._program.instructions;
|
|
307
402
|
(done : bool) = false;
|
|
308
|
-
|
|
309
|
-
while ((pc < instrs.len()) && (!(done))), {
|
|
403
|
+
while((pc < instrs.len()) && (!(done)), {
|
|
310
404
|
instr := instrs.get(pc).unwrap();
|
|
311
|
-
match(
|
|
312
|
-
.
|
|
313
|
-
.
|
|
405
|
+
match(
|
|
406
|
+
instr.kind,
|
|
407
|
+
.Save => {
|
|
408
|
+
pc = (pc + usize(1));
|
|
409
|
+
},
|
|
410
|
+
.AssertStart => {
|
|
411
|
+
pc = (pc + usize(1));
|
|
412
|
+
},
|
|
314
413
|
.Char => {
|
|
315
414
|
cp := instr.codepoint;
|
|
316
415
|
cond(
|
|
@@ -318,19 +417,22 @@ impl(NfaCompiler,
|
|
|
318
417
|
prefix.push(u8(cp));
|
|
319
418
|
pc = (pc + usize(1));
|
|
320
419
|
},
|
|
321
|
-
true => {
|
|
420
|
+
true => {
|
|
421
|
+
done = true;
|
|
422
|
+
}
|
|
322
423
|
);
|
|
323
424
|
},
|
|
324
|
-
_ => {
|
|
425
|
+
_ => {
|
|
426
|
+
done = true;
|
|
427
|
+
}
|
|
325
428
|
);
|
|
326
|
-
};
|
|
327
|
-
|
|
429
|
+
});
|
|
328
430
|
self._program.literal_prefix = prefix;
|
|
329
431
|
})
|
|
330
432
|
);
|
|
331
|
-
|
|
332
433
|
// Top-level compile method
|
|
333
|
-
impl(
|
|
434
|
+
impl(
|
|
435
|
+
NfaCompiler,
|
|
334
436
|
compile : (fn(self : Self, root : RegexNode, n_groups : usize, group_names : ArrayList(GroupNameEntry)) -> NfaProgram)({
|
|
335
437
|
self._program.n_groups = n_groups;
|
|
336
438
|
self._program.group_names = group_names;
|
|
@@ -342,12 +444,11 @@ impl(NfaCompiler,
|
|
|
342
444
|
self._program
|
|
343
445
|
})
|
|
344
446
|
);
|
|
345
|
-
|
|
346
|
-
export
|
|
447
|
+
export(
|
|
347
448
|
NfaCompiler,
|
|
348
449
|
NfaProgram,
|
|
349
450
|
Instr,
|
|
350
451
|
InstrKind,
|
|
351
452
|
ClassEntry,
|
|
352
453
|
GroupNameEntry
|
|
353
|
-
;
|
|
454
|
+
);
|
package/std/regex/flags.yo
CHANGED
|
@@ -9,96 +9,115 @@
|
|
|
9
9
|
//! - `s` — dotAll: `.` matches newline characters
|
|
10
10
|
//! - `u` — unicode: full Unicode matching
|
|
11
11
|
//! - `y` — sticky: match from lastIndex only
|
|
12
|
-
|
|
13
|
-
open
|
|
14
|
-
open import "std/collections/array_list";
|
|
15
|
-
|
|
12
|
+
open(import("std/string"));
|
|
13
|
+
open(import("std/collections/array_list"));
|
|
16
14
|
RegexFlags :: struct(
|
|
17
|
-
global
|
|
15
|
+
global : bool,
|
|
18
16
|
ignore_case : bool,
|
|
19
|
-
multiline
|
|
20
|
-
dot_all
|
|
21
|
-
unicode
|
|
22
|
-
sticky
|
|
17
|
+
multiline : bool,
|
|
18
|
+
dot_all : bool,
|
|
19
|
+
unicode : bool,
|
|
20
|
+
sticky : bool
|
|
23
21
|
);
|
|
24
|
-
|
|
25
|
-
|
|
22
|
+
impl(
|
|
23
|
+
RegexFlags,
|
|
26
24
|
// Create default flags (all false)
|
|
27
25
|
default : (fn() -> Self)(
|
|
28
26
|
Self(
|
|
29
|
-
global: false,
|
|
30
|
-
ignore_case: false,
|
|
31
|
-
multiline: false,
|
|
32
|
-
dot_all: false,
|
|
33
|
-
unicode: false,
|
|
34
|
-
sticky: false
|
|
27
|
+
global : false,
|
|
28
|
+
ignore_case : false,
|
|
29
|
+
multiline : false,
|
|
30
|
+
dot_all : false,
|
|
31
|
+
unicode : false,
|
|
32
|
+
sticky : false
|
|
35
33
|
)
|
|
36
34
|
),
|
|
37
|
-
|
|
38
35
|
// Parse flags from a string like `gi`, `ms`, etc.
|
|
39
|
-
parse : (fn(flags_str: String) -> Result(Self, String))({
|
|
36
|
+
parse : (fn(flags_str : String) -> Result(Self, String))({
|
|
40
37
|
result := Self.default();
|
|
41
38
|
bytes := flags_str.as_bytes();
|
|
42
39
|
i := usize(0);
|
|
43
|
-
while
|
|
40
|
+
while(i < bytes.len(), i = (i + usize(1)), {
|
|
44
41
|
byte_opt := bytes.get(i);
|
|
45
|
-
match(
|
|
42
|
+
match(
|
|
43
|
+
byte_opt,
|
|
46
44
|
.Some(b) => {
|
|
47
45
|
cond(
|
|
48
46
|
(b == u8(103)) => {
|
|
49
47
|
// 'g'
|
|
50
48
|
cond(
|
|
51
|
-
result.global => {
|
|
52
|
-
|
|
49
|
+
result.global => {
|
|
50
|
+
return(.Err(`Duplicate flag: g`));
|
|
51
|
+
},
|
|
52
|
+
true => {
|
|
53
|
+
result.global = true;
|
|
54
|
+
}
|
|
53
55
|
);
|
|
54
56
|
},
|
|
55
57
|
(b == u8(105)) => {
|
|
56
58
|
// 'i'
|
|
57
59
|
cond(
|
|
58
|
-
result.ignore_case => {
|
|
59
|
-
|
|
60
|
+
result.ignore_case => {
|
|
61
|
+
return(.Err(`Duplicate flag: i`));
|
|
62
|
+
},
|
|
63
|
+
true => {
|
|
64
|
+
result.ignore_case = true;
|
|
65
|
+
}
|
|
60
66
|
);
|
|
61
67
|
},
|
|
62
68
|
(b == u8(109)) => {
|
|
63
69
|
// 'm'
|
|
64
70
|
cond(
|
|
65
|
-
result.multiline => {
|
|
66
|
-
|
|
71
|
+
result.multiline => {
|
|
72
|
+
return(.Err(`Duplicate flag: m`));
|
|
73
|
+
},
|
|
74
|
+
true => {
|
|
75
|
+
result.multiline = true;
|
|
76
|
+
}
|
|
67
77
|
);
|
|
68
78
|
},
|
|
69
79
|
(b == u8(115)) => {
|
|
70
80
|
// 's'
|
|
71
81
|
cond(
|
|
72
|
-
result.dot_all => {
|
|
73
|
-
|
|
82
|
+
result.dot_all => {
|
|
83
|
+
return(.Err(`Duplicate flag: s`));
|
|
84
|
+
},
|
|
85
|
+
true => {
|
|
86
|
+
result.dot_all = true;
|
|
87
|
+
}
|
|
74
88
|
);
|
|
75
89
|
},
|
|
76
90
|
(b == u8(117)) => {
|
|
77
91
|
// 'u'
|
|
78
92
|
cond(
|
|
79
|
-
result.unicode => {
|
|
80
|
-
|
|
93
|
+
result.unicode => {
|
|
94
|
+
return(.Err(`Duplicate flag: u`));
|
|
95
|
+
},
|
|
96
|
+
true => {
|
|
97
|
+
result.unicode = true;
|
|
98
|
+
}
|
|
81
99
|
);
|
|
82
100
|
},
|
|
83
101
|
(b == u8(121)) => {
|
|
84
102
|
// 'y'
|
|
85
103
|
cond(
|
|
86
|
-
result.sticky => {
|
|
87
|
-
|
|
104
|
+
result.sticky => {
|
|
105
|
+
return(.Err(`Duplicate flag: y`));
|
|
106
|
+
},
|
|
107
|
+
true => {
|
|
108
|
+
result.sticky = true;
|
|
109
|
+
}
|
|
88
110
|
);
|
|
89
111
|
},
|
|
90
112
|
true => {
|
|
91
|
-
return
|
|
113
|
+
return(.Err(`Invalid flag character`));
|
|
92
114
|
}
|
|
93
115
|
);
|
|
94
116
|
},
|
|
95
117
|
.None => ()
|
|
96
118
|
);
|
|
97
|
-
};
|
|
119
|
+
});
|
|
98
120
|
.Ok(result)
|
|
99
121
|
})
|
|
100
122
|
);
|
|
101
|
-
|
|
102
|
-
export
|
|
103
|
-
RegexFlags
|
|
104
|
-
;
|
|
123
|
+
export(RegexFlags);
|