@shd101wyy/yo 0.0.28 → 0.0.29
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +1 -1
- package/out/cjs/index.cjs +57 -57
- package/out/cjs/yo-cli.cjs +72 -72
- package/out/esm/index.mjs +62 -62
- package/out/types/tsconfig.tsbuildinfo +1 -1
- package/package.json +1 -1
- package/std/regex/compiler.yo +355 -0
- package/std/regex/flags.yo +104 -0
- package/std/regex/match.yo +83 -0
- package/std/regex/node.yo +283 -0
- package/std/regex/parser.yo +847 -0
- package/std/regex/regex.yo +714 -0
- package/std/regex/unicode.yo +365 -0
- package/std/regex/vm.yo +737 -0
- package/std/time/sleep.yo +18 -0
- package/std/time.yo +0 -13
package/package.json
CHANGED
|
@@ -0,0 +1,355 @@
|
|
|
1
|
+
// std/regex/compiler.yo - NFA compiler
|
|
2
|
+
//
|
|
3
|
+
// Compiles a RegexNode AST into a flat list of NFA instructions
|
|
4
|
+
// using Thompson's construction algorithm.
|
|
5
|
+
|
|
6
|
+
open import "std/collections/array_list";
|
|
7
|
+
open import "std/string";
|
|
8
|
+
{ RegexNode, NodeKind, CharRange, AnchorKind, GroupNameEntry } :: import "./node.yo";
|
|
9
|
+
|
|
10
|
+
// NFA instruction types
|
|
11
|
+
InstrKind :: enum(
|
|
12
|
+
Char,
|
|
13
|
+
CharClass,
|
|
14
|
+
AnyChar,
|
|
15
|
+
Split,
|
|
16
|
+
Jump,
|
|
17
|
+
Save,
|
|
18
|
+
Match,
|
|
19
|
+
AssertStart,
|
|
20
|
+
AssertEnd,
|
|
21
|
+
AssertWordBoundary,
|
|
22
|
+
AssertNonWordBoundary,
|
|
23
|
+
Backref,
|
|
24
|
+
Lookahead,
|
|
25
|
+
Lookbehind
|
|
26
|
+
);
|
|
27
|
+
|
|
28
|
+
// A single NFA instruction
|
|
29
|
+
Instr :: struct(
|
|
30
|
+
kind : InstrKind,
|
|
31
|
+
codepoint : u32,
|
|
32
|
+
class_idx : usize,
|
|
33
|
+
target_a : usize,
|
|
34
|
+
target_b : usize,
|
|
35
|
+
target : usize,
|
|
36
|
+
slot : usize
|
|
37
|
+
);
|
|
38
|
+
|
|
39
|
+
impl(Instr,
|
|
40
|
+
char_instr : (fn(cp : u32) -> Self)(
|
|
41
|
+
Self(kind: .Char, codepoint: cp, class_idx: usize(0),
|
|
42
|
+
target_a: usize(0), target_b: usize(0), target: usize(0), slot: usize(0))
|
|
43
|
+
),
|
|
44
|
+
any_char_instr : (fn() -> Self)(
|
|
45
|
+
Self(kind: .AnyChar, codepoint: u32(0), class_idx: usize(0),
|
|
46
|
+
target_a: usize(0), target_b: usize(0), target: usize(0), slot: usize(0))
|
|
47
|
+
),
|
|
48
|
+
char_class_instr : (fn(idx : usize) -> Self)(
|
|
49
|
+
Self(kind: .CharClass, codepoint: u32(0), class_idx: idx,
|
|
50
|
+
target_a: usize(0), target_b: usize(0), target: usize(0), slot: usize(0))
|
|
51
|
+
),
|
|
52
|
+
split_instr : (fn(a : usize, b : usize) -> Self)(
|
|
53
|
+
Self(kind: .Split, codepoint: u32(0), class_idx: usize(0),
|
|
54
|
+
target_a: a, target_b: b, target: usize(0), slot: usize(0))
|
|
55
|
+
),
|
|
56
|
+
jump_instr : (fn(t : usize) -> Self)(
|
|
57
|
+
Self(kind: .Jump, codepoint: u32(0), class_idx: usize(0),
|
|
58
|
+
target_a: usize(0), target_b: usize(0), target: t, slot: usize(0))
|
|
59
|
+
),
|
|
60
|
+
save_instr : (fn(s : usize) -> Self)(
|
|
61
|
+
Self(kind: .Save, codepoint: u32(0), class_idx: usize(0),
|
|
62
|
+
target_a: usize(0), target_b: usize(0), target: usize(0), slot: s)
|
|
63
|
+
),
|
|
64
|
+
match_instr : (fn() -> Self)(
|
|
65
|
+
Self(kind: .Match, codepoint: u32(0), class_idx: usize(0),
|
|
66
|
+
target_a: usize(0), target_b: usize(0), target: usize(0), slot: usize(0))
|
|
67
|
+
),
|
|
68
|
+
assert_start_instr : (fn() -> Self)(
|
|
69
|
+
Self(kind: .AssertStart, codepoint: u32(0), class_idx: usize(0),
|
|
70
|
+
target_a: usize(0), target_b: usize(0), target: usize(0), slot: usize(0))
|
|
71
|
+
),
|
|
72
|
+
assert_end_instr : (fn() -> Self)(
|
|
73
|
+
Self(kind: .AssertEnd, codepoint: u32(0), class_idx: usize(0),
|
|
74
|
+
target_a: usize(0), target_b: usize(0), target: usize(0), slot: usize(0))
|
|
75
|
+
),
|
|
76
|
+
assert_word_boundary_instr : (fn() -> Self)(
|
|
77
|
+
Self(kind: .AssertWordBoundary, codepoint: u32(0), class_idx: usize(0),
|
|
78
|
+
target_a: usize(0), target_b: usize(0), target: usize(0), slot: usize(0))
|
|
79
|
+
),
|
|
80
|
+
assert_non_word_boundary_instr : (fn() -> Self)(
|
|
81
|
+
Self(kind: .AssertNonWordBoundary, codepoint: u32(0), class_idx: usize(0),
|
|
82
|
+
target_a: usize(0), target_b: usize(0), target: usize(0), slot: usize(0))
|
|
83
|
+
),
|
|
84
|
+
backref_instr : (fn(group_idx : usize) -> Self)(
|
|
85
|
+
Self(kind: .Backref, codepoint: u32(0), class_idx: usize(0),
|
|
86
|
+
target_a: usize(0), target_b: usize(0), target: usize(0), slot: group_idx)
|
|
87
|
+
),
|
|
88
|
+
// Lookahead: target_a=sub_start, target_b=sub_end, slot=1 for positive/0 for negative
|
|
89
|
+
lookahead_instr : (fn(sub_start : usize, sub_end : usize, positive : bool) -> Self)(
|
|
90
|
+
Self(kind: .Lookahead, codepoint: u32(0), class_idx: usize(0),
|
|
91
|
+
target_a: sub_start, target_b: sub_end, target: usize(0),
|
|
92
|
+
slot: cond(positive => usize(1), true => usize(0)))
|
|
93
|
+
),
|
|
94
|
+
// Lookbehind: target_a=sub_start, target_b=sub_end, slot=1 for positive/0 for negative
|
|
95
|
+
lookbehind_instr : (fn(sub_start : usize, sub_end : usize, positive : bool) -> Self)(
|
|
96
|
+
Self(kind: .Lookbehind, codepoint: u32(0), class_idx: usize(0),
|
|
97
|
+
target_a: sub_start, target_b: sub_end, target: usize(0),
|
|
98
|
+
slot: cond(positive => usize(1), true => usize(0)))
|
|
99
|
+
)
|
|
100
|
+
);
|
|
101
|
+
|
|
102
|
+
// Character class table entry
|
|
103
|
+
ClassEntry :: struct(
|
|
104
|
+
ranges : ArrayList(CharRange),
|
|
105
|
+
negated : bool
|
|
106
|
+
);
|
|
107
|
+
|
|
108
|
+
// The compiled NFA program
|
|
109
|
+
NfaProgram :: object(
|
|
110
|
+
instructions : ArrayList(Instr),
|
|
111
|
+
classes : ArrayList(ClassEntry),
|
|
112
|
+
n_groups : usize,
|
|
113
|
+
group_names : ArrayList(GroupNameEntry),
|
|
114
|
+
literal_prefix : ArrayList(u8)
|
|
115
|
+
);
|
|
116
|
+
|
|
117
|
+
// The NFA compiler
|
|
118
|
+
NfaCompiler :: object(
|
|
119
|
+
_program : NfaProgram
|
|
120
|
+
);
|
|
121
|
+
|
|
122
|
+
// Utilities defined first (bottom-up ordering required)
|
|
123
|
+
impl(NfaCompiler,
|
|
124
|
+
new : (fn() -> Self)(
|
|
125
|
+
Self(
|
|
126
|
+
_program: NfaProgram(
|
|
127
|
+
instructions: ArrayList(Instr).new(),
|
|
128
|
+
classes: ArrayList(ClassEntry).new(),
|
|
129
|
+
n_groups: usize(0),
|
|
130
|
+
group_names: ArrayList(GroupNameEntry).new(),
|
|
131
|
+
literal_prefix: ArrayList(u8).new()
|
|
132
|
+
)
|
|
133
|
+
)
|
|
134
|
+
),
|
|
135
|
+
|
|
136
|
+
_emit : (fn(self : Self, instr : Instr) -> usize)({
|
|
137
|
+
idx := self._program.instructions.len();
|
|
138
|
+
self._program.instructions.push(instr);
|
|
139
|
+
idx
|
|
140
|
+
}),
|
|
141
|
+
|
|
142
|
+
_current_pc : (fn(self : Self) -> usize)(
|
|
143
|
+
self._program.instructions.len()
|
|
144
|
+
),
|
|
145
|
+
|
|
146
|
+
_add_class : (fn(self : Self, ranges : ArrayList(CharRange), negated : bool) -> usize)({
|
|
147
|
+
idx := self._program.classes.len();
|
|
148
|
+
self._program.classes.push(ClassEntry(ranges: ranges, negated: negated));
|
|
149
|
+
idx
|
|
150
|
+
})
|
|
151
|
+
);
|
|
152
|
+
|
|
153
|
+
// Compile methods: _compile_node is self-recursive, quantifier logic inlined
|
|
154
|
+
impl(NfaCompiler,
|
|
155
|
+
_compile_node : (fn(self : Self, node : RegexNode) -> unit)({
|
|
156
|
+
match(node.kind,
|
|
157
|
+
.Literal => {
|
|
158
|
+
self._emit(Instr.char_instr(node.codepoint));
|
|
159
|
+
},
|
|
160
|
+
.Dot => {
|
|
161
|
+
self._emit(Instr.any_char_instr());
|
|
162
|
+
},
|
|
163
|
+
.CharClass => {
|
|
164
|
+
idx := self._add_class(node.ranges, node.negated);
|
|
165
|
+
self._emit(Instr.char_class_instr(idx));
|
|
166
|
+
},
|
|
167
|
+
.Anchor =>
|
|
168
|
+
match(node.anchor,
|
|
169
|
+
.Start => { self._emit(Instr.assert_start_instr()); },
|
|
170
|
+
.End => { self._emit(Instr.assert_end_instr()); },
|
|
171
|
+
.WordBoundary => { self._emit(Instr.assert_word_boundary_instr()); },
|
|
172
|
+
.NonWordBoundary => { self._emit(Instr.assert_non_word_boundary_instr()); }
|
|
173
|
+
),
|
|
174
|
+
.Sequence => {
|
|
175
|
+
i := usize(0);
|
|
176
|
+
while (i < node.children.len()), (i = (i + usize(1))), {
|
|
177
|
+
child := node.children.get(i).unwrap();
|
|
178
|
+
recur(self, child);
|
|
179
|
+
};
|
|
180
|
+
},
|
|
181
|
+
.Alternation => {
|
|
182
|
+
left := node.children.get(usize(0)).unwrap();
|
|
183
|
+
right := node.children.get(usize(1)).unwrap();
|
|
184
|
+
split_pc := self._emit(Instr.split_instr(usize(0), usize(0)));
|
|
185
|
+
left_start := self._current_pc();
|
|
186
|
+
self._program.instructions.set(split_pc, Instr.split_instr(left_start, usize(0)));
|
|
187
|
+
recur(self, left);
|
|
188
|
+
jump_pc := self._emit(Instr.jump_instr(usize(0)));
|
|
189
|
+
right_start := self._current_pc();
|
|
190
|
+
self._program.instructions.set(split_pc, Instr.split_instr(left_start, right_start));
|
|
191
|
+
recur(self, right);
|
|
192
|
+
end_pc := self._current_pc();
|
|
193
|
+
self._program.instructions.set(jump_pc, Instr.jump_instr(end_pc));
|
|
194
|
+
},
|
|
195
|
+
.Quantifier => {
|
|
196
|
+
child := node.children.get(usize(0)).unwrap();
|
|
197
|
+
min_val := node.q_min;
|
|
198
|
+
max_val := node.q_max;
|
|
199
|
+
greedy := node.q_greedy;
|
|
200
|
+
|
|
201
|
+
// Emit min required copies, tracking start of last copy for loop-back
|
|
202
|
+
(last_body_start : usize) = self._current_pc();
|
|
203
|
+
qi := usize(0);
|
|
204
|
+
while (qi < min_val), (qi = (qi + usize(1))), {
|
|
205
|
+
last_body_start = self._current_pc();
|
|
206
|
+
recur(self, child);
|
|
207
|
+
};
|
|
208
|
+
|
|
209
|
+
cond(
|
|
210
|
+
((max_val == usize(0)) && (min_val == usize(0))) => {
|
|
211
|
+
// * — zero or more (max=0 means unbounded when min=0)
|
|
212
|
+
l1 := self._current_pc();
|
|
213
|
+
split_pc := self._emit(Instr.split_instr(usize(0), usize(0)));
|
|
214
|
+
l2 := self._current_pc();
|
|
215
|
+
recur(self, child);
|
|
216
|
+
self._emit(Instr.jump_instr(l1));
|
|
217
|
+
l3 := self._current_pc();
|
|
218
|
+
cond(
|
|
219
|
+
greedy => {
|
|
220
|
+
self._program.instructions.set(split_pc, Instr.split_instr(l2, l3));
|
|
221
|
+
},
|
|
222
|
+
true => {
|
|
223
|
+
self._program.instructions.set(split_pc, Instr.split_instr(l3, l2));
|
|
224
|
+
}
|
|
225
|
+
);
|
|
226
|
+
},
|
|
227
|
+
((max_val == usize(0)) && (min_val > usize(0))) => {
|
|
228
|
+
// + beyond min copies — loop back to last min copy (no extra body)
|
|
229
|
+
split_pc := self._emit(Instr.split_instr(usize(0), usize(0)));
|
|
230
|
+
l2 := self._current_pc();
|
|
231
|
+
cond(
|
|
232
|
+
greedy => {
|
|
233
|
+
self._program.instructions.set(split_pc, Instr.split_instr(last_body_start, l2));
|
|
234
|
+
},
|
|
235
|
+
true => {
|
|
236
|
+
self._program.instructions.set(split_pc, Instr.split_instr(l2, last_body_start));
|
|
237
|
+
}
|
|
238
|
+
);
|
|
239
|
+
},
|
|
240
|
+
true => {
|
|
241
|
+
// {min, max} — emit (max - min) optional copies
|
|
242
|
+
remaining := (max_val - min_val);
|
|
243
|
+
qj := usize(0);
|
|
244
|
+
while (qj < remaining), (qj = (qj + usize(1))), {
|
|
245
|
+
split_pc := self._emit(Instr.split_instr(usize(0), usize(0)));
|
|
246
|
+
body_start := self._current_pc();
|
|
247
|
+
recur(self, child);
|
|
248
|
+
after := self._current_pc();
|
|
249
|
+
cond(
|
|
250
|
+
greedy => {
|
|
251
|
+
self._program.instructions.set(split_pc, Instr.split_instr(body_start, after));
|
|
252
|
+
},
|
|
253
|
+
true => {
|
|
254
|
+
self._program.instructions.set(split_pc, Instr.split_instr(after, body_start));
|
|
255
|
+
}
|
|
256
|
+
);
|
|
257
|
+
};
|
|
258
|
+
}
|
|
259
|
+
);
|
|
260
|
+
},
|
|
261
|
+
.Group => {
|
|
262
|
+
child := node.children.get(usize(0)).unwrap();
|
|
263
|
+
start_slot := (node.group_index * usize(2));
|
|
264
|
+
end_slot := ((node.group_index * usize(2)) + usize(1));
|
|
265
|
+
self._emit(Instr.save_instr(start_slot));
|
|
266
|
+
recur(self, child);
|
|
267
|
+
self._emit(Instr.save_instr(end_slot));
|
|
268
|
+
},
|
|
269
|
+
.NonCapturingGroup => {
|
|
270
|
+
child := node.children.get(usize(0)).unwrap();
|
|
271
|
+
recur(self, child);
|
|
272
|
+
},
|
|
273
|
+
.Backreference => {
|
|
274
|
+
self._emit(Instr.backref_instr(node.group_index));
|
|
275
|
+
},
|
|
276
|
+
.Lookahead => {
|
|
277
|
+
child := node.children.get(usize(0)).unwrap();
|
|
278
|
+
positive := (!(node.negated));
|
|
279
|
+
// Emit lookahead instruction with placeholder sub_end
|
|
280
|
+
la_pc := self._emit(Instr.lookahead_instr(usize(0), usize(0), positive));
|
|
281
|
+
sub_start := self._current_pc();
|
|
282
|
+
recur(self, child);
|
|
283
|
+
self._emit(Instr.match_instr());
|
|
284
|
+
sub_end := self._current_pc();
|
|
285
|
+
self._program.instructions.set(la_pc, Instr.lookahead_instr(sub_start, sub_end, positive));
|
|
286
|
+
},
|
|
287
|
+
.Lookbehind => {
|
|
288
|
+
child := node.children.get(usize(0)).unwrap();
|
|
289
|
+
positive := (!(node.negated));
|
|
290
|
+
lb_pc := self._emit(Instr.lookbehind_instr(usize(0), usize(0), positive));
|
|
291
|
+
sub_start := self._current_pc();
|
|
292
|
+
recur(self, child);
|
|
293
|
+
self._emit(Instr.match_instr());
|
|
294
|
+
sub_end := self._current_pc();
|
|
295
|
+
self._program.instructions.set(lb_pc, Instr.lookbehind_instr(sub_start, sub_end, positive));
|
|
296
|
+
}
|
|
297
|
+
);
|
|
298
|
+
})
|
|
299
|
+
);
|
|
300
|
+
|
|
301
|
+
// Literal prefix extraction (must be before compile)
|
|
302
|
+
impl(NfaCompiler,
|
|
303
|
+
// Extract literal bytes from the start of the pattern for fast scanning.
|
|
304
|
+
// Walks past Save/AssertStart instructions, then collects Char instructions.
|
|
305
|
+
_extract_literal_prefix : (fn(self : Self) -> unit)({
|
|
306
|
+
prefix := ArrayList(u8).new();
|
|
307
|
+
pc := usize(0);
|
|
308
|
+
instrs := self._program.instructions;
|
|
309
|
+
(done : bool) = false;
|
|
310
|
+
|
|
311
|
+
while ((pc < instrs.len()) && (!(done))), {
|
|
312
|
+
instr := instrs.get(pc).unwrap();
|
|
313
|
+
match(instr.kind,
|
|
314
|
+
.Save => { pc = (pc + usize(1)); },
|
|
315
|
+
.AssertStart => { pc = (pc + usize(1)); },
|
|
316
|
+
.Char => {
|
|
317
|
+
cp := instr.codepoint;
|
|
318
|
+
cond(
|
|
319
|
+
(cp < u32(0x80)) => {
|
|
320
|
+
prefix.push(u8(cp));
|
|
321
|
+
pc = (pc + usize(1));
|
|
322
|
+
},
|
|
323
|
+
true => { done = true; }
|
|
324
|
+
);
|
|
325
|
+
},
|
|
326
|
+
_ => { done = true; }
|
|
327
|
+
);
|
|
328
|
+
};
|
|
329
|
+
|
|
330
|
+
self._program.literal_prefix = prefix;
|
|
331
|
+
})
|
|
332
|
+
);
|
|
333
|
+
|
|
334
|
+
// Top-level compile method
|
|
335
|
+
impl(NfaCompiler,
|
|
336
|
+
compile : (fn(self : Self, root : RegexNode, n_groups : usize, group_names : ArrayList(GroupNameEntry)) -> NfaProgram)({
|
|
337
|
+
self._program.n_groups = n_groups;
|
|
338
|
+
self._program.group_names = group_names;
|
|
339
|
+
self._emit(Instr.save_instr(usize(0)));
|
|
340
|
+
self._compile_node(root);
|
|
341
|
+
self._emit(Instr.save_instr(usize(1)));
|
|
342
|
+
self._emit(Instr.match_instr());
|
|
343
|
+
self._extract_literal_prefix();
|
|
344
|
+
self._program
|
|
345
|
+
})
|
|
346
|
+
);
|
|
347
|
+
|
|
348
|
+
export
|
|
349
|
+
NfaCompiler,
|
|
350
|
+
NfaProgram,
|
|
351
|
+
Instr,
|
|
352
|
+
InstrKind,
|
|
353
|
+
ClassEntry,
|
|
354
|
+
GroupNameEntry
|
|
355
|
+
;
|
|
@@ -0,0 +1,104 @@
|
|
|
1
|
+
// std/regex/flags.yo - RegexFlags parsing and representation
|
|
2
|
+
//
|
|
3
|
+
// Regex flags follow JavaScript syntax: "gi", "ms", "iu", etc.
|
|
4
|
+
//
|
|
5
|
+
// Supported flags:
|
|
6
|
+
// g - global: match all occurrences
|
|
7
|
+
// i - ignoreCase: case-insensitive matching
|
|
8
|
+
// m - multiline: ^ and $ match line boundaries
|
|
9
|
+
// s - dotAll: . matches newline characters
|
|
10
|
+
// u - unicode: full Unicode matching
|
|
11
|
+
// y - sticky: match from lastIndex only
|
|
12
|
+
|
|
13
|
+
open import "std/string";
|
|
14
|
+
open import "std/collections/array_list";
|
|
15
|
+
|
|
16
|
+
RegexFlags :: struct(
|
|
17
|
+
global : bool,
|
|
18
|
+
ignore_case : bool,
|
|
19
|
+
multiline : bool,
|
|
20
|
+
dot_all : bool,
|
|
21
|
+
unicode : bool,
|
|
22
|
+
sticky : bool
|
|
23
|
+
);
|
|
24
|
+
|
|
25
|
+
impl(RegexFlags,
|
|
26
|
+
// Create default flags (all false)
|
|
27
|
+
default : (fn() -> Self)(
|
|
28
|
+
Self(
|
|
29
|
+
global: false,
|
|
30
|
+
ignore_case: false,
|
|
31
|
+
multiline: false,
|
|
32
|
+
dot_all: false,
|
|
33
|
+
unicode: false,
|
|
34
|
+
sticky: false
|
|
35
|
+
)
|
|
36
|
+
),
|
|
37
|
+
|
|
38
|
+
// Parse flags from a string like `gi`, `ms`, etc.
|
|
39
|
+
parse : (fn(flags_str: String) -> Result(Self, String))({
|
|
40
|
+
result := Self.default();
|
|
41
|
+
bytes := flags_str.as_bytes();
|
|
42
|
+
i := usize(0);
|
|
43
|
+
while (i < bytes.len()), (i = (i + usize(1))), {
|
|
44
|
+
byte_opt := bytes.get(i);
|
|
45
|
+
match(byte_opt,
|
|
46
|
+
.Some(b) => {
|
|
47
|
+
cond(
|
|
48
|
+
(b == u8(103)) => {
|
|
49
|
+
// 'g'
|
|
50
|
+
cond(
|
|
51
|
+
result.global => { return .Err(`Duplicate flag: g`); },
|
|
52
|
+
true => { result.global = true; }
|
|
53
|
+
);
|
|
54
|
+
},
|
|
55
|
+
(b == u8(105)) => {
|
|
56
|
+
// 'i'
|
|
57
|
+
cond(
|
|
58
|
+
result.ignore_case => { return .Err(`Duplicate flag: i`); },
|
|
59
|
+
true => { result.ignore_case = true; }
|
|
60
|
+
);
|
|
61
|
+
},
|
|
62
|
+
(b == u8(109)) => {
|
|
63
|
+
// 'm'
|
|
64
|
+
cond(
|
|
65
|
+
result.multiline => { return .Err(`Duplicate flag: m`); },
|
|
66
|
+
true => { result.multiline = true; }
|
|
67
|
+
);
|
|
68
|
+
},
|
|
69
|
+
(b == u8(115)) => {
|
|
70
|
+
// 's'
|
|
71
|
+
cond(
|
|
72
|
+
result.dot_all => { return .Err(`Duplicate flag: s`); },
|
|
73
|
+
true => { result.dot_all = true; }
|
|
74
|
+
);
|
|
75
|
+
},
|
|
76
|
+
(b == u8(117)) => {
|
|
77
|
+
// 'u'
|
|
78
|
+
cond(
|
|
79
|
+
result.unicode => { return .Err(`Duplicate flag: u`); },
|
|
80
|
+
true => { result.unicode = true; }
|
|
81
|
+
);
|
|
82
|
+
},
|
|
83
|
+
(b == u8(121)) => {
|
|
84
|
+
// 'y'
|
|
85
|
+
cond(
|
|
86
|
+
result.sticky => { return .Err(`Duplicate flag: y`); },
|
|
87
|
+
true => { result.sticky = true; }
|
|
88
|
+
);
|
|
89
|
+
},
|
|
90
|
+
true => {
|
|
91
|
+
return .Err(`Invalid flag character`);
|
|
92
|
+
}
|
|
93
|
+
);
|
|
94
|
+
},
|
|
95
|
+
.None => ()
|
|
96
|
+
);
|
|
97
|
+
};
|
|
98
|
+
.Ok(result)
|
|
99
|
+
})
|
|
100
|
+
);
|
|
101
|
+
|
|
102
|
+
export
|
|
103
|
+
RegexFlags
|
|
104
|
+
;
|
|
@@ -0,0 +1,83 @@
|
|
|
1
|
+
// std/regex/match.yo - Match result type
|
|
2
|
+
//
|
|
3
|
+
// Represents the result of a regex match, including the matched text,
|
|
4
|
+
// position, and captured groups.
|
|
5
|
+
|
|
6
|
+
open import "std/collections/array_list";
|
|
7
|
+
open import "std/string";
|
|
8
|
+
{ GroupNameEntry } :: import "./node.yo";
|
|
9
|
+
|
|
10
|
+
// A single regex match result
|
|
11
|
+
RegexMatch :: object(
|
|
12
|
+
_value : String,
|
|
13
|
+
_index : usize,
|
|
14
|
+
_input : String,
|
|
15
|
+
_groups : ArrayList(Option(String)),
|
|
16
|
+
_group_names : ArrayList(GroupNameEntry)
|
|
17
|
+
);
|
|
18
|
+
|
|
19
|
+
impl(RegexMatch,
|
|
20
|
+
// Create a match result
|
|
21
|
+
new : (fn(value : String, index : usize, input : String, groups : ArrayList(Option(String)), group_names : ArrayList(GroupNameEntry)) -> Self)(
|
|
22
|
+
Self(
|
|
23
|
+
_value: value,
|
|
24
|
+
_index: index,
|
|
25
|
+
_input: input,
|
|
26
|
+
_groups: groups,
|
|
27
|
+
_group_names: group_names
|
|
28
|
+
)
|
|
29
|
+
),
|
|
30
|
+
|
|
31
|
+
// Get the full matched text
|
|
32
|
+
value : (fn(self : Self) -> String)(
|
|
33
|
+
self._value
|
|
34
|
+
),
|
|
35
|
+
|
|
36
|
+
// Get the start position (character index) of the match
|
|
37
|
+
index : (fn(self : Self) -> usize)(
|
|
38
|
+
self._index
|
|
39
|
+
),
|
|
40
|
+
|
|
41
|
+
// Get the original input string
|
|
42
|
+
input : (fn(self : Self) -> String)(
|
|
43
|
+
self._input
|
|
44
|
+
),
|
|
45
|
+
|
|
46
|
+
// Get capture group by index (1-based, group 0 is the full match)
|
|
47
|
+
group : (fn(self : Self, idx : usize) -> Option(String))(
|
|
48
|
+
cond(
|
|
49
|
+
(idx == usize(0)) => .Some(self._value),
|
|
50
|
+
true => {
|
|
51
|
+
actual_idx := (idx - usize(1));
|
|
52
|
+
cond(
|
|
53
|
+
(actual_idx < self._groups.len()) => self._groups.get(actual_idx).unwrap(),
|
|
54
|
+
true => .None
|
|
55
|
+
)
|
|
56
|
+
}
|
|
57
|
+
)
|
|
58
|
+
),
|
|
59
|
+
|
|
60
|
+
// Get capture group by name
|
|
61
|
+
named_group : (fn(self : Self, name : String) -> Option(String))({
|
|
62
|
+
i := usize(0);
|
|
63
|
+
while (i < self._group_names.len()), (i = (i + usize(1))), {
|
|
64
|
+
entry := self._group_names.get(i).unwrap();
|
|
65
|
+
cond(
|
|
66
|
+
(entry.name == name) => {
|
|
67
|
+
return self.group(entry.index);
|
|
68
|
+
},
|
|
69
|
+
true => ()
|
|
70
|
+
);
|
|
71
|
+
};
|
|
72
|
+
.None
|
|
73
|
+
}),
|
|
74
|
+
|
|
75
|
+
// Get number of capture groups (not counting group 0)
|
|
76
|
+
group_count : (fn(self : Self) -> usize)(
|
|
77
|
+
self._groups.len()
|
|
78
|
+
)
|
|
79
|
+
);
|
|
80
|
+
|
|
81
|
+
export
|
|
82
|
+
RegexMatch
|
|
83
|
+
;
|