@shd101wyy/yo 0.1.26 → 0.1.27
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.github/skills/yo-async-effects/SKILL.md +4 -4
- package/.github/skills/yo-async-effects/async-effects-recipes.md +34 -34
- package/.github/skills/yo-core-patterns/SKILL.md +1 -1
- package/.github/skills/yo-core-patterns/core-patterns-cheatsheet.md +26 -26
- package/.github/skills/yo-project-workflow/SKILL.md +6 -3
- package/.github/skills/yo-project-workflow/workflow-cheatsheet.md +34 -11
- package/.github/skills/yo-syntax/SKILL.md +7 -6
- package/.github/skills/yo-syntax/syntax-cheatsheet.md +73 -60
- package/.github/skills/yo-wasm-integration/wasm-integration-cheatsheet.md +3 -3
- package/README.md +10 -8
- package/out/cjs/index.cjs +456 -438
- package/out/cjs/yo-cli.cjs +576 -543
- package/out/cjs/yo-lsp.cjs +559 -532
- package/out/esm/index.mjs +281 -263
- package/out/types/src/formatter.d.ts +11 -0
- package/out/types/src/lsp/formatting.d.ts +2 -0
- package/out/types/src/tests/formatter.test.d.ts +1 -0
- package/out/types/tsconfig.tsbuildinfo +1 -1
- package/package.json +1 -1
- package/std/alg/hash.yo +13 -21
- package/std/allocator.yo +25 -40
- package/std/async.yo +3 -7
- package/std/build.yo +105 -151
- package/std/cli/arg_parser.yo +184 -169
- package/std/collections/array_list.yo +350 -314
- package/std/collections/btree_map.yo +142 -131
- package/std/collections/deque.yo +132 -128
- package/std/collections/hash_map.yo +542 -566
- package/std/collections/hash_set.yo +623 -687
- package/std/collections/linked_list.yo +275 -293
- package/std/collections/ordered_map.yo +113 -85
- package/std/collections/priority_queue.yo +73 -73
- package/std/crypto/md5.yo +191 -95
- package/std/crypto/random.yo +56 -64
- package/std/crypto/sha256.yo +151 -107
- package/std/encoding/base64.yo +87 -81
- package/std/encoding/hex.yo +43 -50
- package/std/encoding/html.yo +56 -81
- package/std/encoding/html_char_utils.yo +7 -13
- package/std/encoding/html_entities.yo +2248 -2253
- package/std/encoding/json.yo +316 -224
- package/std/encoding/punycode.yo +86 -116
- package/std/encoding/toml.yo +67 -66
- package/std/encoding/utf16.yo +37 -44
- package/std/env.yo +62 -91
- package/std/error.yo +7 -15
- package/std/fmt/display.yo +5 -9
- package/std/fmt/index.yo +8 -14
- package/std/fmt/to_string.yo +330 -315
- package/std/fmt/writer.yo +58 -87
- package/std/fs/dir.yo +83 -102
- package/std/fs/file.yo +147 -180
- package/std/fs/metadata.yo +45 -78
- package/std/fs/temp.yo +55 -65
- package/std/fs/types.yo +27 -40
- package/std/fs/walker.yo +53 -68
- package/std/gc.yo +5 -8
- package/std/glob.yo +30 -43
- package/std/http/client.yo +107 -120
- package/std/http/http.yo +106 -96
- package/std/http/index.yo +4 -6
- package/std/imm/list.yo +88 -93
- package/std/imm/map.yo +528 -464
- package/std/imm/set.yo +52 -57
- package/std/imm/sorted_map.yo +340 -286
- package/std/imm/sorted_set.yo +57 -63
- package/std/imm/string.yo +404 -345
- package/std/imm/vec.yo +173 -181
- package/std/io/reader.yo +3 -6
- package/std/io/writer.yo +4 -8
- package/std/libc/assert.yo +5 -9
- package/std/libc/ctype.yo +32 -22
- package/std/libc/dirent.yo +26 -25
- package/std/libc/errno.yo +164 -90
- package/std/libc/fcntl.yo +52 -45
- package/std/libc/float.yo +66 -44
- package/std/libc/limits.yo +42 -33
- package/std/libc/math.yo +53 -82
- package/std/libc/signal.yo +72 -47
- package/std/libc/stdatomic.yo +217 -188
- package/std/libc/stdint.yo +5 -29
- package/std/libc/stdio.yo +5 -29
- package/std/libc/stdlib.yo +32 -39
- package/std/libc/string.yo +5 -23
- package/std/libc/sys/stat.yo +58 -56
- package/std/libc/time.yo +5 -19
- package/std/libc/unistd.yo +5 -20
- package/std/libc/wctype.yo +6 -9
- package/std/libc/windows.yo +26 -30
- package/std/log.yo +41 -55
- package/std/net/addr.yo +102 -97
- package/std/net/dns.yo +27 -28
- package/std/net/errors.yo +50 -49
- package/std/net/tcp.yo +113 -124
- package/std/net/udp.yo +55 -66
- package/std/os/env.yo +35 -33
- package/std/os/signal.yo +15 -25
- package/std/path.yo +276 -311
- package/std/prelude.yo +6304 -4315
- package/std/process/command.yo +87 -103
- package/std/process/index.yo +12 -31
- package/std/regex/compiler.yo +196 -95
- package/std/regex/flags.yo +58 -39
- package/std/regex/index.yo +157 -173
- package/std/regex/match.yo +20 -31
- package/std/regex/node.yo +134 -152
- package/std/regex/parser.yo +283 -259
- package/std/regex/unicode.yo +172 -202
- package/std/regex/vm.yo +155 -171
- package/std/string/index.yo +5 -7
- package/std/string/rune.yo +45 -55
- package/std/string/string.yo +937 -964
- package/std/string/string_builder.yo +94 -104
- package/std/string/unicode.yo +46 -64
- package/std/sync/channel.yo +72 -73
- package/std/sync/cond.yo +31 -36
- package/std/sync/mutex.yo +30 -32
- package/std/sync/once.yo +13 -16
- package/std/sync/rwlock.yo +26 -31
- package/std/sync/waitgroup.yo +20 -25
- package/std/sys/advise.yo +16 -24
- package/std/sys/bufio/buf_reader.yo +77 -93
- package/std/sys/bufio/buf_writer.yo +52 -65
- package/std/sys/clock.yo +4 -9
- package/std/sys/constants.yo +77 -61
- package/std/sys/copy.yo +4 -10
- package/std/sys/dir.yo +26 -43
- package/std/sys/dns.yo +41 -61
- package/std/sys/errors.yo +95 -103
- package/std/sys/events.yo +45 -57
- package/std/sys/externs.yo +319 -267
- package/std/sys/fallocate.yo +7 -11
- package/std/sys/fcntl.yo +14 -22
- package/std/sys/file.yo +26 -40
- package/std/sys/future.yo +5 -8
- package/std/sys/iov.yo +12 -25
- package/std/sys/lock.yo +12 -13
- package/std/sys/mmap.yo +38 -43
- package/std/sys/path.yo +3 -8
- package/std/sys/perm.yo +7 -21
- package/std/sys/pipe.yo +5 -12
- package/std/sys/process.yo +23 -29
- package/std/sys/seek.yo +10 -12
- package/std/sys/signal.yo +7 -13
- package/std/sys/signals.yo +52 -35
- package/std/sys/socket.yo +63 -58
- package/std/sys/socketpair.yo +3 -6
- package/std/sys/sockinfo.yo +11 -20
- package/std/sys/statfs.yo +11 -34
- package/std/sys/statx.yo +25 -52
- package/std/sys/sysinfo.yo +15 -20
- package/std/sys/tcp.yo +62 -92
- package/std/sys/temp.yo +5 -9
- package/std/sys/time.yo +5 -15
- package/std/sys/timer.yo +6 -11
- package/std/sys/tty.yo +10 -18
- package/std/sys/udp.yo +22 -39
- package/std/sys/umask.yo +3 -6
- package/std/sys/unix.yo +33 -52
- package/std/testing/bench.yo +49 -52
- package/std/thread.yo +10 -15
- package/std/time/datetime.yo +105 -89
- package/std/time/duration.yo +43 -56
- package/std/time/instant.yo +13 -18
- package/std/time/sleep.yo +5 -9
- package/std/url/index.yo +184 -209
- package/std/worker.yo +6 -10
package/std/regex/parser.yo
CHANGED
|
@@ -1,54 +1,49 @@
|
|
|
1
1
|
//! Regex pattern parser — parses a regex pattern string into an AST
|
|
2
2
|
//! of `RegexNode` objects. Uses an iterative stack-based approach.
|
|
3
|
-
|
|
4
|
-
open
|
|
5
|
-
|
|
6
|
-
{
|
|
7
|
-
{ unicode_property_ranges } :: import "./unicode.yo";
|
|
8
|
-
|
|
3
|
+
open(import("std/collections/array_list"));
|
|
4
|
+
open(import("std/string"));
|
|
5
|
+
{ RegexNode, NodeKind, CharRange, AnchorKind, GroupNameEntry } :: import("./node.yo");
|
|
6
|
+
{ unicode_property_ranges } :: import("./unicode.yo");
|
|
9
7
|
// A parse frame for tracking alternation/sequence state
|
|
10
8
|
ParseFrame :: struct(
|
|
11
9
|
alternatives : ArrayList(ArrayList(RegexNode)),
|
|
12
|
-
current
|
|
10
|
+
current : ArrayList(RegexNode),
|
|
13
11
|
is_non_capturing : bool,
|
|
14
|
-
group_index
|
|
15
|
-
is_lookahead
|
|
12
|
+
group_index : usize,
|
|
13
|
+
is_lookahead : bool,
|
|
16
14
|
is_lookbehind : bool,
|
|
17
|
-
is_positive
|
|
15
|
+
is_positive : bool
|
|
18
16
|
);
|
|
19
|
-
|
|
20
17
|
// Parser state object
|
|
21
18
|
RegexParser :: object(
|
|
22
|
-
_source
|
|
23
|
-
_bytes
|
|
24
|
-
_pos
|
|
19
|
+
_source : String,
|
|
20
|
+
_bytes : ArrayList(u8),
|
|
21
|
+
_pos : usize,
|
|
25
22
|
_group_count : usize,
|
|
26
23
|
_group_names : ArrayList(GroupNameEntry)
|
|
27
24
|
);
|
|
28
|
-
|
|
29
25
|
// First impl block: utility + leaf parsers (defined bottom-up)
|
|
30
|
-
impl(
|
|
26
|
+
impl(
|
|
27
|
+
RegexParser,
|
|
31
28
|
new : (fn(pattern : String) -> Self)(
|
|
32
29
|
Self(
|
|
33
|
-
_source: pattern,
|
|
34
|
-
_bytes: pattern.as_bytes(),
|
|
35
|
-
_pos: usize(0),
|
|
36
|
-
_group_count: usize(0),
|
|
37
|
-
_group_names: ArrayList(GroupNameEntry).new()
|
|
30
|
+
_source : pattern,
|
|
31
|
+
_bytes : pattern.as_bytes(),
|
|
32
|
+
_pos : usize(0),
|
|
33
|
+
_group_count : usize(0),
|
|
34
|
+
_group_names : ArrayList(GroupNameEntry).new()
|
|
38
35
|
)
|
|
39
36
|
),
|
|
40
|
-
|
|
41
37
|
_peek : (fn(self : Self) -> Option(u8))(
|
|
42
38
|
cond(
|
|
43
39
|
(self._pos < self._bytes.len()) => self._bytes.get(self._pos),
|
|
44
|
-
true
|
|
40
|
+
true =>.None
|
|
45
41
|
)
|
|
46
42
|
),
|
|
47
|
-
|
|
48
43
|
_advance : (fn(self : Self) -> Option(u8))({
|
|
49
44
|
cond(
|
|
50
45
|
(self._pos >= self._bytes.len()) => {
|
|
51
|
-
return
|
|
46
|
+
return(.None);
|
|
52
47
|
},
|
|
53
48
|
true => ()
|
|
54
49
|
);
|
|
@@ -56,7 +51,6 @@ impl(RegexParser,
|
|
|
56
51
|
self._pos = (self._pos + usize(1));
|
|
57
52
|
b
|
|
58
53
|
}),
|
|
59
|
-
|
|
60
54
|
// Decode a full UTF-8 codepoint given the first byte (already consumed by _advance).
|
|
61
55
|
// Reads 0-3 continuation bytes from self._pos and advances past them.
|
|
62
56
|
_read_codepoint : (fn(self : Self, first : u8) -> u32)(
|
|
@@ -65,48 +59,45 @@ impl(RegexParser,
|
|
|
65
59
|
((first >= u8(0xC0)) && (first < u8(0xE0))) => {
|
|
66
60
|
second := self._bytes.get(self._pos).unwrap();
|
|
67
61
|
self._pos = (self._pos + usize(1));
|
|
68
|
-
((
|
|
62
|
+
((u32(first) & u32(0x1F)) << u32(6)) | (u32(second) & u32(0x3F))
|
|
69
63
|
},
|
|
70
64
|
((first >= u8(0xE0)) && (first < u8(0xF0))) => {
|
|
71
65
|
second := self._bytes.get(self._pos).unwrap();
|
|
72
|
-
third := self._bytes.get(
|
|
66
|
+
third := self._bytes.get(self._pos + usize(1)).unwrap();
|
|
73
67
|
self._pos = (self._pos + usize(2));
|
|
74
|
-
(((
|
|
68
|
+
(((u32(first) & u32(0x0F)) << u32(12)) | ((u32(second) & u32(0x3F)) << u32(6))) | (u32(third) & u32(0x3F))
|
|
75
69
|
},
|
|
76
70
|
true => {
|
|
77
71
|
second := self._bytes.get(self._pos).unwrap();
|
|
78
|
-
third := self._bytes.get(
|
|
79
|
-
fourth := self._bytes.get(
|
|
72
|
+
third := self._bytes.get(self._pos + usize(1)).unwrap();
|
|
73
|
+
fourth := self._bytes.get(self._pos + usize(2)).unwrap();
|
|
80
74
|
self._pos = (self._pos + usize(3));
|
|
81
|
-
((((
|
|
75
|
+
((((u32(first) & u32(0x07)) << u32(18)) | ((u32(second) & u32(0x3F)) << u32(12))) | ((u32(third) & u32(0x3F)) << u32(6))) | (u32(fourth) & u32(0x3F))
|
|
82
76
|
}
|
|
83
77
|
)
|
|
84
78
|
),
|
|
85
|
-
|
|
86
79
|
_at_end : (fn(self : Self) -> bool)(
|
|
87
|
-
|
|
80
|
+
self._pos >= self._bytes.len()
|
|
88
81
|
),
|
|
89
|
-
|
|
90
82
|
group_count : (fn(self : Self) -> usize)(
|
|
91
83
|
self._group_count
|
|
92
84
|
),
|
|
93
|
-
|
|
94
85
|
group_names : (fn(self : Self) -> ArrayList(GroupNameEntry))(
|
|
95
86
|
self._group_names
|
|
96
87
|
),
|
|
97
|
-
|
|
98
88
|
_lookup_group_name : (fn(self : Self, name : String) -> Option(usize))({
|
|
99
89
|
i := usize(0);
|
|
100
|
-
while
|
|
90
|
+
while(i < self._group_names.len(), i = (i + usize(1)), {
|
|
101
91
|
entry := self._group_names.get(i).unwrap();
|
|
102
92
|
cond(
|
|
103
|
-
(entry.name == name) => {
|
|
93
|
+
(entry.name == name) => {
|
|
94
|
+
return(.Some(entry.index));
|
|
95
|
+
},
|
|
104
96
|
true => ()
|
|
105
97
|
);
|
|
106
|
-
};
|
|
98
|
+
});
|
|
107
99
|
.None
|
|
108
100
|
}),
|
|
109
|
-
|
|
110
101
|
_escape_char_codepoint : (fn(self : Self, ch : u8) -> u32)(
|
|
111
102
|
cond(
|
|
112
103
|
(ch == u8(110)) => u32(10),
|
|
@@ -114,16 +105,16 @@ impl(RegexParser,
|
|
|
114
105
|
(ch == u8(114)) => u32(13),
|
|
115
106
|
(ch == u8(102)) => u32(12),
|
|
116
107
|
(ch == u8(118)) => u32(11),
|
|
117
|
-
(ch == u8(48))
|
|
108
|
+
(ch == u8(48)) => u32(0),
|
|
118
109
|
true => u32(ch)
|
|
119
110
|
)
|
|
120
111
|
),
|
|
121
|
-
|
|
122
112
|
_parse_number : (fn(self : Self) -> Option(usize))({
|
|
123
113
|
start := self._pos;
|
|
124
|
-
while
|
|
114
|
+
while(!(self._at_end()), {
|
|
125
115
|
pk := self._peek();
|
|
126
|
-
is_digit := match(
|
|
116
|
+
is_digit := match(
|
|
117
|
+
pk,
|
|
127
118
|
.Some(b) => ((b >= u8(48)) && (b <= u8(57))),
|
|
128
119
|
.None => false
|
|
129
120
|
);
|
|
@@ -131,135 +122,140 @@ impl(RegexParser,
|
|
|
131
122
|
is_digit => {
|
|
132
123
|
self._pos = (self._pos + usize(1));
|
|
133
124
|
},
|
|
134
|
-
true => {
|
|
125
|
+
true => {
|
|
126
|
+
break;
|
|
127
|
+
}
|
|
135
128
|
);
|
|
136
|
-
};
|
|
129
|
+
});
|
|
137
130
|
cond(
|
|
138
131
|
(self._pos == start) => {
|
|
139
|
-
return
|
|
132
|
+
return(.None);
|
|
140
133
|
},
|
|
141
134
|
true => ()
|
|
142
135
|
);
|
|
143
136
|
result := usize(0);
|
|
144
137
|
i := start;
|
|
145
|
-
while
|
|
138
|
+
while(i < self._pos, i = (i + usize(1)), {
|
|
146
139
|
d := self._bytes.get(i).unwrap();
|
|
147
|
-
result = ((result * usize(10)) + usize(
|
|
148
|
-
};
|
|
140
|
+
result = ((result * usize(10)) + usize(d - u8(48)));
|
|
141
|
+
});
|
|
149
142
|
.Some(result)
|
|
150
143
|
}),
|
|
151
|
-
|
|
152
144
|
_parse_greedy_modifier : (fn(self : Self) -> bool)({
|
|
153
145
|
pk := self._peek();
|
|
154
|
-
is_question := match(
|
|
146
|
+
is_question := match(
|
|
147
|
+
pk,
|
|
155
148
|
.Some(b) => (b == u8(63)),
|
|
156
149
|
.None => false
|
|
157
150
|
);
|
|
158
151
|
cond(
|
|
159
152
|
is_question => {
|
|
160
153
|
self._pos = (self._pos + usize(1));
|
|
161
|
-
return
|
|
154
|
+
return(false);
|
|
162
155
|
},
|
|
163
156
|
true => ()
|
|
164
157
|
);
|
|
165
158
|
true
|
|
166
159
|
}),
|
|
167
|
-
|
|
168
160
|
_make_digit_ranges : (fn(self : Self) -> ArrayList(CharRange))({
|
|
169
161
|
r := ArrayList(CharRange).new();
|
|
170
|
-
r.push(CharRange(low: u32(48), high: u32(57)));
|
|
162
|
+
r.push(CharRange(low : u32(48), high : u32(57)));
|
|
171
163
|
r
|
|
172
164
|
}),
|
|
173
|
-
|
|
174
165
|
_make_word_ranges : (fn(self : Self) -> ArrayList(CharRange))({
|
|
175
166
|
r := ArrayList(CharRange).new();
|
|
176
|
-
r.push(CharRange(low: u32(48), high: u32(57)));
|
|
177
|
-
r.push(CharRange(low: u32(65), high: u32(90)));
|
|
178
|
-
r.push(CharRange(low: u32(95), high: u32(95)));
|
|
179
|
-
r.push(CharRange(low: u32(97), high: u32(122)));
|
|
167
|
+
r.push(CharRange(low : u32(48), high : u32(57)));
|
|
168
|
+
r.push(CharRange(low : u32(65), high : u32(90)));
|
|
169
|
+
r.push(CharRange(low : u32(95), high : u32(95)));
|
|
170
|
+
r.push(CharRange(low : u32(97), high : u32(122)));
|
|
180
171
|
r
|
|
181
172
|
}),
|
|
182
|
-
|
|
183
173
|
_make_space_ranges : (fn(self : Self) -> ArrayList(CharRange))({
|
|
184
174
|
r := ArrayList(CharRange).new();
|
|
185
|
-
r.push(CharRange(low: u32(9), high: u32(13)));
|
|
186
|
-
r.push(CharRange(low: u32(32), high: u32(32)));
|
|
175
|
+
r.push(CharRange(low : u32(9), high : u32(13)));
|
|
176
|
+
r.push(CharRange(low : u32(32), high : u32(32)));
|
|
187
177
|
r
|
|
188
178
|
}),
|
|
189
|
-
|
|
190
179
|
// Parse \xHH hex escape — reads exactly 2 hex digits and returns the codepoint.
|
|
191
180
|
_parse_hex_byte : (fn(self : Self) -> Option(u32))({
|
|
192
|
-
if((
|
|
181
|
+
if((self._pos + usize(2)) > self._bytes.len(), {
|
|
182
|
+
return(.None);
|
|
183
|
+
});
|
|
193
184
|
(h1 : u8) = self._bytes.get(self._pos).unwrap();
|
|
194
|
-
(h2 : u8) = self._bytes.get(
|
|
185
|
+
(h2 : u8) = self._bytes.get(self._pos + usize(1)).unwrap();
|
|
195
186
|
(v1 : i32) = cond(
|
|
196
187
|
((h1 >= u8(48)) && (h1 <= u8(57))) => (i32(h1) - i32(48)),
|
|
197
188
|
((h1 >= u8(65)) && (h1 <= u8(70))) => ((i32(h1) - i32(65)) + i32(10)),
|
|
198
189
|
((h1 >= u8(97)) && (h1 <= u8(102))) => ((i32(h1) - i32(97)) + i32(10)),
|
|
199
|
-
true => {
|
|
190
|
+
true => {
|
|
191
|
+
return(.None);
|
|
192
|
+
}
|
|
200
193
|
);
|
|
201
194
|
(v2 : i32) = cond(
|
|
202
195
|
((h2 >= u8(48)) && (h2 <= u8(57))) => (i32(h2) - i32(48)),
|
|
203
196
|
((h2 >= u8(65)) && (h2 <= u8(70))) => ((i32(h2) - i32(65)) + i32(10)),
|
|
204
197
|
((h2 >= u8(97)) && (h2 <= u8(102))) => ((i32(h2) - i32(97)) + i32(10)),
|
|
205
|
-
true => {
|
|
198
|
+
true => {
|
|
199
|
+
return(.None);
|
|
200
|
+
}
|
|
206
201
|
);
|
|
207
202
|
self._pos = (self._pos + usize(2));
|
|
208
|
-
.Some(u32((
|
|
203
|
+
.Some(u32((v1 << i32(4)) | v2))
|
|
209
204
|
}),
|
|
210
|
-
|
|
211
205
|
_parse_class_escape : (fn(self : Self) -> Result(ArrayList(CharRange), String))({
|
|
212
206
|
b := self._advance();
|
|
213
|
-
match(
|
|
207
|
+
match(
|
|
208
|
+
b,
|
|
214
209
|
.Some(ch) =>
|
|
215
210
|
cond(
|
|
216
|
-
(ch == u8(100))
|
|
211
|
+
(ch == u8(100)) =>.Ok(self._make_digit_ranges()),
|
|
217
212
|
(ch == u8(68)) => {
|
|
218
213
|
r := ArrayList(CharRange).new();
|
|
219
|
-
r.push(CharRange(low: u32(0), high: u32(47)));
|
|
220
|
-
r.push(CharRange(low: u32(58), high: u32(0x10FFFF)));
|
|
214
|
+
r.push(CharRange(low : u32(0), high : u32(47)));
|
|
215
|
+
r.push(CharRange(low : u32(58), high : u32(0x10FFFF)));
|
|
221
216
|
.Ok(r)
|
|
222
217
|
},
|
|
223
|
-
(ch == u8(119))
|
|
218
|
+
(ch == u8(119)) =>.Ok(self._make_word_ranges()),
|
|
224
219
|
(ch == u8(87)) => {
|
|
225
220
|
r := ArrayList(CharRange).new();
|
|
226
|
-
r.push(CharRange(low: u32(0), high: u32(47)));
|
|
227
|
-
r.push(CharRange(low: u32(58), high: u32(64)));
|
|
228
|
-
r.push(CharRange(low: u32(91), high: u32(94)));
|
|
229
|
-
r.push(CharRange(low: u32(96), high: u32(96)));
|
|
230
|
-
r.push(CharRange(low: u32(123), high: u32(0x10FFFF)));
|
|
221
|
+
r.push(CharRange(low : u32(0), high : u32(47)));
|
|
222
|
+
r.push(CharRange(low : u32(58), high : u32(64)));
|
|
223
|
+
r.push(CharRange(low : u32(91), high : u32(94)));
|
|
224
|
+
r.push(CharRange(low : u32(96), high : u32(96)));
|
|
225
|
+
r.push(CharRange(low : u32(123), high : u32(0x10FFFF)));
|
|
231
226
|
.Ok(r)
|
|
232
227
|
},
|
|
233
|
-
(ch == u8(115))
|
|
228
|
+
(ch == u8(115)) =>.Ok(self._make_space_ranges()),
|
|
234
229
|
(ch == u8(83)) => {
|
|
235
230
|
r := ArrayList(CharRange).new();
|
|
236
|
-
r.push(CharRange(low: u32(0), high: u32(8)));
|
|
237
|
-
r.push(CharRange(low: u32(14), high: u32(31)));
|
|
238
|
-
r.push(CharRange(low: u32(33), high: u32(0x10FFFF)));
|
|
231
|
+
r.push(CharRange(low : u32(0), high : u32(8)));
|
|
232
|
+
r.push(CharRange(low : u32(14), high : u32(31)));
|
|
233
|
+
r.push(CharRange(low : u32(33), high : u32(0x10FFFF)));
|
|
239
234
|
.Ok(r)
|
|
240
235
|
},
|
|
241
236
|
(ch == u8(120)) => {
|
|
242
237
|
r := ArrayList(CharRange).new();
|
|
243
|
-
match(
|
|
244
|
-
.
|
|
245
|
-
.
|
|
238
|
+
match(
|
|
239
|
+
self._parse_hex_byte(),
|
|
240
|
+
.Some(v) => r.push(CharRange(low : v, high : v)),
|
|
241
|
+
.None => r.push(CharRange(low : u32(ch), high : u32(ch)))
|
|
246
242
|
);
|
|
247
243
|
.Ok(r)
|
|
248
244
|
},
|
|
249
245
|
true => {
|
|
250
246
|
r := ArrayList(CharRange).new();
|
|
251
247
|
codepoint := self._escape_char_codepoint(ch);
|
|
252
|
-
r.push(CharRange(low: codepoint, high: codepoint));
|
|
248
|
+
r.push(CharRange(low : codepoint, high : codepoint));
|
|
253
249
|
.Ok(r)
|
|
254
250
|
}
|
|
255
251
|
),
|
|
256
|
-
.None
|
|
252
|
+
.None =>.Err(`Unexpected end of pattern after backslash`)
|
|
257
253
|
)
|
|
258
254
|
}),
|
|
259
|
-
|
|
260
255
|
_try_parse_char_range : (fn(self : Self, ranges : ArrayList(CharRange), low : u32) -> unit)({
|
|
261
256
|
pk3 := self._peek();
|
|
262
|
-
is_dash := match(
|
|
257
|
+
is_dash := match(
|
|
258
|
+
pk3,
|
|
263
259
|
.Some(b2) => (b2 == u8(45)),
|
|
264
260
|
.None => false
|
|
265
261
|
);
|
|
@@ -268,33 +264,34 @@ impl(RegexParser,
|
|
|
268
264
|
has_end := ((self._pos + usize(1)) < self._bytes.len());
|
|
269
265
|
cond(
|
|
270
266
|
has_end => {
|
|
271
|
-
end_first := self._bytes.get(
|
|
267
|
+
end_first := self._bytes.get(self._pos + usize(1)).unwrap();
|
|
272
268
|
cond(
|
|
273
269
|
(end_first == u8(93)) => {
|
|
274
|
-
ranges.push(CharRange(low: low, high: low));
|
|
270
|
+
ranges.push(CharRange(low : low, high : low));
|
|
275
271
|
},
|
|
276
272
|
(end_first == u8(92)) => {
|
|
277
273
|
// High end is an escape sequence (e.g. \x20, \0, \n)
|
|
278
274
|
self._pos = (self._pos + usize(1));
|
|
279
275
|
self._pos = (self._pos + usize(1));
|
|
280
276
|
esc := self._parse_class_escape();
|
|
281
|
-
match(
|
|
277
|
+
match(
|
|
278
|
+
esc,
|
|
282
279
|
.Ok(esc_ranges) => {
|
|
283
|
-
if((
|
|
280
|
+
if((esc_ranges.len() == usize(1)) && (esc_ranges.get(usize(0)).unwrap().low == esc_ranges.get(usize(0)).unwrap().high), {
|
|
284
281
|
(high : u32) = esc_ranges.get(usize(0)).unwrap().low;
|
|
285
|
-
ranges.push(CharRange(low: low, high: high));
|
|
282
|
+
ranges.push(CharRange(low : low, high : high));
|
|
286
283
|
}, {
|
|
287
284
|
// Multi-range escape like \d can't be range endpoint; treat dash as literal
|
|
288
|
-
ranges.push(CharRange(low: low, high: low));
|
|
289
|
-
ranges.push(CharRange(low: u32(45), high: u32(45)));
|
|
285
|
+
ranges.push(CharRange(low : low, high : low));
|
|
286
|
+
ranges.push(CharRange(low : u32(45), high : u32(45)));
|
|
290
287
|
j := usize(0);
|
|
291
|
-
while
|
|
288
|
+
while(j < esc_ranges.len(), j = (j + usize(1)), {
|
|
292
289
|
ranges.push(esc_ranges.get(j).unwrap());
|
|
293
|
-
};
|
|
290
|
+
});
|
|
294
291
|
});
|
|
295
292
|
},
|
|
296
293
|
.Err(_e) => {
|
|
297
|
-
ranges.push(CharRange(low: low, high: low));
|
|
294
|
+
ranges.push(CharRange(low : low, high : low));
|
|
298
295
|
}
|
|
299
296
|
);
|
|
300
297
|
},
|
|
@@ -305,49 +302,53 @@ impl(RegexParser,
|
|
|
305
302
|
self._pos = (self._pos + usize(1));
|
|
306
303
|
// Decode full codepoint (reads continuation bytes)
|
|
307
304
|
end_cp := self._read_codepoint(end_first);
|
|
308
|
-
ranges.push(CharRange(low: low, high: end_cp));
|
|
305
|
+
ranges.push(CharRange(low : low, high : end_cp));
|
|
309
306
|
}
|
|
310
307
|
);
|
|
311
308
|
},
|
|
312
309
|
true => {
|
|
313
|
-
ranges.push(CharRange(low: low, high: low));
|
|
310
|
+
ranges.push(CharRange(low : low, high : low));
|
|
314
311
|
}
|
|
315
312
|
);
|
|
316
313
|
},
|
|
317
314
|
true => {
|
|
318
|
-
ranges.push(CharRange(low: low, high: low));
|
|
315
|
+
ranges.push(CharRange(low : low, high : low));
|
|
319
316
|
}
|
|
320
317
|
);
|
|
321
318
|
}),
|
|
322
|
-
|
|
323
319
|
_parse_char_class_content : (fn(self : Self, ranges : ArrayList(CharRange)) -> Result(unit, String))({
|
|
324
320
|
pk2 := self._peek();
|
|
325
|
-
b := match(
|
|
321
|
+
b := match(
|
|
322
|
+
pk2,
|
|
326
323
|
.Some(v) => v,
|
|
327
|
-
.None => {
|
|
324
|
+
.None => {
|
|
325
|
+
return(.Err(`Unterminated character class`));
|
|
326
|
+
}
|
|
328
327
|
);
|
|
329
|
-
|
|
330
328
|
cond(
|
|
331
329
|
(b == u8(93)) => {
|
|
332
|
-
return
|
|
330
|
+
return(.Ok(()));
|
|
333
331
|
},
|
|
334
332
|
(b == u8(92)) => {
|
|
335
333
|
self._pos = (self._pos + usize(1));
|
|
336
334
|
esc := self._parse_class_escape();
|
|
337
|
-
match(
|
|
335
|
+
match(
|
|
336
|
+
esc,
|
|
338
337
|
.Ok(esc_ranges) => {
|
|
339
338
|
// If escape produced a single codepoint, check for range (e.g. \0-\x20)
|
|
340
|
-
if((
|
|
339
|
+
if((esc_ranges.len() == usize(1)) && (esc_ranges.get(usize(0)).unwrap().low == esc_ranges.get(usize(0)).unwrap().high), {
|
|
341
340
|
(low : u32) = esc_ranges.get(usize(0)).unwrap().low;
|
|
342
341
|
self._try_parse_char_range(ranges, low);
|
|
343
342
|
}, {
|
|
344
343
|
j := usize(0);
|
|
345
|
-
while
|
|
344
|
+
while(j < esc_ranges.len(), j = (j + usize(1)), {
|
|
346
345
|
ranges.push(esc_ranges.get(j).unwrap());
|
|
347
|
-
};
|
|
346
|
+
});
|
|
348
347
|
});
|
|
349
348
|
},
|
|
350
|
-
.Err(e) => {
|
|
349
|
+
.Err(e) => {
|
|
350
|
+
return(.Err(e));
|
|
351
|
+
}
|
|
351
352
|
);
|
|
352
353
|
},
|
|
353
354
|
true => {
|
|
@@ -358,14 +359,13 @@ impl(RegexParser,
|
|
|
358
359
|
);
|
|
359
360
|
.Ok(())
|
|
360
361
|
}),
|
|
361
|
-
|
|
362
362
|
_parse_char_class : (fn(self : Self) -> Result(RegexNode, String))({
|
|
363
363
|
ranges := ArrayList(CharRange).new();
|
|
364
364
|
negated := false;
|
|
365
|
-
|
|
366
365
|
// Check for negation '^'
|
|
367
366
|
pk := self._peek();
|
|
368
|
-
is_neg := match(
|
|
367
|
+
is_neg := match(
|
|
368
|
+
pk,
|
|
369
369
|
.Some(b) => (b == u8(94)),
|
|
370
370
|
.None => false
|
|
371
371
|
);
|
|
@@ -376,64 +376,71 @@ impl(RegexParser,
|
|
|
376
376
|
},
|
|
377
377
|
true => ()
|
|
378
378
|
);
|
|
379
|
-
|
|
380
379
|
// Parse class contents until ']'
|
|
381
|
-
while
|
|
380
|
+
while(!(self._at_end()), {
|
|
382
381
|
pk2 := self._peek();
|
|
383
|
-
found_end := match(
|
|
382
|
+
found_end := match(
|
|
383
|
+
pk2,
|
|
384
384
|
.Some(b) => (b == u8(93)),
|
|
385
385
|
.None => false
|
|
386
386
|
);
|
|
387
387
|
cond(
|
|
388
388
|
found_end => {
|
|
389
389
|
self._pos = (self._pos + usize(1));
|
|
390
|
-
return
|
|
390
|
+
return(.Ok(RegexNode.char_class(ranges, negated)));
|
|
391
391
|
},
|
|
392
392
|
true => ()
|
|
393
393
|
);
|
|
394
|
-
|
|
395
394
|
result := self._parse_char_class_content(ranges);
|
|
396
|
-
match(
|
|
395
|
+
match(
|
|
396
|
+
result,
|
|
397
397
|
.Ok(_) => (),
|
|
398
|
-
.Err(e) => {
|
|
398
|
+
.Err(e) => {
|
|
399
|
+
return(.Err(e));
|
|
400
|
+
}
|
|
399
401
|
);
|
|
400
|
-
};
|
|
402
|
+
});
|
|
401
403
|
.Err(`Unterminated character class`)
|
|
402
404
|
}),
|
|
403
|
-
|
|
404
405
|
// Parse \p{PropertyName} or \P{PropertyName} unicode property escape.
|
|
405
406
|
// negated=true for \P (inverted match).
|
|
406
407
|
_parse_unicode_property : (fn(self : Self, negated : bool) -> Result(RegexNode, String))({
|
|
407
408
|
// Expect opening '{'
|
|
408
409
|
pk := self._peek();
|
|
409
|
-
is_brace := match(
|
|
410
|
+
is_brace := match(
|
|
411
|
+
pk,
|
|
410
412
|
.Some(v) => (v == u8(123)),
|
|
411
413
|
.None => false
|
|
412
414
|
);
|
|
413
415
|
cond(
|
|
414
416
|
(!(is_brace)) => {
|
|
415
|
-
return
|
|
417
|
+
return(.Err(`Expected '{' after \\p or \\P`));
|
|
416
418
|
},
|
|
417
419
|
true => ()
|
|
418
420
|
);
|
|
419
421
|
self._pos = (self._pos + usize(1));
|
|
420
|
-
|
|
421
422
|
// Read property name until '}'
|
|
422
423
|
name_bytes := ArrayList(u8).new();
|
|
423
|
-
while
|
|
424
|
+
while(!(self._at_end()), {
|
|
424
425
|
nb := self._peek();
|
|
425
|
-
nb_val := match(
|
|
426
|
+
nb_val := match(
|
|
427
|
+
nb,
|
|
426
428
|
.Some(v) => v,
|
|
427
|
-
.None => {
|
|
429
|
+
.None => {
|
|
430
|
+
break;
|
|
431
|
+
}
|
|
428
432
|
);
|
|
429
433
|
cond(
|
|
430
434
|
(nb_val == u8(125)) => {
|
|
431
435
|
self._pos = (self._pos + usize(1));
|
|
432
436
|
name_str := String.from_bytes(name_bytes);
|
|
433
437
|
lookup := unicode_property_ranges(name_str);
|
|
434
|
-
return
|
|
435
|
-
|
|
436
|
-
|
|
438
|
+
return(
|
|
439
|
+
match(
|
|
440
|
+
lookup,
|
|
441
|
+
.Some(ranges) =>.Ok(RegexNode.char_class(ranges, negated)),
|
|
442
|
+
.None =>.Err(`Unknown Unicode property: \\p{${name_str}}`)
|
|
443
|
+
)
|
|
437
444
|
);
|
|
438
445
|
},
|
|
439
446
|
true => {
|
|
@@ -441,61 +448,68 @@ impl(RegexParser,
|
|
|
441
448
|
self._pos = (self._pos + usize(1));
|
|
442
449
|
}
|
|
443
450
|
);
|
|
444
|
-
};
|
|
451
|
+
});
|
|
445
452
|
.Err(`Unterminated Unicode property \\p{...}`)
|
|
446
453
|
}),
|
|
447
|
-
|
|
448
454
|
_parse_escape : (fn(self : Self) -> Result(RegexNode, String))({
|
|
449
455
|
b := self._advance();
|
|
450
|
-
match(
|
|
456
|
+
match(
|
|
457
|
+
b,
|
|
451
458
|
.Some(ch) =>
|
|
452
459
|
cond(
|
|
453
|
-
(ch == u8(100))
|
|
454
|
-
(ch == u8(68))
|
|
455
|
-
(ch == u8(119))
|
|
456
|
-
(ch == u8(87))
|
|
457
|
-
(ch == u8(115))
|
|
458
|
-
(ch == u8(83))
|
|
459
|
-
(ch == u8(98))
|
|
460
|
-
(ch == u8(66))
|
|
460
|
+
(ch == u8(100)) =>.Ok(RegexNode.char_class(self._make_digit_ranges(), false)),
|
|
461
|
+
(ch == u8(68)) =>.Ok(RegexNode.char_class(self._make_digit_ranges(), true)),
|
|
462
|
+
(ch == u8(119)) =>.Ok(RegexNode.char_class(self._make_word_ranges(), false)),
|
|
463
|
+
(ch == u8(87)) =>.Ok(RegexNode.char_class(self._make_word_ranges(), true)),
|
|
464
|
+
(ch == u8(115)) =>.Ok(RegexNode.char_class(self._make_space_ranges(), false)),
|
|
465
|
+
(ch == u8(83)) =>.Ok(RegexNode.char_class(self._make_space_ranges(), true)),
|
|
466
|
+
(ch == u8(98)) =>.Ok(RegexNode.anchor_node(.WordBoundary)),
|
|
467
|
+
(ch == u8(66)) =>.Ok(RegexNode.anchor_node(.NonWordBoundary)),
|
|
461
468
|
// Numeric backreference \1 through \9
|
|
462
469
|
((ch >= u8(49)) && (ch <= u8(57))) => {
|
|
463
|
-
group_idx := usize(
|
|
470
|
+
group_idx := usize(ch - u8(48));
|
|
464
471
|
cond(
|
|
465
472
|
(group_idx > self._group_count) =>
|
|
466
473
|
.Err(`Backreference \\${group_idx} exceeds number of groups`),
|
|
467
|
-
true
|
|
474
|
+
true =>.Ok(RegexNode.backreference(group_idx))
|
|
468
475
|
)
|
|
469
476
|
},
|
|
470
477
|
// Named backreference \k<name>
|
|
471
478
|
(ch == u8(107)) => {
|
|
472
479
|
pk := self._peek();
|
|
473
|
-
is_lt := match(
|
|
480
|
+
is_lt := match(
|
|
481
|
+
pk,
|
|
474
482
|
.Some(v) => (v == u8(60)),
|
|
475
483
|
.None => false
|
|
476
484
|
);
|
|
477
485
|
cond(
|
|
478
486
|
(!(is_lt)) => {
|
|
479
|
-
return
|
|
487
|
+
return(.Err(`Expected '<' after \\k`));
|
|
480
488
|
},
|
|
481
489
|
true => ()
|
|
482
490
|
);
|
|
483
491
|
self._pos = (self._pos + usize(1));
|
|
484
492
|
name_bytes := ArrayList(u8).new();
|
|
485
|
-
while
|
|
493
|
+
while(!(self._at_end()), {
|
|
486
494
|
nb := self._peek();
|
|
487
|
-
nb_val := match(
|
|
495
|
+
nb_val := match(
|
|
496
|
+
nb,
|
|
488
497
|
.Some(v) => v,
|
|
489
|
-
.None => {
|
|
498
|
+
.None => {
|
|
499
|
+
break;
|
|
500
|
+
}
|
|
490
501
|
);
|
|
491
502
|
cond(
|
|
492
503
|
(nb_val == u8(62)) => {
|
|
493
504
|
self._pos = (self._pos + usize(1));
|
|
494
505
|
name_str := String.from_bytes(name_bytes);
|
|
495
506
|
lookup := self._lookup_group_name(name_str);
|
|
496
|
-
return
|
|
497
|
-
|
|
498
|
-
|
|
507
|
+
return(
|
|
508
|
+
match(
|
|
509
|
+
lookup,
|
|
510
|
+
.Some(idx) =>.Ok(RegexNode.backreference(idx)),
|
|
511
|
+
.None =>.Err(`Unknown named group in backreference`)
|
|
512
|
+
)
|
|
499
513
|
);
|
|
500
514
|
},
|
|
501
515
|
true => {
|
|
@@ -503,7 +517,7 @@ impl(RegexParser,
|
|
|
503
517
|
self._pos = (self._pos + usize(1));
|
|
504
518
|
}
|
|
505
519
|
);
|
|
506
|
-
};
|
|
520
|
+
});
|
|
507
521
|
.Err(`Unterminated named backreference \\k<...>`)
|
|
508
522
|
},
|
|
509
523
|
// Unicode property \p{Name}
|
|
@@ -511,117 +525,123 @@ impl(RegexParser,
|
|
|
511
525
|
// Negated unicode property \P{Name}
|
|
512
526
|
(ch == u8(80)) => self._parse_unicode_property(true),
|
|
513
527
|
// Hex escape \xHH
|
|
514
|
-
(ch == u8(120)) => match(
|
|
515
|
-
.
|
|
516
|
-
.
|
|
528
|
+
(ch == u8(120)) => match(
|
|
529
|
+
self._parse_hex_byte(),
|
|
530
|
+
.Some(v) =>.Ok(RegexNode.literal(v)),
|
|
531
|
+
.None =>.Ok(RegexNode.literal(u32(ch)))
|
|
517
532
|
),
|
|
518
|
-
true
|
|
533
|
+
true =>.Ok(RegexNode.literal(self._escape_char_codepoint(ch)))
|
|
519
534
|
),
|
|
520
|
-
.None
|
|
535
|
+
.None =>.Err(`Unexpected end of pattern after backslash`)
|
|
521
536
|
)
|
|
522
537
|
}),
|
|
523
|
-
|
|
524
538
|
_parse_counted_quantifier : (fn(self : Self, atom : RegexNode) -> Result(RegexNode, String))({
|
|
525
539
|
self._pos = (self._pos + usize(1));
|
|
526
540
|
min_opt := self._parse_number();
|
|
527
|
-
mn := match(
|
|
541
|
+
mn := match(
|
|
542
|
+
min_opt,
|
|
528
543
|
.Some(v) => v,
|
|
529
|
-
.None => {
|
|
544
|
+
.None => {
|
|
545
|
+
return(.Err(`Expected number after '{'`));
|
|
546
|
+
}
|
|
530
547
|
);
|
|
531
|
-
|
|
532
548
|
pk := self._peek();
|
|
533
|
-
b := match(
|
|
549
|
+
b := match(
|
|
550
|
+
pk,
|
|
534
551
|
.Some(v) => v,
|
|
535
|
-
.None => {
|
|
552
|
+
.None => {
|
|
553
|
+
return(.Err(`Unexpected end of pattern in quantifier`));
|
|
554
|
+
}
|
|
536
555
|
);
|
|
537
|
-
|
|
538
556
|
cond(
|
|
539
557
|
(b == u8(125)) => {
|
|
540
558
|
// {n}
|
|
541
559
|
self._pos = (self._pos + usize(1));
|
|
542
560
|
greedy := self._parse_greedy_modifier();
|
|
543
|
-
return
|
|
561
|
+
return(.Ok(RegexNode.quantifier(atom, mn, mn, greedy)));
|
|
544
562
|
},
|
|
545
563
|
(b != u8(44)) => {
|
|
546
|
-
return
|
|
564
|
+
return(.Err(`Expected ',' or '}' in quantifier`));
|
|
547
565
|
},
|
|
548
566
|
true => ()
|
|
549
567
|
);
|
|
550
|
-
|
|
551
568
|
// Consume ','
|
|
552
569
|
self._pos = (self._pos + usize(1));
|
|
553
|
-
|
|
554
570
|
pk2 := self._peek();
|
|
555
|
-
b2 := match(
|
|
571
|
+
b2 := match(
|
|
572
|
+
pk2,
|
|
556
573
|
.Some(v) => v,
|
|
557
|
-
.None => {
|
|
574
|
+
.None => {
|
|
575
|
+
return(.Err(`Unexpected end of pattern in quantifier`));
|
|
576
|
+
}
|
|
558
577
|
);
|
|
559
|
-
|
|
560
578
|
cond(
|
|
561
579
|
(b2 == u8(125)) => {
|
|
562
580
|
// {n,} — unbounded
|
|
563
581
|
self._pos = (self._pos + usize(1));
|
|
564
582
|
greedy := self._parse_greedy_modifier();
|
|
565
|
-
return
|
|
583
|
+
return(.Ok(RegexNode.quantifier(atom, mn, usize(0), greedy)));
|
|
566
584
|
},
|
|
567
585
|
true => ()
|
|
568
586
|
);
|
|
569
|
-
|
|
570
587
|
// {n,m}
|
|
571
588
|
max_opt := self._parse_number();
|
|
572
|
-
mx := match(
|
|
589
|
+
mx := match(
|
|
590
|
+
max_opt,
|
|
573
591
|
.Some(v) => v,
|
|
574
|
-
.None => {
|
|
592
|
+
.None => {
|
|
593
|
+
return(.Err(`Expected number after ',' in quantifier`));
|
|
594
|
+
}
|
|
575
595
|
);
|
|
576
|
-
|
|
577
596
|
pk3 := self._peek();
|
|
578
|
-
b3 := match(
|
|
597
|
+
b3 := match(
|
|
598
|
+
pk3,
|
|
579
599
|
.Some(v) => v,
|
|
580
|
-
.None => {
|
|
600
|
+
.None => {
|
|
601
|
+
return(.Err(`Unexpected end of pattern in quantifier`));
|
|
602
|
+
}
|
|
581
603
|
);
|
|
582
|
-
|
|
583
604
|
cond(
|
|
584
605
|
(b3 != u8(125)) => {
|
|
585
|
-
return
|
|
606
|
+
return(.Err(`Expected '}' in quantifier`));
|
|
586
607
|
},
|
|
587
608
|
true => ()
|
|
588
609
|
);
|
|
589
610
|
self._pos = (self._pos + usize(1));
|
|
590
|
-
|
|
591
611
|
cond(
|
|
592
612
|
(mx < mn) => {
|
|
593
|
-
return
|
|
613
|
+
return(.Err(`Invalid quantifier: max less than min`));
|
|
594
614
|
},
|
|
595
615
|
true => ()
|
|
596
616
|
);
|
|
597
|
-
|
|
598
617
|
greedy := self._parse_greedy_modifier();
|
|
599
618
|
.Ok(RegexNode.quantifier(atom, mn, mx, greedy))
|
|
600
619
|
}),
|
|
601
|
-
|
|
602
620
|
_parse_atom : (fn(self : Self) -> Result(RegexNode, String))({
|
|
603
621
|
b := self._advance();
|
|
604
|
-
match(
|
|
622
|
+
match(
|
|
623
|
+
b,
|
|
605
624
|
.Some(ch) =>
|
|
606
625
|
cond(
|
|
607
|
-
(ch == u8(46))
|
|
608
|
-
(ch == u8(94))
|
|
609
|
-
(ch == u8(36))
|
|
626
|
+
(ch == u8(46)) =>.Ok(RegexNode.dot()),
|
|
627
|
+
(ch == u8(94)) =>.Ok(RegexNode.anchor_node(.Start)),
|
|
628
|
+
(ch == u8(36)) =>.Ok(RegexNode.anchor_node(.End)),
|
|
610
629
|
(ch == u8(91)) => self._parse_char_class(),
|
|
611
630
|
(ch == u8(92)) => self._parse_escape(),
|
|
612
|
-
true
|
|
631
|
+
true =>.Ok(RegexNode.literal(self._read_codepoint(ch)))
|
|
613
632
|
),
|
|
614
|
-
.None
|
|
633
|
+
.None =>.Err(`Unexpected end of pattern`)
|
|
615
634
|
)
|
|
616
635
|
}),
|
|
617
|
-
|
|
618
636
|
_maybe_quantify : (fn(self : Self, a : RegexNode) -> Result(RegexNode, String))({
|
|
619
637
|
pk := self._peek();
|
|
620
|
-
b := match(
|
|
638
|
+
b := match(
|
|
639
|
+
pk,
|
|
621
640
|
.Some(v) => v,
|
|
622
|
-
.None => {
|
|
641
|
+
.None => {
|
|
642
|
+
return(.Ok(a));
|
|
643
|
+
}
|
|
623
644
|
);
|
|
624
|
-
|
|
625
645
|
cond(
|
|
626
646
|
(b == u8(42)) => {
|
|
627
647
|
self._pos = (self._pos + usize(1));
|
|
@@ -639,21 +659,21 @@ impl(RegexParser,
|
|
|
639
659
|
.Ok(RegexNode.quantifier(a, usize(0), usize(1), greedy))
|
|
640
660
|
},
|
|
641
661
|
(b == u8(123)) => self._parse_counted_quantifier(a),
|
|
642
|
-
true
|
|
662
|
+
true =>.Ok(a)
|
|
643
663
|
)
|
|
644
664
|
}),
|
|
645
|
-
|
|
646
665
|
_parse_quantified : (fn(self : Self) -> Result(RegexNode, String))({
|
|
647
666
|
atom := self._parse_atom();
|
|
648
|
-
match(
|
|
667
|
+
match(
|
|
668
|
+
atom,
|
|
649
669
|
.Ok(a) => self._maybe_quantify(a),
|
|
650
|
-
.Err(e)
|
|
670
|
+
.Err(e) =>.Err(e)
|
|
651
671
|
)
|
|
652
672
|
})
|
|
653
673
|
);
|
|
654
|
-
|
|
655
674
|
// Second impl block: helper methods first, then main parse loop
|
|
656
|
-
impl(
|
|
675
|
+
impl(
|
|
676
|
+
RegexParser,
|
|
657
677
|
_make_sequence : (fn(self : Self, nodes : ArrayList(RegexNode)) -> RegexNode)(
|
|
658
678
|
cond(
|
|
659
679
|
(nodes.len() == usize(0)) => RegexNode.sequence(ArrayList(RegexNode).new()),
|
|
@@ -661,55 +681,55 @@ impl(RegexParser,
|
|
|
661
681
|
true => RegexNode.sequence(nodes)
|
|
662
682
|
)
|
|
663
683
|
),
|
|
664
|
-
|
|
665
684
|
_finalize_frame : (fn(self : Self, alts : ArrayList(ArrayList(RegexNode)), seq : ArrayList(RegexNode)) -> RegexNode)({
|
|
666
685
|
cond(
|
|
667
686
|
(alts.len() == usize(0)) => {
|
|
668
|
-
return
|
|
687
|
+
return(self._make_sequence(seq));
|
|
669
688
|
},
|
|
670
689
|
true => ()
|
|
671
690
|
);
|
|
672
691
|
alts.push(seq);
|
|
673
692
|
result := self._make_sequence(alts.get(usize(0)).unwrap());
|
|
674
693
|
i := usize(1);
|
|
675
|
-
while
|
|
694
|
+
while(i < alts.len(), i = (i + usize(1)), {
|
|
676
695
|
right := self._make_sequence(alts.get(i).unwrap());
|
|
677
696
|
result = RegexNode.alternation(result, right);
|
|
678
|
-
};
|
|
697
|
+
});
|
|
679
698
|
result
|
|
680
699
|
}),
|
|
681
|
-
|
|
682
700
|
parse : (fn(self : Self) -> Result(RegexNode, String))({
|
|
683
701
|
stack := ArrayList(ParseFrame).new();
|
|
684
702
|
cur_alts := ArrayList(ArrayList(RegexNode)).new();
|
|
685
|
-
cur_seq
|
|
703
|
+
cur_seq := ArrayList(RegexNode).new();
|
|
686
704
|
cur_non_cap := false;
|
|
687
705
|
cur_group_idx := usize(0);
|
|
688
706
|
cur_is_la := false;
|
|
689
707
|
cur_is_lb := false;
|
|
690
708
|
cur_is_pos := true;
|
|
691
|
-
|
|
692
|
-
while (!(self._at_end())), {
|
|
709
|
+
while(!(self._at_end()), {
|
|
693
710
|
pk := self._peek();
|
|
694
|
-
b := match(
|
|
711
|
+
b := match(
|
|
712
|
+
pk,
|
|
695
713
|
.Some(v) => v,
|
|
696
|
-
.None => {
|
|
714
|
+
.None => {
|
|
715
|
+
break;
|
|
716
|
+
}
|
|
697
717
|
);
|
|
698
|
-
|
|
699
718
|
cond(
|
|
700
719
|
(b == u8(40)) => {
|
|
701
720
|
// '(' — open group
|
|
702
721
|
self._pos = (self._pos + usize(1));
|
|
703
|
-
stack.push(
|
|
704
|
-
|
|
705
|
-
|
|
706
|
-
|
|
707
|
-
|
|
708
|
-
|
|
709
|
-
|
|
710
|
-
|
|
711
|
-
|
|
712
|
-
|
|
722
|
+
stack.push(
|
|
723
|
+
ParseFrame(
|
|
724
|
+
alternatives : cur_alts,
|
|
725
|
+
current : cur_seq,
|
|
726
|
+
is_non_capturing : cur_non_cap,
|
|
727
|
+
group_index : cur_group_idx,
|
|
728
|
+
is_lookahead : cur_is_la,
|
|
729
|
+
is_lookbehind : cur_is_lb,
|
|
730
|
+
is_positive : cur_is_pos
|
|
731
|
+
)
|
|
732
|
+
);
|
|
713
733
|
// Detect group type: (?: (?<name> (?= (?! (?<= (?<!
|
|
714
734
|
is_nc := false;
|
|
715
735
|
g_idx := usize(0);
|
|
@@ -717,7 +737,8 @@ impl(RegexParser,
|
|
|
717
737
|
is_lb := false;
|
|
718
738
|
is_pos := true;
|
|
719
739
|
pk2 := self._peek();
|
|
720
|
-
is_question := match(
|
|
740
|
+
is_question := match(
|
|
741
|
+
pk2,
|
|
721
742
|
.Some(v) => (v == u8(63)),
|
|
722
743
|
.None => false
|
|
723
744
|
);
|
|
@@ -726,8 +747,9 @@ impl(RegexParser,
|
|
|
726
747
|
has_next := ((self._pos + usize(1)) < self._bytes.len());
|
|
727
748
|
cond(
|
|
728
749
|
has_next => {
|
|
729
|
-
nb := self._bytes.get(
|
|
730
|
-
next_ch := match(
|
|
750
|
+
nb := self._bytes.get(self._pos + usize(1));
|
|
751
|
+
next_ch := match(
|
|
752
|
+
nb,
|
|
731
753
|
.Some(v) => v,
|
|
732
754
|
.None => u8(0)
|
|
733
755
|
);
|
|
@@ -756,7 +778,8 @@ impl(RegexParser,
|
|
|
756
778
|
// Check char after '<'
|
|
757
779
|
has_third := ((self._pos + usize(2)) < self._bytes.len());
|
|
758
780
|
third_ch := cond(
|
|
759
|
-
has_third => match(
|
|
781
|
+
has_third => match(
|
|
782
|
+
self._bytes.get(self._pos + usize(2)),
|
|
760
783
|
.Some(v) => v,
|
|
761
784
|
.None => u8(0)
|
|
762
785
|
),
|
|
@@ -781,11 +804,14 @@ impl(RegexParser,
|
|
|
781
804
|
true => {
|
|
782
805
|
self._pos = (self._pos + usize(2));
|
|
783
806
|
name_bytes := ArrayList(u8).new();
|
|
784
|
-
while
|
|
807
|
+
while(!(self._at_end()), {
|
|
785
808
|
name_b := self._peek();
|
|
786
|
-
name_ch := match(
|
|
809
|
+
name_ch := match(
|
|
810
|
+
name_b,
|
|
787
811
|
.Some(v) => v,
|
|
788
|
-
.None => {
|
|
812
|
+
.None => {
|
|
813
|
+
break;
|
|
814
|
+
}
|
|
789
815
|
);
|
|
790
816
|
cond(
|
|
791
817
|
(name_ch == u8(62)) => {
|
|
@@ -797,11 +823,11 @@ impl(RegexParser,
|
|
|
797
823
|
self._pos = (self._pos + usize(1));
|
|
798
824
|
}
|
|
799
825
|
);
|
|
800
|
-
};
|
|
826
|
+
});
|
|
801
827
|
self._group_count = (self._group_count + usize(1));
|
|
802
828
|
g_idx = self._group_count;
|
|
803
829
|
group_name := String.from_bytes(name_bytes);
|
|
804
|
-
self._group_names.push(GroupNameEntry(name: group_name, index: g_idx));
|
|
830
|
+
self._group_names.push(GroupNameEntry(name : group_name, index : g_idx));
|
|
805
831
|
}
|
|
806
832
|
);
|
|
807
833
|
},
|
|
@@ -813,7 +839,6 @@ impl(RegexParser,
|
|
|
813
839
|
},
|
|
814
840
|
true => ()
|
|
815
841
|
);
|
|
816
|
-
|
|
817
842
|
cond(
|
|
818
843
|
(((!(is_nc)) && (!(is_la))) && ((!(is_lb)) && (g_idx == usize(0)))) => {
|
|
819
844
|
self._group_count = (self._group_count + usize(1));
|
|
@@ -821,7 +846,6 @@ impl(RegexParser,
|
|
|
821
846
|
},
|
|
822
847
|
true => ()
|
|
823
848
|
);
|
|
824
|
-
|
|
825
849
|
cur_alts = ArrayList(ArrayList(RegexNode)).new();
|
|
826
850
|
cur_seq = ArrayList(RegexNode).new();
|
|
827
851
|
cur_non_cap = is_nc;
|
|
@@ -835,25 +859,21 @@ impl(RegexParser,
|
|
|
835
859
|
self._pos = (self._pos + usize(1));
|
|
836
860
|
cond(
|
|
837
861
|
(stack.len() == usize(0)) => {
|
|
838
|
-
return
|
|
862
|
+
return(.Err(`Unexpected ')' without matching '('`));
|
|
839
863
|
},
|
|
840
864
|
true => ()
|
|
841
865
|
);
|
|
842
|
-
|
|
843
866
|
inner := self._finalize_frame(cur_alts, cur_seq);
|
|
844
|
-
|
|
845
867
|
// Save this group's type before restoring parent frame
|
|
846
868
|
this_is_la := cur_is_la;
|
|
847
869
|
this_is_lb := cur_is_lb;
|
|
848
|
-
|
|
849
870
|
group_node := cond(
|
|
850
871
|
cur_is_la => RegexNode.lookahead(inner, cur_is_pos),
|
|
851
872
|
cur_is_lb => RegexNode.lookbehind(inner, cur_is_pos),
|
|
852
873
|
cur_non_cap => RegexNode.non_capturing_group(inner),
|
|
853
874
|
true => RegexNode.group(inner, cur_group_idx)
|
|
854
875
|
);
|
|
855
|
-
|
|
856
|
-
parent := stack.get((stack.len() - usize(1))).unwrap();
|
|
876
|
+
parent := stack.get(stack.len() - usize(1)).unwrap();
|
|
857
877
|
stack.pop();
|
|
858
878
|
cur_alts = parent.alternatives;
|
|
859
879
|
cur_seq = parent.current;
|
|
@@ -862,7 +882,6 @@ impl(RegexParser,
|
|
|
862
882
|
cur_is_la = parent.is_lookahead;
|
|
863
883
|
cur_is_lb = parent.is_lookbehind;
|
|
864
884
|
cur_is_pos = parent.is_positive;
|
|
865
|
-
|
|
866
885
|
// Lookahead/lookbehind should not be quantified
|
|
867
886
|
cond(
|
|
868
887
|
(this_is_la || this_is_lb) => {
|
|
@@ -870,9 +889,14 @@ impl(RegexParser,
|
|
|
870
889
|
},
|
|
871
890
|
true => {
|
|
872
891
|
quantified := self._maybe_quantify(group_node);
|
|
873
|
-
match(
|
|
874
|
-
|
|
875
|
-
.
|
|
892
|
+
match(
|
|
893
|
+
quantified,
|
|
894
|
+
.Ok(q) => {
|
|
895
|
+
cur_seq.push(q);
|
|
896
|
+
},
|
|
897
|
+
.Err(e) => {
|
|
898
|
+
return(.Err(e));
|
|
899
|
+
}
|
|
876
900
|
);
|
|
877
901
|
}
|
|
878
902
|
);
|
|
@@ -885,26 +909,26 @@ impl(RegexParser,
|
|
|
885
909
|
},
|
|
886
910
|
true => {
|
|
887
911
|
atom := self._parse_quantified();
|
|
888
|
-
match(
|
|
889
|
-
|
|
890
|
-
.
|
|
912
|
+
match(
|
|
913
|
+
atom,
|
|
914
|
+
.Ok(a) => {
|
|
915
|
+
cur_seq.push(a);
|
|
916
|
+
},
|
|
917
|
+
.Err(e) => {
|
|
918
|
+
return(.Err(e));
|
|
919
|
+
}
|
|
891
920
|
);
|
|
892
921
|
}
|
|
893
922
|
);
|
|
894
|
-
};
|
|
895
|
-
|
|
923
|
+
});
|
|
896
924
|
cond(
|
|
897
925
|
(stack.len() > usize(0)) => {
|
|
898
|
-
return
|
|
926
|
+
return(.Err(`Unterminated group — expected ')'`));
|
|
899
927
|
},
|
|
900
928
|
true => ()
|
|
901
929
|
);
|
|
902
|
-
|
|
903
930
|
result := self._finalize_frame(cur_alts, cur_seq);
|
|
904
931
|
.Ok(result)
|
|
905
932
|
})
|
|
906
933
|
);
|
|
907
|
-
|
|
908
|
-
export
|
|
909
|
-
RegexParser
|
|
910
|
-
;
|
|
934
|
+
export(RegexParser);
|