@silverbulletmd/silverbullet 2.4.2 → 2.5.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +20 -4
- package/client/markdown_parser/constants.ts +2 -2
- package/client/plugos/hooks/code_widget.ts +0 -3
- package/client/plugos/hooks/document_editor.ts +0 -3
- package/client/plugos/hooks/event.ts +1 -1
- package/client/plugos/hooks/mq.ts +1 -1
- package/client/plugos/hooks/plug_namespace.ts +0 -3
- package/client/plugos/hooks/slash_command.ts +2 -2
- package/client/plugos/plug.ts +0 -1
- package/client/plugos/plug_compile.ts +28 -29
- package/client/plugos/proxy_fetch.ts +1 -1
- package/client/plugos/sandboxes/web_worker_sandbox.ts +1 -1
- package/client/plugos/sandboxes/worker_sandbox.ts +2 -3
- package/client/plugos/syscalls/editor.ts +12 -12
- package/client/plugos/syscalls/fetch.ts +1 -1
- package/client/plugos/syscalls/jsonschema.ts +1 -1
- package/client/plugos/syscalls/mq.ts +1 -1
- package/client/plugos/syscalls/space.ts +1 -1
- package/client/plugos/system.ts +2 -2
- package/client/plugos/worker_runtime.ts +8 -30
- package/client/space_lua/aggregates.ts +209 -0
- package/client/space_lua/ast.ts +24 -2
- package/client/space_lua/eval.ts +58 -53
- package/client/space_lua/labels.ts +1 -1
- package/client/space_lua/parse.ts +117 -12
- package/client/space_lua/query_collection.ts +850 -70
- package/client/space_lua/query_env.ts +26 -0
- package/client/space_lua/runtime.ts +47 -17
- package/client/space_lua/stdlib/format.ts +19 -19
- package/client/space_lua/stdlib/math.ts +73 -48
- package/client/space_lua/stdlib/net.ts +2 -2
- package/client/space_lua/stdlib/os.ts +5 -0
- package/client/space_lua/stdlib/pattern.ts +702 -0
- package/client/space_lua/stdlib/prng.ts +145 -0
- package/client/space_lua/stdlib/space_lua.ts +3 -8
- package/client/space_lua/stdlib/string.ts +103 -181
- package/client/space_lua/stdlib/string_pack.ts +486 -0
- package/client/space_lua/stdlib/table.ts +73 -9
- package/client/space_lua/stdlib.ts +38 -14
- package/client/space_lua/tonumber.ts +3 -2
- package/client/space_lua/util.ts +43 -9
- package/dist/plug-compile.js +23 -69
- package/dist/worker_runtime_bundle.js +233 -0
- package/package.json +10 -5
- package/plug-api/constants.ts +0 -32
- package/plug-api/lib/async.ts +2 -2
- package/plug-api/lib/crypto.ts +11 -11
- package/plug-api/lib/json.ts +1 -1
- package/plug-api/lib/limited_map.ts +1 -1
- package/plug-api/lib/native_fetch.ts +2 -0
- package/plug-api/lib/ref.ts +5 -5
- package/plug-api/lib/transclusion.ts +5 -5
- package/plug-api/lib/tree.ts +50 -2
- package/plug-api/lib/yaml.ts +10 -10
- package/plug-api/syscalls/editor.ts +1 -1
- package/plug-api/system_mock.ts +0 -1
- package/client/plugos/sandboxes/deno_worker_sandbox.ts +0 -6
|
@@ -0,0 +1,702 @@
|
|
|
1
|
+
// Lua Optimized Pattern Matching Engine Implementation
|
|
2
|
+
|
|
3
|
+
const CH_ESC = 37; // '%'
|
|
4
|
+
const CH_OPEN = 40; // '('
|
|
5
|
+
const CH_CLOSE = 41; // ')'
|
|
6
|
+
const CH_DOT = 46; // '.'
|
|
7
|
+
const CH_DOLLAR = 36; // '$'
|
|
8
|
+
const CH_LBRACKET = 91; // '['
|
|
9
|
+
const CH_RBRACKET = 93; // ']'
|
|
10
|
+
const CH_CARET = 94; // '^'
|
|
11
|
+
const CH_DASH = 45; // '-'
|
|
12
|
+
const CH_STAR = 42; // '*'
|
|
13
|
+
const CH_PLUS = 43; // '+'
|
|
14
|
+
const CH_QUESTION = 63; // '?'
|
|
15
|
+
const CH_0 = 48; // '0'
|
|
16
|
+
const CH_9 = 57; // '9'
|
|
17
|
+
|
|
18
|
+
const SPECIALS_SET = new Set<number>([
|
|
19
|
+
CH_CARET,
|
|
20
|
+
CH_DOLLAR,
|
|
21
|
+
CH_STAR,
|
|
22
|
+
CH_PLUS,
|
|
23
|
+
CH_QUESTION,
|
|
24
|
+
CH_DOT,
|
|
25
|
+
CH_OPEN,
|
|
26
|
+
CH_LBRACKET,
|
|
27
|
+
CH_ESC,
|
|
28
|
+
CH_DASH,
|
|
29
|
+
]);
|
|
30
|
+
|
|
31
|
+
const MAX_CAPTURES = 32;
|
|
32
|
+
const MAX_MATCH_DEPTH = 200;
|
|
33
|
+
|
|
34
|
+
const CAP_UNFINISHED = -1;
|
|
35
|
+
const CAP_POSITION = -2;
|
|
36
|
+
|
|
37
|
+
interface Capture {
|
|
38
|
+
init: number;
|
|
39
|
+
len: number;
|
|
40
|
+
}
|
|
41
|
+
|
|
42
|
+
export interface MatchState {
|
|
43
|
+
src: string; // original source string (for substring extraction)
|
|
44
|
+
s: Uint8Array; // source bytes
|
|
45
|
+
slen: number;
|
|
46
|
+
p: Uint8Array; // pattern bytes
|
|
47
|
+
plen: number;
|
|
48
|
+
level: number;
|
|
49
|
+
capture: Capture[]; // pre-allocated, length `MAX_CAPTURES`
|
|
50
|
+
matchdepth: number;
|
|
51
|
+
}
|
|
52
|
+
|
|
53
|
+
function toBytes(s: string): Uint8Array {
|
|
54
|
+
const len = s.length;
|
|
55
|
+
const arr = new Uint8Array(len);
|
|
56
|
+
for (let i = 0; i < len; i++) {
|
|
57
|
+
arr[i] = s.charCodeAt(i) & 0xFF;
|
|
58
|
+
}
|
|
59
|
+
return arr;
|
|
60
|
+
}
|
|
61
|
+
|
|
62
|
+
function matchClass(c: number, cl: number): boolean {
|
|
63
|
+
const lcl = cl | 32;
|
|
64
|
+
let res: boolean;
|
|
65
|
+
switch (lcl) {
|
|
66
|
+
case 97: // 'a'
|
|
67
|
+
res = (c >= 65 && c <= 90) || (c >= 97 && c <= 122);
|
|
68
|
+
break;
|
|
69
|
+
case 99: // 'c'
|
|
70
|
+
res = c < 32 || c === 127;
|
|
71
|
+
break;
|
|
72
|
+
case 100: // 'd'
|
|
73
|
+
res = c >= 48 && c <= 57;
|
|
74
|
+
break;
|
|
75
|
+
case 103: // 'g'
|
|
76
|
+
res = c > 32 && c < 127;
|
|
77
|
+
break;
|
|
78
|
+
case 108: // 'l'
|
|
79
|
+
res = c >= 97 && c <= 122;
|
|
80
|
+
break;
|
|
81
|
+
case 112: // 'p'
|
|
82
|
+
res = (c >= 33 && c <= 47) || (c >= 58 && c <= 64) ||
|
|
83
|
+
(c >= 91 && c <= 96) || (c >= 123 && c <= 126);
|
|
84
|
+
break;
|
|
85
|
+
case 115: // 's'
|
|
86
|
+
res = c === 32 || (c >= 9 && c <= 13);
|
|
87
|
+
break;
|
|
88
|
+
case 117: // 'u'
|
|
89
|
+
res = c >= 65 && c <= 90;
|
|
90
|
+
break;
|
|
91
|
+
case 119: // 'w'
|
|
92
|
+
res = (c >= 65 && c <= 90) || (c >= 97 && c <= 122) ||
|
|
93
|
+
(c >= 48 && c <= 57);
|
|
94
|
+
break;
|
|
95
|
+
case 120: // 'x'
|
|
96
|
+
res = (c >= 48 && c <= 57) || (c >= 65 && c <= 70) ||
|
|
97
|
+
(c >= 97 && c <= 102);
|
|
98
|
+
break;
|
|
99
|
+
case 122: // 'z'
|
|
100
|
+
res = c === 0;
|
|
101
|
+
break;
|
|
102
|
+
default:
|
|
103
|
+
return cl === c;
|
|
104
|
+
}
|
|
105
|
+
return (cl >= 97 && cl <= 122) ? res : !res;
|
|
106
|
+
}
|
|
107
|
+
|
|
108
|
+
function classEnd(p: Uint8Array, plen: number, pi: number): number {
|
|
109
|
+
const ch = p[pi];
|
|
110
|
+
pi++;
|
|
111
|
+
if (ch === CH_ESC) {
|
|
112
|
+
if (pi >= plen) {
|
|
113
|
+
throw new Error("malformed pattern (ends with '%')");
|
|
114
|
+
}
|
|
115
|
+
return pi + 1;
|
|
116
|
+
}
|
|
117
|
+
if (ch === CH_LBRACKET) {
|
|
118
|
+
if (pi < plen && p[pi] === CH_CARET) pi++;
|
|
119
|
+
do {
|
|
120
|
+
if (pi >= plen) {
|
|
121
|
+
throw new Error("malformed pattern (missing ']')");
|
|
122
|
+
}
|
|
123
|
+
if (p[pi] === CH_ESC && pi + 1 < plen) {
|
|
124
|
+
pi++;
|
|
125
|
+
}
|
|
126
|
+
pi++;
|
|
127
|
+
} while (pi < plen && p[pi] !== CH_RBRACKET);
|
|
128
|
+
if (pi >= plen) {
|
|
129
|
+
throw new Error("malformed pattern (missing ']')");
|
|
130
|
+
}
|
|
131
|
+
return pi + 1;
|
|
132
|
+
}
|
|
133
|
+
return pi;
|
|
134
|
+
}
|
|
135
|
+
|
|
136
|
+
function matchBracketClass(
|
|
137
|
+
c: number,
|
|
138
|
+
p: Uint8Array,
|
|
139
|
+
pi: number,
|
|
140
|
+
ec: number,
|
|
141
|
+
): boolean {
|
|
142
|
+
let sig = true;
|
|
143
|
+
if (p[pi + 1] === CH_CARET) {
|
|
144
|
+
sig = false;
|
|
145
|
+
pi++;
|
|
146
|
+
}
|
|
147
|
+
pi++;
|
|
148
|
+
while (pi < ec) {
|
|
149
|
+
const pch = p[pi];
|
|
150
|
+
if (pch === CH_ESC) {
|
|
151
|
+
pi++;
|
|
152
|
+
if (matchClass(c, p[pi])) return sig;
|
|
153
|
+
} else if (
|
|
154
|
+
pi + 2 < ec && p[pi + 1] === CH_DASH
|
|
155
|
+
) {
|
|
156
|
+
pi += 2;
|
|
157
|
+
if (pch <= c && c <= p[pi]) return sig;
|
|
158
|
+
} else if (pch === c) {
|
|
159
|
+
return sig;
|
|
160
|
+
}
|
|
161
|
+
pi++;
|
|
162
|
+
}
|
|
163
|
+
return !sig;
|
|
164
|
+
}
|
|
165
|
+
|
|
166
|
+
function singleMatch(
|
|
167
|
+
ms: MatchState,
|
|
168
|
+
si: number,
|
|
169
|
+
pi: number,
|
|
170
|
+
ep: number,
|
|
171
|
+
): boolean {
|
|
172
|
+
if (si >= ms.slen) return false;
|
|
173
|
+
const c = ms.s[si];
|
|
174
|
+
const pch = ms.p[pi];
|
|
175
|
+
if (pch === CH_DOT) return true;
|
|
176
|
+
if (pch === CH_ESC) {
|
|
177
|
+
return matchClass(c, ms.p[pi + 1]);
|
|
178
|
+
}
|
|
179
|
+
if (pch === CH_LBRACKET) {
|
|
180
|
+
return matchBracketClass(c, ms.p, pi, ep - 1);
|
|
181
|
+
}
|
|
182
|
+
return pch === c;
|
|
183
|
+
}
|
|
184
|
+
|
|
185
|
+
function matchBalance(
|
|
186
|
+
ms: MatchState,
|
|
187
|
+
si: number,
|
|
188
|
+
pi: number,
|
|
189
|
+
): number {
|
|
190
|
+
if (pi >= ms.plen - 1) {
|
|
191
|
+
throw new Error("malformed pattern (missing arguments to '%b')");
|
|
192
|
+
}
|
|
193
|
+
if (si >= ms.slen || ms.s[si] !== ms.p[pi]) {
|
|
194
|
+
return -1;
|
|
195
|
+
}
|
|
196
|
+
const b = ms.p[pi];
|
|
197
|
+
const e = ms.p[pi + 1];
|
|
198
|
+
let cont = 1;
|
|
199
|
+
si++;
|
|
200
|
+
while (si < ms.slen) {
|
|
201
|
+
const sc = ms.s[si];
|
|
202
|
+
if (sc === e) {
|
|
203
|
+
if (--cont === 0) return si + 1;
|
|
204
|
+
} else if (sc === b) {
|
|
205
|
+
cont++;
|
|
206
|
+
}
|
|
207
|
+
si++;
|
|
208
|
+
}
|
|
209
|
+
return -1;
|
|
210
|
+
}
|
|
211
|
+
|
|
212
|
+
function maxExpand(
|
|
213
|
+
ms: MatchState,
|
|
214
|
+
si: number,
|
|
215
|
+
pi: number,
|
|
216
|
+
ep: number,
|
|
217
|
+
): number {
|
|
218
|
+
let i = 0;
|
|
219
|
+
while (singleMatch(ms, si + i, pi, ep)) i++;
|
|
220
|
+
while (i >= 0) {
|
|
221
|
+
const res = match(ms, si + i, ep + 1);
|
|
222
|
+
if (res >= 0) return res;
|
|
223
|
+
i--;
|
|
224
|
+
}
|
|
225
|
+
return -1;
|
|
226
|
+
}
|
|
227
|
+
|
|
228
|
+
function minExpand(
|
|
229
|
+
ms: MatchState,
|
|
230
|
+
si: number,
|
|
231
|
+
pi: number,
|
|
232
|
+
ep: number,
|
|
233
|
+
): number {
|
|
234
|
+
for (;;) {
|
|
235
|
+
const res = match(ms, si, ep + 1);
|
|
236
|
+
if (res >= 0) return res;
|
|
237
|
+
if (singleMatch(ms, si, pi, ep)) {
|
|
238
|
+
si++;
|
|
239
|
+
} else {
|
|
240
|
+
return -1;
|
|
241
|
+
}
|
|
242
|
+
}
|
|
243
|
+
}
|
|
244
|
+
|
|
245
|
+
function checkCapture(ms: MatchState, l: number): number {
|
|
246
|
+
l -= CH_0 + 1;
|
|
247
|
+
if (l < 0 || l >= ms.level || ms.capture[l].len === CAP_UNFINISHED) {
|
|
248
|
+
throw new Error(`invalid capture index %${l + 1}`);
|
|
249
|
+
}
|
|
250
|
+
return l;
|
|
251
|
+
}
|
|
252
|
+
|
|
253
|
+
function captureToClose(ms: MatchState): number {
|
|
254
|
+
for (let level = ms.level - 1; level >= 0; level--) {
|
|
255
|
+
if (ms.capture[level].len === CAP_UNFINISHED) return level;
|
|
256
|
+
}
|
|
257
|
+
throw new Error("invalid pattern capture");
|
|
258
|
+
}
|
|
259
|
+
|
|
260
|
+
function startCapture(
|
|
261
|
+
ms: MatchState,
|
|
262
|
+
si: number,
|
|
263
|
+
pi: number,
|
|
264
|
+
what: number,
|
|
265
|
+
): number {
|
|
266
|
+
const level = ms.level;
|
|
267
|
+
if (level >= MAX_CAPTURES) throw new Error("too many captures");
|
|
268
|
+
ms.capture[level].init = si;
|
|
269
|
+
ms.capture[level].len = what;
|
|
270
|
+
ms.level = level + 1;
|
|
271
|
+
const res = match(ms, si, pi);
|
|
272
|
+
if (res < 0) ms.level--;
|
|
273
|
+
return res;
|
|
274
|
+
}
|
|
275
|
+
|
|
276
|
+
function endCapture(ms: MatchState, si: number, pi: number): number {
|
|
277
|
+
const l = captureToClose(ms);
|
|
278
|
+
const savedLen = ms.capture[l].len;
|
|
279
|
+
ms.capture[l].len = si - ms.capture[l].init;
|
|
280
|
+
const res = match(ms, si, pi);
|
|
281
|
+
if (res < 0) ms.capture[l].len = savedLen;
|
|
282
|
+
return res;
|
|
283
|
+
}
|
|
284
|
+
|
|
285
|
+
function matchCapture(ms: MatchState, si: number, l: number): number {
|
|
286
|
+
const idx = checkCapture(ms, l);
|
|
287
|
+
const cap = ms.capture[idx];
|
|
288
|
+
if (cap.len === CAP_POSITION) {
|
|
289
|
+
throw new Error(`invalid capture index %${idx + 1}`);
|
|
290
|
+
}
|
|
291
|
+
const len = cap.len;
|
|
292
|
+
if (ms.slen - si < len) return -1;
|
|
293
|
+
for (let k = 0; k < len; k++) {
|
|
294
|
+
if (ms.s[cap.init + k] !== ms.s[si + k]) return -1;
|
|
295
|
+
}
|
|
296
|
+
return si + len;
|
|
297
|
+
}
|
|
298
|
+
|
|
299
|
+
function match(ms: MatchState, si: number, pi: number): number {
|
|
300
|
+
if (ms.matchdepth-- <= 0) {
|
|
301
|
+
throw new Error("pattern too complex");
|
|
302
|
+
}
|
|
303
|
+
while (true) {
|
|
304
|
+
if (pi >= ms.plen) {
|
|
305
|
+
ms.matchdepth++;
|
|
306
|
+
return si;
|
|
307
|
+
}
|
|
308
|
+
const pch = ms.p[pi];
|
|
309
|
+
if (pch === CH_OPEN) {
|
|
310
|
+
if (pi + 1 < ms.plen && ms.p[pi + 1] === CH_CLOSE) {
|
|
311
|
+
si = startCapture(ms, si, pi + 2, CAP_POSITION);
|
|
312
|
+
} else {
|
|
313
|
+
si = startCapture(ms, si, pi + 1, CAP_UNFINISHED);
|
|
314
|
+
}
|
|
315
|
+
ms.matchdepth++;
|
|
316
|
+
return si;
|
|
317
|
+
}
|
|
318
|
+
if (pch === CH_CLOSE) {
|
|
319
|
+
si = endCapture(ms, si, pi + 1);
|
|
320
|
+
ms.matchdepth++;
|
|
321
|
+
return si;
|
|
322
|
+
}
|
|
323
|
+
if (pch === CH_DOLLAR && pi + 1 === ms.plen) {
|
|
324
|
+
ms.matchdepth++;
|
|
325
|
+
return si === ms.slen ? si : -1;
|
|
326
|
+
}
|
|
327
|
+
if (pch === CH_ESC && pi + 1 < ms.plen) {
|
|
328
|
+
const next = ms.p[pi + 1];
|
|
329
|
+
if (next === 98) { // 'b'
|
|
330
|
+
si = matchBalance(ms, si, pi + 2);
|
|
331
|
+
if (si >= 0) {
|
|
332
|
+
pi += 4;
|
|
333
|
+
continue;
|
|
334
|
+
}
|
|
335
|
+
ms.matchdepth++;
|
|
336
|
+
return -1;
|
|
337
|
+
}
|
|
338
|
+
if (next === 102) { // 'f'
|
|
339
|
+
pi += 2;
|
|
340
|
+
if (pi >= ms.plen || ms.p[pi] !== CH_LBRACKET) {
|
|
341
|
+
throw new Error("missing '[' after '%f' in pattern");
|
|
342
|
+
}
|
|
343
|
+
const ep = classEnd(ms.p, ms.plen, pi);
|
|
344
|
+
const previous = si === 0 ? 0 : ms.s[si - 1];
|
|
345
|
+
const current = si < ms.slen ? ms.s[si] : 0;
|
|
346
|
+
if (
|
|
347
|
+
!matchBracketClass(previous, ms.p, pi, ep - 1) &&
|
|
348
|
+
matchBracketClass(current, ms.p, pi, ep - 1)
|
|
349
|
+
) {
|
|
350
|
+
pi = ep;
|
|
351
|
+
continue;
|
|
352
|
+
}
|
|
353
|
+
ms.matchdepth++;
|
|
354
|
+
return -1;
|
|
355
|
+
}
|
|
356
|
+
if (next >= CH_0 && next <= CH_9) {
|
|
357
|
+
si = matchCapture(ms, si, next);
|
|
358
|
+
if (si >= 0) {
|
|
359
|
+
pi += 2;
|
|
360
|
+
continue;
|
|
361
|
+
}
|
|
362
|
+
ms.matchdepth++;
|
|
363
|
+
return -1;
|
|
364
|
+
}
|
|
365
|
+
}
|
|
366
|
+
// default: class[*+?-]?
|
|
367
|
+
const ep = classEnd(ms.p, ms.plen, pi);
|
|
368
|
+
const matched = singleMatch(ms, si, pi, ep);
|
|
369
|
+
if (!matched) {
|
|
370
|
+
if (ep < ms.plen) {
|
|
371
|
+
const suffix = ms.p[ep];
|
|
372
|
+
if (
|
|
373
|
+
suffix === CH_STAR || suffix === CH_QUESTION || suffix === CH_DASH
|
|
374
|
+
) {
|
|
375
|
+
pi = ep + 1;
|
|
376
|
+
continue;
|
|
377
|
+
}
|
|
378
|
+
}
|
|
379
|
+
ms.matchdepth++;
|
|
380
|
+
return -1;
|
|
381
|
+
}
|
|
382
|
+
if (ep < ms.plen) {
|
|
383
|
+
const suffix = ms.p[ep];
|
|
384
|
+
if (suffix === CH_QUESTION) {
|
|
385
|
+
const res = match(ms, si + 1, ep + 1);
|
|
386
|
+
if (res >= 0) {
|
|
387
|
+
ms.matchdepth++;
|
|
388
|
+
return res;
|
|
389
|
+
}
|
|
390
|
+
pi = ep + 1;
|
|
391
|
+
continue;
|
|
392
|
+
}
|
|
393
|
+
if (suffix === CH_PLUS) {
|
|
394
|
+
si++;
|
|
395
|
+
const res = maxExpand(ms, si, pi, ep);
|
|
396
|
+
ms.matchdepth++;
|
|
397
|
+
return res;
|
|
398
|
+
}
|
|
399
|
+
if (suffix === CH_STAR) {
|
|
400
|
+
const res = maxExpand(ms, si, pi, ep);
|
|
401
|
+
ms.matchdepth++;
|
|
402
|
+
return res;
|
|
403
|
+
}
|
|
404
|
+
if (suffix === CH_DASH) {
|
|
405
|
+
const res = minExpand(ms, si, pi, ep);
|
|
406
|
+
ms.matchdepth++;
|
|
407
|
+
return res;
|
|
408
|
+
}
|
|
409
|
+
}
|
|
410
|
+
si++;
|
|
411
|
+
pi = ep;
|
|
412
|
+
}
|
|
413
|
+
}
|
|
414
|
+
|
|
415
|
+
function createMatchState(s: string, p: string): MatchState {
|
|
416
|
+
const sb = toBytes(s);
|
|
417
|
+
const pb = toBytes(p);
|
|
418
|
+
const capture: Capture[] = new Array(MAX_CAPTURES);
|
|
419
|
+
for (let i = 0; i < MAX_CAPTURES; i++) {
|
|
420
|
+
capture[i] = { init: 0, len: 0 };
|
|
421
|
+
}
|
|
422
|
+
return {
|
|
423
|
+
src: s,
|
|
424
|
+
s: sb,
|
|
425
|
+
slen: sb.length,
|
|
426
|
+
p: pb,
|
|
427
|
+
plen: pb.length,
|
|
428
|
+
level: 0,
|
|
429
|
+
capture,
|
|
430
|
+
matchdepth: MAX_MATCH_DEPTH,
|
|
431
|
+
};
|
|
432
|
+
}
|
|
433
|
+
|
|
434
|
+
function resetMatchState(ms: MatchState): void {
|
|
435
|
+
ms.level = 0;
|
|
436
|
+
ms.matchdepth = MAX_MATCH_DEPTH;
|
|
437
|
+
}
|
|
438
|
+
|
|
439
|
+
function noSpecials(p: string): boolean {
|
|
440
|
+
for (let i = 0; i < p.length; i++) {
|
|
441
|
+
if (SPECIALS_SET.has(p.charCodeAt(i))) return false;
|
|
442
|
+
}
|
|
443
|
+
return true;
|
|
444
|
+
}
|
|
445
|
+
|
|
446
|
+
// Public API
|
|
447
|
+
|
|
448
|
+
export type CaptureResult = { s: string } | { position: number };
|
|
449
|
+
|
|
450
|
+
interface RawCapture {
|
|
451
|
+
kind: 0 | 1; // 0 = string slice, 1 = position
|
|
452
|
+
start: number;
|
|
453
|
+
len: number; // kind = 0: substring length; kind = 1: 1-based position
|
|
454
|
+
}
|
|
455
|
+
|
|
456
|
+
function getOneRawCapture(
|
|
457
|
+
ms: MatchState,
|
|
458
|
+
i: number,
|
|
459
|
+
matchStart: number,
|
|
460
|
+
matchEnd: number,
|
|
461
|
+
): RawCapture {
|
|
462
|
+
if (i >= ms.level) {
|
|
463
|
+
if (i !== 0) {
|
|
464
|
+
throw new Error(`invalid capture index %${i + 1}`);
|
|
465
|
+
}
|
|
466
|
+
return { kind: 0, start: matchStart, len: matchEnd - matchStart };
|
|
467
|
+
}
|
|
468
|
+
const cap = ms.capture[i];
|
|
469
|
+
if (cap.len === CAP_UNFINISHED) {
|
|
470
|
+
throw new Error("unfinished capture");
|
|
471
|
+
}
|
|
472
|
+
if (cap.len === CAP_POSITION) {
|
|
473
|
+
return { kind: 1, start: cap.init + 1, len: 0 };
|
|
474
|
+
}
|
|
475
|
+
return { kind: 0, start: cap.init, len: cap.len };
|
|
476
|
+
}
|
|
477
|
+
|
|
478
|
+
function rawToResult(ms: MatchState, raw: RawCapture): CaptureResult {
|
|
479
|
+
if (raw.kind === 1) {
|
|
480
|
+
return { position: raw.start };
|
|
481
|
+
}
|
|
482
|
+
return { s: ms.src.substring(raw.start, raw.start + raw.len) };
|
|
483
|
+
}
|
|
484
|
+
|
|
485
|
+
function getCaptures(
|
|
486
|
+
ms: MatchState,
|
|
487
|
+
matchStart: number,
|
|
488
|
+
matchEnd: number,
|
|
489
|
+
): CaptureResult[] {
|
|
490
|
+
const nlevels = ms.level === 0 ? 1 : ms.level;
|
|
491
|
+
const result: CaptureResult[] = [];
|
|
492
|
+
for (let i = 0; i < nlevels; i++) {
|
|
493
|
+
result.push(rawToResult(ms, getOneRawCapture(ms, i, matchStart, matchEnd)));
|
|
494
|
+
}
|
|
495
|
+
return result;
|
|
496
|
+
}
|
|
497
|
+
|
|
498
|
+
function getRawCaptureString(ms: MatchState, raw: RawCapture): string {
|
|
499
|
+
if (raw.kind === 1) {
|
|
500
|
+
return raw.start.toString();
|
|
501
|
+
}
|
|
502
|
+
return ms.src.substring(raw.start, raw.start + raw.len);
|
|
503
|
+
}
|
|
504
|
+
|
|
505
|
+
export function patternFind(
|
|
506
|
+
s: string,
|
|
507
|
+
pattern: string,
|
|
508
|
+
init: number = 1,
|
|
509
|
+
plain: boolean = false,
|
|
510
|
+
): { start: number; end: number; captures: CaptureResult[] } | null {
|
|
511
|
+
if (init < 1) init = 1;
|
|
512
|
+
if (init > s.length + 1) return null;
|
|
513
|
+
const si0 = init - 1;
|
|
514
|
+
if (plain || noSpecials(pattern)) {
|
|
515
|
+
const idx = s.indexOf(pattern, si0);
|
|
516
|
+
if (idx < 0) return null;
|
|
517
|
+
return { start: idx + 1, end: idx + pattern.length, captures: [] };
|
|
518
|
+
}
|
|
519
|
+
let p = pattern;
|
|
520
|
+
let anchor = false;
|
|
521
|
+
if (p.length > 0 && p.charCodeAt(0) === CH_CARET) {
|
|
522
|
+
anchor = true;
|
|
523
|
+
p = p.substring(1);
|
|
524
|
+
}
|
|
525
|
+
const ms = createMatchState(s, p);
|
|
526
|
+
for (let si = si0; si <= ms.slen; si++) {
|
|
527
|
+
resetMatchState(ms);
|
|
528
|
+
const res = match(ms, si, 0);
|
|
529
|
+
if (res >= 0) {
|
|
530
|
+
const caps = ms.level === 0 ? [] : getCaptures(ms, si, res);
|
|
531
|
+
return { start: si + 1, end: res, captures: caps };
|
|
532
|
+
}
|
|
533
|
+
if (anchor) break;
|
|
534
|
+
}
|
|
535
|
+
return null;
|
|
536
|
+
}
|
|
537
|
+
|
|
538
|
+
export function patternMatch(
|
|
539
|
+
s: string,
|
|
540
|
+
pattern: string,
|
|
541
|
+
init: number = 1,
|
|
542
|
+
): CaptureResult[] | null {
|
|
543
|
+
if (init < 1) init = 1;
|
|
544
|
+
if (init > s.length + 1) return null;
|
|
545
|
+
const si0 = init - 1;
|
|
546
|
+
let p = pattern;
|
|
547
|
+
let anchor = false;
|
|
548
|
+
if (p.length > 0 && p.charCodeAt(0) === CH_CARET) {
|
|
549
|
+
anchor = true;
|
|
550
|
+
p = p.substring(1);
|
|
551
|
+
}
|
|
552
|
+
const ms = createMatchState(s, p);
|
|
553
|
+
for (let si = si0; si <= ms.slen; si++) {
|
|
554
|
+
resetMatchState(ms);
|
|
555
|
+
const res = match(ms, si, 0);
|
|
556
|
+
if (res >= 0) {
|
|
557
|
+
return getCaptures(ms, si, res);
|
|
558
|
+
}
|
|
559
|
+
if (anchor) break;
|
|
560
|
+
}
|
|
561
|
+
return null;
|
|
562
|
+
}
|
|
563
|
+
|
|
564
|
+
export function patternGmatch(
|
|
565
|
+
s: string,
|
|
566
|
+
pattern: string,
|
|
567
|
+
init: number = 1,
|
|
568
|
+
): () => CaptureResult[] | null {
|
|
569
|
+
if (init < 1) init = 1;
|
|
570
|
+
let p = pattern;
|
|
571
|
+
let anchor = false;
|
|
572
|
+
if (p.length > 0 && p.charCodeAt(0) === CH_CARET) {
|
|
573
|
+
anchor = true;
|
|
574
|
+
p = p.substring(1);
|
|
575
|
+
}
|
|
576
|
+
const ms = createMatchState(s, p);
|
|
577
|
+
let src = init - 1;
|
|
578
|
+
let lastMatch: number | null = null;
|
|
579
|
+
return () => {
|
|
580
|
+
while (src <= ms.slen) {
|
|
581
|
+
resetMatchState(ms);
|
|
582
|
+
const e = match(ms, src, 0);
|
|
583
|
+
if (e >= 0 && e !== lastMatch) {
|
|
584
|
+
const captures = getCaptures(ms, src, e);
|
|
585
|
+
src = e;
|
|
586
|
+
lastMatch = e;
|
|
587
|
+
return captures;
|
|
588
|
+
}
|
|
589
|
+
src++;
|
|
590
|
+
if (anchor) break;
|
|
591
|
+
}
|
|
592
|
+
return null;
|
|
593
|
+
};
|
|
594
|
+
}
|
|
595
|
+
|
|
596
|
+
function expandReplacementString(
|
|
597
|
+
repl: string,
|
|
598
|
+
ms: MatchState,
|
|
599
|
+
matchStart: number,
|
|
600
|
+
matchEnd: number,
|
|
601
|
+
): string {
|
|
602
|
+
const parts: string[] = [];
|
|
603
|
+
let i = 0;
|
|
604
|
+
while (i < repl.length) {
|
|
605
|
+
const ch = repl.charCodeAt(i);
|
|
606
|
+
if (ch === CH_ESC) {
|
|
607
|
+
i++;
|
|
608
|
+
if (i >= repl.length) {
|
|
609
|
+
throw new Error("invalid use of '%' in replacement string");
|
|
610
|
+
}
|
|
611
|
+
const rc = repl.charCodeAt(i);
|
|
612
|
+
if (rc === CH_ESC) {
|
|
613
|
+
parts.push("%");
|
|
614
|
+
} else if (rc === CH_0) {
|
|
615
|
+
parts.push(ms.src.substring(matchStart, matchEnd));
|
|
616
|
+
} else if (rc >= 49 && rc <= CH_9) {
|
|
617
|
+
parts.push(getRawCaptureString(
|
|
618
|
+
ms,
|
|
619
|
+
getOneRawCapture(ms, rc - 49, matchStart, matchEnd),
|
|
620
|
+
));
|
|
621
|
+
} else {
|
|
622
|
+
throw new Error("invalid use of '%' in replacement string");
|
|
623
|
+
}
|
|
624
|
+
} else {
|
|
625
|
+
// Collect consecutive literal characters
|
|
626
|
+
let j = i + 1;
|
|
627
|
+
while (j < repl.length && repl.charCodeAt(j) !== CH_ESC) j++;
|
|
628
|
+
parts.push(repl.substring(i, j));
|
|
629
|
+
i = j;
|
|
630
|
+
continue;
|
|
631
|
+
}
|
|
632
|
+
i++;
|
|
633
|
+
}
|
|
634
|
+
return parts.join("");
|
|
635
|
+
}
|
|
636
|
+
|
|
637
|
+
export interface GsubCallbacks {
|
|
638
|
+
replString?: string;
|
|
639
|
+
replFunction?: (
|
|
640
|
+
...captures: CaptureResult[]
|
|
641
|
+
) => Promise<string | null | undefined> | string | null | undefined;
|
|
642
|
+
replTable?: (key: string) => string | null | undefined;
|
|
643
|
+
}
|
|
644
|
+
|
|
645
|
+
export async function patternGsub(
|
|
646
|
+
s: string,
|
|
647
|
+
pattern: string,
|
|
648
|
+
callbacks: GsubCallbacks,
|
|
649
|
+
maxN?: number,
|
|
650
|
+
): Promise<[string, number]> {
|
|
651
|
+
const max_s = maxN !== undefined ? maxN : s.length + 1;
|
|
652
|
+
let p = pattern;
|
|
653
|
+
let anchor = false;
|
|
654
|
+
if (p.length > 0 && p.charCodeAt(0) === CH_CARET) {
|
|
655
|
+
anchor = true;
|
|
656
|
+
p = p.substring(1);
|
|
657
|
+
}
|
|
658
|
+
const ms = createMatchState(s, p);
|
|
659
|
+
let src = 0;
|
|
660
|
+
let lastMatch: number | null = null;
|
|
661
|
+
let n = 0;
|
|
662
|
+
const resultParts: string[] = [];
|
|
663
|
+
|
|
664
|
+
while (n < max_s) {
|
|
665
|
+
resetMatchState(ms);
|
|
666
|
+
const e = match(ms, src, 0);
|
|
667
|
+
if (e >= 0 && e !== lastMatch) {
|
|
668
|
+
n++;
|
|
669
|
+
let replStr: string | null | undefined;
|
|
670
|
+
if (callbacks.replString !== undefined) {
|
|
671
|
+
replStr = expandReplacementString(callbacks.replString, ms, src, e);
|
|
672
|
+
} else if (callbacks.replFunction) {
|
|
673
|
+
const caps = getCaptures(ms, src, e);
|
|
674
|
+
replStr = await callbacks.replFunction(...caps);
|
|
675
|
+
if (replStr === null || replStr === undefined) {
|
|
676
|
+
replStr = ms.src.substring(src, e);
|
|
677
|
+
}
|
|
678
|
+
} else if (callbacks.replTable) {
|
|
679
|
+
const raw = getOneRawCapture(ms, 0, src, e);
|
|
680
|
+
const key = getRawCaptureString(ms, raw);
|
|
681
|
+
replStr = callbacks.replTable(key);
|
|
682
|
+
if (replStr === null || replStr === undefined) {
|
|
683
|
+
replStr = ms.src.substring(src, e);
|
|
684
|
+
}
|
|
685
|
+
}
|
|
686
|
+
resultParts.push(replStr!);
|
|
687
|
+
src = e;
|
|
688
|
+
lastMatch = e;
|
|
689
|
+
} else if (src < ms.slen) {
|
|
690
|
+
resultParts.push(s[src]);
|
|
691
|
+
src++;
|
|
692
|
+
lastMatch = null;
|
|
693
|
+
} else {
|
|
694
|
+
break;
|
|
695
|
+
}
|
|
696
|
+
if (anchor) break;
|
|
697
|
+
}
|
|
698
|
+
if (src < s.length) {
|
|
699
|
+
resultParts.push(s.substring(src));
|
|
700
|
+
}
|
|
701
|
+
return [resultParts.join(""), n];
|
|
702
|
+
}
|