@bcts/envelope-pattern 1.0.0-alpha.12
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +48 -0
- package/README.md +13 -0
- package/dist/index.cjs +6781 -0
- package/dist/index.cjs.map +1 -0
- package/dist/index.d.cts +2628 -0
- package/dist/index.d.cts.map +1 -0
- package/dist/index.d.mts +2628 -0
- package/dist/index.d.mts.map +1 -0
- package/dist/index.iife.js +6781 -0
- package/dist/index.iife.js.map +1 -0
- package/dist/index.mjs +6545 -0
- package/dist/index.mjs.map +1 -0
- package/package.json +77 -0
- package/src/error.ts +262 -0
- package/src/format.ts +375 -0
- package/src/index.ts +27 -0
- package/src/parse/index.ts +923 -0
- package/src/parse/token.ts +906 -0
- package/src/parse/utils.ts +339 -0
- package/src/pattern/index.ts +719 -0
- package/src/pattern/leaf/array-pattern.ts +273 -0
- package/src/pattern/leaf/bool-pattern.ts +140 -0
- package/src/pattern/leaf/byte-string-pattern.ts +172 -0
- package/src/pattern/leaf/cbor-pattern.ts +355 -0
- package/src/pattern/leaf/date-pattern.ts +178 -0
- package/src/pattern/leaf/index.ts +280 -0
- package/src/pattern/leaf/known-value-pattern.ts +192 -0
- package/src/pattern/leaf/map-pattern.ts +152 -0
- package/src/pattern/leaf/null-pattern.ts +110 -0
- package/src/pattern/leaf/number-pattern.ts +248 -0
- package/src/pattern/leaf/tagged-pattern.ts +228 -0
- package/src/pattern/leaf/text-pattern.ts +165 -0
- package/src/pattern/matcher.ts +88 -0
- package/src/pattern/meta/and-pattern.ts +109 -0
- package/src/pattern/meta/any-pattern.ts +81 -0
- package/src/pattern/meta/capture-pattern.ts +111 -0
- package/src/pattern/meta/group-pattern.ts +110 -0
- package/src/pattern/meta/index.ts +269 -0
- package/src/pattern/meta/not-pattern.ts +91 -0
- package/src/pattern/meta/or-pattern.ts +146 -0
- package/src/pattern/meta/search-pattern.ts +201 -0
- package/src/pattern/meta/traverse-pattern.ts +146 -0
- package/src/pattern/structure/assertions-pattern.ts +244 -0
- package/src/pattern/structure/digest-pattern.ts +225 -0
- package/src/pattern/structure/index.ts +272 -0
- package/src/pattern/structure/leaf-structure-pattern.ts +85 -0
- package/src/pattern/structure/node-pattern.ts +188 -0
- package/src/pattern/structure/object-pattern.ts +149 -0
- package/src/pattern/structure/obscured-pattern.ts +159 -0
- package/src/pattern/structure/predicate-pattern.ts +151 -0
- package/src/pattern/structure/subject-pattern.ts +152 -0
- package/src/pattern/structure/wrapped-pattern.ts +195 -0
- package/src/pattern/vm.ts +1021 -0
|
@@ -0,0 +1,1021 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* @bcts/envelope-pattern - VM instructions and executor
|
|
3
|
+
*
|
|
4
|
+
* This is a 1:1 TypeScript port of bc-envelope-pattern-rust vm.rs
|
|
5
|
+
* Tiny Thompson-style VM for walking Gordian Envelope trees.
|
|
6
|
+
*
|
|
7
|
+
* @module envelope-pattern/pattern/vm
|
|
8
|
+
*/
|
|
9
|
+
|
|
10
|
+
import type { Envelope } from "@bcts/envelope";
|
|
11
|
+
import type { Quantifier } from "@bcts/dcbor-pattern";
|
|
12
|
+
import { Reluctance } from "@bcts/dcbor-pattern";
|
|
13
|
+
import type { Path } from "../format";
|
|
14
|
+
|
|
15
|
+
// Forward declaration - will be set by pattern/index.ts to avoid circular deps
|
|
16
|
+
let _patternPathsWithCaptures: (pattern: Pattern, env: Envelope) => [Path[], Map<string, Path[]>];
|
|
17
|
+
let _patternMatches: (pattern: Pattern, env: Envelope) => boolean;
|
|
18
|
+
let _patternPaths: (pattern: Pattern, env: Envelope) => Path[];
|
|
19
|
+
|
|
20
|
+
/**
|
|
21
|
+
* Register the pattern matching functions to resolve circular dependencies.
|
|
22
|
+
*/
|
|
23
|
+
export function registerVMPatternFunctions(
|
|
24
|
+
pathsWithCaptures: (pattern: Pattern, env: Envelope) => [Path[], Map<string, Path[]>],
|
|
25
|
+
matches: (pattern: Pattern, env: Envelope) => boolean,
|
|
26
|
+
paths: (pattern: Pattern, env: Envelope) => Path[],
|
|
27
|
+
): void {
|
|
28
|
+
_patternPathsWithCaptures = pathsWithCaptures;
|
|
29
|
+
_patternMatches = matches;
|
|
30
|
+
_patternPaths = paths;
|
|
31
|
+
}
|
|
32
|
+
|
|
33
|
+
// Import Pattern type - this creates a circular dependency that we resolve via registration
|
|
34
|
+
import type { Pattern } from "./index";
|
|
35
|
+
import { leafPatternPathsWithCaptures } from "./leaf";
|
|
36
|
+
import { structurePatternPathsWithCaptures, structurePatternPaths } from "./structure";
|
|
37
|
+
|
|
38
|
+
/**
|
|
39
|
+
* Axis for envelope traversal.
|
|
40
|
+
*
|
|
41
|
+
* Corresponds to the Rust `Axis` enum in vm.rs
|
|
42
|
+
*/
|
|
43
|
+
export type Axis = "Subject" | "Assertion" | "Predicate" | "Object" | "Wrapped";
|
|
44
|
+
|
|
45
|
+
/**
|
|
46
|
+
* Edge type for envelope traversal.
|
|
47
|
+
*/
|
|
48
|
+
export type EdgeType = "Subject" | "Assertion" | "Predicate" | "Object" | "Content";
|
|
49
|
+
|
|
50
|
+
/**
|
|
51
|
+
* Returns (child, EdgeType) pairs reachable from env via this axis.
|
|
52
|
+
*/
|
|
53
|
+
export function axisChildren(axis: Axis, env: Envelope): [Envelope, EdgeType][] {
|
|
54
|
+
const envCase = env.case();
|
|
55
|
+
|
|
56
|
+
switch (axis) {
|
|
57
|
+
case "Subject": {
|
|
58
|
+
if (envCase.type === "node") {
|
|
59
|
+
return [[envCase.subject, "Subject"]];
|
|
60
|
+
}
|
|
61
|
+
return [];
|
|
62
|
+
}
|
|
63
|
+
case "Assertion": {
|
|
64
|
+
if (envCase.type === "node") {
|
|
65
|
+
return envCase.assertions.map((a) => [a, "Assertion"] as [Envelope, EdgeType]);
|
|
66
|
+
}
|
|
67
|
+
return [];
|
|
68
|
+
}
|
|
69
|
+
case "Predicate": {
|
|
70
|
+
if (envCase.type === "assertion") {
|
|
71
|
+
return [[envCase.assertion.predicate(), "Predicate"]];
|
|
72
|
+
}
|
|
73
|
+
return [];
|
|
74
|
+
}
|
|
75
|
+
case "Object": {
|
|
76
|
+
if (envCase.type === "assertion") {
|
|
77
|
+
return [[envCase.assertion.object(), "Object"]];
|
|
78
|
+
}
|
|
79
|
+
return [];
|
|
80
|
+
}
|
|
81
|
+
case "Wrapped": {
|
|
82
|
+
if (envCase.type === "node") {
|
|
83
|
+
const subject = envCase.subject;
|
|
84
|
+
if (subject.isWrapped()) {
|
|
85
|
+
const unwrapped = subject.unwrap();
|
|
86
|
+
if (unwrapped !== undefined) {
|
|
87
|
+
return [[unwrapped, "Content"]];
|
|
88
|
+
}
|
|
89
|
+
}
|
|
90
|
+
} else if (envCase.type === "wrapped") {
|
|
91
|
+
return [[envCase.envelope, "Content"]];
|
|
92
|
+
}
|
|
93
|
+
return [];
|
|
94
|
+
}
|
|
95
|
+
}
|
|
96
|
+
}
|
|
97
|
+
|
|
98
|
+
/**
|
|
99
|
+
* VM instructions for pattern matching.
|
|
100
|
+
*
|
|
101
|
+
* Corresponds to the Rust `Instr` enum in vm.rs
|
|
102
|
+
*/
|
|
103
|
+
export type Instr =
|
|
104
|
+
| { readonly type: "MatchPredicate"; readonly literalIndex: number }
|
|
105
|
+
| { readonly type: "MatchStructure"; readonly literalIndex: number }
|
|
106
|
+
| { readonly type: "Split"; readonly a: number; readonly b: number }
|
|
107
|
+
| { readonly type: "Jump"; readonly address: number }
|
|
108
|
+
| { readonly type: "PushAxis"; readonly axis: Axis }
|
|
109
|
+
| { readonly type: "Pop" }
|
|
110
|
+
| { readonly type: "Save" }
|
|
111
|
+
| { readonly type: "Accept" }
|
|
112
|
+
| {
|
|
113
|
+
readonly type: "Search";
|
|
114
|
+
readonly patternIndex: number;
|
|
115
|
+
readonly captureMap: [string, number][];
|
|
116
|
+
}
|
|
117
|
+
| { readonly type: "ExtendTraversal" }
|
|
118
|
+
| { readonly type: "CombineTraversal" }
|
|
119
|
+
| { readonly type: "NavigateSubject" }
|
|
120
|
+
| { readonly type: "NotMatch"; readonly patternIndex: number }
|
|
121
|
+
| { readonly type: "Repeat"; readonly patternIndex: number; readonly quantifier: Quantifier }
|
|
122
|
+
| { readonly type: "CaptureStart"; readonly captureIndex: number }
|
|
123
|
+
| { readonly type: "CaptureEnd"; readonly captureIndex: number };
|
|
124
|
+
|
|
125
|
+
/**
|
|
126
|
+
* Compiled program for the VM.
|
|
127
|
+
*/
|
|
128
|
+
export interface Program {
|
|
129
|
+
readonly code: Instr[];
|
|
130
|
+
readonly literals: Pattern[];
|
|
131
|
+
readonly captureNames: string[];
|
|
132
|
+
}
|
|
133
|
+
|
|
134
|
+
/**
|
|
135
|
+
* Internal back-tracking state.
|
|
136
|
+
*/
|
|
137
|
+
interface Thread {
|
|
138
|
+
pc: number;
|
|
139
|
+
env: Envelope;
|
|
140
|
+
path: Path;
|
|
141
|
+
savedPaths: Path[];
|
|
142
|
+
captures: Path[][];
|
|
143
|
+
captureStack: number[][];
|
|
144
|
+
seen: Set<string>;
|
|
145
|
+
}
|
|
146
|
+
|
|
147
|
+
/**
|
|
148
|
+
* Clone a thread for forking.
|
|
149
|
+
*/
|
|
150
|
+
function cloneThread(th: Thread): Thread {
|
|
151
|
+
return {
|
|
152
|
+
pc: th.pc,
|
|
153
|
+
env: th.env,
|
|
154
|
+
path: [...th.path],
|
|
155
|
+
savedPaths: th.savedPaths.map((p) => [...p]),
|
|
156
|
+
captures: th.captures.map((c) => c.map((p) => [...p])),
|
|
157
|
+
captureStack: th.captureStack.map((s) => [...s]),
|
|
158
|
+
seen: new Set(th.seen),
|
|
159
|
+
};
|
|
160
|
+
}
|
|
161
|
+
|
|
162
|
+
/**
|
|
163
|
+
* Get a unique key for a path based on envelope digests.
|
|
164
|
+
*/
|
|
165
|
+
function pathKey(path: Path): string {
|
|
166
|
+
return path.map((e) => e.digest().hex()).join(",");
|
|
167
|
+
}
|
|
168
|
+
|
|
169
|
+
/**
|
|
170
|
+
* Match atomic patterns without recursion into the VM.
|
|
171
|
+
*
|
|
172
|
+
* This function handles only the patterns that are safe to use in
|
|
173
|
+
* MatchPredicate instructions - Leaf, Structure, Any patterns.
|
|
174
|
+
*/
|
|
175
|
+
function atomicPathsWithCaptures(p: Pattern, env: Envelope): [Path[], Map<string, Path[]>] {
|
|
176
|
+
switch (p.type) {
|
|
177
|
+
case "Leaf":
|
|
178
|
+
return leafPatternPathsWithCaptures(p.pattern, env);
|
|
179
|
+
case "Structure":
|
|
180
|
+
return structurePatternPathsWithCaptures(p.pattern, env);
|
|
181
|
+
case "Meta":
|
|
182
|
+
if (p.pattern.type === "Any") {
|
|
183
|
+
return p.pattern.pattern.pathsWithCaptures(env);
|
|
184
|
+
}
|
|
185
|
+
if (p.pattern.type === "Search") {
|
|
186
|
+
throw new Error(
|
|
187
|
+
"SearchPattern should be compiled to Search instruction, not MatchPredicate",
|
|
188
|
+
);
|
|
189
|
+
}
|
|
190
|
+
throw new Error(`non-atomic meta pattern used in MatchPredicate: ${p.pattern.type}`);
|
|
191
|
+
}
|
|
192
|
+
}
|
|
193
|
+
|
|
194
|
+
/**
|
|
195
|
+
* Execute repeat pattern matching.
|
|
196
|
+
*/
|
|
197
|
+
function repeatPaths(
|
|
198
|
+
pat: Pattern,
|
|
199
|
+
env: Envelope,
|
|
200
|
+
path: Path,
|
|
201
|
+
quantifier: Quantifier,
|
|
202
|
+
): [Envelope, Path][] {
|
|
203
|
+
// Build states for all possible repetition counts
|
|
204
|
+
const states: [Envelope, Path][][] = [[[env, [...path]]]];
|
|
205
|
+
const bound = quantifier.max() ?? Number.MAX_SAFE_INTEGER;
|
|
206
|
+
|
|
207
|
+
// Try matching the pattern repeatedly
|
|
208
|
+
for (let i = 0; i < bound; i++) {
|
|
209
|
+
const next: [Envelope, Path][] = [];
|
|
210
|
+
const lastState = states[states.length - 1];
|
|
211
|
+
|
|
212
|
+
for (const [e, pth] of lastState) {
|
|
213
|
+
const subPaths = _patternPaths(pat, e);
|
|
214
|
+
for (const subPath of subPaths) {
|
|
215
|
+
const last = subPath[subPath.length - 1];
|
|
216
|
+
if (last?.digest().hex() === e.digest().hex()) {
|
|
217
|
+
continue; // Avoid infinite loops
|
|
218
|
+
}
|
|
219
|
+
if (last !== undefined) {
|
|
220
|
+
const combined = [...pth];
|
|
221
|
+
if (subPath[0]?.digest().hex() === e.digest().hex()) {
|
|
222
|
+
combined.push(...subPath.slice(1));
|
|
223
|
+
} else {
|
|
224
|
+
combined.push(...subPath);
|
|
225
|
+
}
|
|
226
|
+
next.push([last, combined]);
|
|
227
|
+
}
|
|
228
|
+
}
|
|
229
|
+
}
|
|
230
|
+
|
|
231
|
+
if (next.length === 0) {
|
|
232
|
+
break; // No more matches possible
|
|
233
|
+
}
|
|
234
|
+
states.push(next);
|
|
235
|
+
}
|
|
236
|
+
|
|
237
|
+
// Zero repetition case
|
|
238
|
+
const hasZeroRep = quantifier.min() === 0;
|
|
239
|
+
const zeroRepResult: [Envelope, Path][] = hasZeroRep ? [[env, [...path]]] : [];
|
|
240
|
+
|
|
241
|
+
// Calculate maximum allowed repetitions
|
|
242
|
+
const maxPossible = states.length - 1;
|
|
243
|
+
const maxAllowed = Math.min(bound, maxPossible);
|
|
244
|
+
|
|
245
|
+
// Check if we can satisfy the minimum repetition requirement
|
|
246
|
+
if (maxAllowed < quantifier.min() && quantifier.min() > 0) {
|
|
247
|
+
return [];
|
|
248
|
+
}
|
|
249
|
+
|
|
250
|
+
// Calculate the range of repetition counts based on min and max
|
|
251
|
+
const minCount = quantifier.min() === 0 ? 1 : quantifier.min();
|
|
252
|
+
if (maxAllowed < minCount) {
|
|
253
|
+
return zeroRepResult;
|
|
254
|
+
}
|
|
255
|
+
const maxCount = maxAllowed;
|
|
256
|
+
|
|
257
|
+
// Generate list of counts to try based on reluctance
|
|
258
|
+
let counts: number[];
|
|
259
|
+
switch (quantifier.reluctance()) {
|
|
260
|
+
case Reluctance.Greedy: {
|
|
261
|
+
counts = [];
|
|
262
|
+
for (let c = maxCount; c >= minCount; c--) {
|
|
263
|
+
counts.push(c);
|
|
264
|
+
}
|
|
265
|
+
break;
|
|
266
|
+
}
|
|
267
|
+
case Reluctance.Lazy: {
|
|
268
|
+
counts = [];
|
|
269
|
+
for (let c = minCount; c <= maxCount; c++) {
|
|
270
|
+
counts.push(c);
|
|
271
|
+
}
|
|
272
|
+
break;
|
|
273
|
+
}
|
|
274
|
+
case Reluctance.Possessive: {
|
|
275
|
+
counts = maxCount >= minCount ? [maxCount] : [];
|
|
276
|
+
break;
|
|
277
|
+
}
|
|
278
|
+
}
|
|
279
|
+
|
|
280
|
+
// Collect results based on the counts determined above
|
|
281
|
+
const out: [Envelope, Path][] = [];
|
|
282
|
+
|
|
283
|
+
if (quantifier.reluctance() === Reluctance.Greedy) {
|
|
284
|
+
// Include results from counts determined by reluctance
|
|
285
|
+
for (const c of counts) {
|
|
286
|
+
const list = states[c];
|
|
287
|
+
if (list !== undefined) {
|
|
288
|
+
out.push(...list);
|
|
289
|
+
}
|
|
290
|
+
}
|
|
291
|
+
|
|
292
|
+
// For greedy matching, add zero repetition case at the end if applicable
|
|
293
|
+
if (hasZeroRep && out.length === 0) {
|
|
294
|
+
out.push([env, [...path]]);
|
|
295
|
+
}
|
|
296
|
+
} else {
|
|
297
|
+
// For lazy/possessive, include zero repetition first if applicable
|
|
298
|
+
if (hasZeroRep) {
|
|
299
|
+
out.push([env, [...path]]);
|
|
300
|
+
}
|
|
301
|
+
|
|
302
|
+
// Then include results from counts determined by reluctance
|
|
303
|
+
for (const c of counts) {
|
|
304
|
+
const list = states[c];
|
|
305
|
+
if (list !== undefined) {
|
|
306
|
+
out.push(...list);
|
|
307
|
+
}
|
|
308
|
+
}
|
|
309
|
+
}
|
|
310
|
+
|
|
311
|
+
return out;
|
|
312
|
+
}
|
|
313
|
+
|
|
314
|
+
/**
|
|
315
|
+
* Execute a single thread until it halts.
|
|
316
|
+
* Returns true if any paths were produced.
|
|
317
|
+
*/
|
|
318
|
+
function runThread(prog: Program, start: Thread, out: [Path, Path[][]][]): boolean {
|
|
319
|
+
let produced = false;
|
|
320
|
+
const stack: Thread[] = [start];
|
|
321
|
+
|
|
322
|
+
while (stack.length > 0) {
|
|
323
|
+
const th = stack.pop();
|
|
324
|
+
if (th === undefined) break;
|
|
325
|
+
|
|
326
|
+
while (true) {
|
|
327
|
+
const instr = prog.code[th.pc];
|
|
328
|
+
|
|
329
|
+
switch (instr.type) {
|
|
330
|
+
case "MatchPredicate": {
|
|
331
|
+
const [paths, patternCaptures] = atomicPathsWithCaptures(
|
|
332
|
+
prog.literals[instr.literalIndex],
|
|
333
|
+
th.env,
|
|
334
|
+
);
|
|
335
|
+
|
|
336
|
+
if (paths.length === 0) {
|
|
337
|
+
break; // Kill thread
|
|
338
|
+
}
|
|
339
|
+
|
|
340
|
+
th.pc += 1;
|
|
341
|
+
|
|
342
|
+
// Distribute captures fairly across paths
|
|
343
|
+
const distributedCaptures: Map<string, Path[]>[] = paths.map(
|
|
344
|
+
() => new Map<string, Path[]>(),
|
|
345
|
+
);
|
|
346
|
+
|
|
347
|
+
for (const [name, capturePaths] of patternCaptures) {
|
|
348
|
+
if (capturePaths.length === paths.length) {
|
|
349
|
+
// Distribute 1:1
|
|
350
|
+
for (let pathIdx = 0; pathIdx < capturePaths.length; pathIdx++) {
|
|
351
|
+
if (pathIdx < distributedCaptures.length) {
|
|
352
|
+
const existing = distributedCaptures[pathIdx].get(name) ?? [];
|
|
353
|
+
existing.push(capturePaths[pathIdx]);
|
|
354
|
+
distributedCaptures[pathIdx].set(name, existing);
|
|
355
|
+
}
|
|
356
|
+
}
|
|
357
|
+
} else {
|
|
358
|
+
// Fallback: give all captures to the first path
|
|
359
|
+
if (distributedCaptures.length > 0) {
|
|
360
|
+
const existing = distributedCaptures[0].get(name) ?? [];
|
|
361
|
+
existing.push(...capturePaths);
|
|
362
|
+
distributedCaptures[0].set(name, existing);
|
|
363
|
+
}
|
|
364
|
+
}
|
|
365
|
+
}
|
|
366
|
+
|
|
367
|
+
// Use first path for current thread
|
|
368
|
+
const firstPath = paths[0];
|
|
369
|
+
if (firstPath.length === 1 && firstPath[0].digest().hex() === th.env.digest().hex()) {
|
|
370
|
+
// Simple atomic match - keep existing path and environment
|
|
371
|
+
} else {
|
|
372
|
+
// Extended path - use the full extended path
|
|
373
|
+
th.path = [...firstPath];
|
|
374
|
+
const lastEnv = firstPath[firstPath.length - 1];
|
|
375
|
+
if (lastEnv !== undefined) {
|
|
376
|
+
th.env = lastEnv;
|
|
377
|
+
}
|
|
378
|
+
}
|
|
379
|
+
|
|
380
|
+
// Add distributed captures for this path
|
|
381
|
+
const pathCaptures = distributedCaptures[0];
|
|
382
|
+
if (pathCaptures !== undefined) {
|
|
383
|
+
for (const [name, capPaths] of pathCaptures) {
|
|
384
|
+
const captureIdx = prog.captureNames.indexOf(name);
|
|
385
|
+
if (captureIdx >= 0 && captureIdx < th.captures.length) {
|
|
386
|
+
th.captures[captureIdx].push(...capPaths);
|
|
387
|
+
}
|
|
388
|
+
}
|
|
389
|
+
}
|
|
390
|
+
|
|
391
|
+
// Spawn threads for remaining paths in reverse order
|
|
392
|
+
for (let i = paths.length - 1; i >= 1; i--) {
|
|
393
|
+
const fork = cloneThread(th);
|
|
394
|
+
// Reset captures for the fork
|
|
395
|
+
for (const captureVec of fork.captures) {
|
|
396
|
+
captureVec.length = 0;
|
|
397
|
+
}
|
|
398
|
+
const pathI = paths[i];
|
|
399
|
+
if (pathI === undefined) continue;
|
|
400
|
+
fork.path = [...pathI];
|
|
401
|
+
const lastEnv = pathI[pathI.length - 1];
|
|
402
|
+
if (lastEnv !== undefined) {
|
|
403
|
+
fork.env = lastEnv;
|
|
404
|
+
}
|
|
405
|
+
|
|
406
|
+
// Add distributed captures for this path
|
|
407
|
+
const forkCaptures = distributedCaptures[i];
|
|
408
|
+
if (forkCaptures !== undefined) {
|
|
409
|
+
for (const [name, capPaths] of forkCaptures) {
|
|
410
|
+
const captureIdx = prog.captureNames.indexOf(name);
|
|
411
|
+
if (captureIdx >= 0 && captureIdx < fork.captures.length) {
|
|
412
|
+
fork.captures[captureIdx].push(...capPaths);
|
|
413
|
+
}
|
|
414
|
+
}
|
|
415
|
+
}
|
|
416
|
+
|
|
417
|
+
stack.push(fork);
|
|
418
|
+
}
|
|
419
|
+
continue;
|
|
420
|
+
}
|
|
421
|
+
|
|
422
|
+
case "MatchStructure": {
|
|
423
|
+
const literal = prog.literals[instr.literalIndex];
|
|
424
|
+
if (literal.type !== "Structure") {
|
|
425
|
+
throw new Error("MatchStructure used with non-structure pattern");
|
|
426
|
+
}
|
|
427
|
+
|
|
428
|
+
const structurePaths = structurePatternPaths(literal.pattern, th.env);
|
|
429
|
+
|
|
430
|
+
if (structurePaths.length === 0) {
|
|
431
|
+
break; // Kill thread
|
|
432
|
+
}
|
|
433
|
+
|
|
434
|
+
th.pc += 1;
|
|
435
|
+
|
|
436
|
+
// Use first path for current thread
|
|
437
|
+
const firstStructPath = structurePaths[0];
|
|
438
|
+
if (firstStructPath !== undefined) {
|
|
439
|
+
th.path = [...firstStructPath];
|
|
440
|
+
const firstLast = firstStructPath[firstStructPath.length - 1];
|
|
441
|
+
if (firstLast !== undefined) {
|
|
442
|
+
th.env = firstLast;
|
|
443
|
+
}
|
|
444
|
+
}
|
|
445
|
+
|
|
446
|
+
// Spawn threads for remaining paths
|
|
447
|
+
for (let i = structurePaths.length - 1; i >= 1; i--) {
|
|
448
|
+
const structPathI = structurePaths[i];
|
|
449
|
+
if (structPathI === undefined) continue;
|
|
450
|
+
const fork = cloneThread(th);
|
|
451
|
+
fork.path = [...structPathI];
|
|
452
|
+
const lastEnv = structPathI[structPathI.length - 1];
|
|
453
|
+
if (lastEnv !== undefined) {
|
|
454
|
+
fork.env = lastEnv;
|
|
455
|
+
}
|
|
456
|
+
stack.push(fork);
|
|
457
|
+
}
|
|
458
|
+
continue;
|
|
459
|
+
}
|
|
460
|
+
|
|
461
|
+
case "Split": {
|
|
462
|
+
const fork = cloneThread(th);
|
|
463
|
+
fork.pc = instr.a;
|
|
464
|
+
stack.push(fork);
|
|
465
|
+
th.pc = instr.b;
|
|
466
|
+
continue;
|
|
467
|
+
}
|
|
468
|
+
|
|
469
|
+
case "Jump": {
|
|
470
|
+
th.pc = instr.address;
|
|
471
|
+
continue;
|
|
472
|
+
}
|
|
473
|
+
|
|
474
|
+
case "PushAxis": {
|
|
475
|
+
th.pc += 1;
|
|
476
|
+
const children = axisChildren(instr.axis, th.env);
|
|
477
|
+
for (const [child, _edge] of children) {
|
|
478
|
+
const fork = cloneThread(th);
|
|
479
|
+
fork.env = child;
|
|
480
|
+
fork.path.push(child);
|
|
481
|
+
stack.push(fork);
|
|
482
|
+
}
|
|
483
|
+
break; // Parent path stops here
|
|
484
|
+
}
|
|
485
|
+
|
|
486
|
+
case "Pop": {
|
|
487
|
+
th.path.pop();
|
|
488
|
+
th.pc += 1;
|
|
489
|
+
continue;
|
|
490
|
+
}
|
|
491
|
+
|
|
492
|
+
case "Save": {
|
|
493
|
+
out.push([[...th.path], th.captures.map((c) => c.map((p) => [...p]))]);
|
|
494
|
+
produced = true;
|
|
495
|
+
th.pc += 1;
|
|
496
|
+
continue;
|
|
497
|
+
}
|
|
498
|
+
|
|
499
|
+
case "Accept": {
|
|
500
|
+
out.push([[...th.path], th.captures.map((c) => c.map((p) => [...p]))]);
|
|
501
|
+
produced = true;
|
|
502
|
+
break; // Halt thread
|
|
503
|
+
}
|
|
504
|
+
|
|
505
|
+
case "Search": {
|
|
506
|
+
const inner = prog.literals[instr.patternIndex];
|
|
507
|
+
if (inner === undefined) break;
|
|
508
|
+
const [foundPaths, caps] = _patternPathsWithCaptures(inner, th.env);
|
|
509
|
+
|
|
510
|
+
if (foundPaths.length > 0) {
|
|
511
|
+
produced = true;
|
|
512
|
+
for (const foundPath of foundPaths) {
|
|
513
|
+
const resultPath = [...th.path];
|
|
514
|
+
if (foundPath[0]?.digest().hex() === th.env.digest().hex()) {
|
|
515
|
+
resultPath.push(...foundPath.slice(1));
|
|
516
|
+
} else {
|
|
517
|
+
resultPath.push(...foundPath);
|
|
518
|
+
}
|
|
519
|
+
|
|
520
|
+
const resultCaps = th.captures.map((c) => c.map((p) => [...p]));
|
|
521
|
+
for (const [name, idx] of instr.captureMap) {
|
|
522
|
+
const pths = caps.get(name);
|
|
523
|
+
if (pths !== undefined) {
|
|
524
|
+
resultCaps[idx].push(...pths);
|
|
525
|
+
}
|
|
526
|
+
}
|
|
527
|
+
|
|
528
|
+
const key = pathKey(resultPath);
|
|
529
|
+
if (!th.seen.has(key)) {
|
|
530
|
+
th.seen.add(key);
|
|
531
|
+
out.push([resultPath, resultCaps]);
|
|
532
|
+
}
|
|
533
|
+
}
|
|
534
|
+
}
|
|
535
|
+
|
|
536
|
+
// Always walk children (same traversal as Envelope::walk)
|
|
537
|
+
const allChildren: Envelope[] = [];
|
|
538
|
+
const envCase = th.env.case();
|
|
539
|
+
|
|
540
|
+
switch (envCase.type) {
|
|
541
|
+
case "node": {
|
|
542
|
+
allChildren.push(envCase.subject);
|
|
543
|
+
for (const assertion of envCase.assertions) {
|
|
544
|
+
allChildren.push(assertion);
|
|
545
|
+
}
|
|
546
|
+
break;
|
|
547
|
+
}
|
|
548
|
+
case "wrapped": {
|
|
549
|
+
allChildren.push(envCase.envelope);
|
|
550
|
+
break;
|
|
551
|
+
}
|
|
552
|
+
case "assertion": {
|
|
553
|
+
allChildren.push(envCase.assertion.predicate());
|
|
554
|
+
allChildren.push(envCase.assertion.object());
|
|
555
|
+
break;
|
|
556
|
+
}
|
|
557
|
+
case "elided":
|
|
558
|
+
case "encrypted":
|
|
559
|
+
case "compressed":
|
|
560
|
+
case "leaf":
|
|
561
|
+
case "knownValue":
|
|
562
|
+
// These envelope types have no children to traverse
|
|
563
|
+
break;
|
|
564
|
+
}
|
|
565
|
+
|
|
566
|
+
// Push child threads in reverse order
|
|
567
|
+
for (let i = allChildren.length - 1; i >= 0; i--) {
|
|
568
|
+
const child = allChildren[i];
|
|
569
|
+
if (child === undefined) continue;
|
|
570
|
+
const fork = cloneThread(th);
|
|
571
|
+
fork.env = child;
|
|
572
|
+
fork.path.push(child);
|
|
573
|
+
stack.push(fork);
|
|
574
|
+
}
|
|
575
|
+
|
|
576
|
+
break; // This thread is done
|
|
577
|
+
}
|
|
578
|
+
|
|
579
|
+
case "ExtendTraversal": {
|
|
580
|
+
const lastEnv = th.path[th.path.length - 1];
|
|
581
|
+
if (lastEnv !== undefined) {
|
|
582
|
+
th.savedPaths.push([...th.path]);
|
|
583
|
+
th.env = lastEnv;
|
|
584
|
+
th.path = [lastEnv]; // Start fresh path from the last envelope
|
|
585
|
+
}
|
|
586
|
+
th.pc += 1;
|
|
587
|
+
continue;
|
|
588
|
+
}
|
|
589
|
+
|
|
590
|
+
case "CombineTraversal": {
|
|
591
|
+
const savedPath = th.savedPaths.pop();
|
|
592
|
+
if (savedPath !== undefined) {
|
|
593
|
+
const combined = [...savedPath];
|
|
594
|
+
const savedLast = savedPath[savedPath.length - 1];
|
|
595
|
+
|
|
596
|
+
if (
|
|
597
|
+
savedLast?.digest().hex() === th.path[0]?.digest().hex() &&
|
|
598
|
+
savedLast !== undefined
|
|
599
|
+
) {
|
|
600
|
+
// Skip first element to avoid duplication
|
|
601
|
+
combined.push(...th.path.slice(1));
|
|
602
|
+
} else {
|
|
603
|
+
combined.push(...th.path);
|
|
604
|
+
}
|
|
605
|
+
|
|
606
|
+
th.path = combined;
|
|
607
|
+
}
|
|
608
|
+
th.pc += 1;
|
|
609
|
+
continue;
|
|
610
|
+
}
|
|
611
|
+
|
|
612
|
+
case "NavigateSubject": {
|
|
613
|
+
if (th.env.isNode()) {
|
|
614
|
+
const subject = th.env.subject();
|
|
615
|
+
th.env = subject;
|
|
616
|
+
th.path.push(subject);
|
|
617
|
+
}
|
|
618
|
+
th.pc += 1;
|
|
619
|
+
continue;
|
|
620
|
+
}
|
|
621
|
+
|
|
622
|
+
case "NotMatch": {
|
|
623
|
+
const pattern = prog.literals[instr.patternIndex];
|
|
624
|
+
const patternMatches = _patternMatches(pattern, th.env);
|
|
625
|
+
|
|
626
|
+
if (patternMatches) {
|
|
627
|
+
// Inner pattern matches, so NOT pattern fails - kill thread
|
|
628
|
+
break;
|
|
629
|
+
} else {
|
|
630
|
+
// Inner pattern doesn't match, so NOT pattern succeeds
|
|
631
|
+
th.pc += 1;
|
|
632
|
+
continue;
|
|
633
|
+
}
|
|
634
|
+
}
|
|
635
|
+
|
|
636
|
+
case "Repeat": {
|
|
637
|
+
const pat = prog.literals[instr.patternIndex];
|
|
638
|
+
const results = repeatPaths(pat, th.env, th.path, instr.quantifier);
|
|
639
|
+
|
|
640
|
+
if (results.length === 0) {
|
|
641
|
+
break; // Kill thread
|
|
642
|
+
}
|
|
643
|
+
|
|
644
|
+
const nextPc = th.pc + 1;
|
|
645
|
+
let success = false;
|
|
646
|
+
|
|
647
|
+
for (const [envAfter, pathAfter] of results) {
|
|
648
|
+
const fork = cloneThread(th);
|
|
649
|
+
fork.pc = nextPc;
|
|
650
|
+
fork.env = envAfter;
|
|
651
|
+
fork.path = pathAfter;
|
|
652
|
+
|
|
653
|
+
if (runThread(prog, fork, out)) {
|
|
654
|
+
produced = true;
|
|
655
|
+
success = true;
|
|
656
|
+
break;
|
|
657
|
+
}
|
|
658
|
+
}
|
|
659
|
+
|
|
660
|
+
if (!success) {
|
|
661
|
+
// None of the repetition counts allowed the rest to match
|
|
662
|
+
}
|
|
663
|
+
break;
|
|
664
|
+
}
|
|
665
|
+
|
|
666
|
+
case "CaptureStart": {
|
|
667
|
+
const id = instr.captureIndex;
|
|
668
|
+
if (id < th.captureStack.length) {
|
|
669
|
+
th.captureStack[id].push(th.path.length - 1);
|
|
670
|
+
}
|
|
671
|
+
th.pc += 1;
|
|
672
|
+
continue;
|
|
673
|
+
}
|
|
674
|
+
|
|
675
|
+
case "CaptureEnd": {
|
|
676
|
+
const id = instr.captureIndex;
|
|
677
|
+
if (id < th.captureStack.length) {
|
|
678
|
+
const startIdx = th.captureStack[id].pop();
|
|
679
|
+
if (startIdx !== undefined && id < th.captures.length) {
|
|
680
|
+
let end = th.path.length;
|
|
681
|
+
// Check if next instruction is ExtendTraversal
|
|
682
|
+
const nextInstr = prog.code[th.pc + 1];
|
|
683
|
+
if (nextInstr?.type === "ExtendTraversal") {
|
|
684
|
+
end = Math.max(0, end - 1);
|
|
685
|
+
}
|
|
686
|
+
const cap = th.path.slice(startIdx, end);
|
|
687
|
+
th.captures[id].push(cap);
|
|
688
|
+
}
|
|
689
|
+
}
|
|
690
|
+
th.pc += 1;
|
|
691
|
+
continue;
|
|
692
|
+
}
|
|
693
|
+
}
|
|
694
|
+
|
|
695
|
+
// If we get here without continue, break out of the inner loop
|
|
696
|
+
break;
|
|
697
|
+
}
|
|
698
|
+
}
|
|
699
|
+
|
|
700
|
+
return produced;
|
|
701
|
+
}
|
|
702
|
+
|
|
703
|
+
/**
|
|
704
|
+
* Execute prog starting at root.
|
|
705
|
+
* Every time SAVE or ACCEPT executes, the current path is pushed into the result.
|
|
706
|
+
*/
|
|
707
|
+
export function run(prog: Program, root: Envelope): [Path, Map<string, Path[]>][] {
|
|
708
|
+
const out: [Path, Path[][]][] = [];
|
|
709
|
+
|
|
710
|
+
const start: Thread = {
|
|
711
|
+
pc: 0,
|
|
712
|
+
env: root,
|
|
713
|
+
path: [root],
|
|
714
|
+
savedPaths: [],
|
|
715
|
+
captures: prog.captureNames.map(() => []),
|
|
716
|
+
captureStack: prog.captureNames.map(() => []),
|
|
717
|
+
seen: new Set(),
|
|
718
|
+
};
|
|
719
|
+
|
|
720
|
+
runThread(prog, start, out);
|
|
721
|
+
|
|
722
|
+
return out.map(([path, caps]) => {
|
|
723
|
+
const map = new Map<string, Path[]>();
|
|
724
|
+
for (let i = 0; i < caps.length; i++) {
|
|
725
|
+
const paths = caps[i];
|
|
726
|
+
if (paths.length > 0) {
|
|
727
|
+
map.set(prog.captureNames[i], paths);
|
|
728
|
+
}
|
|
729
|
+
}
|
|
730
|
+
return [path, map];
|
|
731
|
+
});
|
|
732
|
+
}
|
|
733
|
+
|
|
734
|
+
/**
|
|
735
|
+
* Compile a pattern to bytecode program.
|
|
736
|
+
*/
|
|
737
|
+
export function compile(pattern: Pattern): Program {
|
|
738
|
+
const code: Instr[] = [];
|
|
739
|
+
const literals: Pattern[] = [];
|
|
740
|
+
const captureNames: string[] = [];
|
|
741
|
+
|
|
742
|
+
// Collect capture names first
|
|
743
|
+
collectCaptureNames(pattern, captureNames);
|
|
744
|
+
|
|
745
|
+
// Compile the pattern
|
|
746
|
+
compilePattern(pattern, code, literals, captureNames);
|
|
747
|
+
|
|
748
|
+
// Add final Accept instruction
|
|
749
|
+
code.push({ type: "Accept" });
|
|
750
|
+
|
|
751
|
+
return { code, literals, captureNames };
|
|
752
|
+
}
|
|
753
|
+
|
|
754
|
+
/**
|
|
755
|
+
* Collect capture names from a pattern recursively.
|
|
756
|
+
*/
|
|
757
|
+
function collectCaptureNames(pattern: Pattern, out: string[]): void {
|
|
758
|
+
switch (pattern.type) {
|
|
759
|
+
case "Leaf":
|
|
760
|
+
// Leaf patterns don't have captures
|
|
761
|
+
break;
|
|
762
|
+
case "Structure":
|
|
763
|
+
// Structure patterns may have nested patterns with captures
|
|
764
|
+
collectStructureCaptureNames(pattern.pattern, out);
|
|
765
|
+
break;
|
|
766
|
+
case "Meta":
|
|
767
|
+
collectMetaCaptureNames(pattern.pattern, out);
|
|
768
|
+
break;
|
|
769
|
+
}
|
|
770
|
+
}
|
|
771
|
+
|
|
772
|
+
import type { StructurePattern } from "./structure";
|
|
773
|
+
import type { MetaPattern } from "./meta";
|
|
774
|
+
|
|
775
|
+
function collectStructureCaptureNames(pattern: StructurePattern, out: string[]): void {
|
|
776
|
+
switch (pattern.type) {
|
|
777
|
+
case "Subject": {
|
|
778
|
+
const inner = pattern.pattern.innerPattern();
|
|
779
|
+
if (inner !== undefined) {
|
|
780
|
+
collectCaptureNames(inner, out);
|
|
781
|
+
}
|
|
782
|
+
break;
|
|
783
|
+
}
|
|
784
|
+
case "Predicate": {
|
|
785
|
+
const inner = pattern.pattern.innerPattern();
|
|
786
|
+
if (inner !== undefined) {
|
|
787
|
+
collectCaptureNames(inner, out);
|
|
788
|
+
}
|
|
789
|
+
break;
|
|
790
|
+
}
|
|
791
|
+
case "Object": {
|
|
792
|
+
const inner = pattern.pattern.innerPattern();
|
|
793
|
+
if (inner !== undefined) {
|
|
794
|
+
collectCaptureNames(inner, out);
|
|
795
|
+
}
|
|
796
|
+
break;
|
|
797
|
+
}
|
|
798
|
+
case "Assertions": {
|
|
799
|
+
const predPat = pattern.pattern.predicatePattern();
|
|
800
|
+
if (predPat !== undefined) {
|
|
801
|
+
collectCaptureNames(predPat, out);
|
|
802
|
+
}
|
|
803
|
+
const objPat = pattern.pattern.objectPattern();
|
|
804
|
+
if (objPat !== undefined) {
|
|
805
|
+
collectCaptureNames(objPat, out);
|
|
806
|
+
}
|
|
807
|
+
break;
|
|
808
|
+
}
|
|
809
|
+
case "Node": {
|
|
810
|
+
const subjPat = pattern.pattern.subjectPattern();
|
|
811
|
+
if (subjPat !== undefined) {
|
|
812
|
+
collectCaptureNames(subjPat, out);
|
|
813
|
+
}
|
|
814
|
+
for (const assertionPat of pattern.pattern.assertionPatterns()) {
|
|
815
|
+
collectCaptureNames(assertionPat, out);
|
|
816
|
+
}
|
|
817
|
+
break;
|
|
818
|
+
}
|
|
819
|
+
case "Wrapped": {
|
|
820
|
+
const inner = pattern.pattern.innerPattern();
|
|
821
|
+
if (inner !== undefined) {
|
|
822
|
+
collectCaptureNames(inner, out);
|
|
823
|
+
}
|
|
824
|
+
break;
|
|
825
|
+
}
|
|
826
|
+
case "Digest":
|
|
827
|
+
case "Obscured":
|
|
828
|
+
case "Leaf":
|
|
829
|
+
// These don't have nested patterns
|
|
830
|
+
break;
|
|
831
|
+
}
|
|
832
|
+
}
|
|
833
|
+
|
|
834
|
+
function collectMetaCaptureNames(pattern: MetaPattern, out: string[]): void {
|
|
835
|
+
switch (pattern.type) {
|
|
836
|
+
case "Any":
|
|
837
|
+
// No captures
|
|
838
|
+
break;
|
|
839
|
+
case "And":
|
|
840
|
+
for (const p of pattern.pattern.patterns()) {
|
|
841
|
+
collectCaptureNames(p, out);
|
|
842
|
+
}
|
|
843
|
+
break;
|
|
844
|
+
case "Or":
|
|
845
|
+
for (const p of pattern.pattern.patterns()) {
|
|
846
|
+
collectCaptureNames(p, out);
|
|
847
|
+
}
|
|
848
|
+
break;
|
|
849
|
+
case "Not":
|
|
850
|
+
collectCaptureNames(pattern.pattern.pattern(), out);
|
|
851
|
+
break;
|
|
852
|
+
case "Capture": {
|
|
853
|
+
const name = pattern.pattern.name();
|
|
854
|
+
if (!out.includes(name)) {
|
|
855
|
+
out.push(name);
|
|
856
|
+
}
|
|
857
|
+
collectCaptureNames(pattern.pattern.pattern(), out);
|
|
858
|
+
break;
|
|
859
|
+
}
|
|
860
|
+
case "Search":
|
|
861
|
+
collectCaptureNames(pattern.pattern.pattern(), out);
|
|
862
|
+
break;
|
|
863
|
+
case "Traverse":
|
|
864
|
+
for (const p of pattern.pattern.patterns()) {
|
|
865
|
+
collectCaptureNames(p, out);
|
|
866
|
+
}
|
|
867
|
+
break;
|
|
868
|
+
case "Group":
|
|
869
|
+
collectCaptureNames(pattern.pattern.pattern(), out);
|
|
870
|
+
break;
|
|
871
|
+
}
|
|
872
|
+
}
|
|
873
|
+
|
|
874
|
+
/**
|
|
875
|
+
* Compile a pattern to bytecode.
|
|
876
|
+
*/
|
|
877
|
+
function compilePattern(
|
|
878
|
+
pattern: Pattern,
|
|
879
|
+
code: Instr[],
|
|
880
|
+
literals: Pattern[],
|
|
881
|
+
captureNames: string[],
|
|
882
|
+
): void {
|
|
883
|
+
switch (pattern.type) {
|
|
884
|
+
case "Leaf":
|
|
885
|
+
case "Structure":
|
|
886
|
+
// Atomic patterns use MatchPredicate
|
|
887
|
+
literals.push(pattern);
|
|
888
|
+
code.push({ type: "MatchPredicate", literalIndex: literals.length - 1 });
|
|
889
|
+
break;
|
|
890
|
+
case "Meta":
|
|
891
|
+
compileMetaPattern(pattern.pattern, code, literals, captureNames);
|
|
892
|
+
break;
|
|
893
|
+
}
|
|
894
|
+
}
|
|
895
|
+
|
|
896
|
+
function compileMetaPattern(
|
|
897
|
+
pattern: MetaPattern,
|
|
898
|
+
code: Instr[],
|
|
899
|
+
literals: Pattern[],
|
|
900
|
+
captureNames: string[],
|
|
901
|
+
): void {
|
|
902
|
+
switch (pattern.type) {
|
|
903
|
+
case "Any": {
|
|
904
|
+
// Any matches everything - add as atomic
|
|
905
|
+
const anyPattern: Pattern = { type: "Meta", pattern };
|
|
906
|
+
literals.push(anyPattern);
|
|
907
|
+
code.push({ type: "MatchPredicate", literalIndex: literals.length - 1 });
|
|
908
|
+
break;
|
|
909
|
+
}
|
|
910
|
+
case "And": {
|
|
911
|
+
// All patterns must match at the same position
|
|
912
|
+
const patterns = pattern.pattern.patterns();
|
|
913
|
+
for (const p of patterns) {
|
|
914
|
+
compilePattern(p, code, literals, captureNames);
|
|
915
|
+
}
|
|
916
|
+
break;
|
|
917
|
+
}
|
|
918
|
+
case "Or": {
|
|
919
|
+
// Try each pattern with Split/Jump
|
|
920
|
+
const patterns = pattern.pattern.patterns();
|
|
921
|
+
if (patterns.length === 0) return;
|
|
922
|
+
if (patterns.length === 1) {
|
|
923
|
+
compilePattern(patterns[0], code, literals, captureNames);
|
|
924
|
+
return;
|
|
925
|
+
}
|
|
926
|
+
|
|
927
|
+
// Create split chain
|
|
928
|
+
const jumpAddresses: number[] = [];
|
|
929
|
+
for (let i = 0; i < patterns.length - 1; i++) {
|
|
930
|
+
const splitAddr = code.length;
|
|
931
|
+
code.push({ type: "Split", a: 0, b: 0 }); // Placeholder
|
|
932
|
+
|
|
933
|
+
// First branch
|
|
934
|
+
const aStart = code.length;
|
|
935
|
+
compilePattern(patterns[i], code, literals, captureNames);
|
|
936
|
+
jumpAddresses.push(code.length);
|
|
937
|
+
code.push({ type: "Jump", address: 0 }); // Placeholder
|
|
938
|
+
|
|
939
|
+
// Update split to point to first branch and next split
|
|
940
|
+
const bStart = code.length;
|
|
941
|
+
(code[splitAddr] as { type: "Split"; a: number; b: number }).a = aStart;
|
|
942
|
+
(code[splitAddr] as { type: "Split"; a: number; b: number }).b = bStart;
|
|
943
|
+
}
|
|
944
|
+
|
|
945
|
+
// Last pattern (no split needed)
|
|
946
|
+
compilePattern(patterns[patterns.length - 1], code, literals, captureNames);
|
|
947
|
+
|
|
948
|
+
// Update all jumps to point after the Or
|
|
949
|
+
const endAddr = code.length;
|
|
950
|
+
for (const jumpAddr of jumpAddresses) {
|
|
951
|
+
(code[jumpAddr] as { type: "Jump"; address: number }).address = endAddr;
|
|
952
|
+
}
|
|
953
|
+
break;
|
|
954
|
+
}
|
|
955
|
+
case "Not": {
|
|
956
|
+
// Use NotMatch instruction
|
|
957
|
+
const innerPattern = pattern.pattern.pattern();
|
|
958
|
+
literals.push(innerPattern);
|
|
959
|
+
code.push({ type: "NotMatch", patternIndex: literals.length - 1 });
|
|
960
|
+
break;
|
|
961
|
+
}
|
|
962
|
+
case "Capture": {
|
|
963
|
+
const name = pattern.pattern.name();
|
|
964
|
+
const captureIndex = captureNames.indexOf(name);
|
|
965
|
+
|
|
966
|
+
code.push({ type: "CaptureStart", captureIndex });
|
|
967
|
+
compilePattern(pattern.pattern.pattern(), code, literals, captureNames);
|
|
968
|
+
code.push({ type: "CaptureEnd", captureIndex });
|
|
969
|
+
break;
|
|
970
|
+
}
|
|
971
|
+
case "Search": {
|
|
972
|
+
// Build capture map
|
|
973
|
+
const innerCaptureNames: string[] = [];
|
|
974
|
+
collectCaptureNames(pattern.pattern.pattern(), innerCaptureNames);
|
|
975
|
+
|
|
976
|
+
const captureMap: [string, number][] = innerCaptureNames.map((name) => {
|
|
977
|
+
const idx = captureNames.indexOf(name);
|
|
978
|
+
return [name, idx >= 0 ? idx : 0];
|
|
979
|
+
});
|
|
980
|
+
|
|
981
|
+
literals.push(pattern.pattern.pattern());
|
|
982
|
+
code.push({
|
|
983
|
+
type: "Search",
|
|
984
|
+
patternIndex: literals.length - 1,
|
|
985
|
+
captureMap,
|
|
986
|
+
});
|
|
987
|
+
break;
|
|
988
|
+
}
|
|
989
|
+
case "Traverse": {
|
|
990
|
+
const patterns = pattern.pattern.patterns();
|
|
991
|
+
for (let i = 0; i < patterns.length; i++) {
|
|
992
|
+
const pat = patterns[i];
|
|
993
|
+
if (pat === undefined) continue;
|
|
994
|
+
compilePattern(pat, code, literals, captureNames);
|
|
995
|
+
if (i < patterns.length - 1) {
|
|
996
|
+
code.push({ type: "ExtendTraversal" });
|
|
997
|
+
}
|
|
998
|
+
}
|
|
999
|
+
if (patterns.length > 1) {
|
|
1000
|
+
code.push({ type: "CombineTraversal" });
|
|
1001
|
+
}
|
|
1002
|
+
break;
|
|
1003
|
+
}
|
|
1004
|
+
case "Group": {
|
|
1005
|
+
const quantifier = pattern.pattern.quantifier();
|
|
1006
|
+
if (quantifier !== undefined) {
|
|
1007
|
+
// Repeat pattern
|
|
1008
|
+
literals.push(pattern.pattern.pattern());
|
|
1009
|
+
code.push({
|
|
1010
|
+
type: "Repeat",
|
|
1011
|
+
patternIndex: literals.length - 1,
|
|
1012
|
+
quantifier,
|
|
1013
|
+
});
|
|
1014
|
+
} else {
|
|
1015
|
+
// Simple grouping
|
|
1016
|
+
compilePattern(pattern.pattern.pattern(), code, literals, captureNames);
|
|
1017
|
+
}
|
|
1018
|
+
break;
|
|
1019
|
+
}
|
|
1020
|
+
}
|
|
1021
|
+
}
|