@bcts/envelope-pattern 1.0.0-alpha.12

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (53) hide show
  1. package/LICENSE +48 -0
  2. package/README.md +13 -0
  3. package/dist/index.cjs +6781 -0
  4. package/dist/index.cjs.map +1 -0
  5. package/dist/index.d.cts +2628 -0
  6. package/dist/index.d.cts.map +1 -0
  7. package/dist/index.d.mts +2628 -0
  8. package/dist/index.d.mts.map +1 -0
  9. package/dist/index.iife.js +6781 -0
  10. package/dist/index.iife.js.map +1 -0
  11. package/dist/index.mjs +6545 -0
  12. package/dist/index.mjs.map +1 -0
  13. package/package.json +77 -0
  14. package/src/error.ts +262 -0
  15. package/src/format.ts +375 -0
  16. package/src/index.ts +27 -0
  17. package/src/parse/index.ts +923 -0
  18. package/src/parse/token.ts +906 -0
  19. package/src/parse/utils.ts +339 -0
  20. package/src/pattern/index.ts +719 -0
  21. package/src/pattern/leaf/array-pattern.ts +273 -0
  22. package/src/pattern/leaf/bool-pattern.ts +140 -0
  23. package/src/pattern/leaf/byte-string-pattern.ts +172 -0
  24. package/src/pattern/leaf/cbor-pattern.ts +355 -0
  25. package/src/pattern/leaf/date-pattern.ts +178 -0
  26. package/src/pattern/leaf/index.ts +280 -0
  27. package/src/pattern/leaf/known-value-pattern.ts +192 -0
  28. package/src/pattern/leaf/map-pattern.ts +152 -0
  29. package/src/pattern/leaf/null-pattern.ts +110 -0
  30. package/src/pattern/leaf/number-pattern.ts +248 -0
  31. package/src/pattern/leaf/tagged-pattern.ts +228 -0
  32. package/src/pattern/leaf/text-pattern.ts +165 -0
  33. package/src/pattern/matcher.ts +88 -0
  34. package/src/pattern/meta/and-pattern.ts +109 -0
  35. package/src/pattern/meta/any-pattern.ts +81 -0
  36. package/src/pattern/meta/capture-pattern.ts +111 -0
  37. package/src/pattern/meta/group-pattern.ts +110 -0
  38. package/src/pattern/meta/index.ts +269 -0
  39. package/src/pattern/meta/not-pattern.ts +91 -0
  40. package/src/pattern/meta/or-pattern.ts +146 -0
  41. package/src/pattern/meta/search-pattern.ts +201 -0
  42. package/src/pattern/meta/traverse-pattern.ts +146 -0
  43. package/src/pattern/structure/assertions-pattern.ts +244 -0
  44. package/src/pattern/structure/digest-pattern.ts +225 -0
  45. package/src/pattern/structure/index.ts +272 -0
  46. package/src/pattern/structure/leaf-structure-pattern.ts +85 -0
  47. package/src/pattern/structure/node-pattern.ts +188 -0
  48. package/src/pattern/structure/object-pattern.ts +149 -0
  49. package/src/pattern/structure/obscured-pattern.ts +159 -0
  50. package/src/pattern/structure/predicate-pattern.ts +151 -0
  51. package/src/pattern/structure/subject-pattern.ts +152 -0
  52. package/src/pattern/structure/wrapped-pattern.ts +195 -0
  53. package/src/pattern/vm.ts +1021 -0
@@ -0,0 +1,1021 @@
1
+ /**
2
+ * @bcts/envelope-pattern - VM instructions and executor
3
+ *
4
+ * This is a 1:1 TypeScript port of bc-envelope-pattern-rust vm.rs
5
+ * Tiny Thompson-style VM for walking Gordian Envelope trees.
6
+ *
7
+ * @module envelope-pattern/pattern/vm
8
+ */
9
+
10
+ import type { Envelope } from "@bcts/envelope";
11
+ import type { Quantifier } from "@bcts/dcbor-pattern";
12
+ import { Reluctance } from "@bcts/dcbor-pattern";
13
+ import type { Path } from "../format";
14
+
15
+ // Forward declaration - will be set by pattern/index.ts to avoid circular deps
16
+ let _patternPathsWithCaptures: (pattern: Pattern, env: Envelope) => [Path[], Map<string, Path[]>];
17
+ let _patternMatches: (pattern: Pattern, env: Envelope) => boolean;
18
+ let _patternPaths: (pattern: Pattern, env: Envelope) => Path[];
19
+
20
+ /**
21
+ * Register the pattern matching functions to resolve circular dependencies.
22
+ */
23
+ export function registerVMPatternFunctions(
24
+ pathsWithCaptures: (pattern: Pattern, env: Envelope) => [Path[], Map<string, Path[]>],
25
+ matches: (pattern: Pattern, env: Envelope) => boolean,
26
+ paths: (pattern: Pattern, env: Envelope) => Path[],
27
+ ): void {
28
+ _patternPathsWithCaptures = pathsWithCaptures;
29
+ _patternMatches = matches;
30
+ _patternPaths = paths;
31
+ }
32
+
33
+ // Import Pattern type - this creates a circular dependency that we resolve via registration
34
+ import type { Pattern } from "./index";
35
+ import { leafPatternPathsWithCaptures } from "./leaf";
36
+ import { structurePatternPathsWithCaptures, structurePatternPaths } from "./structure";
37
+
38
+ /**
39
+ * Axis for envelope traversal.
40
+ *
41
+ * Corresponds to the Rust `Axis` enum in vm.rs
42
+ */
43
+ export type Axis = "Subject" | "Assertion" | "Predicate" | "Object" | "Wrapped";
44
+
45
+ /**
46
+ * Edge type for envelope traversal.
47
+ */
48
+ export type EdgeType = "Subject" | "Assertion" | "Predicate" | "Object" | "Content";
49
+
50
+ /**
51
+ * Returns (child, EdgeType) pairs reachable from env via this axis.
52
+ */
53
+ export function axisChildren(axis: Axis, env: Envelope): [Envelope, EdgeType][] {
54
+ const envCase = env.case();
55
+
56
+ switch (axis) {
57
+ case "Subject": {
58
+ if (envCase.type === "node") {
59
+ return [[envCase.subject, "Subject"]];
60
+ }
61
+ return [];
62
+ }
63
+ case "Assertion": {
64
+ if (envCase.type === "node") {
65
+ return envCase.assertions.map((a) => [a, "Assertion"] as [Envelope, EdgeType]);
66
+ }
67
+ return [];
68
+ }
69
+ case "Predicate": {
70
+ if (envCase.type === "assertion") {
71
+ return [[envCase.assertion.predicate(), "Predicate"]];
72
+ }
73
+ return [];
74
+ }
75
+ case "Object": {
76
+ if (envCase.type === "assertion") {
77
+ return [[envCase.assertion.object(), "Object"]];
78
+ }
79
+ return [];
80
+ }
81
+ case "Wrapped": {
82
+ if (envCase.type === "node") {
83
+ const subject = envCase.subject;
84
+ if (subject.isWrapped()) {
85
+ const unwrapped = subject.unwrap();
86
+ if (unwrapped !== undefined) {
87
+ return [[unwrapped, "Content"]];
88
+ }
89
+ }
90
+ } else if (envCase.type === "wrapped") {
91
+ return [[envCase.envelope, "Content"]];
92
+ }
93
+ return [];
94
+ }
95
+ }
96
+ }
97
+
98
+ /**
99
+ * VM instructions for pattern matching.
100
+ *
101
+ * Corresponds to the Rust `Instr` enum in vm.rs
102
+ */
103
+ export type Instr =
104
+ | { readonly type: "MatchPredicate"; readonly literalIndex: number }
105
+ | { readonly type: "MatchStructure"; readonly literalIndex: number }
106
+ | { readonly type: "Split"; readonly a: number; readonly b: number }
107
+ | { readonly type: "Jump"; readonly address: number }
108
+ | { readonly type: "PushAxis"; readonly axis: Axis }
109
+ | { readonly type: "Pop" }
110
+ | { readonly type: "Save" }
111
+ | { readonly type: "Accept" }
112
+ | {
113
+ readonly type: "Search";
114
+ readonly patternIndex: number;
115
+ readonly captureMap: [string, number][];
116
+ }
117
+ | { readonly type: "ExtendTraversal" }
118
+ | { readonly type: "CombineTraversal" }
119
+ | { readonly type: "NavigateSubject" }
120
+ | { readonly type: "NotMatch"; readonly patternIndex: number }
121
+ | { readonly type: "Repeat"; readonly patternIndex: number; readonly quantifier: Quantifier }
122
+ | { readonly type: "CaptureStart"; readonly captureIndex: number }
123
+ | { readonly type: "CaptureEnd"; readonly captureIndex: number };
124
+
125
+ /**
126
+ * Compiled program for the VM.
127
+ */
128
+ export interface Program {
129
+ readonly code: Instr[];
130
+ readonly literals: Pattern[];
131
+ readonly captureNames: string[];
132
+ }
133
+
134
+ /**
135
+ * Internal back-tracking state.
136
+ */
137
+ interface Thread {
138
+ pc: number;
139
+ env: Envelope;
140
+ path: Path;
141
+ savedPaths: Path[];
142
+ captures: Path[][];
143
+ captureStack: number[][];
144
+ seen: Set<string>;
145
+ }
146
+
147
+ /**
148
+ * Clone a thread for forking.
149
+ */
150
+ function cloneThread(th: Thread): Thread {
151
+ return {
152
+ pc: th.pc,
153
+ env: th.env,
154
+ path: [...th.path],
155
+ savedPaths: th.savedPaths.map((p) => [...p]),
156
+ captures: th.captures.map((c) => c.map((p) => [...p])),
157
+ captureStack: th.captureStack.map((s) => [...s]),
158
+ seen: new Set(th.seen),
159
+ };
160
+ }
161
+
162
+ /**
163
+ * Get a unique key for a path based on envelope digests.
164
+ */
165
+ function pathKey(path: Path): string {
166
+ return path.map((e) => e.digest().hex()).join(",");
167
+ }
168
+
169
+ /**
170
+ * Match atomic patterns without recursion into the VM.
171
+ *
172
+ * This function handles only the patterns that are safe to use in
173
+ * MatchPredicate instructions - Leaf, Structure, Any patterns.
174
+ */
175
+ function atomicPathsWithCaptures(p: Pattern, env: Envelope): [Path[], Map<string, Path[]>] {
176
+ switch (p.type) {
177
+ case "Leaf":
178
+ return leafPatternPathsWithCaptures(p.pattern, env);
179
+ case "Structure":
180
+ return structurePatternPathsWithCaptures(p.pattern, env);
181
+ case "Meta":
182
+ if (p.pattern.type === "Any") {
183
+ return p.pattern.pattern.pathsWithCaptures(env);
184
+ }
185
+ if (p.pattern.type === "Search") {
186
+ throw new Error(
187
+ "SearchPattern should be compiled to Search instruction, not MatchPredicate",
188
+ );
189
+ }
190
+ throw new Error(`non-atomic meta pattern used in MatchPredicate: ${p.pattern.type}`);
191
+ }
192
+ }
193
+
194
+ /**
195
+ * Execute repeat pattern matching.
196
+ */
197
+ function repeatPaths(
198
+ pat: Pattern,
199
+ env: Envelope,
200
+ path: Path,
201
+ quantifier: Quantifier,
202
+ ): [Envelope, Path][] {
203
+ // Build states for all possible repetition counts
204
+ const states: [Envelope, Path][][] = [[[env, [...path]]]];
205
+ const bound = quantifier.max() ?? Number.MAX_SAFE_INTEGER;
206
+
207
+ // Try matching the pattern repeatedly
208
+ for (let i = 0; i < bound; i++) {
209
+ const next: [Envelope, Path][] = [];
210
+ const lastState = states[states.length - 1];
211
+
212
+ for (const [e, pth] of lastState) {
213
+ const subPaths = _patternPaths(pat, e);
214
+ for (const subPath of subPaths) {
215
+ const last = subPath[subPath.length - 1];
216
+ if (last?.digest().hex() === e.digest().hex()) {
217
+ continue; // Avoid infinite loops
218
+ }
219
+ if (last !== undefined) {
220
+ const combined = [...pth];
221
+ if (subPath[0]?.digest().hex() === e.digest().hex()) {
222
+ combined.push(...subPath.slice(1));
223
+ } else {
224
+ combined.push(...subPath);
225
+ }
226
+ next.push([last, combined]);
227
+ }
228
+ }
229
+ }
230
+
231
+ if (next.length === 0) {
232
+ break; // No more matches possible
233
+ }
234
+ states.push(next);
235
+ }
236
+
237
+ // Zero repetition case
238
+ const hasZeroRep = quantifier.min() === 0;
239
+ const zeroRepResult: [Envelope, Path][] = hasZeroRep ? [[env, [...path]]] : [];
240
+
241
+ // Calculate maximum allowed repetitions
242
+ const maxPossible = states.length - 1;
243
+ const maxAllowed = Math.min(bound, maxPossible);
244
+
245
+ // Check if we can satisfy the minimum repetition requirement
246
+ if (maxAllowed < quantifier.min() && quantifier.min() > 0) {
247
+ return [];
248
+ }
249
+
250
+ // Calculate the range of repetition counts based on min and max
251
+ const minCount = quantifier.min() === 0 ? 1 : quantifier.min();
252
+ if (maxAllowed < minCount) {
253
+ return zeroRepResult;
254
+ }
255
+ const maxCount = maxAllowed;
256
+
257
+ // Generate list of counts to try based on reluctance
258
+ let counts: number[];
259
+ switch (quantifier.reluctance()) {
260
+ case Reluctance.Greedy: {
261
+ counts = [];
262
+ for (let c = maxCount; c >= minCount; c--) {
263
+ counts.push(c);
264
+ }
265
+ break;
266
+ }
267
+ case Reluctance.Lazy: {
268
+ counts = [];
269
+ for (let c = minCount; c <= maxCount; c++) {
270
+ counts.push(c);
271
+ }
272
+ break;
273
+ }
274
+ case Reluctance.Possessive: {
275
+ counts = maxCount >= minCount ? [maxCount] : [];
276
+ break;
277
+ }
278
+ }
279
+
280
+ // Collect results based on the counts determined above
281
+ const out: [Envelope, Path][] = [];
282
+
283
+ if (quantifier.reluctance() === Reluctance.Greedy) {
284
+ // Include results from counts determined by reluctance
285
+ for (const c of counts) {
286
+ const list = states[c];
287
+ if (list !== undefined) {
288
+ out.push(...list);
289
+ }
290
+ }
291
+
292
+ // For greedy matching, add zero repetition case at the end if applicable
293
+ if (hasZeroRep && out.length === 0) {
294
+ out.push([env, [...path]]);
295
+ }
296
+ } else {
297
+ // For lazy/possessive, include zero repetition first if applicable
298
+ if (hasZeroRep) {
299
+ out.push([env, [...path]]);
300
+ }
301
+
302
+ // Then include results from counts determined by reluctance
303
+ for (const c of counts) {
304
+ const list = states[c];
305
+ if (list !== undefined) {
306
+ out.push(...list);
307
+ }
308
+ }
309
+ }
310
+
311
+ return out;
312
+ }
313
+
314
+ /**
315
+ * Execute a single thread until it halts.
316
+ * Returns true if any paths were produced.
317
+ */
318
+ function runThread(prog: Program, start: Thread, out: [Path, Path[][]][]): boolean {
319
+ let produced = false;
320
+ const stack: Thread[] = [start];
321
+
322
+ while (stack.length > 0) {
323
+ const th = stack.pop();
324
+ if (th === undefined) break;
325
+
326
+ while (true) {
327
+ const instr = prog.code[th.pc];
328
+
329
+ switch (instr.type) {
330
+ case "MatchPredicate": {
331
+ const [paths, patternCaptures] = atomicPathsWithCaptures(
332
+ prog.literals[instr.literalIndex],
333
+ th.env,
334
+ );
335
+
336
+ if (paths.length === 0) {
337
+ break; // Kill thread
338
+ }
339
+
340
+ th.pc += 1;
341
+
342
+ // Distribute captures fairly across paths
343
+ const distributedCaptures: Map<string, Path[]>[] = paths.map(
344
+ () => new Map<string, Path[]>(),
345
+ );
346
+
347
+ for (const [name, capturePaths] of patternCaptures) {
348
+ if (capturePaths.length === paths.length) {
349
+ // Distribute 1:1
350
+ for (let pathIdx = 0; pathIdx < capturePaths.length; pathIdx++) {
351
+ if (pathIdx < distributedCaptures.length) {
352
+ const existing = distributedCaptures[pathIdx].get(name) ?? [];
353
+ existing.push(capturePaths[pathIdx]);
354
+ distributedCaptures[pathIdx].set(name, existing);
355
+ }
356
+ }
357
+ } else {
358
+ // Fallback: give all captures to the first path
359
+ if (distributedCaptures.length > 0) {
360
+ const existing = distributedCaptures[0].get(name) ?? [];
361
+ existing.push(...capturePaths);
362
+ distributedCaptures[0].set(name, existing);
363
+ }
364
+ }
365
+ }
366
+
367
+ // Use first path for current thread
368
+ const firstPath = paths[0];
369
+ if (firstPath.length === 1 && firstPath[0].digest().hex() === th.env.digest().hex()) {
370
+ // Simple atomic match - keep existing path and environment
371
+ } else {
372
+ // Extended path - use the full extended path
373
+ th.path = [...firstPath];
374
+ const lastEnv = firstPath[firstPath.length - 1];
375
+ if (lastEnv !== undefined) {
376
+ th.env = lastEnv;
377
+ }
378
+ }
379
+
380
+ // Add distributed captures for this path
381
+ const pathCaptures = distributedCaptures[0];
382
+ if (pathCaptures !== undefined) {
383
+ for (const [name, capPaths] of pathCaptures) {
384
+ const captureIdx = prog.captureNames.indexOf(name);
385
+ if (captureIdx >= 0 && captureIdx < th.captures.length) {
386
+ th.captures[captureIdx].push(...capPaths);
387
+ }
388
+ }
389
+ }
390
+
391
+ // Spawn threads for remaining paths in reverse order
392
+ for (let i = paths.length - 1; i >= 1; i--) {
393
+ const fork = cloneThread(th);
394
+ // Reset captures for the fork
395
+ for (const captureVec of fork.captures) {
396
+ captureVec.length = 0;
397
+ }
398
+ const pathI = paths[i];
399
+ if (pathI === undefined) continue;
400
+ fork.path = [...pathI];
401
+ const lastEnv = pathI[pathI.length - 1];
402
+ if (lastEnv !== undefined) {
403
+ fork.env = lastEnv;
404
+ }
405
+
406
+ // Add distributed captures for this path
407
+ const forkCaptures = distributedCaptures[i];
408
+ if (forkCaptures !== undefined) {
409
+ for (const [name, capPaths] of forkCaptures) {
410
+ const captureIdx = prog.captureNames.indexOf(name);
411
+ if (captureIdx >= 0 && captureIdx < fork.captures.length) {
412
+ fork.captures[captureIdx].push(...capPaths);
413
+ }
414
+ }
415
+ }
416
+
417
+ stack.push(fork);
418
+ }
419
+ continue;
420
+ }
421
+
422
+ case "MatchStructure": {
423
+ const literal = prog.literals[instr.literalIndex];
424
+ if (literal.type !== "Structure") {
425
+ throw new Error("MatchStructure used with non-structure pattern");
426
+ }
427
+
428
+ const structurePaths = structurePatternPaths(literal.pattern, th.env);
429
+
430
+ if (structurePaths.length === 0) {
431
+ break; // Kill thread
432
+ }
433
+
434
+ th.pc += 1;
435
+
436
+ // Use first path for current thread
437
+ const firstStructPath = structurePaths[0];
438
+ if (firstStructPath !== undefined) {
439
+ th.path = [...firstStructPath];
440
+ const firstLast = firstStructPath[firstStructPath.length - 1];
441
+ if (firstLast !== undefined) {
442
+ th.env = firstLast;
443
+ }
444
+ }
445
+
446
+ // Spawn threads for remaining paths
447
+ for (let i = structurePaths.length - 1; i >= 1; i--) {
448
+ const structPathI = structurePaths[i];
449
+ if (structPathI === undefined) continue;
450
+ const fork = cloneThread(th);
451
+ fork.path = [...structPathI];
452
+ const lastEnv = structPathI[structPathI.length - 1];
453
+ if (lastEnv !== undefined) {
454
+ fork.env = lastEnv;
455
+ }
456
+ stack.push(fork);
457
+ }
458
+ continue;
459
+ }
460
+
461
+ case "Split": {
462
+ const fork = cloneThread(th);
463
+ fork.pc = instr.a;
464
+ stack.push(fork);
465
+ th.pc = instr.b;
466
+ continue;
467
+ }
468
+
469
+ case "Jump": {
470
+ th.pc = instr.address;
471
+ continue;
472
+ }
473
+
474
+ case "PushAxis": {
475
+ th.pc += 1;
476
+ const children = axisChildren(instr.axis, th.env);
477
+ for (const [child, _edge] of children) {
478
+ const fork = cloneThread(th);
479
+ fork.env = child;
480
+ fork.path.push(child);
481
+ stack.push(fork);
482
+ }
483
+ break; // Parent path stops here
484
+ }
485
+
486
+ case "Pop": {
487
+ th.path.pop();
488
+ th.pc += 1;
489
+ continue;
490
+ }
491
+
492
+ case "Save": {
493
+ out.push([[...th.path], th.captures.map((c) => c.map((p) => [...p]))]);
494
+ produced = true;
495
+ th.pc += 1;
496
+ continue;
497
+ }
498
+
499
+ case "Accept": {
500
+ out.push([[...th.path], th.captures.map((c) => c.map((p) => [...p]))]);
501
+ produced = true;
502
+ break; // Halt thread
503
+ }
504
+
505
+ case "Search": {
506
+ const inner = prog.literals[instr.patternIndex];
507
+ if (inner === undefined) break;
508
+ const [foundPaths, caps] = _patternPathsWithCaptures(inner, th.env);
509
+
510
+ if (foundPaths.length > 0) {
511
+ produced = true;
512
+ for (const foundPath of foundPaths) {
513
+ const resultPath = [...th.path];
514
+ if (foundPath[0]?.digest().hex() === th.env.digest().hex()) {
515
+ resultPath.push(...foundPath.slice(1));
516
+ } else {
517
+ resultPath.push(...foundPath);
518
+ }
519
+
520
+ const resultCaps = th.captures.map((c) => c.map((p) => [...p]));
521
+ for (const [name, idx] of instr.captureMap) {
522
+ const pths = caps.get(name);
523
+ if (pths !== undefined) {
524
+ resultCaps[idx].push(...pths);
525
+ }
526
+ }
527
+
528
+ const key = pathKey(resultPath);
529
+ if (!th.seen.has(key)) {
530
+ th.seen.add(key);
531
+ out.push([resultPath, resultCaps]);
532
+ }
533
+ }
534
+ }
535
+
536
+ // Always walk children (same traversal as Envelope::walk)
537
+ const allChildren: Envelope[] = [];
538
+ const envCase = th.env.case();
539
+
540
+ switch (envCase.type) {
541
+ case "node": {
542
+ allChildren.push(envCase.subject);
543
+ for (const assertion of envCase.assertions) {
544
+ allChildren.push(assertion);
545
+ }
546
+ break;
547
+ }
548
+ case "wrapped": {
549
+ allChildren.push(envCase.envelope);
550
+ break;
551
+ }
552
+ case "assertion": {
553
+ allChildren.push(envCase.assertion.predicate());
554
+ allChildren.push(envCase.assertion.object());
555
+ break;
556
+ }
557
+ case "elided":
558
+ case "encrypted":
559
+ case "compressed":
560
+ case "leaf":
561
+ case "knownValue":
562
+ // These envelope types have no children to traverse
563
+ break;
564
+ }
565
+
566
+ // Push child threads in reverse order
567
+ for (let i = allChildren.length - 1; i >= 0; i--) {
568
+ const child = allChildren[i];
569
+ if (child === undefined) continue;
570
+ const fork = cloneThread(th);
571
+ fork.env = child;
572
+ fork.path.push(child);
573
+ stack.push(fork);
574
+ }
575
+
576
+ break; // This thread is done
577
+ }
578
+
579
+ case "ExtendTraversal": {
580
+ const lastEnv = th.path[th.path.length - 1];
581
+ if (lastEnv !== undefined) {
582
+ th.savedPaths.push([...th.path]);
583
+ th.env = lastEnv;
584
+ th.path = [lastEnv]; // Start fresh path from the last envelope
585
+ }
586
+ th.pc += 1;
587
+ continue;
588
+ }
589
+
590
+ case "CombineTraversal": {
591
+ const savedPath = th.savedPaths.pop();
592
+ if (savedPath !== undefined) {
593
+ const combined = [...savedPath];
594
+ const savedLast = savedPath[savedPath.length - 1];
595
+
596
+ if (
597
+ savedLast?.digest().hex() === th.path[0]?.digest().hex() &&
598
+ savedLast !== undefined
599
+ ) {
600
+ // Skip first element to avoid duplication
601
+ combined.push(...th.path.slice(1));
602
+ } else {
603
+ combined.push(...th.path);
604
+ }
605
+
606
+ th.path = combined;
607
+ }
608
+ th.pc += 1;
609
+ continue;
610
+ }
611
+
612
+ case "NavigateSubject": {
613
+ if (th.env.isNode()) {
614
+ const subject = th.env.subject();
615
+ th.env = subject;
616
+ th.path.push(subject);
617
+ }
618
+ th.pc += 1;
619
+ continue;
620
+ }
621
+
622
+ case "NotMatch": {
623
+ const pattern = prog.literals[instr.patternIndex];
624
+ const patternMatches = _patternMatches(pattern, th.env);
625
+
626
+ if (patternMatches) {
627
+ // Inner pattern matches, so NOT pattern fails - kill thread
628
+ break;
629
+ } else {
630
+ // Inner pattern doesn't match, so NOT pattern succeeds
631
+ th.pc += 1;
632
+ continue;
633
+ }
634
+ }
635
+
636
+ case "Repeat": {
637
+ const pat = prog.literals[instr.patternIndex];
638
+ const results = repeatPaths(pat, th.env, th.path, instr.quantifier);
639
+
640
+ if (results.length === 0) {
641
+ break; // Kill thread
642
+ }
643
+
644
+ const nextPc = th.pc + 1;
645
+ let success = false;
646
+
647
+ for (const [envAfter, pathAfter] of results) {
648
+ const fork = cloneThread(th);
649
+ fork.pc = nextPc;
650
+ fork.env = envAfter;
651
+ fork.path = pathAfter;
652
+
653
+ if (runThread(prog, fork, out)) {
654
+ produced = true;
655
+ success = true;
656
+ break;
657
+ }
658
+ }
659
+
660
+ if (!success) {
661
+ // None of the repetition counts allowed the rest to match
662
+ }
663
+ break;
664
+ }
665
+
666
+ case "CaptureStart": {
667
+ const id = instr.captureIndex;
668
+ if (id < th.captureStack.length) {
669
+ th.captureStack[id].push(th.path.length - 1);
670
+ }
671
+ th.pc += 1;
672
+ continue;
673
+ }
674
+
675
+ case "CaptureEnd": {
676
+ const id = instr.captureIndex;
677
+ if (id < th.captureStack.length) {
678
+ const startIdx = th.captureStack[id].pop();
679
+ if (startIdx !== undefined && id < th.captures.length) {
680
+ let end = th.path.length;
681
+ // Check if next instruction is ExtendTraversal
682
+ const nextInstr = prog.code[th.pc + 1];
683
+ if (nextInstr?.type === "ExtendTraversal") {
684
+ end = Math.max(0, end - 1);
685
+ }
686
+ const cap = th.path.slice(startIdx, end);
687
+ th.captures[id].push(cap);
688
+ }
689
+ }
690
+ th.pc += 1;
691
+ continue;
692
+ }
693
+ }
694
+
695
+ // If we get here without continue, break out of the inner loop
696
+ break;
697
+ }
698
+ }
699
+
700
+ return produced;
701
+ }
702
+
703
+ /**
704
+ * Execute prog starting at root.
705
+ * Every time SAVE or ACCEPT executes, the current path is pushed into the result.
706
+ */
707
+ export function run(prog: Program, root: Envelope): [Path, Map<string, Path[]>][] {
708
+ const out: [Path, Path[][]][] = [];
709
+
710
+ const start: Thread = {
711
+ pc: 0,
712
+ env: root,
713
+ path: [root],
714
+ savedPaths: [],
715
+ captures: prog.captureNames.map(() => []),
716
+ captureStack: prog.captureNames.map(() => []),
717
+ seen: new Set(),
718
+ };
719
+
720
+ runThread(prog, start, out);
721
+
722
+ return out.map(([path, caps]) => {
723
+ const map = new Map<string, Path[]>();
724
+ for (let i = 0; i < caps.length; i++) {
725
+ const paths = caps[i];
726
+ if (paths.length > 0) {
727
+ map.set(prog.captureNames[i], paths);
728
+ }
729
+ }
730
+ return [path, map];
731
+ });
732
+ }
733
+
734
+ /**
735
+ * Compile a pattern to bytecode program.
736
+ */
737
+ export function compile(pattern: Pattern): Program {
738
+ const code: Instr[] = [];
739
+ const literals: Pattern[] = [];
740
+ const captureNames: string[] = [];
741
+
742
+ // Collect capture names first
743
+ collectCaptureNames(pattern, captureNames);
744
+
745
+ // Compile the pattern
746
+ compilePattern(pattern, code, literals, captureNames);
747
+
748
+ // Add final Accept instruction
749
+ code.push({ type: "Accept" });
750
+
751
+ return { code, literals, captureNames };
752
+ }
753
+
754
+ /**
755
+ * Collect capture names from a pattern recursively.
756
+ */
757
+ function collectCaptureNames(pattern: Pattern, out: string[]): void {
758
+ switch (pattern.type) {
759
+ case "Leaf":
760
+ // Leaf patterns don't have captures
761
+ break;
762
+ case "Structure":
763
+ // Structure patterns may have nested patterns with captures
764
+ collectStructureCaptureNames(pattern.pattern, out);
765
+ break;
766
+ case "Meta":
767
+ collectMetaCaptureNames(pattern.pattern, out);
768
+ break;
769
+ }
770
+ }
771
+
772
+ import type { StructurePattern } from "./structure";
773
+ import type { MetaPattern } from "./meta";
774
+
775
+ function collectStructureCaptureNames(pattern: StructurePattern, out: string[]): void {
776
+ switch (pattern.type) {
777
+ case "Subject": {
778
+ const inner = pattern.pattern.innerPattern();
779
+ if (inner !== undefined) {
780
+ collectCaptureNames(inner, out);
781
+ }
782
+ break;
783
+ }
784
+ case "Predicate": {
785
+ const inner = pattern.pattern.innerPattern();
786
+ if (inner !== undefined) {
787
+ collectCaptureNames(inner, out);
788
+ }
789
+ break;
790
+ }
791
+ case "Object": {
792
+ const inner = pattern.pattern.innerPattern();
793
+ if (inner !== undefined) {
794
+ collectCaptureNames(inner, out);
795
+ }
796
+ break;
797
+ }
798
+ case "Assertions": {
799
+ const predPat = pattern.pattern.predicatePattern();
800
+ if (predPat !== undefined) {
801
+ collectCaptureNames(predPat, out);
802
+ }
803
+ const objPat = pattern.pattern.objectPattern();
804
+ if (objPat !== undefined) {
805
+ collectCaptureNames(objPat, out);
806
+ }
807
+ break;
808
+ }
809
+ case "Node": {
810
+ const subjPat = pattern.pattern.subjectPattern();
811
+ if (subjPat !== undefined) {
812
+ collectCaptureNames(subjPat, out);
813
+ }
814
+ for (const assertionPat of pattern.pattern.assertionPatterns()) {
815
+ collectCaptureNames(assertionPat, out);
816
+ }
817
+ break;
818
+ }
819
+ case "Wrapped": {
820
+ const inner = pattern.pattern.innerPattern();
821
+ if (inner !== undefined) {
822
+ collectCaptureNames(inner, out);
823
+ }
824
+ break;
825
+ }
826
+ case "Digest":
827
+ case "Obscured":
828
+ case "Leaf":
829
+ // These don't have nested patterns
830
+ break;
831
+ }
832
+ }
833
+
834
+ function collectMetaCaptureNames(pattern: MetaPattern, out: string[]): void {
835
+ switch (pattern.type) {
836
+ case "Any":
837
+ // No captures
838
+ break;
839
+ case "And":
840
+ for (const p of pattern.pattern.patterns()) {
841
+ collectCaptureNames(p, out);
842
+ }
843
+ break;
844
+ case "Or":
845
+ for (const p of pattern.pattern.patterns()) {
846
+ collectCaptureNames(p, out);
847
+ }
848
+ break;
849
+ case "Not":
850
+ collectCaptureNames(pattern.pattern.pattern(), out);
851
+ break;
852
+ case "Capture": {
853
+ const name = pattern.pattern.name();
854
+ if (!out.includes(name)) {
855
+ out.push(name);
856
+ }
857
+ collectCaptureNames(pattern.pattern.pattern(), out);
858
+ break;
859
+ }
860
+ case "Search":
861
+ collectCaptureNames(pattern.pattern.pattern(), out);
862
+ break;
863
+ case "Traverse":
864
+ for (const p of pattern.pattern.patterns()) {
865
+ collectCaptureNames(p, out);
866
+ }
867
+ break;
868
+ case "Group":
869
+ collectCaptureNames(pattern.pattern.pattern(), out);
870
+ break;
871
+ }
872
+ }
873
+
874
+ /**
875
+ * Compile a pattern to bytecode.
876
+ */
877
+ function compilePattern(
878
+ pattern: Pattern,
879
+ code: Instr[],
880
+ literals: Pattern[],
881
+ captureNames: string[],
882
+ ): void {
883
+ switch (pattern.type) {
884
+ case "Leaf":
885
+ case "Structure":
886
+ // Atomic patterns use MatchPredicate
887
+ literals.push(pattern);
888
+ code.push({ type: "MatchPredicate", literalIndex: literals.length - 1 });
889
+ break;
890
+ case "Meta":
891
+ compileMetaPattern(pattern.pattern, code, literals, captureNames);
892
+ break;
893
+ }
894
+ }
895
+
896
+ function compileMetaPattern(
897
+ pattern: MetaPattern,
898
+ code: Instr[],
899
+ literals: Pattern[],
900
+ captureNames: string[],
901
+ ): void {
902
+ switch (pattern.type) {
903
+ case "Any": {
904
+ // Any matches everything - add as atomic
905
+ const anyPattern: Pattern = { type: "Meta", pattern };
906
+ literals.push(anyPattern);
907
+ code.push({ type: "MatchPredicate", literalIndex: literals.length - 1 });
908
+ break;
909
+ }
910
+ case "And": {
911
+ // All patterns must match at the same position
912
+ const patterns = pattern.pattern.patterns();
913
+ for (const p of patterns) {
914
+ compilePattern(p, code, literals, captureNames);
915
+ }
916
+ break;
917
+ }
918
+ case "Or": {
919
+ // Try each pattern with Split/Jump
920
+ const patterns = pattern.pattern.patterns();
921
+ if (patterns.length === 0) return;
922
+ if (patterns.length === 1) {
923
+ compilePattern(patterns[0], code, literals, captureNames);
924
+ return;
925
+ }
926
+
927
+ // Create split chain
928
+ const jumpAddresses: number[] = [];
929
+ for (let i = 0; i < patterns.length - 1; i++) {
930
+ const splitAddr = code.length;
931
+ code.push({ type: "Split", a: 0, b: 0 }); // Placeholder
932
+
933
+ // First branch
934
+ const aStart = code.length;
935
+ compilePattern(patterns[i], code, literals, captureNames);
936
+ jumpAddresses.push(code.length);
937
+ code.push({ type: "Jump", address: 0 }); // Placeholder
938
+
939
+ // Update split to point to first branch and next split
940
+ const bStart = code.length;
941
+ (code[splitAddr] as { type: "Split"; a: number; b: number }).a = aStart;
942
+ (code[splitAddr] as { type: "Split"; a: number; b: number }).b = bStart;
943
+ }
944
+
945
+ // Last pattern (no split needed)
946
+ compilePattern(patterns[patterns.length - 1], code, literals, captureNames);
947
+
948
+ // Update all jumps to point after the Or
949
+ const endAddr = code.length;
950
+ for (const jumpAddr of jumpAddresses) {
951
+ (code[jumpAddr] as { type: "Jump"; address: number }).address = endAddr;
952
+ }
953
+ break;
954
+ }
955
+ case "Not": {
956
+ // Use NotMatch instruction
957
+ const innerPattern = pattern.pattern.pattern();
958
+ literals.push(innerPattern);
959
+ code.push({ type: "NotMatch", patternIndex: literals.length - 1 });
960
+ break;
961
+ }
962
+ case "Capture": {
963
+ const name = pattern.pattern.name();
964
+ const captureIndex = captureNames.indexOf(name);
965
+
966
+ code.push({ type: "CaptureStart", captureIndex });
967
+ compilePattern(pattern.pattern.pattern(), code, literals, captureNames);
968
+ code.push({ type: "CaptureEnd", captureIndex });
969
+ break;
970
+ }
971
+ case "Search": {
972
+ // Build capture map
973
+ const innerCaptureNames: string[] = [];
974
+ collectCaptureNames(pattern.pattern.pattern(), innerCaptureNames);
975
+
976
+ const captureMap: [string, number][] = innerCaptureNames.map((name) => {
977
+ const idx = captureNames.indexOf(name);
978
+ return [name, idx >= 0 ? idx : 0];
979
+ });
980
+
981
+ literals.push(pattern.pattern.pattern());
982
+ code.push({
983
+ type: "Search",
984
+ patternIndex: literals.length - 1,
985
+ captureMap,
986
+ });
987
+ break;
988
+ }
989
+ case "Traverse": {
990
+ const patterns = pattern.pattern.patterns();
991
+ for (let i = 0; i < patterns.length; i++) {
992
+ const pat = patterns[i];
993
+ if (pat === undefined) continue;
994
+ compilePattern(pat, code, literals, captureNames);
995
+ if (i < patterns.length - 1) {
996
+ code.push({ type: "ExtendTraversal" });
997
+ }
998
+ }
999
+ if (patterns.length > 1) {
1000
+ code.push({ type: "CombineTraversal" });
1001
+ }
1002
+ break;
1003
+ }
1004
+ case "Group": {
1005
+ const quantifier = pattern.pattern.quantifier();
1006
+ if (quantifier !== undefined) {
1007
+ // Repeat pattern
1008
+ literals.push(pattern.pattern.pattern());
1009
+ code.push({
1010
+ type: "Repeat",
1011
+ patternIndex: literals.length - 1,
1012
+ quantifier,
1013
+ });
1014
+ } else {
1015
+ // Simple grouping
1016
+ compilePattern(pattern.pattern.pattern(), code, literals, captureNames);
1017
+ }
1018
+ break;
1019
+ }
1020
+ }
1021
+ }