@mhingston5/lasso 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (124) hide show
  1. package/README.md +707 -0
  2. package/docs/agent-wrangling.png +0 -0
  3. package/package.json +26 -0
  4. package/src/capabilities/matcher.ts +25 -0
  5. package/src/capabilities/registry.ts +103 -0
  6. package/src/capabilities/types.ts +15 -0
  7. package/src/cir/lower.ts +253 -0
  8. package/src/cir/optimize.ts +251 -0
  9. package/src/cir/types.ts +131 -0
  10. package/src/cir/validate.ts +265 -0
  11. package/src/compiler/compile.ts +601 -0
  12. package/src/compiler/feedback.ts +471 -0
  13. package/src/compiler/runtime-helpers.ts +455 -0
  14. package/src/composition/chain.ts +58 -0
  15. package/src/composition/conditional.ts +76 -0
  16. package/src/composition/parallel.ts +75 -0
  17. package/src/composition/types.ts +105 -0
  18. package/src/environment/analyzer.ts +56 -0
  19. package/src/environment/discovery.ts +179 -0
  20. package/src/environment/types.ts +68 -0
  21. package/src/failures/classifiers.ts +134 -0
  22. package/src/failures/generator.ts +421 -0
  23. package/src/failures/map-reference-failures.ts +23 -0
  24. package/src/failures/ontology.ts +210 -0
  25. package/src/failures/recovery.ts +214 -0
  26. package/src/failures/types.ts +14 -0
  27. package/src/index.ts +67 -0
  28. package/src/memory/advisor.ts +132 -0
  29. package/src/memory/extractor.ts +166 -0
  30. package/src/memory/store.ts +107 -0
  31. package/src/memory/types.ts +53 -0
  32. package/src/metaharness/engine.ts +256 -0
  33. package/src/metaharness/predictor.ts +168 -0
  34. package/src/metaharness/types.ts +40 -0
  35. package/src/mutation/derive.ts +308 -0
  36. package/src/mutation/diff.ts +52 -0
  37. package/src/mutation/engine.ts +256 -0
  38. package/src/mutation/types.ts +84 -0
  39. package/src/pi/command-input.ts +209 -0
  40. package/src/pi/commands.ts +351 -0
  41. package/src/pi/extension.ts +16 -0
  42. package/src/planner/synthesize.ts +83 -0
  43. package/src/planner/template-rules.ts +183 -0
  44. package/src/planner/types.ts +42 -0
  45. package/src/reference/catalog.ts +128 -0
  46. package/src/reference/patch-validation-strategies.ts +170 -0
  47. package/src/reference/patch-validation.ts +174 -0
  48. package/src/reference/pr-review-merge.ts +155 -0
  49. package/src/reference/strategies.ts +126 -0
  50. package/src/reference/types.ts +33 -0
  51. package/src/replanner/risk-rules.ts +161 -0
  52. package/src/replanner/runtime.ts +308 -0
  53. package/src/replanner/synthesize.ts +619 -0
  54. package/src/replanner/types.ts +73 -0
  55. package/src/spec/schema.ts +254 -0
  56. package/src/spec/types.ts +319 -0
  57. package/src/spec/validate.ts +296 -0
  58. package/src/state/snapshots.ts +43 -0
  59. package/src/state/types.ts +12 -0
  60. package/src/synthesis/graph-builder.ts +267 -0
  61. package/src/synthesis/harness-builder.ts +113 -0
  62. package/src/synthesis/intent-ir.ts +63 -0
  63. package/src/synthesis/policy-builder.ts +320 -0
  64. package/src/synthesis/risk-analyzer.ts +182 -0
  65. package/src/synthesis/skill-parser.ts +441 -0
  66. package/src/verification/engine.ts +230 -0
  67. package/src/versioning/file-store.ts +103 -0
  68. package/src/versioning/history.ts +43 -0
  69. package/src/versioning/store.ts +16 -0
  70. package/src/versioning/types.ts +31 -0
  71. package/test/capabilities/matcher.test.ts +67 -0
  72. package/test/capabilities/registry.test.ts +136 -0
  73. package/test/capabilities/synthesis.test.ts +264 -0
  74. package/test/cir/lower.test.ts +417 -0
  75. package/test/cir/optimize.test.ts +266 -0
  76. package/test/cir/validate.test.ts +368 -0
  77. package/test/compiler/adaptive-runtime.test.ts +157 -0
  78. package/test/compiler/compile.test.ts +1198 -0
  79. package/test/compiler/feedback.test.ts +784 -0
  80. package/test/compiler/guardrails.test.ts +191 -0
  81. package/test/compiler/trace.test.ts +404 -0
  82. package/test/composition/chain.test.ts +328 -0
  83. package/test/composition/conditional.test.ts +241 -0
  84. package/test/composition/parallel.test.ts +215 -0
  85. package/test/environment/analyzer.test.ts +204 -0
  86. package/test/environment/discovery.test.ts +149 -0
  87. package/test/failures/classifiers.test.ts +287 -0
  88. package/test/failures/generator.test.ts +203 -0
  89. package/test/failures/ontology.test.ts +439 -0
  90. package/test/failures/recovery.test.ts +300 -0
  91. package/test/helpers/createFixtureRepo.ts +84 -0
  92. package/test/helpers/createPatchValidationFixture.ts +144 -0
  93. package/test/helpers/runCompiledWorkflow.ts +208 -0
  94. package/test/memory/advisor.test.ts +332 -0
  95. package/test/memory/extractor.test.ts +295 -0
  96. package/test/memory/store.test.ts +244 -0
  97. package/test/metaharness/engine.test.ts +575 -0
  98. package/test/metaharness/predictor.test.ts +436 -0
  99. package/test/mutation/derive-failure.test.ts +209 -0
  100. package/test/mutation/engine.test.ts +622 -0
  101. package/test/package-smoke.test.ts +29 -0
  102. package/test/pi/command-input.test.ts +153 -0
  103. package/test/pi/commands.test.ts +623 -0
  104. package/test/planner/classify-template.test.ts +32 -0
  105. package/test/planner/synthesize.test.ts +901 -0
  106. package/test/reference/PatchValidation.failures.test.ts +137 -0
  107. package/test/reference/PatchValidation.test.ts +326 -0
  108. package/test/reference/PrReviewMerge.failures.test.ts +121 -0
  109. package/test/reference/PrReviewMerge.test.ts +55 -0
  110. package/test/reference/catalog-open.test.ts +70 -0
  111. package/test/replanner/runtime.test.ts +207 -0
  112. package/test/replanner/synthesize.test.ts +303 -0
  113. package/test/spec/validate.test.ts +1056 -0
  114. package/test/state/snapshots.test.ts +264 -0
  115. package/test/synthesis/custom-workflow.test.ts +264 -0
  116. package/test/synthesis/graph-builder.test.ts +370 -0
  117. package/test/synthesis/harness-builder.test.ts +128 -0
  118. package/test/synthesis/policy-builder.test.ts +149 -0
  119. package/test/synthesis/risk-analyzer.test.ts +230 -0
  120. package/test/synthesis/skill-parser.test.ts +796 -0
  121. package/test/verification/engine.test.ts +509 -0
  122. package/test/versioning/history.test.ts +144 -0
  123. package/test/versioning/store.test.ts +254 -0
  124. package/vitest.config.ts +9 -0
@@ -0,0 +1,509 @@
1
+ import { describe, expect, it, vi } from "vitest";
2
+ import { runVerification } from "../../src/verification/engine.js";
3
+ import type { CirNode, CirVerificationHook, CirWorkflow } from "../../src/cir/types.js";
4
+ import type { ExecutionState } from "../../src/compiler/runtime-helpers.js";
5
+ import type { WorkflowContext, YieldItem } from "pi-duroxide";
6
+ import { createHarnessState } from "../../src/state/snapshots.js";
7
+
8
+ function createMockContext() {
9
+ const calls: { tools: Array<{ name: string; args: unknown }>; llm: Array<{ messages: unknown[]; options?: unknown }>; statuses: unknown[] } = {
10
+ tools: [],
11
+ llm: [],
12
+ statuses: [],
13
+ };
14
+
15
+ return {
16
+ calls,
17
+ context: {
18
+ scheduleTimer: vi.fn(),
19
+ waitForEvent: vi.fn(),
20
+ scheduleSubOrchestration: vi.fn(),
21
+ all: vi.fn(),
22
+ race: vi.fn(),
23
+ utcNow: () => 0,
24
+ newGuid: () => "guid-1",
25
+ continueAsNew: vi.fn(),
26
+ setCustomStatus: (status: unknown) => {
27
+ calls.statuses.push(status);
28
+ },
29
+ traceInfo: vi.fn(),
30
+ traceWarn: vi.fn(),
31
+ traceError: vi.fn(),
32
+ traceDebug: vi.fn(),
33
+ kv: { get: vi.fn(), set: vi.fn(), clear: vi.fn() },
34
+ pi: {
35
+ tool: (name: string, args: unknown) => {
36
+ calls.tools.push({ name, args });
37
+ return { kind: "tool-call", name, args };
38
+ },
39
+ llm: (messages: unknown[], options?: unknown) => {
40
+ calls.llm.push({ messages, options });
41
+ return { kind: "llm-call", messages, options };
42
+ },
43
+ skill: vi.fn(),
44
+ sendMessage: vi.fn(),
45
+ prompt: vi.fn(),
46
+ },
47
+ } satisfies WorkflowContext,
48
+ };
49
+ }
50
+
51
+ function createToolNode(id: string, verification?: CirVerificationHook[]): CirNode {
52
+ return {
53
+ id,
54
+ kind: "tool",
55
+ source: { specNodeId: id, specNodeKind: "tool", specPath: `graph.nodes[0]` },
56
+ verification,
57
+ action: { tool: "echo", args: ["hello"] },
58
+ } as Extract<CirNode, { kind: "tool" }>;
59
+ }
60
+
61
+ function createLlmVerifierNode(id: string): CirNode {
62
+ return {
63
+ id,
64
+ kind: "llm",
65
+ source: { specNodeId: id, specNodeKind: "llm", specPath: `graph.nodes[1]` },
66
+ action: { provider: "anthropic", model: "claude-sonnet", prompt: "Verify?" },
67
+ } as Extract<CirNode, { kind: "llm" }>;
68
+ }
69
+
70
+ function createToolVerifierNode(id: string): CirNode {
71
+ return {
72
+ id,
73
+ kind: "tool",
74
+ source: { specNodeId: id, specNodeKind: "tool", specPath: `graph.nodes[1]` },
75
+ action: { tool: "test", args: ["-f", "output.txt"] },
76
+ } as Extract<CirNode, { kind: "tool" }>;
77
+ }
78
+
79
+ function createConditionNode(id: string, conditionExpr: string): CirNode {
80
+ return {
81
+ id,
82
+ kind: "condition",
83
+ source: { specNodeId: id, specNodeKind: "condition", specPath: `graph.nodes[1]` },
84
+ action: { conditionExpr },
85
+ } as Extract<CirNode, { kind: "condition" }>;
86
+ }
87
+
88
+ function createExecutionState(): ExecutionState {
89
+ return {
90
+ input: {},
91
+ outputs: {},
92
+ trace: [],
93
+ harnessState: createHarnessState({}),
94
+ startTimeMs: Date.now(),
95
+ };
96
+ }
97
+
98
+ function collectGenerator<T>(gen: Generator<YieldItem, T, unknown>): { yields: YieldItem[]; result: T } {
99
+ const yields: YieldItem[] = [];
100
+ let current = gen.next();
101
+ while (!current.done) {
102
+ yields.push(current.value);
103
+ current = gen.next(undefined);
104
+ }
105
+ return { yields, result: current.value };
106
+ }
107
+
108
+ describe("runVerification", () => {
109
+ it("returns pass report when no hooks are provided", async () => {
110
+ const { context } = createMockContext();
111
+ const node = createToolNode("action", []);
112
+ const nodeMap = new Map<string, CirNode>([["action", node]]);
113
+ const state = createExecutionState();
114
+
115
+ const gen = runVerification("action", [], nodeMap, state, context);
116
+ const { result } = collectGenerator(gen);
117
+
118
+ expect(result).toEqual({
119
+ nodeId: "action",
120
+ hookResults: [],
121
+ overallStatus: "pass",
122
+ });
123
+ });
124
+
125
+ it("returns pass report when single hook passes", async () => {
126
+ const hook: CirVerificationHook = { kind: "llm", checkNodeId: "verifier", onFail: "block" };
127
+ const { context, calls } = createMockContext();
128
+ const node = createToolNode("action", [hook]);
129
+ const verifierNode = createLlmVerifierNode("verifier");
130
+ const nodeMap = new Map<string, CirNode>([
131
+ ["action", node],
132
+ ["verifier", verifierNode],
133
+ ]);
134
+ const state = createExecutionState();
135
+
136
+ const gen = runVerification("action", [hook], nodeMap, state, context);
137
+
138
+ // Yields LLM call for verifier
139
+ const first = gen.next();
140
+ expect(first.done).toBe(false);
141
+ expect(first.value).toMatchObject({ kind: "llm-call" });
142
+
143
+ // Pass verifier result (boolean true)
144
+ const { result } = collectGeneratorFrom(gen, true);
145
+
146
+ expect(result.overallStatus).toBe("pass");
147
+ expect(result.hookResults).toHaveLength(1);
148
+ expect(result.hookResults[0].hook).toBe(hook);
149
+ expect(result.hookResults[0].outcome).toEqual({ status: "pass" });
150
+ expect(result.hookResults[0].durationMs).toBeGreaterThanOrEqual(0);
151
+ });
152
+
153
+ it("returns block report when single hook fails with block onFail", async () => {
154
+ const hook: CirVerificationHook = { kind: "llm", checkNodeId: "verifier", onFail: "block" };
155
+ const { context } = createMockContext();
156
+ const node = createToolNode("action", [hook]);
157
+ const verifierNode = createLlmVerifierNode("verifier");
158
+ const nodeMap = new Map<string, CirNode>([
159
+ ["action", node],
160
+ ["verifier", verifierNode],
161
+ ]);
162
+ const state = createExecutionState();
163
+
164
+ const gen = runVerification("action", [hook], nodeMap, state, context);
165
+
166
+ // Yields LLM call for verifier
167
+ gen.next();
168
+
169
+ // Fail verifier result (boolean false)
170
+ const { result } = collectGeneratorFrom(gen, false);
171
+
172
+ expect(result.overallStatus).toBe("block");
173
+ expect(result.hookResults).toHaveLength(1);
174
+ expect(result.hookResults[0].outcome).toEqual({
175
+ status: "block",
176
+ hook,
177
+ message: "Verification failed via verifier",
178
+ });
179
+ });
180
+
181
+ it("returns warn report when single hook fails with warn onFail", async () => {
182
+ const hook: CirVerificationHook = { kind: "llm", checkNodeId: "verifier", onFail: "warn" };
183
+ const { context } = createMockContext();
184
+ const node = createToolNode("action", [hook]);
185
+ const verifierNode = createLlmVerifierNode("verifier");
186
+ const nodeMap = new Map<string, CirNode>([
187
+ ["action", node],
188
+ ["verifier", verifierNode],
189
+ ]);
190
+ const state = createExecutionState();
191
+
192
+ const gen = runVerification("action", [hook], nodeMap, state, context);
193
+ gen.next();
194
+ const { result } = collectGeneratorFrom(gen, false);
195
+
196
+ expect(result.overallStatus).toBe("pass");
197
+ expect(result.hookResults).toHaveLength(1);
198
+ expect(result.hookResults[0].outcome).toEqual({ status: "warn", hook });
199
+ });
200
+
201
+ it("all-must-pass strategy: stops at first block", async () => {
202
+ const hooks: CirVerificationHook[] = [
203
+ { kind: "llm", checkNodeId: "verifier-a", onFail: "block" },
204
+ { kind: "llm", checkNodeId: "verifier-b", onFail: "block" },
205
+ ];
206
+ const { context, calls } = createMockContext();
207
+ const node = createToolNode("action", hooks);
208
+ const verifierA = createLlmVerifierNode("verifier-a");
209
+ const verifierB = createLlmVerifierNode("verifier-b");
210
+ const nodeMap = new Map<string, CirNode>([
211
+ ["action", node],
212
+ ["verifier-a", verifierA],
213
+ ["verifier-b", verifierB],
214
+ ]);
215
+ const state = createExecutionState();
216
+
217
+ const gen = runVerification("action", hooks, nodeMap, state, context, "all-must-pass");
218
+
219
+ // First verifier yields
220
+ gen.next();
221
+ // First verifier fails
222
+ const { result } = collectGeneratorFrom(gen, false);
223
+
224
+ expect(result.overallStatus).toBe("block");
225
+ expect(result.hookResults).toHaveLength(1);
226
+ expect(result.hookResults[0].hook).toBe(hooks[0]);
227
+ // Second verifier should not have been called
228
+ expect(calls.llm).toHaveLength(1);
229
+ });
230
+
231
+ it("all-must-pass strategy: all pass yields pass report", async () => {
232
+ const hooks: CirVerificationHook[] = [
233
+ { kind: "llm", checkNodeId: "verifier-a", onFail: "block" },
234
+ { kind: "llm", checkNodeId: "verifier-b", onFail: "block" },
235
+ ];
236
+ const { context, calls } = createMockContext();
237
+ const node = createToolNode("action", hooks);
238
+ const verifierA = createLlmVerifierNode("verifier-a");
239
+ const verifierB = createLlmVerifierNode("verifier-b");
240
+ const nodeMap = new Map<string, CirNode>([
241
+ ["action", node],
242
+ ["verifier-a", verifierA],
243
+ ["verifier-b", verifierB],
244
+ ]);
245
+ const state = createExecutionState();
246
+
247
+ const gen = runVerification("action", hooks, nodeMap, state, context, "all-must-pass");
248
+
249
+ // First verifier yields
250
+ gen.next();
251
+ // First verifier passes, second verifier yields
252
+ gen.next(true);
253
+ // Second verifier passes
254
+ const { result } = collectGeneratorFrom(gen, true);
255
+
256
+ expect(result.overallStatus).toBe("pass");
257
+ expect(result.hookResults).toHaveLength(2);
258
+ expect(calls.llm).toHaveLength(2);
259
+ });
260
+
261
+ it("first-pass strategy: exits on first pass", async () => {
262
+ const hooks: CirVerificationHook[] = [
263
+ { kind: "llm", checkNodeId: "verifier-a", onFail: "block" },
264
+ { kind: "llm", checkNodeId: "verifier-b", onFail: "block" },
265
+ ];
266
+ const { context, calls } = createMockContext();
267
+ const node = createToolNode("action", hooks);
268
+ const verifierA = createLlmVerifierNode("verifier-a");
269
+ const verifierB = createLlmVerifierNode("verifier-b");
270
+ const nodeMap = new Map<string, CirNode>([
271
+ ["action", node],
272
+ ["verifier-a", verifierA],
273
+ ["verifier-b", verifierB],
274
+ ]);
275
+ const state = createExecutionState();
276
+
277
+ const gen = runVerification("action", hooks, nodeMap, state, context, "first-pass");
278
+
279
+ // First verifier yields
280
+ gen.next();
281
+ // First verifier passes
282
+ const { result } = collectGeneratorFrom(gen, true);
283
+
284
+ expect(result.overallStatus).toBe("pass");
285
+ expect(result.hookResults).toHaveLength(1);
286
+ expect(result.hookResults[0].hook).toBe(hooks[0]);
287
+ // Second verifier should not have been called
288
+ expect(calls.llm).toHaveLength(1);
289
+ });
290
+
291
+ it("first-pass strategy: continues to second hook when first fails", async () => {
292
+ const hooks: CirVerificationHook[] = [
293
+ { kind: "llm", checkNodeId: "verifier-a", onFail: "block" },
294
+ { kind: "llm", checkNodeId: "verifier-b", onFail: "block" },
295
+ ];
296
+ const { context, calls } = createMockContext();
297
+ const node = createToolNode("action", hooks);
298
+ const verifierA = createLlmVerifierNode("verifier-a");
299
+ const verifierB = createLlmVerifierNode("verifier-b");
300
+ const nodeMap = new Map<string, CirNode>([
301
+ ["action", node],
302
+ ["verifier-a", verifierA],
303
+ ["verifier-b", verifierB],
304
+ ]);
305
+ const state = createExecutionState();
306
+
307
+ const gen = runVerification("action", hooks, nodeMap, state, context, "first-pass");
308
+
309
+ // First verifier yields
310
+ gen.next();
311
+ // First verifier fails, second verifier yields
312
+ gen.next(false);
313
+ // Second verifier passes
314
+ const { result } = collectGeneratorFrom(gen, true);
315
+
316
+ expect(result.overallStatus).toBe("pass");
317
+ expect(result.hookResults).toHaveLength(2);
318
+ expect(calls.llm).toHaveLength(2);
319
+ });
320
+
321
+ it("any-block strategy: exits on first block", async () => {
322
+ const hooks: CirVerificationHook[] = [
323
+ { kind: "llm", checkNodeId: "verifier-a", onFail: "block" },
324
+ { kind: "llm", checkNodeId: "verifier-b", onFail: "block" },
325
+ ];
326
+ const { context, calls } = createMockContext();
327
+ const node = createToolNode("action", hooks);
328
+ const verifierA = createLlmVerifierNode("verifier-a");
329
+ const verifierB = createLlmVerifierNode("verifier-b");
330
+ const nodeMap = new Map<string, CirNode>([
331
+ ["action", node],
332
+ ["verifier-a", verifierA],
333
+ ["verifier-b", verifierB],
334
+ ]);
335
+ const state = createExecutionState();
336
+
337
+ const gen = runVerification("action", hooks, nodeMap, state, context, "any-block");
338
+
339
+ // First verifier yields
340
+ gen.next();
341
+ // First verifier blocks
342
+ const { result } = collectGeneratorFrom(gen, false);
343
+
344
+ expect(result.overallStatus).toBe("block");
345
+ expect(result.hookResults).toHaveLength(1);
346
+ expect(calls.llm).toHaveLength(1);
347
+ });
348
+
349
+ it("any-block strategy: continues when hook warns", async () => {
350
+ const hooks: CirVerificationHook[] = [
351
+ { kind: "llm", checkNodeId: "verifier-a", onFail: "warn" },
352
+ { kind: "llm", checkNodeId: "verifier-b", onFail: "block" },
353
+ ];
354
+ const { context, calls } = createMockContext();
355
+ const node = createToolNode("action", hooks);
356
+ const verifierA = createLlmVerifierNode("verifier-a");
357
+ const verifierB = createLlmVerifierNode("verifier-b");
358
+ const nodeMap = new Map<string, CirNode>([
359
+ ["action", node],
360
+ ["verifier-a", verifierA],
361
+ ["verifier-b", verifierB],
362
+ ]);
363
+ const state = createExecutionState();
364
+
365
+ const gen = runVerification("action", hooks, nodeMap, state, context, "any-block");
366
+
367
+ // First verifier yields
368
+ gen.next();
369
+ // First verifier warns (not a block, continue), second verifier yields
370
+ gen.next(false);
371
+ // Second verifier passes
372
+ const { result } = collectGeneratorFrom(gen, true);
373
+
374
+ expect(result.overallStatus).toBe("pass");
375
+ expect(result.hookResults).toHaveLength(2);
376
+ expect(calls.llm).toHaveLength(2);
377
+ });
378
+
379
+ it("tracks duration for each hook", async () => {
380
+ const hooks: CirVerificationHook[] = [
381
+ { kind: "llm", checkNodeId: "verifier", onFail: "block" },
382
+ ];
383
+ const { context } = createMockContext();
384
+ const node = createToolNode("action", hooks);
385
+ const verifierNode = createLlmVerifierNode("verifier");
386
+ const nodeMap = new Map<string, CirNode>([
387
+ ["action", node],
388
+ ["verifier", verifierNode],
389
+ ]);
390
+ const state = createExecutionState();
391
+
392
+ const gen = runVerification("action", hooks, nodeMap, state, context);
393
+ gen.next();
394
+ const { result } = collectGeneratorFrom(gen, true);
395
+
396
+ expect(result.hookResults[0].durationMs).toBeGreaterThanOrEqual(0);
397
+ expect(typeof result.hookResults[0].durationMs).toBe("number");
398
+ });
399
+
400
+ it("handles expression verification hooks without yielding", async () => {
401
+ const hook: CirVerificationHook = { kind: "expression", checkNodeId: "check-expr", onFail: "block" };
402
+ const { context, calls } = createMockContext();
403
+ const node = createToolNode("action", [hook]);
404
+ const conditionNode = createConditionNode("check-expr", "outputs.action.ok");
405
+ const nodeMap = new Map<string, CirNode>([
406
+ ["action", node],
407
+ ["check-expr", conditionNode],
408
+ ]);
409
+ const state = createExecutionState();
410
+ state.outputs["action"] = { ok: true };
411
+
412
+ const gen = runVerification("action", [hook], nodeMap, state, context);
413
+ const { result, yields } = collectGenerator(gen);
414
+
415
+ // Expression verification should not yield any external actions
416
+ expect(yields).toHaveLength(0);
417
+ expect(result.overallStatus).toBe("pass");
418
+ expect(result.hookResults).toHaveLength(1);
419
+ expect(result.hookResults[0].outcome).toEqual({ status: "pass" });
420
+ });
421
+
422
+ it("defaults to all-must-pass strategy when not specified", async () => {
423
+ const hooks: CirVerificationHook[] = [
424
+ { kind: "llm", checkNodeId: "verifier-a", onFail: "block" },
425
+ { kind: "llm", checkNodeId: "verifier-b", onFail: "block" },
426
+ ];
427
+ const { context, calls } = createMockContext();
428
+ const node = createToolNode("action", hooks);
429
+ const verifierA = createLlmVerifierNode("verifier-a");
430
+ const verifierB = createLlmVerifierNode("verifier-b");
431
+ const nodeMap = new Map<string, CirNode>([
432
+ ["action", node],
433
+ ["verifier-a", verifierA],
434
+ ["verifier-b", verifierB],
435
+ ]);
436
+ const state = createExecutionState();
437
+
438
+ const gen = runVerification("action", hooks, nodeMap, state, context);
439
+
440
+ // First verifier yields
441
+ gen.next();
442
+ // First verifier fails -> should block immediately (all-must-pass default)
443
+ const { result } = collectGeneratorFrom(gen, false);
444
+
445
+ expect(result.overallStatus).toBe("block");
446
+ expect(result.hookResults).toHaveLength(1);
447
+ expect(calls.llm).toHaveLength(1);
448
+ });
449
+
450
+ it("stores verifier output in state.outputs", async () => {
451
+ const hook: CirVerificationHook = { kind: "llm", checkNodeId: "verifier", onFail: "block" };
452
+ const { context } = createMockContext();
453
+ const node = createToolNode("action", [hook]);
454
+ const verifierNode = createLlmVerifierNode("verifier");
455
+ const nodeMap = new Map<string, CirNode>([
456
+ ["action", node],
457
+ ["verifier", verifierNode],
458
+ ]);
459
+ const state = createExecutionState();
460
+
461
+ const gen = runVerification("action", [hook], nodeMap, state, context);
462
+ gen.next();
463
+ collectGeneratorFrom(gen, { passed: true });
464
+
465
+ expect(state.outputs["verifier"]).toEqual({ passed: true });
466
+ });
467
+
468
+ it("warn outcome does not block even with all-must-pass strategy", async () => {
469
+ const hooks: CirVerificationHook[] = [
470
+ { kind: "llm", checkNodeId: "verifier-a", onFail: "warn" },
471
+ { kind: "llm", checkNodeId: "verifier-b", onFail: "block" },
472
+ ];
473
+ const { context, calls } = createMockContext();
474
+ const node = createToolNode("action", hooks);
475
+ const verifierA = createLlmVerifierNode("verifier-a");
476
+ const verifierB = createLlmVerifierNode("verifier-b");
477
+ const nodeMap = new Map<string, CirNode>([
478
+ ["action", node],
479
+ ["verifier-a", verifierA],
480
+ ["verifier-b", verifierB],
481
+ ]);
482
+ const state = createExecutionState();
483
+
484
+ const gen = runVerification("action", hooks, nodeMap, state, context, "all-must-pass");
485
+
486
+ // First verifier yields
487
+ gen.next();
488
+ // First verifier warns (not a block), second verifier yields
489
+ gen.next(false);
490
+ // Second verifier passes
491
+ const { result } = collectGeneratorFrom(gen, true);
492
+
493
+ expect(result.overallStatus).toBe("pass");
494
+ expect(result.hookResults).toHaveLength(2);
495
+ expect(calls.llm).toHaveLength(2);
496
+ });
497
+ });
498
+
499
+ function collectGeneratorFrom<T>(gen: Generator<YieldItem, T, unknown>, ...nextValues: unknown[]): { yields: YieldItem[]; result: T } {
500
+ const yields: YieldItem[] = [];
501
+ let valueIndex = 0;
502
+ let current = gen.next(nextValues[valueIndex++]);
503
+ while (!current.done) {
504
+ yields.push(current.value);
505
+ current = gen.next(nextValues[valueIndex] ?? undefined);
506
+ valueIndex++;
507
+ }
508
+ return { yields, result: current.value };
509
+ }
@@ -0,0 +1,144 @@
1
+ import { describe, it, expect } from "vitest";
2
+ import { createInitialVersion, createNextVersion, createLineageEntry } from "../../src/versioning/history.js";
3
+ import type { HarnessSpec } from "../../src/spec/types.js";
4
+ import type { CompiledHarnessResult } from "../../src/compiler/compile.js";
5
+
6
+ describe("versioning/history", () => {
7
+ const mockSpec: HarnessSpec = {
8
+ name: "test-workflow",
9
+ graph: {
10
+ nodes: [
11
+ {
12
+ id: "start",
13
+ label: "Start",
14
+ task: {
15
+ kind: "shell",
16
+ tool: "bash",
17
+ args: ["echo test"],
18
+ },
19
+ },
20
+ ],
21
+ edges: [],
22
+ },
23
+ };
24
+
25
+ describe("createInitialVersion", () => {
26
+ it("should create version 1 with no parent", () => {
27
+ const version = createInitialVersion(mockSpec);
28
+
29
+ expect(version.version).toBe(1);
30
+ expect(version.parentVersion).toBeUndefined();
31
+ expect(version.reason).toBe("initial");
32
+ expect(version.spec).toEqual(mockSpec);
33
+ expect(version.generatedAt).toBeGreaterThan(0);
34
+ });
35
+
36
+ it("should deep clone the spec", () => {
37
+ const version = createInitialVersion(mockSpec);
38
+
39
+ expect(version.spec).not.toBe(mockSpec);
40
+ expect(version.spec).toEqual(mockSpec);
41
+ });
42
+ });
43
+
44
+ describe("createNextVersion", () => {
45
+ it("should increment version and set parent", () => {
46
+ const initial = createInitialVersion(mockSpec);
47
+ const next = createNextVersion(initial, mockSpec, "escalation");
48
+
49
+ expect(next.version).toBe(2);
50
+ expect(next.parentVersion).toBe(1);
51
+ expect(next.reason).toBe("escalation");
52
+ expect(next.spec).toEqual(mockSpec);
53
+ expect(next.generatedAt).toBeGreaterThanOrEqual(initial.generatedAt);
54
+ });
55
+
56
+ it("should chain multiple versions", () => {
57
+ const v1 = createInitialVersion(mockSpec);
58
+ const v2 = createNextVersion(v1, mockSpec, "escalation");
59
+ const v3 = createNextVersion(v2, mockSpec, "retry");
60
+
61
+ expect(v3.version).toBe(3);
62
+ expect(v3.parentVersion).toBe(2);
63
+ expect(v3.reason).toBe("retry");
64
+ });
65
+ });
66
+
67
+ describe("createLineageEntry", () => {
68
+ it("should capture completed result data", () => {
69
+ const mockResult: CompiledHarnessResult = {
70
+ status: "completed",
71
+ terminalNodeId: "success",
72
+ result: { output: "test" },
73
+ outputs: { start: { stdout: "test" } },
74
+ trace: {
75
+ entries: [
76
+ {
77
+ nodeId: "start",
78
+ source: { kind: "user-task", nodeId: "start" },
79
+ phase: "enter",
80
+ },
81
+ ],
82
+ totalDurationMs: 100,
83
+ nodeCount: 1,
84
+ failureCount: 0,
85
+ startTimeMs: Date.now() - 100,
86
+ endTimeMs: Date.now(),
87
+ },
88
+ harnessState: {
89
+ inputs: {},
90
+ outputs: { start: { stdout: "test" } },
91
+ nodeResults: { start: { stdout: "test" } },
92
+ failures: [],
93
+ metrics: {
94
+ retries: 0,
95
+ durationMs: 100,
96
+ },
97
+ },
98
+ };
99
+
100
+ const version = createInitialVersion(mockSpec);
101
+ const lineage = createLineageEntry(version, mockResult);
102
+
103
+ expect(lineage.version).toBe(1);
104
+ expect(lineage.terminalNodeId).toBe("success");
105
+ expect(lineage.outputs).toEqual({ start: { stdout: "test" } });
106
+ expect(lineage.nodeResults).toEqual({ start: { stdout: "test" } });
107
+ expect(lineage.failures).toEqual([]);
108
+ expect(lineage.metrics).toEqual({ retries: 0, durationMs: 100 });
109
+ expect(lineage.trace.entries).toHaveLength(1);
110
+ expect(lineage.completedAt).toBeGreaterThan(0);
111
+ });
112
+
113
+ it("should deep clone result data", () => {
114
+ const now = Date.now();
115
+ const mockResult: CompiledHarnessResult = {
116
+ status: "completed",
117
+ terminalNodeId: "success",
118
+ result: { output: "test" },
119
+ outputs: { start: { stdout: "test" } },
120
+ trace: {
121
+ entries: [],
122
+ totalDurationMs: 100,
123
+ nodeCount: 0,
124
+ failureCount: 0,
125
+ startTimeMs: now - 100,
126
+ endTimeMs: now,
127
+ },
128
+ harnessState: {
129
+ inputs: {},
130
+ outputs: { start: { stdout: "test" } },
131
+ nodeResults: { start: { stdout: "test" } },
132
+ failures: [],
133
+ metrics: { retries: 0, durationMs: 100 },
134
+ },
135
+ };
136
+
137
+ const version = createInitialVersion(mockSpec);
138
+ const lineage = createLineageEntry(version, mockResult);
139
+
140
+ expect(lineage.outputs).not.toBe(mockResult.outputs);
141
+ expect(lineage.nodeResults).not.toBe(mockResult.harnessState.nodeResults);
142
+ });
143
+ });
144
+ });