@mhingston5/lasso 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +707 -0
- package/docs/agent-wrangling.png +0 -0
- package/package.json +26 -0
- package/src/capabilities/matcher.ts +25 -0
- package/src/capabilities/registry.ts +103 -0
- package/src/capabilities/types.ts +15 -0
- package/src/cir/lower.ts +253 -0
- package/src/cir/optimize.ts +251 -0
- package/src/cir/types.ts +131 -0
- package/src/cir/validate.ts +265 -0
- package/src/compiler/compile.ts +601 -0
- package/src/compiler/feedback.ts +471 -0
- package/src/compiler/runtime-helpers.ts +455 -0
- package/src/composition/chain.ts +58 -0
- package/src/composition/conditional.ts +76 -0
- package/src/composition/parallel.ts +75 -0
- package/src/composition/types.ts +105 -0
- package/src/environment/analyzer.ts +56 -0
- package/src/environment/discovery.ts +179 -0
- package/src/environment/types.ts +68 -0
- package/src/failures/classifiers.ts +134 -0
- package/src/failures/generator.ts +421 -0
- package/src/failures/map-reference-failures.ts +23 -0
- package/src/failures/ontology.ts +210 -0
- package/src/failures/recovery.ts +214 -0
- package/src/failures/types.ts +14 -0
- package/src/index.ts +67 -0
- package/src/memory/advisor.ts +132 -0
- package/src/memory/extractor.ts +166 -0
- package/src/memory/store.ts +107 -0
- package/src/memory/types.ts +53 -0
- package/src/metaharness/engine.ts +256 -0
- package/src/metaharness/predictor.ts +168 -0
- package/src/metaharness/types.ts +40 -0
- package/src/mutation/derive.ts +308 -0
- package/src/mutation/diff.ts +52 -0
- package/src/mutation/engine.ts +256 -0
- package/src/mutation/types.ts +84 -0
- package/src/pi/command-input.ts +209 -0
- package/src/pi/commands.ts +351 -0
- package/src/pi/extension.ts +16 -0
- package/src/planner/synthesize.ts +83 -0
- package/src/planner/template-rules.ts +183 -0
- package/src/planner/types.ts +42 -0
- package/src/reference/catalog.ts +128 -0
- package/src/reference/patch-validation-strategies.ts +170 -0
- package/src/reference/patch-validation.ts +174 -0
- package/src/reference/pr-review-merge.ts +155 -0
- package/src/reference/strategies.ts +126 -0
- package/src/reference/types.ts +33 -0
- package/src/replanner/risk-rules.ts +161 -0
- package/src/replanner/runtime.ts +308 -0
- package/src/replanner/synthesize.ts +619 -0
- package/src/replanner/types.ts +73 -0
- package/src/spec/schema.ts +254 -0
- package/src/spec/types.ts +319 -0
- package/src/spec/validate.ts +296 -0
- package/src/state/snapshots.ts +43 -0
- package/src/state/types.ts +12 -0
- package/src/synthesis/graph-builder.ts +267 -0
- package/src/synthesis/harness-builder.ts +113 -0
- package/src/synthesis/intent-ir.ts +63 -0
- package/src/synthesis/policy-builder.ts +320 -0
- package/src/synthesis/risk-analyzer.ts +182 -0
- package/src/synthesis/skill-parser.ts +441 -0
- package/src/verification/engine.ts +230 -0
- package/src/versioning/file-store.ts +103 -0
- package/src/versioning/history.ts +43 -0
- package/src/versioning/store.ts +16 -0
- package/src/versioning/types.ts +31 -0
- package/test/capabilities/matcher.test.ts +67 -0
- package/test/capabilities/registry.test.ts +136 -0
- package/test/capabilities/synthesis.test.ts +264 -0
- package/test/cir/lower.test.ts +417 -0
- package/test/cir/optimize.test.ts +266 -0
- package/test/cir/validate.test.ts +368 -0
- package/test/compiler/adaptive-runtime.test.ts +157 -0
- package/test/compiler/compile.test.ts +1198 -0
- package/test/compiler/feedback.test.ts +784 -0
- package/test/compiler/guardrails.test.ts +191 -0
- package/test/compiler/trace.test.ts +404 -0
- package/test/composition/chain.test.ts +328 -0
- package/test/composition/conditional.test.ts +241 -0
- package/test/composition/parallel.test.ts +215 -0
- package/test/environment/analyzer.test.ts +204 -0
- package/test/environment/discovery.test.ts +149 -0
- package/test/failures/classifiers.test.ts +287 -0
- package/test/failures/generator.test.ts +203 -0
- package/test/failures/ontology.test.ts +439 -0
- package/test/failures/recovery.test.ts +300 -0
- package/test/helpers/createFixtureRepo.ts +84 -0
- package/test/helpers/createPatchValidationFixture.ts +144 -0
- package/test/helpers/runCompiledWorkflow.ts +208 -0
- package/test/memory/advisor.test.ts +332 -0
- package/test/memory/extractor.test.ts +295 -0
- package/test/memory/store.test.ts +244 -0
- package/test/metaharness/engine.test.ts +575 -0
- package/test/metaharness/predictor.test.ts +436 -0
- package/test/mutation/derive-failure.test.ts +209 -0
- package/test/mutation/engine.test.ts +622 -0
- package/test/package-smoke.test.ts +29 -0
- package/test/pi/command-input.test.ts +153 -0
- package/test/pi/commands.test.ts +623 -0
- package/test/planner/classify-template.test.ts +32 -0
- package/test/planner/synthesize.test.ts +901 -0
- package/test/reference/PatchValidation.failures.test.ts +137 -0
- package/test/reference/PatchValidation.test.ts +326 -0
- package/test/reference/PrReviewMerge.failures.test.ts +121 -0
- package/test/reference/PrReviewMerge.test.ts +55 -0
- package/test/reference/catalog-open.test.ts +70 -0
- package/test/replanner/runtime.test.ts +207 -0
- package/test/replanner/synthesize.test.ts +303 -0
- package/test/spec/validate.test.ts +1056 -0
- package/test/state/snapshots.test.ts +264 -0
- package/test/synthesis/custom-workflow.test.ts +264 -0
- package/test/synthesis/graph-builder.test.ts +370 -0
- package/test/synthesis/harness-builder.test.ts +128 -0
- package/test/synthesis/policy-builder.test.ts +149 -0
- package/test/synthesis/risk-analyzer.test.ts +230 -0
- package/test/synthesis/skill-parser.test.ts +796 -0
- package/test/verification/engine.test.ts +509 -0
- package/test/versioning/history.test.ts +144 -0
- package/test/versioning/store.test.ts +254 -0
- package/vitest.config.ts +9 -0
|
@@ -0,0 +1,509 @@
|
|
|
1
|
+
import { describe, expect, it, vi } from "vitest";
|
|
2
|
+
import { runVerification } from "../../src/verification/engine.js";
|
|
3
|
+
import type { CirNode, CirVerificationHook, CirWorkflow } from "../../src/cir/types.js";
|
|
4
|
+
import type { ExecutionState } from "../../src/compiler/runtime-helpers.js";
|
|
5
|
+
import type { WorkflowContext, YieldItem } from "pi-duroxide";
|
|
6
|
+
import { createHarnessState } from "../../src/state/snapshots.js";
|
|
7
|
+
|
|
8
|
+
function createMockContext() {
|
|
9
|
+
const calls: { tools: Array<{ name: string; args: unknown }>; llm: Array<{ messages: unknown[]; options?: unknown }>; statuses: unknown[] } = {
|
|
10
|
+
tools: [],
|
|
11
|
+
llm: [],
|
|
12
|
+
statuses: [],
|
|
13
|
+
};
|
|
14
|
+
|
|
15
|
+
return {
|
|
16
|
+
calls,
|
|
17
|
+
context: {
|
|
18
|
+
scheduleTimer: vi.fn(),
|
|
19
|
+
waitForEvent: vi.fn(),
|
|
20
|
+
scheduleSubOrchestration: vi.fn(),
|
|
21
|
+
all: vi.fn(),
|
|
22
|
+
race: vi.fn(),
|
|
23
|
+
utcNow: () => 0,
|
|
24
|
+
newGuid: () => "guid-1",
|
|
25
|
+
continueAsNew: vi.fn(),
|
|
26
|
+
setCustomStatus: (status: unknown) => {
|
|
27
|
+
calls.statuses.push(status);
|
|
28
|
+
},
|
|
29
|
+
traceInfo: vi.fn(),
|
|
30
|
+
traceWarn: vi.fn(),
|
|
31
|
+
traceError: vi.fn(),
|
|
32
|
+
traceDebug: vi.fn(),
|
|
33
|
+
kv: { get: vi.fn(), set: vi.fn(), clear: vi.fn() },
|
|
34
|
+
pi: {
|
|
35
|
+
tool: (name: string, args: unknown) => {
|
|
36
|
+
calls.tools.push({ name, args });
|
|
37
|
+
return { kind: "tool-call", name, args };
|
|
38
|
+
},
|
|
39
|
+
llm: (messages: unknown[], options?: unknown) => {
|
|
40
|
+
calls.llm.push({ messages, options });
|
|
41
|
+
return { kind: "llm-call", messages, options };
|
|
42
|
+
},
|
|
43
|
+
skill: vi.fn(),
|
|
44
|
+
sendMessage: vi.fn(),
|
|
45
|
+
prompt: vi.fn(),
|
|
46
|
+
},
|
|
47
|
+
} satisfies WorkflowContext,
|
|
48
|
+
};
|
|
49
|
+
}
|
|
50
|
+
|
|
51
|
+
function createToolNode(id: string, verification?: CirVerificationHook[]): CirNode {
|
|
52
|
+
return {
|
|
53
|
+
id,
|
|
54
|
+
kind: "tool",
|
|
55
|
+
source: { specNodeId: id, specNodeKind: "tool", specPath: `graph.nodes[0]` },
|
|
56
|
+
verification,
|
|
57
|
+
action: { tool: "echo", args: ["hello"] },
|
|
58
|
+
} as Extract<CirNode, { kind: "tool" }>;
|
|
59
|
+
}
|
|
60
|
+
|
|
61
|
+
function createLlmVerifierNode(id: string): CirNode {
|
|
62
|
+
return {
|
|
63
|
+
id,
|
|
64
|
+
kind: "llm",
|
|
65
|
+
source: { specNodeId: id, specNodeKind: "llm", specPath: `graph.nodes[1]` },
|
|
66
|
+
action: { provider: "anthropic", model: "claude-sonnet", prompt: "Verify?" },
|
|
67
|
+
} as Extract<CirNode, { kind: "llm" }>;
|
|
68
|
+
}
|
|
69
|
+
|
|
70
|
+
function createToolVerifierNode(id: string): CirNode {
|
|
71
|
+
return {
|
|
72
|
+
id,
|
|
73
|
+
kind: "tool",
|
|
74
|
+
source: { specNodeId: id, specNodeKind: "tool", specPath: `graph.nodes[1]` },
|
|
75
|
+
action: { tool: "test", args: ["-f", "output.txt"] },
|
|
76
|
+
} as Extract<CirNode, { kind: "tool" }>;
|
|
77
|
+
}
|
|
78
|
+
|
|
79
|
+
function createConditionNode(id: string, conditionExpr: string): CirNode {
|
|
80
|
+
return {
|
|
81
|
+
id,
|
|
82
|
+
kind: "condition",
|
|
83
|
+
source: { specNodeId: id, specNodeKind: "condition", specPath: `graph.nodes[1]` },
|
|
84
|
+
action: { conditionExpr },
|
|
85
|
+
} as Extract<CirNode, { kind: "condition" }>;
|
|
86
|
+
}
|
|
87
|
+
|
|
88
|
+
function createExecutionState(): ExecutionState {
|
|
89
|
+
return {
|
|
90
|
+
input: {},
|
|
91
|
+
outputs: {},
|
|
92
|
+
trace: [],
|
|
93
|
+
harnessState: createHarnessState({}),
|
|
94
|
+
startTimeMs: Date.now(),
|
|
95
|
+
};
|
|
96
|
+
}
|
|
97
|
+
|
|
98
|
+
function collectGenerator<T>(gen: Generator<YieldItem, T, unknown>): { yields: YieldItem[]; result: T } {
|
|
99
|
+
const yields: YieldItem[] = [];
|
|
100
|
+
let current = gen.next();
|
|
101
|
+
while (!current.done) {
|
|
102
|
+
yields.push(current.value);
|
|
103
|
+
current = gen.next(undefined);
|
|
104
|
+
}
|
|
105
|
+
return { yields, result: current.value };
|
|
106
|
+
}
|
|
107
|
+
|
|
108
|
+
describe("runVerification", () => {
|
|
109
|
+
it("returns pass report when no hooks are provided", async () => {
|
|
110
|
+
const { context } = createMockContext();
|
|
111
|
+
const node = createToolNode("action", []);
|
|
112
|
+
const nodeMap = new Map<string, CirNode>([["action", node]]);
|
|
113
|
+
const state = createExecutionState();
|
|
114
|
+
|
|
115
|
+
const gen = runVerification("action", [], nodeMap, state, context);
|
|
116
|
+
const { result } = collectGenerator(gen);
|
|
117
|
+
|
|
118
|
+
expect(result).toEqual({
|
|
119
|
+
nodeId: "action",
|
|
120
|
+
hookResults: [],
|
|
121
|
+
overallStatus: "pass",
|
|
122
|
+
});
|
|
123
|
+
});
|
|
124
|
+
|
|
125
|
+
it("returns pass report when single hook passes", async () => {
|
|
126
|
+
const hook: CirVerificationHook = { kind: "llm", checkNodeId: "verifier", onFail: "block" };
|
|
127
|
+
const { context, calls } = createMockContext();
|
|
128
|
+
const node = createToolNode("action", [hook]);
|
|
129
|
+
const verifierNode = createLlmVerifierNode("verifier");
|
|
130
|
+
const nodeMap = new Map<string, CirNode>([
|
|
131
|
+
["action", node],
|
|
132
|
+
["verifier", verifierNode],
|
|
133
|
+
]);
|
|
134
|
+
const state = createExecutionState();
|
|
135
|
+
|
|
136
|
+
const gen = runVerification("action", [hook], nodeMap, state, context);
|
|
137
|
+
|
|
138
|
+
// Yields LLM call for verifier
|
|
139
|
+
const first = gen.next();
|
|
140
|
+
expect(first.done).toBe(false);
|
|
141
|
+
expect(first.value).toMatchObject({ kind: "llm-call" });
|
|
142
|
+
|
|
143
|
+
// Pass verifier result (boolean true)
|
|
144
|
+
const { result } = collectGeneratorFrom(gen, true);
|
|
145
|
+
|
|
146
|
+
expect(result.overallStatus).toBe("pass");
|
|
147
|
+
expect(result.hookResults).toHaveLength(1);
|
|
148
|
+
expect(result.hookResults[0].hook).toBe(hook);
|
|
149
|
+
expect(result.hookResults[0].outcome).toEqual({ status: "pass" });
|
|
150
|
+
expect(result.hookResults[0].durationMs).toBeGreaterThanOrEqual(0);
|
|
151
|
+
});
|
|
152
|
+
|
|
153
|
+
it("returns block report when single hook fails with block onFail", async () => {
|
|
154
|
+
const hook: CirVerificationHook = { kind: "llm", checkNodeId: "verifier", onFail: "block" };
|
|
155
|
+
const { context } = createMockContext();
|
|
156
|
+
const node = createToolNode("action", [hook]);
|
|
157
|
+
const verifierNode = createLlmVerifierNode("verifier");
|
|
158
|
+
const nodeMap = new Map<string, CirNode>([
|
|
159
|
+
["action", node],
|
|
160
|
+
["verifier", verifierNode],
|
|
161
|
+
]);
|
|
162
|
+
const state = createExecutionState();
|
|
163
|
+
|
|
164
|
+
const gen = runVerification("action", [hook], nodeMap, state, context);
|
|
165
|
+
|
|
166
|
+
// Yields LLM call for verifier
|
|
167
|
+
gen.next();
|
|
168
|
+
|
|
169
|
+
// Fail verifier result (boolean false)
|
|
170
|
+
const { result } = collectGeneratorFrom(gen, false);
|
|
171
|
+
|
|
172
|
+
expect(result.overallStatus).toBe("block");
|
|
173
|
+
expect(result.hookResults).toHaveLength(1);
|
|
174
|
+
expect(result.hookResults[0].outcome).toEqual({
|
|
175
|
+
status: "block",
|
|
176
|
+
hook,
|
|
177
|
+
message: "Verification failed via verifier",
|
|
178
|
+
});
|
|
179
|
+
});
|
|
180
|
+
|
|
181
|
+
it("returns warn report when single hook fails with warn onFail", async () => {
|
|
182
|
+
const hook: CirVerificationHook = { kind: "llm", checkNodeId: "verifier", onFail: "warn" };
|
|
183
|
+
const { context } = createMockContext();
|
|
184
|
+
const node = createToolNode("action", [hook]);
|
|
185
|
+
const verifierNode = createLlmVerifierNode("verifier");
|
|
186
|
+
const nodeMap = new Map<string, CirNode>([
|
|
187
|
+
["action", node],
|
|
188
|
+
["verifier", verifierNode],
|
|
189
|
+
]);
|
|
190
|
+
const state = createExecutionState();
|
|
191
|
+
|
|
192
|
+
const gen = runVerification("action", [hook], nodeMap, state, context);
|
|
193
|
+
gen.next();
|
|
194
|
+
const { result } = collectGeneratorFrom(gen, false);
|
|
195
|
+
|
|
196
|
+
expect(result.overallStatus).toBe("pass");
|
|
197
|
+
expect(result.hookResults).toHaveLength(1);
|
|
198
|
+
expect(result.hookResults[0].outcome).toEqual({ status: "warn", hook });
|
|
199
|
+
});
|
|
200
|
+
|
|
201
|
+
it("all-must-pass strategy: stops at first block", async () => {
|
|
202
|
+
const hooks: CirVerificationHook[] = [
|
|
203
|
+
{ kind: "llm", checkNodeId: "verifier-a", onFail: "block" },
|
|
204
|
+
{ kind: "llm", checkNodeId: "verifier-b", onFail: "block" },
|
|
205
|
+
];
|
|
206
|
+
const { context, calls } = createMockContext();
|
|
207
|
+
const node = createToolNode("action", hooks);
|
|
208
|
+
const verifierA = createLlmVerifierNode("verifier-a");
|
|
209
|
+
const verifierB = createLlmVerifierNode("verifier-b");
|
|
210
|
+
const nodeMap = new Map<string, CirNode>([
|
|
211
|
+
["action", node],
|
|
212
|
+
["verifier-a", verifierA],
|
|
213
|
+
["verifier-b", verifierB],
|
|
214
|
+
]);
|
|
215
|
+
const state = createExecutionState();
|
|
216
|
+
|
|
217
|
+
const gen = runVerification("action", hooks, nodeMap, state, context, "all-must-pass");
|
|
218
|
+
|
|
219
|
+
// First verifier yields
|
|
220
|
+
gen.next();
|
|
221
|
+
// First verifier fails
|
|
222
|
+
const { result } = collectGeneratorFrom(gen, false);
|
|
223
|
+
|
|
224
|
+
expect(result.overallStatus).toBe("block");
|
|
225
|
+
expect(result.hookResults).toHaveLength(1);
|
|
226
|
+
expect(result.hookResults[0].hook).toBe(hooks[0]);
|
|
227
|
+
// Second verifier should not have been called
|
|
228
|
+
expect(calls.llm).toHaveLength(1);
|
|
229
|
+
});
|
|
230
|
+
|
|
231
|
+
it("all-must-pass strategy: all pass yields pass report", async () => {
|
|
232
|
+
const hooks: CirVerificationHook[] = [
|
|
233
|
+
{ kind: "llm", checkNodeId: "verifier-a", onFail: "block" },
|
|
234
|
+
{ kind: "llm", checkNodeId: "verifier-b", onFail: "block" },
|
|
235
|
+
];
|
|
236
|
+
const { context, calls } = createMockContext();
|
|
237
|
+
const node = createToolNode("action", hooks);
|
|
238
|
+
const verifierA = createLlmVerifierNode("verifier-a");
|
|
239
|
+
const verifierB = createLlmVerifierNode("verifier-b");
|
|
240
|
+
const nodeMap = new Map<string, CirNode>([
|
|
241
|
+
["action", node],
|
|
242
|
+
["verifier-a", verifierA],
|
|
243
|
+
["verifier-b", verifierB],
|
|
244
|
+
]);
|
|
245
|
+
const state = createExecutionState();
|
|
246
|
+
|
|
247
|
+
const gen = runVerification("action", hooks, nodeMap, state, context, "all-must-pass");
|
|
248
|
+
|
|
249
|
+
// First verifier yields
|
|
250
|
+
gen.next();
|
|
251
|
+
// First verifier passes, second verifier yields
|
|
252
|
+
gen.next(true);
|
|
253
|
+
// Second verifier passes
|
|
254
|
+
const { result } = collectGeneratorFrom(gen, true);
|
|
255
|
+
|
|
256
|
+
expect(result.overallStatus).toBe("pass");
|
|
257
|
+
expect(result.hookResults).toHaveLength(2);
|
|
258
|
+
expect(calls.llm).toHaveLength(2);
|
|
259
|
+
});
|
|
260
|
+
|
|
261
|
+
it("first-pass strategy: exits on first pass", async () => {
|
|
262
|
+
const hooks: CirVerificationHook[] = [
|
|
263
|
+
{ kind: "llm", checkNodeId: "verifier-a", onFail: "block" },
|
|
264
|
+
{ kind: "llm", checkNodeId: "verifier-b", onFail: "block" },
|
|
265
|
+
];
|
|
266
|
+
const { context, calls } = createMockContext();
|
|
267
|
+
const node = createToolNode("action", hooks);
|
|
268
|
+
const verifierA = createLlmVerifierNode("verifier-a");
|
|
269
|
+
const verifierB = createLlmVerifierNode("verifier-b");
|
|
270
|
+
const nodeMap = new Map<string, CirNode>([
|
|
271
|
+
["action", node],
|
|
272
|
+
["verifier-a", verifierA],
|
|
273
|
+
["verifier-b", verifierB],
|
|
274
|
+
]);
|
|
275
|
+
const state = createExecutionState();
|
|
276
|
+
|
|
277
|
+
const gen = runVerification("action", hooks, nodeMap, state, context, "first-pass");
|
|
278
|
+
|
|
279
|
+
// First verifier yields
|
|
280
|
+
gen.next();
|
|
281
|
+
// First verifier passes
|
|
282
|
+
const { result } = collectGeneratorFrom(gen, true);
|
|
283
|
+
|
|
284
|
+
expect(result.overallStatus).toBe("pass");
|
|
285
|
+
expect(result.hookResults).toHaveLength(1);
|
|
286
|
+
expect(result.hookResults[0].hook).toBe(hooks[0]);
|
|
287
|
+
// Second verifier should not have been called
|
|
288
|
+
expect(calls.llm).toHaveLength(1);
|
|
289
|
+
});
|
|
290
|
+
|
|
291
|
+
it("first-pass strategy: continues to second hook when first fails", async () => {
|
|
292
|
+
const hooks: CirVerificationHook[] = [
|
|
293
|
+
{ kind: "llm", checkNodeId: "verifier-a", onFail: "block" },
|
|
294
|
+
{ kind: "llm", checkNodeId: "verifier-b", onFail: "block" },
|
|
295
|
+
];
|
|
296
|
+
const { context, calls } = createMockContext();
|
|
297
|
+
const node = createToolNode("action", hooks);
|
|
298
|
+
const verifierA = createLlmVerifierNode("verifier-a");
|
|
299
|
+
const verifierB = createLlmVerifierNode("verifier-b");
|
|
300
|
+
const nodeMap = new Map<string, CirNode>([
|
|
301
|
+
["action", node],
|
|
302
|
+
["verifier-a", verifierA],
|
|
303
|
+
["verifier-b", verifierB],
|
|
304
|
+
]);
|
|
305
|
+
const state = createExecutionState();
|
|
306
|
+
|
|
307
|
+
const gen = runVerification("action", hooks, nodeMap, state, context, "first-pass");
|
|
308
|
+
|
|
309
|
+
// First verifier yields
|
|
310
|
+
gen.next();
|
|
311
|
+
// First verifier fails, second verifier yields
|
|
312
|
+
gen.next(false);
|
|
313
|
+
// Second verifier passes
|
|
314
|
+
const { result } = collectGeneratorFrom(gen, true);
|
|
315
|
+
|
|
316
|
+
expect(result.overallStatus).toBe("pass");
|
|
317
|
+
expect(result.hookResults).toHaveLength(2);
|
|
318
|
+
expect(calls.llm).toHaveLength(2);
|
|
319
|
+
});
|
|
320
|
+
|
|
321
|
+
it("any-block strategy: exits on first block", async () => {
|
|
322
|
+
const hooks: CirVerificationHook[] = [
|
|
323
|
+
{ kind: "llm", checkNodeId: "verifier-a", onFail: "block" },
|
|
324
|
+
{ kind: "llm", checkNodeId: "verifier-b", onFail: "block" },
|
|
325
|
+
];
|
|
326
|
+
const { context, calls } = createMockContext();
|
|
327
|
+
const node = createToolNode("action", hooks);
|
|
328
|
+
const verifierA = createLlmVerifierNode("verifier-a");
|
|
329
|
+
const verifierB = createLlmVerifierNode("verifier-b");
|
|
330
|
+
const nodeMap = new Map<string, CirNode>([
|
|
331
|
+
["action", node],
|
|
332
|
+
["verifier-a", verifierA],
|
|
333
|
+
["verifier-b", verifierB],
|
|
334
|
+
]);
|
|
335
|
+
const state = createExecutionState();
|
|
336
|
+
|
|
337
|
+
const gen = runVerification("action", hooks, nodeMap, state, context, "any-block");
|
|
338
|
+
|
|
339
|
+
// First verifier yields
|
|
340
|
+
gen.next();
|
|
341
|
+
// First verifier blocks
|
|
342
|
+
const { result } = collectGeneratorFrom(gen, false);
|
|
343
|
+
|
|
344
|
+
expect(result.overallStatus).toBe("block");
|
|
345
|
+
expect(result.hookResults).toHaveLength(1);
|
|
346
|
+
expect(calls.llm).toHaveLength(1);
|
|
347
|
+
});
|
|
348
|
+
|
|
349
|
+
it("any-block strategy: continues when hook warns", async () => {
|
|
350
|
+
const hooks: CirVerificationHook[] = [
|
|
351
|
+
{ kind: "llm", checkNodeId: "verifier-a", onFail: "warn" },
|
|
352
|
+
{ kind: "llm", checkNodeId: "verifier-b", onFail: "block" },
|
|
353
|
+
];
|
|
354
|
+
const { context, calls } = createMockContext();
|
|
355
|
+
const node = createToolNode("action", hooks);
|
|
356
|
+
const verifierA = createLlmVerifierNode("verifier-a");
|
|
357
|
+
const verifierB = createLlmVerifierNode("verifier-b");
|
|
358
|
+
const nodeMap = new Map<string, CirNode>([
|
|
359
|
+
["action", node],
|
|
360
|
+
["verifier-a", verifierA],
|
|
361
|
+
["verifier-b", verifierB],
|
|
362
|
+
]);
|
|
363
|
+
const state = createExecutionState();
|
|
364
|
+
|
|
365
|
+
const gen = runVerification("action", hooks, nodeMap, state, context, "any-block");
|
|
366
|
+
|
|
367
|
+
// First verifier yields
|
|
368
|
+
gen.next();
|
|
369
|
+
// First verifier warns (not a block, continue), second verifier yields
|
|
370
|
+
gen.next(false);
|
|
371
|
+
// Second verifier passes
|
|
372
|
+
const { result } = collectGeneratorFrom(gen, true);
|
|
373
|
+
|
|
374
|
+
expect(result.overallStatus).toBe("pass");
|
|
375
|
+
expect(result.hookResults).toHaveLength(2);
|
|
376
|
+
expect(calls.llm).toHaveLength(2);
|
|
377
|
+
});
|
|
378
|
+
|
|
379
|
+
it("tracks duration for each hook", async () => {
|
|
380
|
+
const hooks: CirVerificationHook[] = [
|
|
381
|
+
{ kind: "llm", checkNodeId: "verifier", onFail: "block" },
|
|
382
|
+
];
|
|
383
|
+
const { context } = createMockContext();
|
|
384
|
+
const node = createToolNode("action", hooks);
|
|
385
|
+
const verifierNode = createLlmVerifierNode("verifier");
|
|
386
|
+
const nodeMap = new Map<string, CirNode>([
|
|
387
|
+
["action", node],
|
|
388
|
+
["verifier", verifierNode],
|
|
389
|
+
]);
|
|
390
|
+
const state = createExecutionState();
|
|
391
|
+
|
|
392
|
+
const gen = runVerification("action", hooks, nodeMap, state, context);
|
|
393
|
+
gen.next();
|
|
394
|
+
const { result } = collectGeneratorFrom(gen, true);
|
|
395
|
+
|
|
396
|
+
expect(result.hookResults[0].durationMs).toBeGreaterThanOrEqual(0);
|
|
397
|
+
expect(typeof result.hookResults[0].durationMs).toBe("number");
|
|
398
|
+
});
|
|
399
|
+
|
|
400
|
+
it("handles expression verification hooks without yielding", async () => {
|
|
401
|
+
const hook: CirVerificationHook = { kind: "expression", checkNodeId: "check-expr", onFail: "block" };
|
|
402
|
+
const { context, calls } = createMockContext();
|
|
403
|
+
const node = createToolNode("action", [hook]);
|
|
404
|
+
const conditionNode = createConditionNode("check-expr", "outputs.action.ok");
|
|
405
|
+
const nodeMap = new Map<string, CirNode>([
|
|
406
|
+
["action", node],
|
|
407
|
+
["check-expr", conditionNode],
|
|
408
|
+
]);
|
|
409
|
+
const state = createExecutionState();
|
|
410
|
+
state.outputs["action"] = { ok: true };
|
|
411
|
+
|
|
412
|
+
const gen = runVerification("action", [hook], nodeMap, state, context);
|
|
413
|
+
const { result, yields } = collectGenerator(gen);
|
|
414
|
+
|
|
415
|
+
// Expression verification should not yield any external actions
|
|
416
|
+
expect(yields).toHaveLength(0);
|
|
417
|
+
expect(result.overallStatus).toBe("pass");
|
|
418
|
+
expect(result.hookResults).toHaveLength(1);
|
|
419
|
+
expect(result.hookResults[0].outcome).toEqual({ status: "pass" });
|
|
420
|
+
});
|
|
421
|
+
|
|
422
|
+
it("defaults to all-must-pass strategy when not specified", async () => {
|
|
423
|
+
const hooks: CirVerificationHook[] = [
|
|
424
|
+
{ kind: "llm", checkNodeId: "verifier-a", onFail: "block" },
|
|
425
|
+
{ kind: "llm", checkNodeId: "verifier-b", onFail: "block" },
|
|
426
|
+
];
|
|
427
|
+
const { context, calls } = createMockContext();
|
|
428
|
+
const node = createToolNode("action", hooks);
|
|
429
|
+
const verifierA = createLlmVerifierNode("verifier-a");
|
|
430
|
+
const verifierB = createLlmVerifierNode("verifier-b");
|
|
431
|
+
const nodeMap = new Map<string, CirNode>([
|
|
432
|
+
["action", node],
|
|
433
|
+
["verifier-a", verifierA],
|
|
434
|
+
["verifier-b", verifierB],
|
|
435
|
+
]);
|
|
436
|
+
const state = createExecutionState();
|
|
437
|
+
|
|
438
|
+
const gen = runVerification("action", hooks, nodeMap, state, context);
|
|
439
|
+
|
|
440
|
+
// First verifier yields
|
|
441
|
+
gen.next();
|
|
442
|
+
// First verifier fails -> should block immediately (all-must-pass default)
|
|
443
|
+
const { result } = collectGeneratorFrom(gen, false);
|
|
444
|
+
|
|
445
|
+
expect(result.overallStatus).toBe("block");
|
|
446
|
+
expect(result.hookResults).toHaveLength(1);
|
|
447
|
+
expect(calls.llm).toHaveLength(1);
|
|
448
|
+
});
|
|
449
|
+
|
|
450
|
+
it("stores verifier output in state.outputs", async () => {
|
|
451
|
+
const hook: CirVerificationHook = { kind: "llm", checkNodeId: "verifier", onFail: "block" };
|
|
452
|
+
const { context } = createMockContext();
|
|
453
|
+
const node = createToolNode("action", [hook]);
|
|
454
|
+
const verifierNode = createLlmVerifierNode("verifier");
|
|
455
|
+
const nodeMap = new Map<string, CirNode>([
|
|
456
|
+
["action", node],
|
|
457
|
+
["verifier", verifierNode],
|
|
458
|
+
]);
|
|
459
|
+
const state = createExecutionState();
|
|
460
|
+
|
|
461
|
+
const gen = runVerification("action", [hook], nodeMap, state, context);
|
|
462
|
+
gen.next();
|
|
463
|
+
collectGeneratorFrom(gen, { passed: true });
|
|
464
|
+
|
|
465
|
+
expect(state.outputs["verifier"]).toEqual({ passed: true });
|
|
466
|
+
});
|
|
467
|
+
|
|
468
|
+
it("warn outcome does not block even with all-must-pass strategy", async () => {
|
|
469
|
+
const hooks: CirVerificationHook[] = [
|
|
470
|
+
{ kind: "llm", checkNodeId: "verifier-a", onFail: "warn" },
|
|
471
|
+
{ kind: "llm", checkNodeId: "verifier-b", onFail: "block" },
|
|
472
|
+
];
|
|
473
|
+
const { context, calls } = createMockContext();
|
|
474
|
+
const node = createToolNode("action", hooks);
|
|
475
|
+
const verifierA = createLlmVerifierNode("verifier-a");
|
|
476
|
+
const verifierB = createLlmVerifierNode("verifier-b");
|
|
477
|
+
const nodeMap = new Map<string, CirNode>([
|
|
478
|
+
["action", node],
|
|
479
|
+
["verifier-a", verifierA],
|
|
480
|
+
["verifier-b", verifierB],
|
|
481
|
+
]);
|
|
482
|
+
const state = createExecutionState();
|
|
483
|
+
|
|
484
|
+
const gen = runVerification("action", hooks, nodeMap, state, context, "all-must-pass");
|
|
485
|
+
|
|
486
|
+
// First verifier yields
|
|
487
|
+
gen.next();
|
|
488
|
+
// First verifier warns (not a block), second verifier yields
|
|
489
|
+
gen.next(false);
|
|
490
|
+
// Second verifier passes
|
|
491
|
+
const { result } = collectGeneratorFrom(gen, true);
|
|
492
|
+
|
|
493
|
+
expect(result.overallStatus).toBe("pass");
|
|
494
|
+
expect(result.hookResults).toHaveLength(2);
|
|
495
|
+
expect(calls.llm).toHaveLength(2);
|
|
496
|
+
});
|
|
497
|
+
});
|
|
498
|
+
|
|
499
|
+
function collectGeneratorFrom<T>(gen: Generator<YieldItem, T, unknown>, ...nextValues: unknown[]): { yields: YieldItem[]; result: T } {
|
|
500
|
+
const yields: YieldItem[] = [];
|
|
501
|
+
let valueIndex = 0;
|
|
502
|
+
let current = gen.next(nextValues[valueIndex++]);
|
|
503
|
+
while (!current.done) {
|
|
504
|
+
yields.push(current.value);
|
|
505
|
+
current = gen.next(nextValues[valueIndex] ?? undefined);
|
|
506
|
+
valueIndex++;
|
|
507
|
+
}
|
|
508
|
+
return { yields, result: current.value };
|
|
509
|
+
}
|
|
@@ -0,0 +1,144 @@
|
|
|
1
|
+
import { describe, it, expect } from "vitest";
|
|
2
|
+
import { createInitialVersion, createNextVersion, createLineageEntry } from "../../src/versioning/history.js";
|
|
3
|
+
import type { HarnessSpec } from "../../src/spec/types.js";
|
|
4
|
+
import type { CompiledHarnessResult } from "../../src/compiler/compile.js";
|
|
5
|
+
|
|
6
|
+
describe("versioning/history", () => {
|
|
7
|
+
const mockSpec: HarnessSpec = {
|
|
8
|
+
name: "test-workflow",
|
|
9
|
+
graph: {
|
|
10
|
+
nodes: [
|
|
11
|
+
{
|
|
12
|
+
id: "start",
|
|
13
|
+
label: "Start",
|
|
14
|
+
task: {
|
|
15
|
+
kind: "shell",
|
|
16
|
+
tool: "bash",
|
|
17
|
+
args: ["echo test"],
|
|
18
|
+
},
|
|
19
|
+
},
|
|
20
|
+
],
|
|
21
|
+
edges: [],
|
|
22
|
+
},
|
|
23
|
+
};
|
|
24
|
+
|
|
25
|
+
describe("createInitialVersion", () => {
|
|
26
|
+
it("should create version 1 with no parent", () => {
|
|
27
|
+
const version = createInitialVersion(mockSpec);
|
|
28
|
+
|
|
29
|
+
expect(version.version).toBe(1);
|
|
30
|
+
expect(version.parentVersion).toBeUndefined();
|
|
31
|
+
expect(version.reason).toBe("initial");
|
|
32
|
+
expect(version.spec).toEqual(mockSpec);
|
|
33
|
+
expect(version.generatedAt).toBeGreaterThan(0);
|
|
34
|
+
});
|
|
35
|
+
|
|
36
|
+
it("should deep clone the spec", () => {
|
|
37
|
+
const version = createInitialVersion(mockSpec);
|
|
38
|
+
|
|
39
|
+
expect(version.spec).not.toBe(mockSpec);
|
|
40
|
+
expect(version.spec).toEqual(mockSpec);
|
|
41
|
+
});
|
|
42
|
+
});
|
|
43
|
+
|
|
44
|
+
describe("createNextVersion", () => {
|
|
45
|
+
it("should increment version and set parent", () => {
|
|
46
|
+
const initial = createInitialVersion(mockSpec);
|
|
47
|
+
const next = createNextVersion(initial, mockSpec, "escalation");
|
|
48
|
+
|
|
49
|
+
expect(next.version).toBe(2);
|
|
50
|
+
expect(next.parentVersion).toBe(1);
|
|
51
|
+
expect(next.reason).toBe("escalation");
|
|
52
|
+
expect(next.spec).toEqual(mockSpec);
|
|
53
|
+
expect(next.generatedAt).toBeGreaterThanOrEqual(initial.generatedAt);
|
|
54
|
+
});
|
|
55
|
+
|
|
56
|
+
it("should chain multiple versions", () => {
|
|
57
|
+
const v1 = createInitialVersion(mockSpec);
|
|
58
|
+
const v2 = createNextVersion(v1, mockSpec, "escalation");
|
|
59
|
+
const v3 = createNextVersion(v2, mockSpec, "retry");
|
|
60
|
+
|
|
61
|
+
expect(v3.version).toBe(3);
|
|
62
|
+
expect(v3.parentVersion).toBe(2);
|
|
63
|
+
expect(v3.reason).toBe("retry");
|
|
64
|
+
});
|
|
65
|
+
});
|
|
66
|
+
|
|
67
|
+
describe("createLineageEntry", () => {
|
|
68
|
+
it("should capture completed result data", () => {
|
|
69
|
+
const mockResult: CompiledHarnessResult = {
|
|
70
|
+
status: "completed",
|
|
71
|
+
terminalNodeId: "success",
|
|
72
|
+
result: { output: "test" },
|
|
73
|
+
outputs: { start: { stdout: "test" } },
|
|
74
|
+
trace: {
|
|
75
|
+
entries: [
|
|
76
|
+
{
|
|
77
|
+
nodeId: "start",
|
|
78
|
+
source: { kind: "user-task", nodeId: "start" },
|
|
79
|
+
phase: "enter",
|
|
80
|
+
},
|
|
81
|
+
],
|
|
82
|
+
totalDurationMs: 100,
|
|
83
|
+
nodeCount: 1,
|
|
84
|
+
failureCount: 0,
|
|
85
|
+
startTimeMs: Date.now() - 100,
|
|
86
|
+
endTimeMs: Date.now(),
|
|
87
|
+
},
|
|
88
|
+
harnessState: {
|
|
89
|
+
inputs: {},
|
|
90
|
+
outputs: { start: { stdout: "test" } },
|
|
91
|
+
nodeResults: { start: { stdout: "test" } },
|
|
92
|
+
failures: [],
|
|
93
|
+
metrics: {
|
|
94
|
+
retries: 0,
|
|
95
|
+
durationMs: 100,
|
|
96
|
+
},
|
|
97
|
+
},
|
|
98
|
+
};
|
|
99
|
+
|
|
100
|
+
const version = createInitialVersion(mockSpec);
|
|
101
|
+
const lineage = createLineageEntry(version, mockResult);
|
|
102
|
+
|
|
103
|
+
expect(lineage.version).toBe(1);
|
|
104
|
+
expect(lineage.terminalNodeId).toBe("success");
|
|
105
|
+
expect(lineage.outputs).toEqual({ start: { stdout: "test" } });
|
|
106
|
+
expect(lineage.nodeResults).toEqual({ start: { stdout: "test" } });
|
|
107
|
+
expect(lineage.failures).toEqual([]);
|
|
108
|
+
expect(lineage.metrics).toEqual({ retries: 0, durationMs: 100 });
|
|
109
|
+
expect(lineage.trace.entries).toHaveLength(1);
|
|
110
|
+
expect(lineage.completedAt).toBeGreaterThan(0);
|
|
111
|
+
});
|
|
112
|
+
|
|
113
|
+
it("should deep clone result data", () => {
|
|
114
|
+
const now = Date.now();
|
|
115
|
+
const mockResult: CompiledHarnessResult = {
|
|
116
|
+
status: "completed",
|
|
117
|
+
terminalNodeId: "success",
|
|
118
|
+
result: { output: "test" },
|
|
119
|
+
outputs: { start: { stdout: "test" } },
|
|
120
|
+
trace: {
|
|
121
|
+
entries: [],
|
|
122
|
+
totalDurationMs: 100,
|
|
123
|
+
nodeCount: 0,
|
|
124
|
+
failureCount: 0,
|
|
125
|
+
startTimeMs: now - 100,
|
|
126
|
+
endTimeMs: now,
|
|
127
|
+
},
|
|
128
|
+
harnessState: {
|
|
129
|
+
inputs: {},
|
|
130
|
+
outputs: { start: { stdout: "test" } },
|
|
131
|
+
nodeResults: { start: { stdout: "test" } },
|
|
132
|
+
failures: [],
|
|
133
|
+
metrics: { retries: 0, durationMs: 100 },
|
|
134
|
+
},
|
|
135
|
+
};
|
|
136
|
+
|
|
137
|
+
const version = createInitialVersion(mockSpec);
|
|
138
|
+
const lineage = createLineageEntry(version, mockResult);
|
|
139
|
+
|
|
140
|
+
expect(lineage.outputs).not.toBe(mockResult.outputs);
|
|
141
|
+
expect(lineage.nodeResults).not.toBe(mockResult.harnessState.nodeResults);
|
|
142
|
+
});
|
|
143
|
+
});
|
|
144
|
+
});
|