@mhingston5/lasso 0.1.1 → 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.lean-ctx/graph.db +0 -0
- package/.lean-ctx/graph.db-shm +0 -0
- package/.lean-ctx/graph.db-wal +0 -0
- package/README.md +150 -5
- package/package.json +1 -1
- package/src/cir/lower.ts +2 -0
- package/src/cir/types.ts +6 -0
- package/src/compiler/compile.ts +272 -2
- package/src/failures/generator.ts +78 -2
- package/src/failures/types.ts +21 -0
- package/src/index.ts +1 -0
- package/src/metaharness/engine.ts +146 -3
- package/src/metaharness/trace-adapter.ts +34 -0
- package/src/metaharness/types.ts +41 -0
- package/src/replanner/runtime.ts +181 -0
- package/src/spec/schema.ts +46 -6
- package/src/spec/types.ts +39 -0
- package/test/compiler/per-node-harness.test.ts +955 -0
- package/test/failures/risk.test.ts +285 -0
- package/test/metaharness/synthesize-from-trace.test.ts +372 -0
- package/test/replanner/runtime.test.ts +134 -0
|
@@ -0,0 +1,955 @@
|
|
|
1
|
+
import { beforeEach, describe, expect, it, vi } from "vitest";
|
|
2
|
+
|
|
3
|
+
vi.mock("pi-duroxide", () => ({
|
|
4
|
+
registerWorkflow: vi.fn(),
|
|
5
|
+
}));
|
|
6
|
+
|
|
7
|
+
import { compileHarnessSpec } from "../../src/compiler/compile.js";
|
|
8
|
+
import { GuardrailExceededError } from "../../src/compiler/runtime-helpers.js";
|
|
9
|
+
import type { HarnessSpec } from "../../src/spec/types.js";
|
|
10
|
+
|
|
11
|
+
function createMockContext() {
|
|
12
|
+
return {
|
|
13
|
+
scheduleActivity: vi.fn(),
|
|
14
|
+
scheduleActivityWithRetry: vi.fn(),
|
|
15
|
+
scheduleTimer: vi.fn(),
|
|
16
|
+
waitForEvent: vi.fn(),
|
|
17
|
+
scheduleSubOrchestration: vi.fn(),
|
|
18
|
+
all: vi.fn(),
|
|
19
|
+
race: vi.fn(),
|
|
20
|
+
utcNow: () => 0,
|
|
21
|
+
newGuid: () => "guid-1",
|
|
22
|
+
continueAsNew: vi.fn(),
|
|
23
|
+
setCustomStatus: vi.fn(),
|
|
24
|
+
traceInfo: vi.fn(),
|
|
25
|
+
traceWarn: vi.fn(),
|
|
26
|
+
traceError: vi.fn(),
|
|
27
|
+
traceDebug: vi.fn(),
|
|
28
|
+
kv: { get: vi.fn(), set: vi.fn(), clear: vi.fn() },
|
|
29
|
+
pi: {
|
|
30
|
+
tool: (name: string, args: unknown) => ({ kind: "tool-call", name, args }),
|
|
31
|
+
llm: (messages: unknown[], options?: unknown) => ({ kind: "llm-call", messages, options }),
|
|
32
|
+
skill: vi.fn(),
|
|
33
|
+
sendMessage: vi.fn(),
|
|
34
|
+
prompt: vi.fn(),
|
|
35
|
+
},
|
|
36
|
+
};
|
|
37
|
+
}
|
|
38
|
+
|
|
39
|
+
describe("per-node guardrails", () => {
|
|
40
|
+
it("enforces per-node maxRetries overriding global retryPolicy", () => {
|
|
41
|
+
const spec: HarnessSpec = {
|
|
42
|
+
name: "per-node-retry-override",
|
|
43
|
+
executionPolicy: {
|
|
44
|
+
failureClassification: [
|
|
45
|
+
{ pattern: "transient", category: "transient", retry: true },
|
|
46
|
+
],
|
|
47
|
+
},
|
|
48
|
+
graph: {
|
|
49
|
+
entryNodeId: "action",
|
|
50
|
+
nodes: [
|
|
51
|
+
{
|
|
52
|
+
id: "action",
|
|
53
|
+
kind: "tool",
|
|
54
|
+
tool: "bash",
|
|
55
|
+
args: ["echo fail"],
|
|
56
|
+
retryPolicy: {
|
|
57
|
+
maxAttempts: 5,
|
|
58
|
+
backoff: "constant",
|
|
59
|
+
initialDelay: 2,
|
|
60
|
+
retryOn: ["transient"],
|
|
61
|
+
},
|
|
62
|
+
guardrails: {
|
|
63
|
+
maxRetries: 1,
|
|
64
|
+
},
|
|
65
|
+
},
|
|
66
|
+
],
|
|
67
|
+
edges: [],
|
|
68
|
+
},
|
|
69
|
+
};
|
|
70
|
+
|
|
71
|
+
const compiled = compileHarnessSpec(spec);
|
|
72
|
+
const mock = {
|
|
73
|
+
calls: { timers: [] as number[] },
|
|
74
|
+
context: createMockContext(),
|
|
75
|
+
};
|
|
76
|
+
mock.context.scheduleTimer = (delayMs: number) => {
|
|
77
|
+
mock.calls.timers.push(delayMs);
|
|
78
|
+
return { kind: "timer", delayMs };
|
|
79
|
+
};
|
|
80
|
+
const iterator = compiled.workflows[0].generator(mock.context as any, {});
|
|
81
|
+
|
|
82
|
+
// First attempt
|
|
83
|
+
expect(iterator.next().value).toMatchObject({ kind: "tool-call" });
|
|
84
|
+
|
|
85
|
+
// Throw to trigger retry — with initialDelay=2, we get a timer
|
|
86
|
+
const retryYield = iterator.throw(new Error("transient failure"));
|
|
87
|
+
expect(retryYield.value).toEqual({ kind: "timer", delayMs: 2000 });
|
|
88
|
+
expect(mock.calls.timers).toEqual([2000]);
|
|
89
|
+
|
|
90
|
+
// Second attempt (retry 1 — maxRetries=1 allows 1 retry = 2 total attempts)
|
|
91
|
+
expect(iterator.next().value).toMatchObject({ kind: "tool-call" });
|
|
92
|
+
|
|
93
|
+
// Second failure should exhaust retries (maxRetries=1 means maxAttempts=2)
|
|
94
|
+
let threw = false;
|
|
95
|
+
try {
|
|
96
|
+
iterator.throw(new Error("transient failure"));
|
|
97
|
+
} catch {
|
|
98
|
+
threw = true;
|
|
99
|
+
}
|
|
100
|
+
expect(threw).toBe(true);
|
|
101
|
+
});
|
|
102
|
+
|
|
103
|
+
it("enforces per-node maxCostUsd on LLM nodes (per-node delta, not cumulative)", () => {
|
|
104
|
+
const spec: HarnessSpec = {
|
|
105
|
+
name: "per-node-cost",
|
|
106
|
+
graph: {
|
|
107
|
+
entryNodeId: "llm-first",
|
|
108
|
+
nodes: [
|
|
109
|
+
{
|
|
110
|
+
id: "llm-first",
|
|
111
|
+
kind: "llm",
|
|
112
|
+
provider: "anthropic",
|
|
113
|
+
model: "claude-sonnet",
|
|
114
|
+
prompt: "Do something first",
|
|
115
|
+
},
|
|
116
|
+
{
|
|
117
|
+
id: "llm-second",
|
|
118
|
+
kind: "llm",
|
|
119
|
+
provider: "anthropic",
|
|
120
|
+
model: "claude-sonnet",
|
|
121
|
+
prompt: "Do something second",
|
|
122
|
+
guardrails: {
|
|
123
|
+
maxCostUsd: 0.005,
|
|
124
|
+
},
|
|
125
|
+
},
|
|
126
|
+
],
|
|
127
|
+
edges: [
|
|
128
|
+
{ from: "llm-first", to: "llm-second" },
|
|
129
|
+
],
|
|
130
|
+
},
|
|
131
|
+
};
|
|
132
|
+
|
|
133
|
+
const compiled = compileHarnessSpec(spec);
|
|
134
|
+
const ctx = createMockContext();
|
|
135
|
+
const iterator = compiled.workflows[0].generator(ctx as any, {});
|
|
136
|
+
|
|
137
|
+
// First LLM yields
|
|
138
|
+
expect(iterator.next().value).toMatchObject({ kind: "llm-call" });
|
|
139
|
+
|
|
140
|
+
// First LLM returns → cost += 0.01, moves to llm-second
|
|
141
|
+
// Per-node check: nodeStartCost=0.01, maxCostUsd=0.005 → pre-check passes (delta not yet measured)
|
|
142
|
+
// llm-second yields
|
|
143
|
+
expect(iterator.next("output1").value).toMatchObject({ kind: "llm-call" });
|
|
144
|
+
|
|
145
|
+
// llm-second returns → cost += 0.01 (cumulative = 0.02)
|
|
146
|
+
// Per-node delta check: nodeCost = 0.02 - 0.01 = 0.01 > 0.005 → should throw
|
|
147
|
+
let thrownError: unknown;
|
|
148
|
+
try {
|
|
149
|
+
iterator.next("output2");
|
|
150
|
+
} catch (error) {
|
|
151
|
+
thrownError = error;
|
|
152
|
+
}
|
|
153
|
+
expect(thrownError).toBeInstanceOf(GuardrailExceededError);
|
|
154
|
+
expect((thrownError as GuardrailExceededError).message).toContain("Per-node cost limit exceeded");
|
|
155
|
+
expect((thrownError as GuardrailExceededError).message).toContain("llm-second");
|
|
156
|
+
});
|
|
157
|
+
|
|
158
|
+
it("evaluates per-node constraints before executing the node", () => {
|
|
159
|
+
const spec: HarnessSpec = {
|
|
160
|
+
name: "per-node-constraints",
|
|
161
|
+
graph: {
|
|
162
|
+
entryNodeId: "start",
|
|
163
|
+
nodes: [
|
|
164
|
+
{
|
|
165
|
+
id: "start",
|
|
166
|
+
kind: "tool",
|
|
167
|
+
tool: "echo",
|
|
168
|
+
args: ["hello"],
|
|
169
|
+
},
|
|
170
|
+
{
|
|
171
|
+
id: "guarded",
|
|
172
|
+
kind: "tool",
|
|
173
|
+
tool: "echo",
|
|
174
|
+
args: ["should not run"],
|
|
175
|
+
guardrails: {
|
|
176
|
+
constraints: ["outputs.start.ok"],
|
|
177
|
+
},
|
|
178
|
+
},
|
|
179
|
+
{
|
|
180
|
+
id: "fallback",
|
|
181
|
+
kind: "tool",
|
|
182
|
+
tool: "echo",
|
|
183
|
+
args: ["fallback"],
|
|
184
|
+
},
|
|
185
|
+
],
|
|
186
|
+
edges: [
|
|
187
|
+
{ from: "start", to: "guarded" },
|
|
188
|
+
{ from: "guarded", to: "fallback" },
|
|
189
|
+
],
|
|
190
|
+
},
|
|
191
|
+
};
|
|
192
|
+
|
|
193
|
+
const compiled = compileHarnessSpec(spec);
|
|
194
|
+
const ctx = createMockContext();
|
|
195
|
+
const iterator = compiled.workflows[0].generator(ctx as any, {});
|
|
196
|
+
|
|
197
|
+
// First node executes
|
|
198
|
+
expect(iterator.next().value).toMatchObject({ kind: "tool-call" });
|
|
199
|
+
|
|
200
|
+
// Return { ok: false } — constraint "outputs.start.ok" will be falsy
|
|
201
|
+
// The constraint check should throw before guarded executes
|
|
202
|
+
let thrownError: unknown;
|
|
203
|
+
try {
|
|
204
|
+
iterator.next({ ok: false });
|
|
205
|
+
} catch (error) {
|
|
206
|
+
thrownError = error;
|
|
207
|
+
}
|
|
208
|
+
expect(thrownError).toBeInstanceOf(GuardrailExceededError);
|
|
209
|
+
expect((thrownError as GuardrailExceededError).message).toContain("Constraint failed");
|
|
210
|
+
expect((thrownError as GuardrailExceededError).message).toContain("outputs.start.ok");
|
|
211
|
+
});
|
|
212
|
+
|
|
213
|
+
it("allows execution when per-node constraints pass", () => {
|
|
214
|
+
const spec: HarnessSpec = {
|
|
215
|
+
name: "per-node-constraints-pass",
|
|
216
|
+
graph: {
|
|
217
|
+
entryNodeId: "start",
|
|
218
|
+
nodes: [
|
|
219
|
+
{
|
|
220
|
+
id: "start",
|
|
221
|
+
kind: "tool",
|
|
222
|
+
tool: "echo",
|
|
223
|
+
args: ["hello"],
|
|
224
|
+
},
|
|
225
|
+
{
|
|
226
|
+
id: "guarded",
|
|
227
|
+
kind: "tool",
|
|
228
|
+
tool: "echo",
|
|
229
|
+
args: ["should run"],
|
|
230
|
+
guardrails: {
|
|
231
|
+
constraints: ["outputs.start.ok"],
|
|
232
|
+
},
|
|
233
|
+
},
|
|
234
|
+
],
|
|
235
|
+
edges: [
|
|
236
|
+
{ from: "start", to: "guarded" },
|
|
237
|
+
],
|
|
238
|
+
},
|
|
239
|
+
};
|
|
240
|
+
|
|
241
|
+
const compiled = compileHarnessSpec(spec);
|
|
242
|
+
const ctx = createMockContext();
|
|
243
|
+
const iterator = compiled.workflows[0].generator(ctx as any, {});
|
|
244
|
+
|
|
245
|
+
expect(iterator.next().value).toMatchObject({ kind: "tool-call" });
|
|
246
|
+
// Return { ok: true } — constraint passes
|
|
247
|
+
expect(iterator.next({ ok: true }).value).toMatchObject({ kind: "tool-call" });
|
|
248
|
+
const completed = iterator.next("done");
|
|
249
|
+
expect(completed.done).toBe(true);
|
|
250
|
+
expect(completed.value.status).toBe("completed");
|
|
251
|
+
});
|
|
252
|
+
|
|
253
|
+
it("enforces per-node timeoutSeconds by checking elapsed time after yield", () => {
|
|
254
|
+
const spec: HarnessSpec = {
|
|
255
|
+
name: "per-node-timeout",
|
|
256
|
+
graph: {
|
|
257
|
+
entryNodeId: "start",
|
|
258
|
+
nodes: [
|
|
259
|
+
{
|
|
260
|
+
id: "start",
|
|
261
|
+
kind: "tool",
|
|
262
|
+
tool: "echo",
|
|
263
|
+
args: ["first"],
|
|
264
|
+
},
|
|
265
|
+
{
|
|
266
|
+
id: "slow-node",
|
|
267
|
+
kind: "tool",
|
|
268
|
+
tool: "echo",
|
|
269
|
+
args: ["too slow"],
|
|
270
|
+
guardrails: {
|
|
271
|
+
timeoutSeconds: 1,
|
|
272
|
+
},
|
|
273
|
+
},
|
|
274
|
+
],
|
|
275
|
+
edges: [
|
|
276
|
+
{ from: "start", to: "slow-node" },
|
|
277
|
+
],
|
|
278
|
+
},
|
|
279
|
+
};
|
|
280
|
+
|
|
281
|
+
const compiled = compileHarnessSpec(spec);
|
|
282
|
+
const ctx = createMockContext();
|
|
283
|
+
const iterator = compiled.workflows[0].generator(ctx as any, {});
|
|
284
|
+
|
|
285
|
+
// Mock Date.now before iteration starts so nodeStartTime uses mocked time
|
|
286
|
+
const originalNow = Date.now;
|
|
287
|
+
let fakeTime = 1000;
|
|
288
|
+
Date.now = () => fakeTime;
|
|
289
|
+
|
|
290
|
+
try {
|
|
291
|
+
// First node executes fine (fakeTime=1000, no guardrails timeout)
|
|
292
|
+
expect(iterator.next().value).toMatchObject({ kind: "tool-call" });
|
|
293
|
+
expect(iterator.next("ok").value).toMatchObject({ kind: "tool-call" });
|
|
294
|
+
|
|
295
|
+
// slow-node yielded. Advance time by 2 seconds (past 1s timeout)
|
|
296
|
+
fakeTime = 3500;
|
|
297
|
+
|
|
298
|
+
let thrownError: unknown;
|
|
299
|
+
try {
|
|
300
|
+
iterator.next("done");
|
|
301
|
+
} catch (error) {
|
|
302
|
+
thrownError = error;
|
|
303
|
+
}
|
|
304
|
+
expect(thrownError).toBeInstanceOf(GuardrailExceededError);
|
|
305
|
+
expect((thrownError as GuardrailExceededError).message).toContain("timeout exceeded");
|
|
306
|
+
expect((thrownError as GuardrailExceededError).message).toContain("slow-node");
|
|
307
|
+
} finally {
|
|
308
|
+
Date.now = originalNow;
|
|
309
|
+
}
|
|
310
|
+
});
|
|
311
|
+
|
|
312
|
+
it("global guardrails still work alongside per-node guardrails", () => {
|
|
313
|
+
const spec: HarnessSpec = {
|
|
314
|
+
name: "mixed-guardrails",
|
|
315
|
+
executionPolicy: {
|
|
316
|
+
maxSteps: 3,
|
|
317
|
+
},
|
|
318
|
+
graph: {
|
|
319
|
+
entryNodeId: "step-0",
|
|
320
|
+
nodes: [
|
|
321
|
+
{
|
|
322
|
+
id: "step-0",
|
|
323
|
+
kind: "tool",
|
|
324
|
+
tool: "echo",
|
|
325
|
+
args: ["0"],
|
|
326
|
+
guardrails: {
|
|
327
|
+
constraints: ["outputs.step-0.ok"],
|
|
328
|
+
},
|
|
329
|
+
},
|
|
330
|
+
{
|
|
331
|
+
id: "step-1",
|
|
332
|
+
kind: "tool",
|
|
333
|
+
tool: "echo",
|
|
334
|
+
args: ["1"],
|
|
335
|
+
},
|
|
336
|
+
{
|
|
337
|
+
id: "step-2",
|
|
338
|
+
kind: "tool",
|
|
339
|
+
tool: "echo",
|
|
340
|
+
args: ["2"],
|
|
341
|
+
},
|
|
342
|
+
{
|
|
343
|
+
id: "step-3",
|
|
344
|
+
kind: "tool",
|
|
345
|
+
tool: "echo",
|
|
346
|
+
args: ["3"],
|
|
347
|
+
},
|
|
348
|
+
],
|
|
349
|
+
edges: [
|
|
350
|
+
{ from: "step-0", to: "step-1" },
|
|
351
|
+
{ from: "step-1", to: "step-2" },
|
|
352
|
+
{ from: "step-2", to: "step-3" },
|
|
353
|
+
],
|
|
354
|
+
},
|
|
355
|
+
};
|
|
356
|
+
|
|
357
|
+
const compiled = compileHarnessSpec(spec);
|
|
358
|
+
const ctx = createMockContext();
|
|
359
|
+
const iterator = compiled.workflows[0].generator(ctx as any, {});
|
|
360
|
+
|
|
361
|
+
// step-0 has constraint on outputs.step-0.ok — but there's no prior output for step-0
|
|
362
|
+
// The constraint check happens before step-0 executes, so outputs.step-0 doesn't exist yet
|
|
363
|
+
// This means the constraint will fail
|
|
364
|
+
let thrownError: unknown;
|
|
365
|
+
try {
|
|
366
|
+
iterator.next();
|
|
367
|
+
} catch (error) {
|
|
368
|
+
thrownError = error;
|
|
369
|
+
}
|
|
370
|
+
expect(thrownError).toBeInstanceOf(GuardrailExceededError);
|
|
371
|
+
expect((thrownError as GuardrailExceededError).message).toContain("Constraint failed");
|
|
372
|
+
});
|
|
373
|
+
});
|
|
374
|
+
|
|
375
|
+
describe("per-node verification hooks", () => {
|
|
376
|
+
it("runs verification hooks after node execution and blocks on failure", () => {
|
|
377
|
+
const spec: HarnessSpec = {
|
|
378
|
+
name: "verify-block",
|
|
379
|
+
graph: {
|
|
380
|
+
entryNodeId: "action",
|
|
381
|
+
nodes: [
|
|
382
|
+
{
|
|
383
|
+
id: "action",
|
|
384
|
+
kind: "tool",
|
|
385
|
+
tool: "bash",
|
|
386
|
+
args: ["echo test"],
|
|
387
|
+
verificationHooks: [
|
|
388
|
+
{
|
|
389
|
+
name: "check-output",
|
|
390
|
+
kind: "llm",
|
|
391
|
+
check: "Did the test pass?",
|
|
392
|
+
onFail: "block",
|
|
393
|
+
},
|
|
394
|
+
],
|
|
395
|
+
},
|
|
396
|
+
{
|
|
397
|
+
id: "after",
|
|
398
|
+
kind: "tool",
|
|
399
|
+
tool: "echo",
|
|
400
|
+
args: ["done"],
|
|
401
|
+
},
|
|
402
|
+
],
|
|
403
|
+
edges: [
|
|
404
|
+
{ from: "action", to: "after" },
|
|
405
|
+
],
|
|
406
|
+
},
|
|
407
|
+
};
|
|
408
|
+
|
|
409
|
+
const compiled = compileHarnessSpec(spec);
|
|
410
|
+
const ctx = createMockContext();
|
|
411
|
+
const iterator = compiled.workflows[0].generator(ctx as any, {});
|
|
412
|
+
|
|
413
|
+
// Primary node executes
|
|
414
|
+
expect(iterator.next().value).toMatchObject({ kind: "tool-call" });
|
|
415
|
+
|
|
416
|
+
// Verification hook runs (LLM verifier)
|
|
417
|
+
expect(iterator.next({ ok: true }).value).toMatchObject({ kind: "llm-call" });
|
|
418
|
+
|
|
419
|
+
// Verifier returns false → block
|
|
420
|
+
let thrownError: unknown;
|
|
421
|
+
try {
|
|
422
|
+
iterator.next({ passed: false });
|
|
423
|
+
} catch (error) {
|
|
424
|
+
thrownError = error;
|
|
425
|
+
}
|
|
426
|
+
expect(thrownError).toBeDefined();
|
|
427
|
+
expect((thrownError as Error).message).toContain("Verification hook");
|
|
428
|
+
expect((thrownError as Error).message).toContain("check-output");
|
|
429
|
+
expect((thrownError as Error).message).toContain("blocked");
|
|
430
|
+
});
|
|
431
|
+
|
|
432
|
+
it("runs verification hooks and warns on failure without halting", () => {
|
|
433
|
+
const spec: HarnessSpec = {
|
|
434
|
+
name: "verify-warn",
|
|
435
|
+
graph: {
|
|
436
|
+
entryNodeId: "action",
|
|
437
|
+
nodes: [
|
|
438
|
+
{
|
|
439
|
+
id: "action",
|
|
440
|
+
kind: "tool",
|
|
441
|
+
tool: "bash",
|
|
442
|
+
args: ["echo test"],
|
|
443
|
+
verificationHooks: [
|
|
444
|
+
{
|
|
445
|
+
name: "soft-check",
|
|
446
|
+
kind: "llm",
|
|
447
|
+
check: "Is this OK?",
|
|
448
|
+
onFail: "warn",
|
|
449
|
+
},
|
|
450
|
+
],
|
|
451
|
+
},
|
|
452
|
+
{
|
|
453
|
+
id: "after",
|
|
454
|
+
kind: "tool",
|
|
455
|
+
tool: "echo",
|
|
456
|
+
args: ["done"],
|
|
457
|
+
},
|
|
458
|
+
],
|
|
459
|
+
edges: [
|
|
460
|
+
{ from: "action", to: "after" },
|
|
461
|
+
],
|
|
462
|
+
},
|
|
463
|
+
};
|
|
464
|
+
|
|
465
|
+
const compiled = compileHarnessSpec(spec);
|
|
466
|
+
const ctx = createMockContext();
|
|
467
|
+
const iterator = compiled.workflows[0].generator(ctx as any, {});
|
|
468
|
+
|
|
469
|
+
// Primary node executes
|
|
470
|
+
expect(iterator.next().value).toMatchObject({ kind: "tool-call" });
|
|
471
|
+
|
|
472
|
+
// Verification hook runs
|
|
473
|
+
expect(iterator.next({ ok: true }).value).toMatchObject({ kind: "llm-call" });
|
|
474
|
+
|
|
475
|
+
// Verifier returns false → warn, but execution continues
|
|
476
|
+
expect(iterator.next({ passed: false }).value).toMatchObject({ kind: "tool-call" });
|
|
477
|
+
|
|
478
|
+
const completed = iterator.next("done");
|
|
479
|
+
expect(completed.done).toBe(true);
|
|
480
|
+
expect(completed.value.status).toBe("completed");
|
|
481
|
+
});
|
|
482
|
+
|
|
483
|
+
it("runs verification hooks and retries the node on failure", () => {
|
|
484
|
+
const spec: HarnessSpec = {
|
|
485
|
+
name: "verify-retry",
|
|
486
|
+
graph: {
|
|
487
|
+
entryNodeId: "action",
|
|
488
|
+
nodes: [
|
|
489
|
+
{
|
|
490
|
+
id: "action",
|
|
491
|
+
kind: "tool",
|
|
492
|
+
tool: "bash",
|
|
493
|
+
args: ["echo test"],
|
|
494
|
+
verificationHooks: [
|
|
495
|
+
{
|
|
496
|
+
name: "retry-check",
|
|
497
|
+
kind: "llm",
|
|
498
|
+
check: "Did it work?",
|
|
499
|
+
onFail: "retry",
|
|
500
|
+
maxAttempts: 2,
|
|
501
|
+
},
|
|
502
|
+
],
|
|
503
|
+
},
|
|
504
|
+
{
|
|
505
|
+
id: "after",
|
|
506
|
+
kind: "tool",
|
|
507
|
+
tool: "echo",
|
|
508
|
+
args: ["done"],
|
|
509
|
+
},
|
|
510
|
+
],
|
|
511
|
+
edges: [
|
|
512
|
+
{ from: "action", to: "after" },
|
|
513
|
+
],
|
|
514
|
+
},
|
|
515
|
+
};
|
|
516
|
+
|
|
517
|
+
const compiled = compileHarnessSpec(spec);
|
|
518
|
+
const ctx = createMockContext();
|
|
519
|
+
const iterator = compiled.workflows[0].generator(ctx as any, {});
|
|
520
|
+
|
|
521
|
+
// First attempt: primary node
|
|
522
|
+
expect(iterator.next().value).toMatchObject({ kind: "tool-call" });
|
|
523
|
+
|
|
524
|
+
// Verification runs
|
|
525
|
+
expect(iterator.next({ ok: true }).value).toMatchObject({ kind: "llm-call" });
|
|
526
|
+
|
|
527
|
+
// Verification fails → retry (attempt 1 of maxAttempts=2)
|
|
528
|
+
// Should re-execute the primary node
|
|
529
|
+
expect(iterator.next({ passed: false }).value).toMatchObject({ kind: "tool-call" });
|
|
530
|
+
|
|
531
|
+
// Second attempt verification runs
|
|
532
|
+
expect(iterator.next({ ok: true }).value).toMatchObject({ kind: "llm-call" });
|
|
533
|
+
|
|
534
|
+
// Verification fails again → retry exhausted
|
|
535
|
+
let thrownError: unknown;
|
|
536
|
+
try {
|
|
537
|
+
iterator.next({ passed: false });
|
|
538
|
+
} catch (error) {
|
|
539
|
+
thrownError = error;
|
|
540
|
+
}
|
|
541
|
+
expect(thrownError).toBeDefined();
|
|
542
|
+
expect((thrownError as Error).message).toContain("Verification hook");
|
|
543
|
+
expect((thrownError as Error).message).toContain("retry exhausted");
|
|
544
|
+
});
|
|
545
|
+
|
|
546
|
+
it("defaults maxAttempts to 2 for retry verification hooks", () => {
|
|
547
|
+
const spec: HarnessSpec = {
|
|
548
|
+
name: "verify-retry-default",
|
|
549
|
+
graph: {
|
|
550
|
+
entryNodeId: "action",
|
|
551
|
+
nodes: [
|
|
552
|
+
{
|
|
553
|
+
id: "action",
|
|
554
|
+
kind: "tool",
|
|
555
|
+
tool: "bash",
|
|
556
|
+
args: ["echo test"],
|
|
557
|
+
verificationHooks: [
|
|
558
|
+
{
|
|
559
|
+
name: "default-retry",
|
|
560
|
+
kind: "llm",
|
|
561
|
+
check: "Did it work?",
|
|
562
|
+
onFail: "retry",
|
|
563
|
+
},
|
|
564
|
+
],
|
|
565
|
+
},
|
|
566
|
+
],
|
|
567
|
+
edges: [],
|
|
568
|
+
},
|
|
569
|
+
};
|
|
570
|
+
|
|
571
|
+
const compiled = compileHarnessSpec(spec);
|
|
572
|
+
const ctx = createMockContext();
|
|
573
|
+
const iterator = compiled.workflows[0].generator(ctx as any, {});
|
|
574
|
+
|
|
575
|
+
// First attempt
|
|
576
|
+
expect(iterator.next().value).toMatchObject({ kind: "tool-call" });
|
|
577
|
+
expect(iterator.next({ ok: true }).value).toMatchObject({ kind: "llm-call" });
|
|
578
|
+
|
|
579
|
+
// Fail → retry (maxAttempts defaults to 2, so 1 retry)
|
|
580
|
+
expect(iterator.next({ passed: false }).value).toMatchObject({ kind: "tool-call" });
|
|
581
|
+
|
|
582
|
+
// Second attempt
|
|
583
|
+
expect(iterator.next({ ok: true }).value).toMatchObject({ kind: "llm-call" });
|
|
584
|
+
|
|
585
|
+
// Fail again → exhausted
|
|
586
|
+
let thrownError: unknown;
|
|
587
|
+
try {
|
|
588
|
+
iterator.next({ passed: false });
|
|
589
|
+
} catch (error) {
|
|
590
|
+
thrownError = error;
|
|
591
|
+
}
|
|
592
|
+
expect(thrownError).toBeDefined();
|
|
593
|
+
expect((thrownError as Error).message).toContain("retry exhausted");
|
|
594
|
+
});
|
|
595
|
+
|
|
596
|
+
it("runs multiple verification hooks in order", () => {
|
|
597
|
+
const spec: HarnessSpec = {
|
|
598
|
+
name: "verify-multiple",
|
|
599
|
+
graph: {
|
|
600
|
+
entryNodeId: "action",
|
|
601
|
+
nodes: [
|
|
602
|
+
{
|
|
603
|
+
id: "action",
|
|
604
|
+
kind: "tool",
|
|
605
|
+
tool: "bash",
|
|
606
|
+
args: ["echo test"],
|
|
607
|
+
verificationHooks: [
|
|
608
|
+
{
|
|
609
|
+
name: "first-check",
|
|
610
|
+
kind: "llm",
|
|
611
|
+
check: "First check?",
|
|
612
|
+
onFail: "block",
|
|
613
|
+
},
|
|
614
|
+
{
|
|
615
|
+
name: "second-check",
|
|
616
|
+
kind: "llm",
|
|
617
|
+
check: "Second check?",
|
|
618
|
+
onFail: "block",
|
|
619
|
+
},
|
|
620
|
+
],
|
|
621
|
+
},
|
|
622
|
+
{
|
|
623
|
+
id: "after",
|
|
624
|
+
kind: "tool",
|
|
625
|
+
tool: "echo",
|
|
626
|
+
args: ["done"],
|
|
627
|
+
},
|
|
628
|
+
],
|
|
629
|
+
edges: [
|
|
630
|
+
{ from: "action", to: "after" },
|
|
631
|
+
],
|
|
632
|
+
},
|
|
633
|
+
};
|
|
634
|
+
|
|
635
|
+
const compiled = compileHarnessSpec(spec);
|
|
636
|
+
const ctx = createMockContext();
|
|
637
|
+
const iterator = compiled.workflows[0].generator(ctx as any, {});
|
|
638
|
+
|
|
639
|
+
// Primary node
|
|
640
|
+
expect(iterator.next().value).toMatchObject({ kind: "tool-call" });
|
|
641
|
+
|
|
642
|
+
// First verification hook
|
|
643
|
+
expect(iterator.next({ ok: true }).value).toMatchObject({ kind: "llm-call" });
|
|
644
|
+
|
|
645
|
+
// First hook passes → second verification hook
|
|
646
|
+
expect(iterator.next({ passed: true }).value).toMatchObject({ kind: "llm-call" });
|
|
647
|
+
|
|
648
|
+
// Second hook passes → proceed to next node
|
|
649
|
+
expect(iterator.next({ approved: true }).value).toMatchObject({ kind: "tool-call" });
|
|
650
|
+
|
|
651
|
+
const completed = iterator.next("done");
|
|
652
|
+
expect(completed.done).toBe(true);
|
|
653
|
+
expect(completed.value.status).toBe("completed");
|
|
654
|
+
});
|
|
655
|
+
|
|
656
|
+
it("stops early when first verification hook fails with block", () => {
|
|
657
|
+
const spec: HarnessSpec = {
|
|
658
|
+
name: "verify-early-stop",
|
|
659
|
+
graph: {
|
|
660
|
+
entryNodeId: "action",
|
|
661
|
+
nodes: [
|
|
662
|
+
{
|
|
663
|
+
id: "action",
|
|
664
|
+
kind: "tool",
|
|
665
|
+
tool: "bash",
|
|
666
|
+
args: ["echo test"],
|
|
667
|
+
verificationHooks: [
|
|
668
|
+
{
|
|
669
|
+
name: "first-check",
|
|
670
|
+
kind: "llm",
|
|
671
|
+
check: "First check?",
|
|
672
|
+
onFail: "block",
|
|
673
|
+
},
|
|
674
|
+
{
|
|
675
|
+
name: "second-check",
|
|
676
|
+
kind: "llm",
|
|
677
|
+
check: "Second check?",
|
|
678
|
+
onFail: "block",
|
|
679
|
+
},
|
|
680
|
+
],
|
|
681
|
+
},
|
|
682
|
+
],
|
|
683
|
+
edges: [],
|
|
684
|
+
},
|
|
685
|
+
};
|
|
686
|
+
|
|
687
|
+
const compiled = compileHarnessSpec(spec);
|
|
688
|
+
const ctx = createMockContext();
|
|
689
|
+
const iterator = compiled.workflows[0].generator(ctx as any, {});
|
|
690
|
+
|
|
691
|
+
// Primary node
|
|
692
|
+
expect(iterator.next().value).toMatchObject({ kind: "tool-call" });
|
|
693
|
+
|
|
694
|
+
// First verification hook
|
|
695
|
+
expect(iterator.next({ ok: true }).value).toMatchObject({ kind: "llm-call" });
|
|
696
|
+
|
|
697
|
+
// First hook fails → block, second hook should NOT run
|
|
698
|
+
let thrownError: unknown;
|
|
699
|
+
try {
|
|
700
|
+
iterator.next({ passed: false });
|
|
701
|
+
} catch (error) {
|
|
702
|
+
thrownError = error;
|
|
703
|
+
}
|
|
704
|
+
expect(thrownError).toBeDefined();
|
|
705
|
+
expect((thrownError as Error).message).toContain("first-check");
|
|
706
|
+
});
|
|
707
|
+
|
|
708
|
+
it("runs expression-based verification hooks without yielding", () => {
|
|
709
|
+
const spec: HarnessSpec = {
|
|
710
|
+
name: "verify-expression",
|
|
711
|
+
graph: {
|
|
712
|
+
entryNodeId: "action",
|
|
713
|
+
nodes: [
|
|
714
|
+
{
|
|
715
|
+
id: "action",
|
|
716
|
+
kind: "tool",
|
|
717
|
+
tool: "bash",
|
|
718
|
+
args: ["echo test"],
|
|
719
|
+
verificationHooks: [
|
|
720
|
+
{
|
|
721
|
+
name: "expr-check",
|
|
722
|
+
kind: "expression",
|
|
723
|
+
check: "outputs.action.ok",
|
|
724
|
+
onFail: "block",
|
|
725
|
+
},
|
|
726
|
+
],
|
|
727
|
+
},
|
|
728
|
+
{
|
|
729
|
+
id: "after",
|
|
730
|
+
kind: "tool",
|
|
731
|
+
tool: "echo",
|
|
732
|
+
args: ["done"],
|
|
733
|
+
},
|
|
734
|
+
],
|
|
735
|
+
edges: [
|
|
736
|
+
{ from: "action", to: "after" },
|
|
737
|
+
],
|
|
738
|
+
},
|
|
739
|
+
};
|
|
740
|
+
|
|
741
|
+
const compiled = compileHarnessSpec(spec);
|
|
742
|
+
const ctx = createMockContext();
|
|
743
|
+
const iterator = compiled.workflows[0].generator(ctx as any, {});
|
|
744
|
+
|
|
745
|
+
// Primary node executes
|
|
746
|
+
expect(iterator.next().value).toMatchObject({ kind: "tool-call" });
|
|
747
|
+
|
|
748
|
+
// Expression verification evaluates inline (no yield) → passes
|
|
749
|
+
expect(iterator.next({ ok: true }).value).toMatchObject({ kind: "tool-call" });
|
|
750
|
+
|
|
751
|
+
const completed = iterator.next("done");
|
|
752
|
+
expect(completed.done).toBe(true);
|
|
753
|
+
expect(completed.value.status).toBe("completed");
|
|
754
|
+
});
|
|
755
|
+
|
|
756
|
+
it("expression verification hook fails when expression is falsy", () => {
|
|
757
|
+
const spec: HarnessSpec = {
|
|
758
|
+
name: "verify-expression-fail",
|
|
759
|
+
graph: {
|
|
760
|
+
entryNodeId: "action",
|
|
761
|
+
nodes: [
|
|
762
|
+
{
|
|
763
|
+
id: "action",
|
|
764
|
+
kind: "tool",
|
|
765
|
+
tool: "bash",
|
|
766
|
+
args: ["echo test"],
|
|
767
|
+
verificationHooks: [
|
|
768
|
+
{
|
|
769
|
+
name: "expr-check",
|
|
770
|
+
kind: "expression",
|
|
771
|
+
check: "outputs.action.ok",
|
|
772
|
+
onFail: "block",
|
|
773
|
+
},
|
|
774
|
+
],
|
|
775
|
+
},
|
|
776
|
+
],
|
|
777
|
+
edges: [],
|
|
778
|
+
},
|
|
779
|
+
};
|
|
780
|
+
|
|
781
|
+
const compiled = compileHarnessSpec(spec);
|
|
782
|
+
const ctx = createMockContext();
|
|
783
|
+
const iterator = compiled.workflows[0].generator(ctx as any, {});
|
|
784
|
+
|
|
785
|
+
// Primary node executes
|
|
786
|
+
expect(iterator.next().value).toMatchObject({ kind: "tool-call" });
|
|
787
|
+
|
|
788
|
+
// Return { ok: false } → expression fails → block
|
|
789
|
+
let thrownError: unknown;
|
|
790
|
+
try {
|
|
791
|
+
iterator.next({ ok: false });
|
|
792
|
+
} catch (error) {
|
|
793
|
+
thrownError = error;
|
|
794
|
+
}
|
|
795
|
+
expect(thrownError).toBeDefined();
|
|
796
|
+
expect((thrownError as Error).message).toContain("expr-check");
|
|
797
|
+
expect((thrownError as Error).message).toContain("blocked");
|
|
798
|
+
});
|
|
799
|
+
|
|
800
|
+
it("nodes without verificationHooks work normally", () => {
|
|
801
|
+
const spec: HarnessSpec = {
|
|
802
|
+
name: "no-hooks",
|
|
803
|
+
graph: {
|
|
804
|
+
entryNodeId: "action",
|
|
805
|
+
nodes: [
|
|
806
|
+
{
|
|
807
|
+
id: "action",
|
|
808
|
+
kind: "tool",
|
|
809
|
+
tool: "bash",
|
|
810
|
+
args: ["echo test"],
|
|
811
|
+
},
|
|
812
|
+
{
|
|
813
|
+
id: "after",
|
|
814
|
+
kind: "tool",
|
|
815
|
+
tool: "echo",
|
|
816
|
+
args: ["done"],
|
|
817
|
+
},
|
|
818
|
+
],
|
|
819
|
+
edges: [
|
|
820
|
+
{ from: "action", to: "after" },
|
|
821
|
+
],
|
|
822
|
+
},
|
|
823
|
+
};
|
|
824
|
+
|
|
825
|
+
const compiled = compileHarnessSpec(spec);
|
|
826
|
+
const ctx = createMockContext();
|
|
827
|
+
const iterator = compiled.workflows[0].generator(ctx as any, {});
|
|
828
|
+
|
|
829
|
+
expect(iterator.next().value).toMatchObject({ kind: "tool-call" });
|
|
830
|
+
expect(iterator.next("ok").value).toMatchObject({ kind: "tool-call" });
|
|
831
|
+
const completed = iterator.next("done");
|
|
832
|
+
expect(completed.done).toBe(true);
|
|
833
|
+
expect(completed.value.status).toBe("completed");
|
|
834
|
+
});
|
|
835
|
+
});
|
|
836
|
+
|
|
837
|
+
describe("per-node guardrails and verification hooks combined", () => {
|
|
838
|
+
it("enforces guardrails before verification hooks", () => {
|
|
839
|
+
const spec: HarnessSpec = {
|
|
840
|
+
name: "guardrails-before-verify",
|
|
841
|
+
graph: {
|
|
842
|
+
entryNodeId: "start",
|
|
843
|
+
nodes: [
|
|
844
|
+
{
|
|
845
|
+
id: "start",
|
|
846
|
+
kind: "tool",
|
|
847
|
+
tool: "echo",
|
|
848
|
+
args: ["go"],
|
|
849
|
+
},
|
|
850
|
+
{
|
|
851
|
+
id: "action",
|
|
852
|
+
kind: "tool",
|
|
853
|
+
tool: "bash",
|
|
854
|
+
args: ["echo action"],
|
|
855
|
+
guardrails: {
|
|
856
|
+
constraints: ["outputs.start.proceed"],
|
|
857
|
+
},
|
|
858
|
+
verificationHooks: [
|
|
859
|
+
{
|
|
860
|
+
name: "post-check",
|
|
861
|
+
kind: "llm",
|
|
862
|
+
check: "Was it correct?",
|
|
863
|
+
onFail: "block",
|
|
864
|
+
},
|
|
865
|
+
],
|
|
866
|
+
},
|
|
867
|
+
],
|
|
868
|
+
edges: [
|
|
869
|
+
{ from: "start", to: "action" },
|
|
870
|
+
],
|
|
871
|
+
},
|
|
872
|
+
};
|
|
873
|
+
|
|
874
|
+
const compiled = compileHarnessSpec(spec);
|
|
875
|
+
const ctx = createMockContext();
|
|
876
|
+
const iterator = compiled.workflows[0].generator(ctx as any, {});
|
|
877
|
+
|
|
878
|
+
// start executes
|
|
879
|
+
expect(iterator.next().value).toMatchObject({ kind: "tool-call" });
|
|
880
|
+
|
|
881
|
+
// start returns { proceed: false } → constraint fails → guardrail error
|
|
882
|
+
let thrownError: unknown;
|
|
883
|
+
try {
|
|
884
|
+
iterator.next({ proceed: false });
|
|
885
|
+
} catch (error) {
|
|
886
|
+
thrownError = error;
|
|
887
|
+
}
|
|
888
|
+
expect(thrownError).toBeInstanceOf(GuardrailExceededError);
|
|
889
|
+
expect((thrownError as GuardrailExceededError).message).toContain("Constraint failed");
|
|
890
|
+
});
|
|
891
|
+
|
|
892
|
+
it("verification hooks run after guardrails pass", () => {
|
|
893
|
+
const spec: HarnessSpec = {
|
|
894
|
+
name: "guardrails-pass-verify-runs",
|
|
895
|
+
graph: {
|
|
896
|
+
entryNodeId: "start",
|
|
897
|
+
nodes: [
|
|
898
|
+
{
|
|
899
|
+
id: "start",
|
|
900
|
+
kind: "tool",
|
|
901
|
+
tool: "echo",
|
|
902
|
+
args: ["go"],
|
|
903
|
+
},
|
|
904
|
+
{
|
|
905
|
+
id: "action",
|
|
906
|
+
kind: "tool",
|
|
907
|
+
tool: "bash",
|
|
908
|
+
args: ["echo action"],
|
|
909
|
+
guardrails: {
|
|
910
|
+
constraints: ["outputs.start.proceed"],
|
|
911
|
+
},
|
|
912
|
+
verificationHooks: [
|
|
913
|
+
{
|
|
914
|
+
name: "post-check",
|
|
915
|
+
kind: "llm",
|
|
916
|
+
check: "Was it correct?",
|
|
917
|
+
onFail: "block",
|
|
918
|
+
},
|
|
919
|
+
],
|
|
920
|
+
},
|
|
921
|
+
{
|
|
922
|
+
id: "after",
|
|
923
|
+
kind: "tool",
|
|
924
|
+
tool: "echo",
|
|
925
|
+
args: ["done"],
|
|
926
|
+
},
|
|
927
|
+
],
|
|
928
|
+
edges: [
|
|
929
|
+
{ from: "start", to: "action" },
|
|
930
|
+
{ from: "action", to: "after" },
|
|
931
|
+
],
|
|
932
|
+
},
|
|
933
|
+
};
|
|
934
|
+
|
|
935
|
+
const compiled = compileHarnessSpec(spec);
|
|
936
|
+
const ctx = createMockContext();
|
|
937
|
+
const iterator = compiled.workflows[0].generator(ctx as any, {});
|
|
938
|
+
|
|
939
|
+
// start executes
|
|
940
|
+
expect(iterator.next().value).toMatchObject({ kind: "tool-call" });
|
|
941
|
+
|
|
942
|
+
// start returns { proceed: true } → constraint passes → action executes
|
|
943
|
+
expect(iterator.next({ proceed: true }).value).toMatchObject({ kind: "tool-call" });
|
|
944
|
+
|
|
945
|
+
// action returns → verification hook runs
|
|
946
|
+
expect(iterator.next({ result: "ok" }).value).toMatchObject({ kind: "llm-call" });
|
|
947
|
+
|
|
948
|
+
// Verifier passes → continue
|
|
949
|
+
expect(iterator.next({ approved: true }).value).toMatchObject({ kind: "tool-call" });
|
|
950
|
+
|
|
951
|
+
const completed = iterator.next("done");
|
|
952
|
+
expect(completed.done).toBe(true);
|
|
953
|
+
expect(completed.value.status).toBe("completed");
|
|
954
|
+
});
|
|
955
|
+
});
|