@mhingston5/lasso 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +707 -0
- package/docs/agent-wrangling.png +0 -0
- package/package.json +26 -0
- package/src/capabilities/matcher.ts +25 -0
- package/src/capabilities/registry.ts +103 -0
- package/src/capabilities/types.ts +15 -0
- package/src/cir/lower.ts +253 -0
- package/src/cir/optimize.ts +251 -0
- package/src/cir/types.ts +131 -0
- package/src/cir/validate.ts +265 -0
- package/src/compiler/compile.ts +601 -0
- package/src/compiler/feedback.ts +471 -0
- package/src/compiler/runtime-helpers.ts +455 -0
- package/src/composition/chain.ts +58 -0
- package/src/composition/conditional.ts +76 -0
- package/src/composition/parallel.ts +75 -0
- package/src/composition/types.ts +105 -0
- package/src/environment/analyzer.ts +56 -0
- package/src/environment/discovery.ts +179 -0
- package/src/environment/types.ts +68 -0
- package/src/failures/classifiers.ts +134 -0
- package/src/failures/generator.ts +421 -0
- package/src/failures/map-reference-failures.ts +23 -0
- package/src/failures/ontology.ts +210 -0
- package/src/failures/recovery.ts +214 -0
- package/src/failures/types.ts +14 -0
- package/src/index.ts +67 -0
- package/src/memory/advisor.ts +132 -0
- package/src/memory/extractor.ts +166 -0
- package/src/memory/store.ts +107 -0
- package/src/memory/types.ts +53 -0
- package/src/metaharness/engine.ts +256 -0
- package/src/metaharness/predictor.ts +168 -0
- package/src/metaharness/types.ts +40 -0
- package/src/mutation/derive.ts +308 -0
- package/src/mutation/diff.ts +52 -0
- package/src/mutation/engine.ts +256 -0
- package/src/mutation/types.ts +84 -0
- package/src/pi/command-input.ts +209 -0
- package/src/pi/commands.ts +351 -0
- package/src/pi/extension.ts +16 -0
- package/src/planner/synthesize.ts +83 -0
- package/src/planner/template-rules.ts +183 -0
- package/src/planner/types.ts +42 -0
- package/src/reference/catalog.ts +128 -0
- package/src/reference/patch-validation-strategies.ts +170 -0
- package/src/reference/patch-validation.ts +174 -0
- package/src/reference/pr-review-merge.ts +155 -0
- package/src/reference/strategies.ts +126 -0
- package/src/reference/types.ts +33 -0
- package/src/replanner/risk-rules.ts +161 -0
- package/src/replanner/runtime.ts +308 -0
- package/src/replanner/synthesize.ts +619 -0
- package/src/replanner/types.ts +73 -0
- package/src/spec/schema.ts +254 -0
- package/src/spec/types.ts +319 -0
- package/src/spec/validate.ts +296 -0
- package/src/state/snapshots.ts +43 -0
- package/src/state/types.ts +12 -0
- package/src/synthesis/graph-builder.ts +267 -0
- package/src/synthesis/harness-builder.ts +113 -0
- package/src/synthesis/intent-ir.ts +63 -0
- package/src/synthesis/policy-builder.ts +320 -0
- package/src/synthesis/risk-analyzer.ts +182 -0
- package/src/synthesis/skill-parser.ts +441 -0
- package/src/verification/engine.ts +230 -0
- package/src/versioning/file-store.ts +103 -0
- package/src/versioning/history.ts +43 -0
- package/src/versioning/store.ts +16 -0
- package/src/versioning/types.ts +31 -0
- package/test/capabilities/matcher.test.ts +67 -0
- package/test/capabilities/registry.test.ts +136 -0
- package/test/capabilities/synthesis.test.ts +264 -0
- package/test/cir/lower.test.ts +417 -0
- package/test/cir/optimize.test.ts +266 -0
- package/test/cir/validate.test.ts +368 -0
- package/test/compiler/adaptive-runtime.test.ts +157 -0
- package/test/compiler/compile.test.ts +1198 -0
- package/test/compiler/feedback.test.ts +784 -0
- package/test/compiler/guardrails.test.ts +191 -0
- package/test/compiler/trace.test.ts +404 -0
- package/test/composition/chain.test.ts +328 -0
- package/test/composition/conditional.test.ts +241 -0
- package/test/composition/parallel.test.ts +215 -0
- package/test/environment/analyzer.test.ts +204 -0
- package/test/environment/discovery.test.ts +149 -0
- package/test/failures/classifiers.test.ts +287 -0
- package/test/failures/generator.test.ts +203 -0
- package/test/failures/ontology.test.ts +439 -0
- package/test/failures/recovery.test.ts +300 -0
- package/test/helpers/createFixtureRepo.ts +84 -0
- package/test/helpers/createPatchValidationFixture.ts +144 -0
- package/test/helpers/runCompiledWorkflow.ts +208 -0
- package/test/memory/advisor.test.ts +332 -0
- package/test/memory/extractor.test.ts +295 -0
- package/test/memory/store.test.ts +244 -0
- package/test/metaharness/engine.test.ts +575 -0
- package/test/metaharness/predictor.test.ts +436 -0
- package/test/mutation/derive-failure.test.ts +209 -0
- package/test/mutation/engine.test.ts +622 -0
- package/test/package-smoke.test.ts +29 -0
- package/test/pi/command-input.test.ts +153 -0
- package/test/pi/commands.test.ts +623 -0
- package/test/planner/classify-template.test.ts +32 -0
- package/test/planner/synthesize.test.ts +901 -0
- package/test/reference/PatchValidation.failures.test.ts +137 -0
- package/test/reference/PatchValidation.test.ts +326 -0
- package/test/reference/PrReviewMerge.failures.test.ts +121 -0
- package/test/reference/PrReviewMerge.test.ts +55 -0
- package/test/reference/catalog-open.test.ts +70 -0
- package/test/replanner/runtime.test.ts +207 -0
- package/test/replanner/synthesize.test.ts +303 -0
- package/test/spec/validate.test.ts +1056 -0
- package/test/state/snapshots.test.ts +264 -0
- package/test/synthesis/custom-workflow.test.ts +264 -0
- package/test/synthesis/graph-builder.test.ts +370 -0
- package/test/synthesis/harness-builder.test.ts +128 -0
- package/test/synthesis/policy-builder.test.ts +149 -0
- package/test/synthesis/risk-analyzer.test.ts +230 -0
- package/test/synthesis/skill-parser.test.ts +796 -0
- package/test/verification/engine.test.ts +509 -0
- package/test/versioning/history.test.ts +144 -0
- package/test/versioning/store.test.ts +254 -0
- package/vitest.config.ts +9 -0
|
@@ -0,0 +1,1198 @@
|
|
|
1
|
+
import { beforeEach, describe, expect, it, vi } from "vitest";
|
|
2
|
+
|
|
3
|
+
vi.mock("pi-duroxide", () => ({
|
|
4
|
+
registerWorkflow: vi.fn(),
|
|
5
|
+
}));
|
|
6
|
+
|
|
7
|
+
import { registerWorkflow } from "pi-duroxide";
|
|
8
|
+
import { compileHarnessSpec } from "../../src/compiler/compile.js";
|
|
9
|
+
import type { HarnessSpec } from "../../src/spec/types.js";
|
|
10
|
+
|
|
11
|
+
interface MockContextCalls {
|
|
12
|
+
tools: Array<{ name: string; args: unknown }>;
|
|
13
|
+
llm: Array<{ messages: unknown[]; options?: unknown }>;
|
|
14
|
+
events: string[];
|
|
15
|
+
merges: unknown[][];
|
|
16
|
+
subworkflows: Array<{ name: string; input: unknown }>;
|
|
17
|
+
timers: number[];
|
|
18
|
+
statuses: unknown[];
|
|
19
|
+
}
|
|
20
|
+
|
|
21
|
+
function createMockContext() {
|
|
22
|
+
const calls: MockContextCalls = {
|
|
23
|
+
tools: [],
|
|
24
|
+
llm: [],
|
|
25
|
+
events: [],
|
|
26
|
+
merges: [],
|
|
27
|
+
subworkflows: [],
|
|
28
|
+
timers: [],
|
|
29
|
+
statuses: [],
|
|
30
|
+
};
|
|
31
|
+
|
|
32
|
+
return {
|
|
33
|
+
calls,
|
|
34
|
+
context: {
|
|
35
|
+
scheduleActivity: vi.fn(),
|
|
36
|
+
scheduleActivityWithRetry: vi.fn(),
|
|
37
|
+
scheduleTimer: (delayMs: number) => {
|
|
38
|
+
calls.timers.push(delayMs);
|
|
39
|
+
return { kind: "timer", delayMs };
|
|
40
|
+
},
|
|
41
|
+
waitForEvent: (eventName: string) => {
|
|
42
|
+
calls.events.push(eventName);
|
|
43
|
+
return { kind: "wait-for-event", eventName };
|
|
44
|
+
},
|
|
45
|
+
scheduleSubOrchestration: (name: string, input: unknown) => {
|
|
46
|
+
calls.subworkflows.push({ name, input });
|
|
47
|
+
return { kind: "subworkflow", name, input };
|
|
48
|
+
},
|
|
49
|
+
all: (tasks: unknown[]) => {
|
|
50
|
+
calls.merges.push(tasks);
|
|
51
|
+
return { kind: "all", tasks };
|
|
52
|
+
},
|
|
53
|
+
race: vi.fn(),
|
|
54
|
+
utcNow: () => 0,
|
|
55
|
+
newGuid: () => "guid-1",
|
|
56
|
+
continueAsNew: vi.fn(),
|
|
57
|
+
setCustomStatus: (status: unknown) => {
|
|
58
|
+
calls.statuses.push(status);
|
|
59
|
+
},
|
|
60
|
+
traceInfo: vi.fn(),
|
|
61
|
+
traceWarn: vi.fn(),
|
|
62
|
+
traceError: vi.fn(),
|
|
63
|
+
traceDebug: vi.fn(),
|
|
64
|
+
kv: {
|
|
65
|
+
get: vi.fn(),
|
|
66
|
+
set: vi.fn(),
|
|
67
|
+
clear: vi.fn(),
|
|
68
|
+
},
|
|
69
|
+
pi: {
|
|
70
|
+
tool: (name: string, args: unknown) => {
|
|
71
|
+
calls.tools.push({ name, args });
|
|
72
|
+
return { kind: "tool-call", name, args };
|
|
73
|
+
},
|
|
74
|
+
llm: (messages: unknown[], options?: unknown) => {
|
|
75
|
+
calls.llm.push({ messages, options });
|
|
76
|
+
return { kind: "llm-call", messages, options };
|
|
77
|
+
},
|
|
78
|
+
skill: vi.fn(),
|
|
79
|
+
sendMessage: vi.fn(),
|
|
80
|
+
prompt: vi.fn(),
|
|
81
|
+
},
|
|
82
|
+
},
|
|
83
|
+
};
|
|
84
|
+
}
|
|
85
|
+
|
|
86
|
+
describe("compileHarnessSpec", () => {
|
|
87
|
+
beforeEach(() => {
|
|
88
|
+
vi.mocked(registerWorkflow).mockReset();
|
|
89
|
+
});
|
|
90
|
+
|
|
91
|
+
it("compiles tool nodes into bash tool calls", () => {
|
|
92
|
+
const compiled = compileHarnessSpec(createToolSpec());
|
|
93
|
+
const mock = createMockContext();
|
|
94
|
+
const iterator = compiled.workflows[0].generator(mock.context as any, {});
|
|
95
|
+
|
|
96
|
+
expect(compiled.workflows).toHaveLength(1);
|
|
97
|
+
expect(compiled.workflows[0]?.name).toBe("run-diff");
|
|
98
|
+
expect(compiled.workflows[0]?.options).toMatchObject({
|
|
99
|
+
description: "Compiled Lasso harness run-diff",
|
|
100
|
+
timeoutMs: 30000,
|
|
101
|
+
});
|
|
102
|
+
|
|
103
|
+
expect(iterator.next().value).toEqual({
|
|
104
|
+
kind: "tool-call",
|
|
105
|
+
name: "bash",
|
|
106
|
+
args: {
|
|
107
|
+
command: "cd /repo && git diff main...feature",
|
|
108
|
+
description: "Lasso tool node run-diff"
|
|
109
|
+
}
|
|
110
|
+
});
|
|
111
|
+
|
|
112
|
+
const completed = iterator.next({ stdout: "diff output" });
|
|
113
|
+
expect(completed.done).toBe(true);
|
|
114
|
+
expect(completed.value).toMatchObject({
|
|
115
|
+
status: "completed",
|
|
116
|
+
terminalNodeId: "run-diff",
|
|
117
|
+
outputs: {
|
|
118
|
+
"run-diff": { stdout: "diff output" }
|
|
119
|
+
}
|
|
120
|
+
});
|
|
121
|
+
});
|
|
122
|
+
|
|
123
|
+
it("compiles llm nodes into ctx.pi.llm calls", () => {
|
|
124
|
+
const compiled = compileHarnessSpec(createLlmSpec());
|
|
125
|
+
const mock = createMockContext();
|
|
126
|
+
const iterator = compiled.workflows[0].generator(mock.context as any, {});
|
|
127
|
+
|
|
128
|
+
const firstYield = iterator.next().value;
|
|
129
|
+
expect(firstYield).toMatchObject({
|
|
130
|
+
kind: "llm-call",
|
|
131
|
+
options: {
|
|
132
|
+
model: "claude-sonnet"
|
|
133
|
+
}
|
|
134
|
+
});
|
|
135
|
+
expect(mock.calls.llm[0]?.messages).toEqual([
|
|
136
|
+
{
|
|
137
|
+
role: "system",
|
|
138
|
+
content: [{ type: "text", text: "Be precise." }]
|
|
139
|
+
},
|
|
140
|
+
{
|
|
141
|
+
role: "user",
|
|
142
|
+
content: [{ type: "text", text: "Summarise the diff." }]
|
|
143
|
+
}
|
|
144
|
+
]);
|
|
145
|
+
});
|
|
146
|
+
|
|
147
|
+
it("compiles human nodes into ctx.waitForEvent calls", () => {
|
|
148
|
+
const compiled = compileHarnessSpec(createHumanSpec());
|
|
149
|
+
const mock = createMockContext();
|
|
150
|
+
const iterator = compiled.workflows[0].generator(mock.context as any, {});
|
|
151
|
+
|
|
152
|
+
expect(iterator.next().value).toEqual({
|
|
153
|
+
kind: "wait-for-event",
|
|
154
|
+
eventName: "lasso:human:human-review:approve"
|
|
155
|
+
});
|
|
156
|
+
expect(mock.calls.events).toEqual(["lasso:human:human-review:approve"]);
|
|
157
|
+
});
|
|
158
|
+
|
|
159
|
+
it("compiles supported merge branches into ctx.all joins", () => {
|
|
160
|
+
const compiled = compileHarnessSpec(createMergeSpec());
|
|
161
|
+
const mock = createMockContext();
|
|
162
|
+
const iterator = compiled.workflows[0].generator(mock.context as any, {});
|
|
163
|
+
|
|
164
|
+
expect(iterator.next().value).toMatchObject({
|
|
165
|
+
kind: "tool-call",
|
|
166
|
+
name: "bash",
|
|
167
|
+
});
|
|
168
|
+
|
|
169
|
+
const mergeYield = iterator.next({ loaded: true });
|
|
170
|
+
expect(mergeYield.value).toMatchObject({
|
|
171
|
+
kind: "all",
|
|
172
|
+
tasks: [
|
|
173
|
+
{
|
|
174
|
+
kind: "llm-call",
|
|
175
|
+
},
|
|
176
|
+
{
|
|
177
|
+
kind: "tool-call",
|
|
178
|
+
name: "bash",
|
|
179
|
+
}
|
|
180
|
+
]
|
|
181
|
+
});
|
|
182
|
+
expect(mock.calls.merges).toHaveLength(1);
|
|
183
|
+
expect(mock.calls.merges[0]).toHaveLength(2);
|
|
184
|
+
});
|
|
185
|
+
|
|
186
|
+
it("retries failed node executions with deterministic timers", () => {
|
|
187
|
+
const compiled = compileHarnessSpec(createRetrySpec());
|
|
188
|
+
const mock = createMockContext();
|
|
189
|
+
const iterator = compiled.workflows[0].generator(mock.context as any, {});
|
|
190
|
+
|
|
191
|
+
expect(iterator.next().value).toMatchObject({
|
|
192
|
+
kind: "tool-call",
|
|
193
|
+
name: "bash",
|
|
194
|
+
});
|
|
195
|
+
|
|
196
|
+
expect(iterator.throw?.(new Error("timeout while running verification"))?.value).toEqual({
|
|
197
|
+
kind: "timer",
|
|
198
|
+
delayMs: 2000
|
|
199
|
+
});
|
|
200
|
+
|
|
201
|
+
expect(iterator.next().value).toMatchObject({
|
|
202
|
+
kind: "tool-call",
|
|
203
|
+
name: "bash",
|
|
204
|
+
});
|
|
205
|
+
expect(mock.calls.tools).toHaveLength(2);
|
|
206
|
+
expect(mock.calls.timers).toEqual([2000]);
|
|
207
|
+
});
|
|
208
|
+
|
|
209
|
+
it("injects verification nodes after the primary node executes", () => {
|
|
210
|
+
const compiled = compileHarnessSpec(createVerificationSpec());
|
|
211
|
+
const mock = createMockContext();
|
|
212
|
+
const iterator = compiled.workflows[0].generator(mock.context as any, {});
|
|
213
|
+
|
|
214
|
+
expect(iterator.next().value).toMatchObject({
|
|
215
|
+
kind: "tool-call",
|
|
216
|
+
name: "bash",
|
|
217
|
+
});
|
|
218
|
+
|
|
219
|
+
expect(iterator.next({ ok: true }).value).toMatchObject({
|
|
220
|
+
kind: "llm-call",
|
|
221
|
+
options: {
|
|
222
|
+
model: "claude-sonnet"
|
|
223
|
+
}
|
|
224
|
+
});
|
|
225
|
+
});
|
|
226
|
+
|
|
227
|
+
it("defaults verification retries to one actual retry when maxAttempts is omitted", () => {
|
|
228
|
+
const compiled = compileHarnessSpec(createVerificationRetrySpec());
|
|
229
|
+
const mock = createMockContext();
|
|
230
|
+
const iterator = compiled.workflows[0].generator(mock.context as any, {});
|
|
231
|
+
|
|
232
|
+
expect(iterator.next().value).toMatchObject({
|
|
233
|
+
kind: "tool-call",
|
|
234
|
+
name: "bash",
|
|
235
|
+
});
|
|
236
|
+
|
|
237
|
+
expect(iterator.next({ ok: true }).value).toMatchObject({
|
|
238
|
+
kind: "llm-call",
|
|
239
|
+
options: {
|
|
240
|
+
model: "claude-sonnet"
|
|
241
|
+
}
|
|
242
|
+
});
|
|
243
|
+
|
|
244
|
+
expect(iterator.next({ passed: false }).value).toMatchObject({
|
|
245
|
+
kind: "tool-call",
|
|
246
|
+
name: "bash",
|
|
247
|
+
});
|
|
248
|
+
});
|
|
249
|
+
|
|
250
|
+
it("rejects ambiguous boolean verification payloads", () => {
|
|
251
|
+
const compiled = compileHarnessSpec(createVerificationSpec());
|
|
252
|
+
const mock = createMockContext();
|
|
253
|
+
const iterator = compiled.workflows[0].generator(mock.context as any, {});
|
|
254
|
+
|
|
255
|
+
expect(iterator.next().value).toMatchObject({
|
|
256
|
+
kind: "tool-call",
|
|
257
|
+
name: "bash",
|
|
258
|
+
});
|
|
259
|
+
expect(iterator.next({ ok: true }).value).toMatchObject({
|
|
260
|
+
kind: "llm-call",
|
|
261
|
+
});
|
|
262
|
+
|
|
263
|
+
expect(() => iterator.next({ passed: true, ok: false })).toThrow(/Ambiguous boolean status fields: passed, ok/);
|
|
264
|
+
});
|
|
265
|
+
|
|
266
|
+
it("throws on verification block and records verification_failed", () => {
|
|
267
|
+
const compiled = compileHarnessSpec(createBlockVerificationSpec());
|
|
268
|
+
const mock = createMockContext();
|
|
269
|
+
const iterator = compiled.workflows[0].generator(mock.context as any, {});
|
|
270
|
+
|
|
271
|
+
// Execute primary node
|
|
272
|
+
expect(iterator.next().value).toMatchObject({
|
|
273
|
+
kind: "tool-call",
|
|
274
|
+
name: "bash",
|
|
275
|
+
});
|
|
276
|
+
|
|
277
|
+
// Execute verification node (LLM)
|
|
278
|
+
expect(iterator.next({ ok: true }).value).toMatchObject({
|
|
279
|
+
kind: "llm-call",
|
|
280
|
+
});
|
|
281
|
+
|
|
282
|
+
// Verifier returns false, should throw with block message
|
|
283
|
+
let thrownError: Error | undefined;
|
|
284
|
+
let finalValue: any;
|
|
285
|
+
try {
|
|
286
|
+
finalValue = iterator.next({ passed: false });
|
|
287
|
+
} catch (error) {
|
|
288
|
+
thrownError = error as Error;
|
|
289
|
+
}
|
|
290
|
+
|
|
291
|
+
expect(thrownError).toBeDefined();
|
|
292
|
+
expect(thrownError?.message).toContain("Verification failed via confirm-output");
|
|
293
|
+
|
|
294
|
+
// The error should have been thrown before yielding a final value
|
|
295
|
+
// So we can't inspect harnessState directly from the completed result
|
|
296
|
+
// But we can verify the error message contains the verification failure indicator
|
|
297
|
+
expect(thrownError?.message).toMatch(/Verification failed/);
|
|
298
|
+
});
|
|
299
|
+
|
|
300
|
+
it("throws on verification retry exhaustion and records verification_failed", () => {
|
|
301
|
+
const compiled = compileHarnessSpec(createExhaustRetryVerificationSpec());
|
|
302
|
+
const mock = createMockContext();
|
|
303
|
+
const iterator = compiled.workflows[0].generator(mock.context as any, {});
|
|
304
|
+
|
|
305
|
+
// Execute primary node - attempt 1
|
|
306
|
+
expect(iterator.next().value).toMatchObject({
|
|
307
|
+
kind: "tool-call",
|
|
308
|
+
name: "bash",
|
|
309
|
+
});
|
|
310
|
+
|
|
311
|
+
// Execute verification node - attempt 1 (verifier succeeds but returns false)
|
|
312
|
+
expect(iterator.next({ ok: true }).value).toMatchObject({
|
|
313
|
+
kind: "tool-call",
|
|
314
|
+
});
|
|
315
|
+
|
|
316
|
+
// Verification interprets result as false, retries primary node - attempt 2
|
|
317
|
+
expect(iterator.next(false).value).toMatchObject({
|
|
318
|
+
kind: "tool-call",
|
|
319
|
+
name: "bash",
|
|
320
|
+
});
|
|
321
|
+
|
|
322
|
+
// Execute verification node - attempt 2
|
|
323
|
+
expect(iterator.next({ ok: true }).value).toMatchObject({
|
|
324
|
+
kind: "tool-call",
|
|
325
|
+
});
|
|
326
|
+
|
|
327
|
+
// Verification fails again, exhausts retries
|
|
328
|
+
let thrownError: Error | undefined;
|
|
329
|
+
try {
|
|
330
|
+
iterator.next(false);
|
|
331
|
+
} catch (error) {
|
|
332
|
+
thrownError = error as Error;
|
|
333
|
+
}
|
|
334
|
+
|
|
335
|
+
expect(thrownError).toBeDefined();
|
|
336
|
+
expect(thrownError?.message).toContain("Verification retry exhausted");
|
|
337
|
+
expect(thrownError?.message).toContain("verify-check");
|
|
338
|
+
});
|
|
339
|
+
|
|
340
|
+
it("executes expression verification without yielding external action", () => {
|
|
341
|
+
const compiled = compileHarnessSpec(createExpressionVerificationSpec());
|
|
342
|
+
const mock = createMockContext();
|
|
343
|
+
const iterator = compiled.workflows[0].generator(mock.context as any, {});
|
|
344
|
+
|
|
345
|
+
// Execute primary node
|
|
346
|
+
const firstYield = iterator.next().value;
|
|
347
|
+
expect(firstYield).toMatchObject({
|
|
348
|
+
kind: "tool-call",
|
|
349
|
+
name: "bash",
|
|
350
|
+
});
|
|
351
|
+
|
|
352
|
+
// Primary node returns with ok: true, expression verification evaluates directly
|
|
353
|
+
// Should go straight to subworkflow without yielding verifier
|
|
354
|
+
const secondYield = iterator.next({ ok: true }).value;
|
|
355
|
+
expect(secondYield).toMatchObject({
|
|
356
|
+
kind: "subworkflow",
|
|
357
|
+
name: "finish-flow",
|
|
358
|
+
});
|
|
359
|
+
|
|
360
|
+
// Complete the workflow
|
|
361
|
+
const finalValue = iterator.next({});
|
|
362
|
+
expect(finalValue.done).toBe(true);
|
|
363
|
+
expect(finalValue.value.status).toBe("completed");
|
|
364
|
+
|
|
365
|
+
// Verify expression verifier result was recorded
|
|
366
|
+
expect(finalValue.value.outputs["check-expr"]).toMatchObject({
|
|
367
|
+
evaluated: true,
|
|
368
|
+
result: true,
|
|
369
|
+
expression: "outputs.action.ok",
|
|
370
|
+
});
|
|
371
|
+
});
|
|
372
|
+
|
|
373
|
+
it("expression verification records failure when expression evaluates to false", () => {
|
|
374
|
+
const compiled = compileHarnessSpec(createExpressionVerificationSpec());
|
|
375
|
+
const mock = createMockContext();
|
|
376
|
+
const iterator = compiled.workflows[0].generator(mock.context as any, {});
|
|
377
|
+
|
|
378
|
+
// Execute primary node
|
|
379
|
+
expect(iterator.next().value).toMatchObject({
|
|
380
|
+
kind: "tool-call",
|
|
381
|
+
name: "bash",
|
|
382
|
+
});
|
|
383
|
+
|
|
384
|
+
// Primary node returns with ok: false, expression will fail
|
|
385
|
+
let thrownError: Error | undefined;
|
|
386
|
+
try {
|
|
387
|
+
iterator.next({ ok: false });
|
|
388
|
+
} catch (error) {
|
|
389
|
+
thrownError = error as Error;
|
|
390
|
+
}
|
|
391
|
+
|
|
392
|
+
expect(thrownError).toBeDefined();
|
|
393
|
+
expect(thrownError?.message).toContain("Verification failed via check-expr");
|
|
394
|
+
});
|
|
395
|
+
|
|
396
|
+
it("routes condition nodes using stored node outputs", () => {
|
|
397
|
+
const compiled = compileHarnessSpec(createConditionSpec());
|
|
398
|
+
const mock = createMockContext();
|
|
399
|
+
const iterator = compiled.workflows[0].generator(mock.context as any, {});
|
|
400
|
+
|
|
401
|
+
expect(iterator.next().value).toMatchObject({
|
|
402
|
+
kind: "tool-call",
|
|
403
|
+
name: "bash",
|
|
404
|
+
});
|
|
405
|
+
|
|
406
|
+
expect(iterator.next({ ok: true }).value).toEqual({
|
|
407
|
+
kind: "subworkflow",
|
|
408
|
+
name: "yes-branch",
|
|
409
|
+
input: {}
|
|
410
|
+
});
|
|
411
|
+
});
|
|
412
|
+
|
|
413
|
+
it("registers compiled workflows through pi-duroxide", () => {
|
|
414
|
+
const compiled = compileHarnessSpec(createToolSpec());
|
|
415
|
+
|
|
416
|
+
compiled.register({} as any);
|
|
417
|
+
|
|
418
|
+
expect(registerWorkflow).toHaveBeenCalledTimes(1);
|
|
419
|
+
expect(registerWorkflow).toHaveBeenCalledWith(
|
|
420
|
+
"run-diff",
|
|
421
|
+
expect.any(Function),
|
|
422
|
+
expect.objectContaining({
|
|
423
|
+
description: "Compiled Lasso harness run-diff",
|
|
424
|
+
timeoutMs: 30000,
|
|
425
|
+
}),
|
|
426
|
+
);
|
|
427
|
+
});
|
|
428
|
+
|
|
429
|
+
it("rejects unsupported multi-step merge shapes at compile time", () => {
|
|
430
|
+
expect(() => compileHarnessSpec(createUnsupportedMergeSpec())).toThrow(/Unsupported parallel merge shape/);
|
|
431
|
+
});
|
|
432
|
+
|
|
433
|
+
it("rejects parallel merge branches that rely on failure-routing metadata", () => {
|
|
434
|
+
expect(() => compileHarnessSpec(createFailureRoutedMergeSpec())).toThrow(
|
|
435
|
+
/Unsupported parallel merge shape.*failure-routing metadata/,
|
|
436
|
+
);
|
|
437
|
+
});
|
|
438
|
+
|
|
439
|
+
it("rejects verifier nodes with nested verification hooks", () => {
|
|
440
|
+
expect(() => compileHarnessSpec(createNestedVerifierSpec())).toThrow(
|
|
441
|
+
/Verifier node nested-check cannot carry nested verification hooks/,
|
|
442
|
+
);
|
|
443
|
+
});
|
|
444
|
+
|
|
445
|
+
it("rejects invalid environment variable names before building bash commands", () => {
|
|
446
|
+
expect(() => compileHarnessSpec(createInvalidEnvSpec()).workflows[0].generator(createMockContext().context as any, {}).next()).toThrow(
|
|
447
|
+
/Invalid environment variable name: BAD=NAME/,
|
|
448
|
+
);
|
|
449
|
+
});
|
|
450
|
+
|
|
451
|
+
it("rejects merge convergence outside supported fork-join patterns", () => {
|
|
452
|
+
expect(() => compileHarnessSpec(createConditionalMergeSpec())).toThrow(
|
|
453
|
+
/Unsupported merge execution shape for merge node join/,
|
|
454
|
+
);
|
|
455
|
+
});
|
|
456
|
+
|
|
457
|
+
it("preserves trace and adds harnessState to result", () => {
|
|
458
|
+
const compiled = compileHarnessSpec(createToolSpec());
|
|
459
|
+
const mock = createMockContext();
|
|
460
|
+
const iterator = compiled.workflows[0].generator(mock.context as any, { testInput: 42 });
|
|
461
|
+
|
|
462
|
+
iterator.next();
|
|
463
|
+
const completed = iterator.next({ stdout: "diff output" });
|
|
464
|
+
|
|
465
|
+
expect(completed.done).toBe(true);
|
|
466
|
+
expect(completed.value).toMatchObject({
|
|
467
|
+
status: "completed",
|
|
468
|
+
terminalNodeId: "run-diff",
|
|
469
|
+
outputs: {
|
|
470
|
+
"run-diff": { stdout: "diff output" }
|
|
471
|
+
},
|
|
472
|
+
trace: expect.objectContaining({
|
|
473
|
+
entries: expect.arrayContaining([
|
|
474
|
+
expect.objectContaining({
|
|
475
|
+
nodeId: "run-diff",
|
|
476
|
+
phase: "enter"
|
|
477
|
+
})
|
|
478
|
+
]),
|
|
479
|
+
totalDurationMs: expect.any(Number),
|
|
480
|
+
nodeCount: expect.any(Number),
|
|
481
|
+
failureCount: 0,
|
|
482
|
+
startTimeMs: expect.any(Number),
|
|
483
|
+
endTimeMs: expect.any(Number),
|
|
484
|
+
})
|
|
485
|
+
});
|
|
486
|
+
|
|
487
|
+
expect(completed.value.harnessState).toBeDefined();
|
|
488
|
+
expect(completed.value.harnessState.inputs).toEqual({ testInput: 42 });
|
|
489
|
+
expect(completed.value.harnessState.outputs).toEqual({ "run-diff": { stdout: "diff output" } });
|
|
490
|
+
expect(completed.value.harnessState.nodeResults).toEqual({ "run-diff": { stdout: "diff output" } });
|
|
491
|
+
expect(completed.value.harnessState.failures).toEqual([]);
|
|
492
|
+
expect(completed.value.harnessState.metrics).toEqual({ retries: 0, durationMs: expect.any(Number) });
|
|
493
|
+
});
|
|
494
|
+
|
|
495
|
+
it("records failures in harnessState when retry occurs", () => {
|
|
496
|
+
const compiled = compileHarnessSpec(createRetrySpec());
|
|
497
|
+
const mock = createMockContext();
|
|
498
|
+
const iterator = compiled.workflows[0].generator(mock.context as any, {});
|
|
499
|
+
|
|
500
|
+
// First attempt
|
|
501
|
+
const firstAttempt = iterator.next();
|
|
502
|
+
expect(firstAttempt.value).toMatchObject({
|
|
503
|
+
kind: "tool-call",
|
|
504
|
+
name: "bash",
|
|
505
|
+
});
|
|
506
|
+
|
|
507
|
+
const errorMessage = "timeout while running verification";
|
|
508
|
+
|
|
509
|
+
// Throw error to trigger retry
|
|
510
|
+
const afterError = iterator.throw?.(new Error(errorMessage));
|
|
511
|
+
|
|
512
|
+
// Should get timer for backoff
|
|
513
|
+
expect(afterError?.value).toEqual({
|
|
514
|
+
kind: "timer",
|
|
515
|
+
delayMs: 2000
|
|
516
|
+
});
|
|
517
|
+
|
|
518
|
+
// After timer, second attempt
|
|
519
|
+
const secondAttempt = iterator.next();
|
|
520
|
+
expect(secondAttempt.value).toMatchObject({
|
|
521
|
+
kind: "tool-call",
|
|
522
|
+
name: "bash",
|
|
523
|
+
});
|
|
524
|
+
|
|
525
|
+
// Complete successfully on second attempt
|
|
526
|
+
const completed = iterator.next({ passed: true });
|
|
527
|
+
expect(completed.done).toBe(true);
|
|
528
|
+
|
|
529
|
+
// Check that failure was recorded
|
|
530
|
+
expect(completed.value.harnessState.failures).toHaveLength(1);
|
|
531
|
+
expect(completed.value.harnessState.failures[0]).toMatchObject({
|
|
532
|
+
domainType: "lasso",
|
|
533
|
+
rootCause: "tool_timeout",
|
|
534
|
+
nodeId: "verify",
|
|
535
|
+
message: errorMessage,
|
|
536
|
+
});
|
|
537
|
+
|
|
538
|
+
// Check that retry was counted
|
|
539
|
+
expect(completed.value.harnessState.metrics.retries).toBe(1);
|
|
540
|
+
});
|
|
541
|
+
|
|
542
|
+
it("accumulates multiple failures in harnessState during retries", () => {
|
|
543
|
+
const compiled = compileHarnessSpec(createRetrySpec());
|
|
544
|
+
const mock = createMockContext();
|
|
545
|
+
const iterator = compiled.workflows[0].generator(mock.context as any, {});
|
|
546
|
+
|
|
547
|
+
// First attempt
|
|
548
|
+
iterator.next();
|
|
549
|
+
|
|
550
|
+
// First failure
|
|
551
|
+
const afterFirstError = iterator.throw?.(new Error("timeout while running verification"));
|
|
552
|
+
expect(afterFirstError?.value).toEqual({ kind: "timer", delayMs: 2000 });
|
|
553
|
+
|
|
554
|
+
// Second attempt after timer
|
|
555
|
+
const secondAttempt = iterator.next();
|
|
556
|
+
expect(secondAttempt.value).toMatchObject({ kind: "tool-call" });
|
|
557
|
+
|
|
558
|
+
// Second failure - should exhaust retries and throw
|
|
559
|
+
expect(() => iterator.throw?.(new Error("timeout while running verification"))).toThrow();
|
|
560
|
+
});
|
|
561
|
+
|
|
562
|
+
it("increments retry metric in harnessState for each retry attempt", () => {
|
|
563
|
+
const compiled = compileHarnessSpec(createRetrySpec());
|
|
564
|
+
const mock = createMockContext();
|
|
565
|
+
const iterator = compiled.workflows[0].generator(mock.context as any, {});
|
|
566
|
+
|
|
567
|
+
iterator.next();
|
|
568
|
+
const afterError = iterator.throw?.(new Error("timeout while running verification"));
|
|
569
|
+
|
|
570
|
+
expect(afterError?.value).toEqual({ kind: "timer", delayMs: 2000 });
|
|
571
|
+
|
|
572
|
+
const secondAttempt = iterator.next();
|
|
573
|
+
expect(secondAttempt.value).toMatchObject({
|
|
574
|
+
kind: "tool-call",
|
|
575
|
+
name: "bash",
|
|
576
|
+
});
|
|
577
|
+
|
|
578
|
+
const completed = iterator.next({ passed: true });
|
|
579
|
+
|
|
580
|
+
// Verify retry metric was incremented
|
|
581
|
+
expect(completed.value.harnessState.metrics.retries).toBe(1);
|
|
582
|
+
expect(mock.calls.timers).toEqual([2000]);
|
|
583
|
+
});
|
|
584
|
+
});
|
|
585
|
+
|
|
586
|
+
function createToolSpec(): HarnessSpec {
|
|
587
|
+
return {
|
|
588
|
+
name: "run-diff",
|
|
589
|
+
executionPolicy: {
|
|
590
|
+
timeout: 30,
|
|
591
|
+
},
|
|
592
|
+
graph: {
|
|
593
|
+
entryNodeId: "run-diff",
|
|
594
|
+
nodes: [
|
|
595
|
+
{
|
|
596
|
+
id: "run-diff",
|
|
597
|
+
kind: "tool",
|
|
598
|
+
tool: "git",
|
|
599
|
+
args: ["diff", "main...feature"],
|
|
600
|
+
cwd: "/repo",
|
|
601
|
+
},
|
|
602
|
+
],
|
|
603
|
+
edges: [],
|
|
604
|
+
},
|
|
605
|
+
};
|
|
606
|
+
}
|
|
607
|
+
|
|
608
|
+
function createLlmSpec(): HarnessSpec {
|
|
609
|
+
return {
|
|
610
|
+
name: "summarise-diff",
|
|
611
|
+
graph: {
|
|
612
|
+
entryNodeId: "summarise",
|
|
613
|
+
nodes: [
|
|
614
|
+
{
|
|
615
|
+
id: "summarise",
|
|
616
|
+
kind: "llm",
|
|
617
|
+
provider: "anthropic",
|
|
618
|
+
model: "claude-sonnet",
|
|
619
|
+
system: "Be precise.",
|
|
620
|
+
prompt: "Summarise the diff.",
|
|
621
|
+
},
|
|
622
|
+
],
|
|
623
|
+
edges: [],
|
|
624
|
+
},
|
|
625
|
+
};
|
|
626
|
+
}
|
|
627
|
+
|
|
628
|
+
function createHumanSpec(): HarnessSpec {
|
|
629
|
+
return {
|
|
630
|
+
name: "human-review",
|
|
631
|
+
graph: {
|
|
632
|
+
entryNodeId: "approve",
|
|
633
|
+
nodes: [
|
|
634
|
+
{
|
|
635
|
+
id: "approve",
|
|
636
|
+
kind: "human",
|
|
637
|
+
prompt: "Approve the merge?",
|
|
638
|
+
interactionType: "approval",
|
|
639
|
+
},
|
|
640
|
+
],
|
|
641
|
+
edges: [],
|
|
642
|
+
},
|
|
643
|
+
};
|
|
644
|
+
}
|
|
645
|
+
|
|
646
|
+
function createMergeSpec(): HarnessSpec {
|
|
647
|
+
return {
|
|
648
|
+
name: "parallel-review",
|
|
649
|
+
graph: {
|
|
650
|
+
entryNodeId: "load-pr",
|
|
651
|
+
nodes: [
|
|
652
|
+
{
|
|
653
|
+
id: "load-pr",
|
|
654
|
+
kind: "tool",
|
|
655
|
+
tool: "git",
|
|
656
|
+
args: ["status"],
|
|
657
|
+
},
|
|
658
|
+
{
|
|
659
|
+
id: "review",
|
|
660
|
+
kind: "llm",
|
|
661
|
+
provider: "anthropic",
|
|
662
|
+
model: "claude-sonnet",
|
|
663
|
+
prompt: "Review the pull request.",
|
|
664
|
+
},
|
|
665
|
+
{
|
|
666
|
+
id: "verify",
|
|
667
|
+
kind: "tool",
|
|
668
|
+
tool: "npm",
|
|
669
|
+
args: ["test"],
|
|
670
|
+
},
|
|
671
|
+
{
|
|
672
|
+
id: "join",
|
|
673
|
+
kind: "merge",
|
|
674
|
+
waitFor: ["review", "verify"],
|
|
675
|
+
},
|
|
676
|
+
{
|
|
677
|
+
id: "finish",
|
|
678
|
+
kind: "subworkflow",
|
|
679
|
+
specRef: "finish-flow",
|
|
680
|
+
},
|
|
681
|
+
],
|
|
682
|
+
edges: [
|
|
683
|
+
{ from: "load-pr", to: "review" },
|
|
684
|
+
{ from: "load-pr", to: "verify" },
|
|
685
|
+
{ from: "review", to: "join" },
|
|
686
|
+
{ from: "verify", to: "join" },
|
|
687
|
+
{ from: "join", to: "finish" },
|
|
688
|
+
],
|
|
689
|
+
},
|
|
690
|
+
};
|
|
691
|
+
}
|
|
692
|
+
|
|
693
|
+
function createRetrySpec(): HarnessSpec {
|
|
694
|
+
return {
|
|
695
|
+
name: "retry-tool",
|
|
696
|
+
executionPolicy: {
|
|
697
|
+
failureClassification: [
|
|
698
|
+
{
|
|
699
|
+
pattern: "timeout",
|
|
700
|
+
category: "transient",
|
|
701
|
+
retry: true,
|
|
702
|
+
},
|
|
703
|
+
],
|
|
704
|
+
},
|
|
705
|
+
graph: {
|
|
706
|
+
entryNodeId: "verify",
|
|
707
|
+
nodes: [
|
|
708
|
+
{
|
|
709
|
+
id: "verify",
|
|
710
|
+
kind: "tool",
|
|
711
|
+
tool: "npm",
|
|
712
|
+
args: ["test"],
|
|
713
|
+
retryPolicy: {
|
|
714
|
+
maxAttempts: 2,
|
|
715
|
+
backoff: "constant",
|
|
716
|
+
initialDelay: 2,
|
|
717
|
+
retryOn: ["transient"],
|
|
718
|
+
},
|
|
719
|
+
},
|
|
720
|
+
],
|
|
721
|
+
edges: [],
|
|
722
|
+
},
|
|
723
|
+
};
|
|
724
|
+
}
|
|
725
|
+
|
|
726
|
+
function createVerificationSpec(): HarnessSpec {
|
|
727
|
+
return {
|
|
728
|
+
name: "verification-flow",
|
|
729
|
+
graph: {
|
|
730
|
+
entryNodeId: "run-check",
|
|
731
|
+
nodes: [
|
|
732
|
+
{
|
|
733
|
+
id: "run-check",
|
|
734
|
+
kind: "tool",
|
|
735
|
+
tool: "npm",
|
|
736
|
+
args: ["test"],
|
|
737
|
+
verificationPolicy: {
|
|
738
|
+
rules: [
|
|
739
|
+
{
|
|
740
|
+
kind: "llm",
|
|
741
|
+
checkNodeId: "confirm-output",
|
|
742
|
+
onFail: "block",
|
|
743
|
+
},
|
|
744
|
+
],
|
|
745
|
+
},
|
|
746
|
+
},
|
|
747
|
+
{
|
|
748
|
+
id: "confirm-output",
|
|
749
|
+
kind: "llm",
|
|
750
|
+
provider: "anthropic",
|
|
751
|
+
model: "claude-sonnet",
|
|
752
|
+
prompt: "Did the verification pass?",
|
|
753
|
+
},
|
|
754
|
+
{
|
|
755
|
+
id: "finish",
|
|
756
|
+
kind: "subworkflow",
|
|
757
|
+
specRef: "finish-flow",
|
|
758
|
+
},
|
|
759
|
+
],
|
|
760
|
+
edges: [
|
|
761
|
+
{ from: "run-check", to: "confirm-output" },
|
|
762
|
+
{ from: "confirm-output", to: "finish" },
|
|
763
|
+
],
|
|
764
|
+
},
|
|
765
|
+
};
|
|
766
|
+
}
|
|
767
|
+
|
|
768
|
+
function createConditionSpec(): HarnessSpec {
|
|
769
|
+
return {
|
|
770
|
+
name: "condition-flow",
|
|
771
|
+
graph: {
|
|
772
|
+
entryNodeId: "start",
|
|
773
|
+
nodes: [
|
|
774
|
+
{
|
|
775
|
+
id: "start",
|
|
776
|
+
kind: "tool",
|
|
777
|
+
tool: "npm",
|
|
778
|
+
args: ["test"],
|
|
779
|
+
},
|
|
780
|
+
{
|
|
781
|
+
id: "decide",
|
|
782
|
+
kind: "condition",
|
|
783
|
+
condition: "start.ok",
|
|
784
|
+
thenNodeId: "yes",
|
|
785
|
+
elseNodeId: "no",
|
|
786
|
+
},
|
|
787
|
+
{
|
|
788
|
+
id: "yes",
|
|
789
|
+
kind: "subworkflow",
|
|
790
|
+
specRef: "yes-branch",
|
|
791
|
+
},
|
|
792
|
+
{
|
|
793
|
+
id: "no",
|
|
794
|
+
kind: "subworkflow",
|
|
795
|
+
specRef: "no-branch",
|
|
796
|
+
},
|
|
797
|
+
],
|
|
798
|
+
edges: [
|
|
799
|
+
{ from: "start", to: "decide" },
|
|
800
|
+
],
|
|
801
|
+
},
|
|
802
|
+
};
|
|
803
|
+
}
|
|
804
|
+
|
|
805
|
+
function createVerificationRetrySpec(): HarnessSpec {
|
|
806
|
+
return {
|
|
807
|
+
name: "verification-retry",
|
|
808
|
+
graph: {
|
|
809
|
+
entryNodeId: "run-check",
|
|
810
|
+
nodes: [
|
|
811
|
+
{
|
|
812
|
+
id: "run-check",
|
|
813
|
+
kind: "tool",
|
|
814
|
+
tool: "npm",
|
|
815
|
+
args: ["test"],
|
|
816
|
+
verificationPolicy: {
|
|
817
|
+
rules: [
|
|
818
|
+
{
|
|
819
|
+
kind: "llm",
|
|
820
|
+
checkNodeId: "confirm-output",
|
|
821
|
+
onFail: "retry",
|
|
822
|
+
},
|
|
823
|
+
],
|
|
824
|
+
},
|
|
825
|
+
},
|
|
826
|
+
{
|
|
827
|
+
id: "confirm-output",
|
|
828
|
+
kind: "llm",
|
|
829
|
+
provider: "anthropic",
|
|
830
|
+
model: "claude-sonnet",
|
|
831
|
+
prompt: "Did the verification pass?",
|
|
832
|
+
},
|
|
833
|
+
{
|
|
834
|
+
id: "finish",
|
|
835
|
+
kind: "subworkflow",
|
|
836
|
+
specRef: "finish-flow",
|
|
837
|
+
},
|
|
838
|
+
],
|
|
839
|
+
edges: [
|
|
840
|
+
{ from: "run-check", to: "confirm-output" },
|
|
841
|
+
{ from: "confirm-output", to: "finish" },
|
|
842
|
+
],
|
|
843
|
+
},
|
|
844
|
+
};
|
|
845
|
+
}
|
|
846
|
+
|
|
847
|
+
function createUnsupportedMergeSpec(): HarnessSpec {
|
|
848
|
+
return {
|
|
849
|
+
name: "unsupported-merge",
|
|
850
|
+
graph: {
|
|
851
|
+
entryNodeId: "start",
|
|
852
|
+
nodes: [
|
|
853
|
+
{
|
|
854
|
+
id: "start",
|
|
855
|
+
kind: "tool",
|
|
856
|
+
tool: "echo",
|
|
857
|
+
args: ["start"],
|
|
858
|
+
},
|
|
859
|
+
{
|
|
860
|
+
id: "branch-a",
|
|
861
|
+
kind: "tool",
|
|
862
|
+
tool: "echo",
|
|
863
|
+
args: ["a"],
|
|
864
|
+
},
|
|
865
|
+
{
|
|
866
|
+
id: "branch-a-followup",
|
|
867
|
+
kind: "tool",
|
|
868
|
+
tool: "echo",
|
|
869
|
+
args: ["a2"],
|
|
870
|
+
},
|
|
871
|
+
{
|
|
872
|
+
id: "branch-b",
|
|
873
|
+
kind: "tool",
|
|
874
|
+
tool: "echo",
|
|
875
|
+
args: ["b"],
|
|
876
|
+
},
|
|
877
|
+
{
|
|
878
|
+
id: "join",
|
|
879
|
+
kind: "merge",
|
|
880
|
+
waitFor: ["branch-a-followup", "branch-b"],
|
|
881
|
+
},
|
|
882
|
+
{
|
|
883
|
+
id: "finish",
|
|
884
|
+
kind: "subworkflow",
|
|
885
|
+
specRef: "finish-flow",
|
|
886
|
+
},
|
|
887
|
+
],
|
|
888
|
+
edges: [
|
|
889
|
+
{ from: "start", to: "branch-a" },
|
|
890
|
+
{ from: "start", to: "branch-b" },
|
|
891
|
+
{ from: "branch-a", to: "branch-a-followup" },
|
|
892
|
+
{ from: "branch-a-followup", to: "join" },
|
|
893
|
+
{ from: "branch-b", to: "join" },
|
|
894
|
+
{ from: "join", to: "finish" },
|
|
895
|
+
],
|
|
896
|
+
},
|
|
897
|
+
};
|
|
898
|
+
}
|
|
899
|
+
|
|
900
|
+
function createFailureRoutedMergeSpec(): HarnessSpec {
|
|
901
|
+
return {
|
|
902
|
+
name: "failure-routed-merge",
|
|
903
|
+
graph: {
|
|
904
|
+
entryNodeId: "start",
|
|
905
|
+
nodes: [
|
|
906
|
+
{
|
|
907
|
+
id: "start",
|
|
908
|
+
kind: "tool",
|
|
909
|
+
tool: "echo",
|
|
910
|
+
args: ["start"],
|
|
911
|
+
},
|
|
912
|
+
{
|
|
913
|
+
id: "review",
|
|
914
|
+
kind: "tool",
|
|
915
|
+
tool: "echo",
|
|
916
|
+
args: ["review"],
|
|
917
|
+
executionPolicy: {
|
|
918
|
+
failureClassification: [
|
|
919
|
+
{
|
|
920
|
+
pattern: "timeout",
|
|
921
|
+
category: "transient",
|
|
922
|
+
retry: true,
|
|
923
|
+
},
|
|
924
|
+
],
|
|
925
|
+
},
|
|
926
|
+
},
|
|
927
|
+
{
|
|
928
|
+
id: "verify",
|
|
929
|
+
kind: "tool",
|
|
930
|
+
tool: "echo",
|
|
931
|
+
args: ["verify"],
|
|
932
|
+
},
|
|
933
|
+
{
|
|
934
|
+
id: "join",
|
|
935
|
+
kind: "merge",
|
|
936
|
+
waitFor: ["review", "verify"],
|
|
937
|
+
},
|
|
938
|
+
{
|
|
939
|
+
id: "finish",
|
|
940
|
+
kind: "subworkflow",
|
|
941
|
+
specRef: "finish-flow",
|
|
942
|
+
},
|
|
943
|
+
],
|
|
944
|
+
edges: [
|
|
945
|
+
{ from: "start", to: "review" },
|
|
946
|
+
{ from: "start", to: "verify" },
|
|
947
|
+
{ from: "review", to: "join" },
|
|
948
|
+
{ from: "verify", to: "join" },
|
|
949
|
+
{ from: "join", to: "finish" },
|
|
950
|
+
],
|
|
951
|
+
},
|
|
952
|
+
};
|
|
953
|
+
}
|
|
954
|
+
|
|
955
|
+
function createNestedVerifierSpec(): HarnessSpec {
|
|
956
|
+
return {
|
|
957
|
+
name: "nested-verifier",
|
|
958
|
+
graph: {
|
|
959
|
+
entryNodeId: "start",
|
|
960
|
+
nodes: [
|
|
961
|
+
{
|
|
962
|
+
id: "start",
|
|
963
|
+
kind: "tool",
|
|
964
|
+
tool: "echo",
|
|
965
|
+
args: ["start"],
|
|
966
|
+
verificationPolicy: {
|
|
967
|
+
rules: [
|
|
968
|
+
{
|
|
969
|
+
kind: "tool",
|
|
970
|
+
checkNodeId: "nested-check",
|
|
971
|
+
onFail: "block",
|
|
972
|
+
},
|
|
973
|
+
],
|
|
974
|
+
},
|
|
975
|
+
},
|
|
976
|
+
{
|
|
977
|
+
id: "nested-check",
|
|
978
|
+
kind: "tool",
|
|
979
|
+
tool: "echo",
|
|
980
|
+
args: ["nested"],
|
|
981
|
+
verificationPolicy: {
|
|
982
|
+
rules: [
|
|
983
|
+
{
|
|
984
|
+
kind: "tool",
|
|
985
|
+
checkNodeId: "final-check",
|
|
986
|
+
onFail: "block",
|
|
987
|
+
},
|
|
988
|
+
],
|
|
989
|
+
},
|
|
990
|
+
},
|
|
991
|
+
{
|
|
992
|
+
id: "final-check",
|
|
993
|
+
kind: "tool",
|
|
994
|
+
tool: "echo",
|
|
995
|
+
args: ["final"],
|
|
996
|
+
},
|
|
997
|
+
],
|
|
998
|
+
edges: [
|
|
999
|
+
{ from: "start", to: "nested-check" },
|
|
1000
|
+
{ from: "nested-check", to: "final-check" },
|
|
1001
|
+
],
|
|
1002
|
+
},
|
|
1003
|
+
};
|
|
1004
|
+
}
|
|
1005
|
+
|
|
1006
|
+
function createInvalidEnvSpec(): HarnessSpec {
|
|
1007
|
+
return {
|
|
1008
|
+
name: "invalid-env",
|
|
1009
|
+
graph: {
|
|
1010
|
+
entryNodeId: "run",
|
|
1011
|
+
nodes: [
|
|
1012
|
+
{
|
|
1013
|
+
id: "run",
|
|
1014
|
+
kind: "tool",
|
|
1015
|
+
tool: "printenv",
|
|
1016
|
+
args: ["HOME"],
|
|
1017
|
+
env: {
|
|
1018
|
+
"BAD=NAME": "oops",
|
|
1019
|
+
},
|
|
1020
|
+
},
|
|
1021
|
+
],
|
|
1022
|
+
edges: [],
|
|
1023
|
+
},
|
|
1024
|
+
};
|
|
1025
|
+
}
|
|
1026
|
+
|
|
1027
|
+
function createConditionalMergeSpec(): HarnessSpec {
|
|
1028
|
+
return {
|
|
1029
|
+
name: "conditional-merge",
|
|
1030
|
+
graph: {
|
|
1031
|
+
entryNodeId: "start",
|
|
1032
|
+
nodes: [
|
|
1033
|
+
{
|
|
1034
|
+
id: "start",
|
|
1035
|
+
kind: "tool",
|
|
1036
|
+
tool: "echo",
|
|
1037
|
+
args: ["start"],
|
|
1038
|
+
},
|
|
1039
|
+
{
|
|
1040
|
+
id: "decide",
|
|
1041
|
+
kind: "condition",
|
|
1042
|
+
condition: "start.ok",
|
|
1043
|
+
thenNodeId: "yes-branch",
|
|
1044
|
+
elseNodeId: "no-branch",
|
|
1045
|
+
},
|
|
1046
|
+
{
|
|
1047
|
+
id: "yes-branch",
|
|
1048
|
+
kind: "tool",
|
|
1049
|
+
tool: "echo",
|
|
1050
|
+
args: ["yes"],
|
|
1051
|
+
},
|
|
1052
|
+
{
|
|
1053
|
+
id: "no-branch",
|
|
1054
|
+
kind: "tool",
|
|
1055
|
+
tool: "echo",
|
|
1056
|
+
args: ["no"],
|
|
1057
|
+
},
|
|
1058
|
+
{
|
|
1059
|
+
id: "join",
|
|
1060
|
+
kind: "merge",
|
|
1061
|
+
waitFor: ["yes-branch", "no-branch"],
|
|
1062
|
+
},
|
|
1063
|
+
{
|
|
1064
|
+
id: "finish",
|
|
1065
|
+
kind: "subworkflow",
|
|
1066
|
+
specRef: "finish-flow",
|
|
1067
|
+
},
|
|
1068
|
+
],
|
|
1069
|
+
edges: [
|
|
1070
|
+
{ from: "start", to: "decide" },
|
|
1071
|
+
{ from: "yes-branch", to: "join" },
|
|
1072
|
+
{ from: "no-branch", to: "join" },
|
|
1073
|
+
{ from: "join", to: "finish" },
|
|
1074
|
+
],
|
|
1075
|
+
},
|
|
1076
|
+
};
|
|
1077
|
+
}
|
|
1078
|
+
|
|
1079
|
+
function createBlockVerificationSpec(): HarnessSpec {
|
|
1080
|
+
return {
|
|
1081
|
+
name: "block-verification",
|
|
1082
|
+
graph: {
|
|
1083
|
+
entryNodeId: "action",
|
|
1084
|
+
nodes: [
|
|
1085
|
+
{
|
|
1086
|
+
id: "action",
|
|
1087
|
+
kind: "tool",
|
|
1088
|
+
tool: "npm",
|
|
1089
|
+
args: ["test"],
|
|
1090
|
+
verificationPolicy: {
|
|
1091
|
+
rules: [
|
|
1092
|
+
{
|
|
1093
|
+
kind: "llm",
|
|
1094
|
+
checkNodeId: "confirm-output",
|
|
1095
|
+
onFail: "block",
|
|
1096
|
+
},
|
|
1097
|
+
],
|
|
1098
|
+
},
|
|
1099
|
+
},
|
|
1100
|
+
{
|
|
1101
|
+
id: "confirm-output",
|
|
1102
|
+
kind: "llm",
|
|
1103
|
+
provider: "anthropic",
|
|
1104
|
+
model: "claude-sonnet",
|
|
1105
|
+
prompt: "Did the test pass?",
|
|
1106
|
+
},
|
|
1107
|
+
],
|
|
1108
|
+
edges: [],
|
|
1109
|
+
},
|
|
1110
|
+
};
|
|
1111
|
+
}
|
|
1112
|
+
|
|
1113
|
+
function createExhaustRetryVerificationSpec(): HarnessSpec {
|
|
1114
|
+
return {
|
|
1115
|
+
name: "exhaust-retry-verification",
|
|
1116
|
+
graph: {
|
|
1117
|
+
entryNodeId: "action",
|
|
1118
|
+
nodes: [
|
|
1119
|
+
{
|
|
1120
|
+
id: "action",
|
|
1121
|
+
kind: "tool",
|
|
1122
|
+
tool: "npm",
|
|
1123
|
+
args: ["test"],
|
|
1124
|
+
verificationPolicy: {
|
|
1125
|
+
rules: [
|
|
1126
|
+
{
|
|
1127
|
+
kind: "tool",
|
|
1128
|
+
checkNodeId: "verify-check",
|
|
1129
|
+
onFail: "retry",
|
|
1130
|
+
maxAttempts: 2,
|
|
1131
|
+
},
|
|
1132
|
+
],
|
|
1133
|
+
},
|
|
1134
|
+
},
|
|
1135
|
+
{
|
|
1136
|
+
id: "verify-check",
|
|
1137
|
+
kind: "tool",
|
|
1138
|
+
tool: "test",
|
|
1139
|
+
args: ["-f", "output.txt"],
|
|
1140
|
+
},
|
|
1141
|
+
],
|
|
1142
|
+
edges: [],
|
|
1143
|
+
},
|
|
1144
|
+
};
|
|
1145
|
+
}
|
|
1146
|
+
|
|
1147
|
+
function createExpressionVerificationSpec(): HarnessSpec {
|
|
1148
|
+
return {
|
|
1149
|
+
name: "expression-verification",
|
|
1150
|
+
graph: {
|
|
1151
|
+
entryNodeId: "action",
|
|
1152
|
+
nodes: [
|
|
1153
|
+
{
|
|
1154
|
+
id: "action",
|
|
1155
|
+
kind: "tool",
|
|
1156
|
+
tool: "npm",
|
|
1157
|
+
args: ["test"],
|
|
1158
|
+
verificationPolicy: {
|
|
1159
|
+
rules: [
|
|
1160
|
+
{
|
|
1161
|
+
kind: "expression",
|
|
1162
|
+
checkNodeId: "check-expr",
|
|
1163
|
+
onFail: "block",
|
|
1164
|
+
},
|
|
1165
|
+
],
|
|
1166
|
+
},
|
|
1167
|
+
},
|
|
1168
|
+
{
|
|
1169
|
+
id: "check-expr",
|
|
1170
|
+
kind: "condition",
|
|
1171
|
+
condition: "outputs.action.ok",
|
|
1172
|
+
thenNodeId: "success-node",
|
|
1173
|
+
elseNodeId: "failure-node",
|
|
1174
|
+
},
|
|
1175
|
+
{
|
|
1176
|
+
id: "success-node",
|
|
1177
|
+
kind: "tool",
|
|
1178
|
+
tool: "echo",
|
|
1179
|
+
args: ["success"],
|
|
1180
|
+
},
|
|
1181
|
+
{
|
|
1182
|
+
id: "failure-node",
|
|
1183
|
+
kind: "tool",
|
|
1184
|
+
tool: "echo",
|
|
1185
|
+
args: ["failure"],
|
|
1186
|
+
},
|
|
1187
|
+
{
|
|
1188
|
+
id: "finish",
|
|
1189
|
+
kind: "subworkflow",
|
|
1190
|
+
specRef: "finish-flow",
|
|
1191
|
+
},
|
|
1192
|
+
],
|
|
1193
|
+
edges: [
|
|
1194
|
+
{ from: "action", to: "finish" },
|
|
1195
|
+
],
|
|
1196
|
+
},
|
|
1197
|
+
};
|
|
1198
|
+
}
|