@mhingston5/lasso 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +707 -0
- package/docs/agent-wrangling.png +0 -0
- package/package.json +26 -0
- package/src/capabilities/matcher.ts +25 -0
- package/src/capabilities/registry.ts +103 -0
- package/src/capabilities/types.ts +15 -0
- package/src/cir/lower.ts +253 -0
- package/src/cir/optimize.ts +251 -0
- package/src/cir/types.ts +131 -0
- package/src/cir/validate.ts +265 -0
- package/src/compiler/compile.ts +601 -0
- package/src/compiler/feedback.ts +471 -0
- package/src/compiler/runtime-helpers.ts +455 -0
- package/src/composition/chain.ts +58 -0
- package/src/composition/conditional.ts +76 -0
- package/src/composition/parallel.ts +75 -0
- package/src/composition/types.ts +105 -0
- package/src/environment/analyzer.ts +56 -0
- package/src/environment/discovery.ts +179 -0
- package/src/environment/types.ts +68 -0
- package/src/failures/classifiers.ts +134 -0
- package/src/failures/generator.ts +421 -0
- package/src/failures/map-reference-failures.ts +23 -0
- package/src/failures/ontology.ts +210 -0
- package/src/failures/recovery.ts +214 -0
- package/src/failures/types.ts +14 -0
- package/src/index.ts +67 -0
- package/src/memory/advisor.ts +132 -0
- package/src/memory/extractor.ts +166 -0
- package/src/memory/store.ts +107 -0
- package/src/memory/types.ts +53 -0
- package/src/metaharness/engine.ts +256 -0
- package/src/metaharness/predictor.ts +168 -0
- package/src/metaharness/types.ts +40 -0
- package/src/mutation/derive.ts +308 -0
- package/src/mutation/diff.ts +52 -0
- package/src/mutation/engine.ts +256 -0
- package/src/mutation/types.ts +84 -0
- package/src/pi/command-input.ts +209 -0
- package/src/pi/commands.ts +351 -0
- package/src/pi/extension.ts +16 -0
- package/src/planner/synthesize.ts +83 -0
- package/src/planner/template-rules.ts +183 -0
- package/src/planner/types.ts +42 -0
- package/src/reference/catalog.ts +128 -0
- package/src/reference/patch-validation-strategies.ts +170 -0
- package/src/reference/patch-validation.ts +174 -0
- package/src/reference/pr-review-merge.ts +155 -0
- package/src/reference/strategies.ts +126 -0
- package/src/reference/types.ts +33 -0
- package/src/replanner/risk-rules.ts +161 -0
- package/src/replanner/runtime.ts +308 -0
- package/src/replanner/synthesize.ts +619 -0
- package/src/replanner/types.ts +73 -0
- package/src/spec/schema.ts +254 -0
- package/src/spec/types.ts +319 -0
- package/src/spec/validate.ts +296 -0
- package/src/state/snapshots.ts +43 -0
- package/src/state/types.ts +12 -0
- package/src/synthesis/graph-builder.ts +267 -0
- package/src/synthesis/harness-builder.ts +113 -0
- package/src/synthesis/intent-ir.ts +63 -0
- package/src/synthesis/policy-builder.ts +320 -0
- package/src/synthesis/risk-analyzer.ts +182 -0
- package/src/synthesis/skill-parser.ts +441 -0
- package/src/verification/engine.ts +230 -0
- package/src/versioning/file-store.ts +103 -0
- package/src/versioning/history.ts +43 -0
- package/src/versioning/store.ts +16 -0
- package/src/versioning/types.ts +31 -0
- package/test/capabilities/matcher.test.ts +67 -0
- package/test/capabilities/registry.test.ts +136 -0
- package/test/capabilities/synthesis.test.ts +264 -0
- package/test/cir/lower.test.ts +417 -0
- package/test/cir/optimize.test.ts +266 -0
- package/test/cir/validate.test.ts +368 -0
- package/test/compiler/adaptive-runtime.test.ts +157 -0
- package/test/compiler/compile.test.ts +1198 -0
- package/test/compiler/feedback.test.ts +784 -0
- package/test/compiler/guardrails.test.ts +191 -0
- package/test/compiler/trace.test.ts +404 -0
- package/test/composition/chain.test.ts +328 -0
- package/test/composition/conditional.test.ts +241 -0
- package/test/composition/parallel.test.ts +215 -0
- package/test/environment/analyzer.test.ts +204 -0
- package/test/environment/discovery.test.ts +149 -0
- package/test/failures/classifiers.test.ts +287 -0
- package/test/failures/generator.test.ts +203 -0
- package/test/failures/ontology.test.ts +439 -0
- package/test/failures/recovery.test.ts +300 -0
- package/test/helpers/createFixtureRepo.ts +84 -0
- package/test/helpers/createPatchValidationFixture.ts +144 -0
- package/test/helpers/runCompiledWorkflow.ts +208 -0
- package/test/memory/advisor.test.ts +332 -0
- package/test/memory/extractor.test.ts +295 -0
- package/test/memory/store.test.ts +244 -0
- package/test/metaharness/engine.test.ts +575 -0
- package/test/metaharness/predictor.test.ts +436 -0
- package/test/mutation/derive-failure.test.ts +209 -0
- package/test/mutation/engine.test.ts +622 -0
- package/test/package-smoke.test.ts +29 -0
- package/test/pi/command-input.test.ts +153 -0
- package/test/pi/commands.test.ts +623 -0
- package/test/planner/classify-template.test.ts +32 -0
- package/test/planner/synthesize.test.ts +901 -0
- package/test/reference/PatchValidation.failures.test.ts +137 -0
- package/test/reference/PatchValidation.test.ts +326 -0
- package/test/reference/PrReviewMerge.failures.test.ts +121 -0
- package/test/reference/PrReviewMerge.test.ts +55 -0
- package/test/reference/catalog-open.test.ts +70 -0
- package/test/replanner/runtime.test.ts +207 -0
- package/test/replanner/synthesize.test.ts +303 -0
- package/test/spec/validate.test.ts +1056 -0
- package/test/state/snapshots.test.ts +264 -0
- package/test/synthesis/custom-workflow.test.ts +264 -0
- package/test/synthesis/graph-builder.test.ts +370 -0
- package/test/synthesis/harness-builder.test.ts +128 -0
- package/test/synthesis/policy-builder.test.ts +149 -0
- package/test/synthesis/risk-analyzer.test.ts +230 -0
- package/test/synthesis/skill-parser.test.ts +796 -0
- package/test/verification/engine.test.ts +509 -0
- package/test/versioning/history.test.ts +144 -0
- package/test/versioning/store.test.ts +254 -0
- package/vitest.config.ts +9 -0
|
@@ -0,0 +1,191 @@
|
|
|
1
|
+
import { describe, expect, it, vi } from "vitest";
|
|
2
|
+
|
|
3
|
+
vi.mock("pi-duroxide", () => ({
|
|
4
|
+
registerWorkflow: vi.fn(),
|
|
5
|
+
}));
|
|
6
|
+
|
|
7
|
+
import { compileHarnessSpec } from "../../src/compiler/compile.js";
|
|
8
|
+
import { checkGuardrails, GuardrailExceededError } from "../../src/compiler/runtime-helpers.js";
|
|
9
|
+
import type { HarnessSpec } from "../../src/spec/types.js";
|
|
10
|
+
|
|
11
|
+
function createMockContext() {
|
|
12
|
+
return {
|
|
13
|
+
scheduleActivity: vi.fn(),
|
|
14
|
+
scheduleActivityWithRetry: vi.fn(),
|
|
15
|
+
scheduleTimer: vi.fn(),
|
|
16
|
+
waitForEvent: vi.fn(),
|
|
17
|
+
scheduleSubOrchestration: vi.fn(),
|
|
18
|
+
all: vi.fn(),
|
|
19
|
+
race: vi.fn(),
|
|
20
|
+
utcNow: () => 0,
|
|
21
|
+
newGuid: () => "guid-1",
|
|
22
|
+
continueAsNew: vi.fn(),
|
|
23
|
+
setCustomStatus: vi.fn(),
|
|
24
|
+
traceInfo: vi.fn(),
|
|
25
|
+
traceWarn: vi.fn(),
|
|
26
|
+
traceError: vi.fn(),
|
|
27
|
+
traceDebug: vi.fn(),
|
|
28
|
+
kv: { get: vi.fn(), set: vi.fn(), clear: vi.fn() },
|
|
29
|
+
pi: {
|
|
30
|
+
tool: (name: string, args: unknown) => ({ kind: "tool-call", name, args }),
|
|
31
|
+
llm: (messages: unknown[], options?: unknown) => ({ kind: "llm-call", messages, options }),
|
|
32
|
+
skill: vi.fn(),
|
|
33
|
+
sendMessage: vi.fn(),
|
|
34
|
+
prompt: vi.fn(),
|
|
35
|
+
},
|
|
36
|
+
};
|
|
37
|
+
}
|
|
38
|
+
|
|
39
|
+
function createLinearSpec(count: number, policy?: import("../../src/spec/types.js").ExecutionPolicy): HarnessSpec {
|
|
40
|
+
const nodes: any[] = [];
|
|
41
|
+
const edges: any[] = [];
|
|
42
|
+
for (let i = 0; i < count; i++) {
|
|
43
|
+
nodes.push({
|
|
44
|
+
id: `step-${i}`,
|
|
45
|
+
kind: "tool",
|
|
46
|
+
tool: "echo",
|
|
47
|
+
args: [`step ${i}`],
|
|
48
|
+
});
|
|
49
|
+
if (i > 0) {
|
|
50
|
+
edges.push({ from: `step-${i - 1}`, to: `step-${i}` });
|
|
51
|
+
}
|
|
52
|
+
}
|
|
53
|
+
return {
|
|
54
|
+
name: "linear-chain",
|
|
55
|
+
...(policy ? { executionPolicy: policy } : {}),
|
|
56
|
+
graph: {
|
|
57
|
+
entryNodeId: "step-0",
|
|
58
|
+
nodes,
|
|
59
|
+
edges,
|
|
60
|
+
},
|
|
61
|
+
};
|
|
62
|
+
}
|
|
63
|
+
|
|
64
|
+
describe("checkGuardrails", () => {
|
|
65
|
+
it("returns withinLimits=true when no limits set", () => {
|
|
66
|
+
const state = { stepCount: 100, estimatedCostUsd: 99.99 };
|
|
67
|
+
const result = checkGuardrails(state);
|
|
68
|
+
expect(result.withinLimits).toBe(true);
|
|
69
|
+
expect(result.reason).toBeUndefined();
|
|
70
|
+
});
|
|
71
|
+
|
|
72
|
+
it("returns withinLimits=true when under step limit", () => {
|
|
73
|
+
const state = { stepCount: 4, estimatedCostUsd: 0, maxSteps: 5 };
|
|
74
|
+
const result = checkGuardrails(state);
|
|
75
|
+
expect(result.withinLimits).toBe(true);
|
|
76
|
+
});
|
|
77
|
+
|
|
78
|
+
it("returns withinLimits=false when step limit reached", () => {
|
|
79
|
+
const state = { stepCount: 5, estimatedCostUsd: 0, maxSteps: 5 };
|
|
80
|
+
const result = checkGuardrails(state);
|
|
81
|
+
expect(result.withinLimits).toBe(false);
|
|
82
|
+
expect(result.reason).toContain("Step limit reached");
|
|
83
|
+
expect(result.reason).toContain("5/5");
|
|
84
|
+
});
|
|
85
|
+
|
|
86
|
+
it("returns withinLimits=true when under cost limit", () => {
|
|
87
|
+
const state = { stepCount: 0, estimatedCostUsd: 0.24, costLimitUsd: 0.25 };
|
|
88
|
+
const result = checkGuardrails(state);
|
|
89
|
+
expect(result.withinLimits).toBe(true);
|
|
90
|
+
});
|
|
91
|
+
|
|
92
|
+
it("returns withinLimits=false when cost limit exceeded", () => {
|
|
93
|
+
const state = { stepCount: 0, estimatedCostUsd: 0.28, costLimitUsd: 0.25 };
|
|
94
|
+
const result = checkGuardrails(state);
|
|
95
|
+
expect(result.withinLimits).toBe(false);
|
|
96
|
+
expect(result.reason).toContain("Cost limit exceeded");
|
|
97
|
+
expect(result.reason).toContain("$0.28");
|
|
98
|
+
expect(result.reason).toContain("$0.25");
|
|
99
|
+
});
|
|
100
|
+
|
|
101
|
+
it("checks both limits and fails on step first if both exceeded", () => {
|
|
102
|
+
const state = { stepCount: 5, estimatedCostUsd: 1.0, maxSteps: 5, costLimitUsd: 0.25 };
|
|
103
|
+
const result = checkGuardrails(state);
|
|
104
|
+
expect(result.withinLimits).toBe(false);
|
|
105
|
+
expect(result.reason).toContain("Step limit reached");
|
|
106
|
+
});
|
|
107
|
+
});
|
|
108
|
+
|
|
109
|
+
describe("GuardrailExceededError", () => {
|
|
110
|
+
it("has descriptive message for step limit", () => {
|
|
111
|
+
const error = new GuardrailExceededError("Step limit reached (5/5)");
|
|
112
|
+
expect(error.message).toBe("Step limit reached (5/5)");
|
|
113
|
+
expect(error.name).toBe("GuardrailExceededError");
|
|
114
|
+
expect(error).toBeInstanceOf(Error);
|
|
115
|
+
});
|
|
116
|
+
|
|
117
|
+
it("has descriptive message for cost limit", () => {
|
|
118
|
+
const error = new GuardrailExceededError("Cost limit exceeded ($0.28/$0.25)");
|
|
119
|
+
expect(error.message).toBe("Cost limit exceeded ($0.28/$0.25)");
|
|
120
|
+
expect(error.name).toBe("GuardrailExceededError");
|
|
121
|
+
});
|
|
122
|
+
});
|
|
123
|
+
|
|
124
|
+
describe("guardrail enforcement in compiler", () => {
|
|
125
|
+
it("stops after maxSteps node executions", () => {
|
|
126
|
+
const spec = createLinearSpec(10, { maxSteps: 3 });
|
|
127
|
+
const compiled = compileHarnessSpec(spec);
|
|
128
|
+
const ctx = createMockContext();
|
|
129
|
+
const iterator = compiled.workflows[0].generator(ctx as any, {});
|
|
130
|
+
|
|
131
|
+
// Steps 0, 1, 2 should execute; step 3 should throw
|
|
132
|
+
expect(iterator.next().value).toMatchObject({ kind: "tool-call" }); // step-0
|
|
133
|
+
expect(iterator.next("out0").value).toMatchObject({ kind: "tool-call" }); // step-1
|
|
134
|
+
expect(iterator.next("out1").value).toMatchObject({ kind: "tool-call" }); // step-2
|
|
135
|
+
|
|
136
|
+
// After 3 steps, the next call should throw GuardrailExceededError
|
|
137
|
+
let thrownError: unknown;
|
|
138
|
+
try {
|
|
139
|
+
iterator.next("out2");
|
|
140
|
+
} catch (error) {
|
|
141
|
+
thrownError = error;
|
|
142
|
+
}
|
|
143
|
+
expect(thrownError).toBeInstanceOf(GuardrailExceededError);
|
|
144
|
+
expect((thrownError as GuardrailExceededError).message).toContain("Step limit reached");
|
|
145
|
+
expect((thrownError as GuardrailExceededError).message).toContain("3/3");
|
|
146
|
+
});
|
|
147
|
+
|
|
148
|
+
it("runs normally without guardrails", () => {
|
|
149
|
+
const spec = createLinearSpec(3);
|
|
150
|
+
const compiled = compileHarnessSpec(spec);
|
|
151
|
+
const ctx = createMockContext();
|
|
152
|
+
const iterator = compiled.workflows[0].generator(ctx as any, {});
|
|
153
|
+
|
|
154
|
+
expect(iterator.next().value).toMatchObject({ kind: "tool-call" });
|
|
155
|
+
expect(iterator.next("a").value).toMatchObject({ kind: "tool-call" });
|
|
156
|
+
expect(iterator.next("b").value).toMatchObject({ kind: "tool-call" });
|
|
157
|
+
const completed = iterator.next("c");
|
|
158
|
+
expect(completed.done).toBe(true);
|
|
159
|
+
expect(completed.value.status).toBe("completed");
|
|
160
|
+
});
|
|
161
|
+
|
|
162
|
+
it("resets step count on continueAsNew (adaptive)", () => {
|
|
163
|
+
// This test verifies that guardrail state is per-execution, not global
|
|
164
|
+
// For now, we just verify the guardrailState is reset when a new execution starts
|
|
165
|
+
const state: ExecutionState = {
|
|
166
|
+
input: {},
|
|
167
|
+
outputs: {},
|
|
168
|
+
trace: [],
|
|
169
|
+
harnessState: { inputs: {}, outputs: {}, nodeResults: {}, failures: [], metrics: { retries: 0, durationMs: 0 } } as any,
|
|
170
|
+
startTimeMs: Date.now(),
|
|
171
|
+
stepCount: 0,
|
|
172
|
+
estimatedCostUsd: 0,
|
|
173
|
+
};
|
|
174
|
+
const result1 = checkGuardrails({ stepCount: 3, estimatedCostUsd: 0, maxSteps: 5 });
|
|
175
|
+
expect(result1.withinLimits).toBe(true);
|
|
176
|
+
|
|
177
|
+
// Simulating a new execution (continueAsNew resets state)
|
|
178
|
+
const result2 = checkGuardrails({ stepCount: 5, estimatedCostUsd: 0, maxSteps: 5 });
|
|
179
|
+
expect(result2.withinLimits).toBe(false);
|
|
180
|
+
});
|
|
181
|
+
|
|
182
|
+
it("cost accumulates across execution", () => {
|
|
183
|
+
const state = { stepCount: 2, estimatedCostUsd: 0.20, costLimitUsd: 0.25 };
|
|
184
|
+
const result = checkGuardrails(state);
|
|
185
|
+
expect(result.withinLimits).toBe(true);
|
|
186
|
+
|
|
187
|
+
const state2 = { stepCount: 3, estimatedCostUsd: 0.30, costLimitUsd: 0.25 };
|
|
188
|
+
const result2 = checkGuardrails(state2);
|
|
189
|
+
expect(result2.withinLimits).toBe(false);
|
|
190
|
+
});
|
|
191
|
+
});
|
|
@@ -0,0 +1,404 @@
|
|
|
1
|
+
import { describe, expect, it, vi, beforeEach } from "vitest";
|
|
2
|
+
|
|
3
|
+
vi.mock("pi-duroxide", () => ({
|
|
4
|
+
registerWorkflow: vi.fn(),
|
|
5
|
+
}));
|
|
6
|
+
|
|
7
|
+
import { compileHarnessSpec } from "../../src/compiler/compile.js";
|
|
8
|
+
import { recordTrace, type ExecutionState, type ExecutionTraceEntry } from "../../src/compiler/runtime-helpers.js";
|
|
9
|
+
import type { CirNode } from "../../src/cir/types.js";
|
|
10
|
+
import type { HarnessSpec } from "../../src/spec/types.js";
|
|
11
|
+
import { createLineageEntry, createInitialVersion } from "../../src/versioning/history.js";
|
|
12
|
+
import type { HarnessExecutionTrace } from "../../src/versioning/types.js";
|
|
13
|
+
|
|
14
|
+
interface MockContextCalls {
|
|
15
|
+
tools: Array<{ name: string; args: unknown }>;
|
|
16
|
+
llm: Array<{ messages: unknown[]; options?: unknown }>;
|
|
17
|
+
events: string[];
|
|
18
|
+
merges: unknown[][];
|
|
19
|
+
subworkflows: Array<{ name: string; input: unknown }>;
|
|
20
|
+
timers: number[];
|
|
21
|
+
statuses: unknown[];
|
|
22
|
+
}
|
|
23
|
+
|
|
24
|
+
function createMockContext() {
|
|
25
|
+
const calls: MockContextCalls = {
|
|
26
|
+
tools: [],
|
|
27
|
+
llm: [],
|
|
28
|
+
events: [],
|
|
29
|
+
merges: [],
|
|
30
|
+
subworkflows: [],
|
|
31
|
+
timers: [],
|
|
32
|
+
statuses: [],
|
|
33
|
+
};
|
|
34
|
+
|
|
35
|
+
return {
|
|
36
|
+
calls,
|
|
37
|
+
context: {
|
|
38
|
+
scheduleActivity: vi.fn(),
|
|
39
|
+
scheduleActivityWithRetry: vi.fn(),
|
|
40
|
+
scheduleTimer: (delayMs: number) => {
|
|
41
|
+
calls.timers.push(delayMs);
|
|
42
|
+
return { kind: "timer", delayMs };
|
|
43
|
+
},
|
|
44
|
+
waitForEvent: (eventName: string) => {
|
|
45
|
+
calls.events.push(eventName);
|
|
46
|
+
return { kind: "wait-for-event", eventName };
|
|
47
|
+
},
|
|
48
|
+
scheduleSubOrchestration: (name: string, input: unknown) => {
|
|
49
|
+
calls.subworkflows.push({ name, input });
|
|
50
|
+
return { kind: "subworkflow", name, input };
|
|
51
|
+
},
|
|
52
|
+
all: (tasks: unknown[]) => {
|
|
53
|
+
calls.merges.push(tasks);
|
|
54
|
+
return { kind: "all", tasks };
|
|
55
|
+
},
|
|
56
|
+
race: vi.fn(),
|
|
57
|
+
utcNow: () => 0,
|
|
58
|
+
newGuid: () => "guid-1",
|
|
59
|
+
continueAsNew: vi.fn(),
|
|
60
|
+
setCustomStatus: (status: unknown) => {
|
|
61
|
+
calls.statuses.push(status);
|
|
62
|
+
},
|
|
63
|
+
traceInfo: vi.fn(),
|
|
64
|
+
traceWarn: vi.fn(),
|
|
65
|
+
traceError: vi.fn(),
|
|
66
|
+
traceDebug: vi.fn(),
|
|
67
|
+
kv: {
|
|
68
|
+
get: vi.fn(),
|
|
69
|
+
set: vi.fn(),
|
|
70
|
+
clear: vi.fn(),
|
|
71
|
+
},
|
|
72
|
+
pi: {
|
|
73
|
+
tool: (name: string, args: unknown) => {
|
|
74
|
+
calls.tools.push({ name, args });
|
|
75
|
+
return { kind: "tool-call", name, args };
|
|
76
|
+
},
|
|
77
|
+
llm: (messages: unknown[], options?: unknown) => {
|
|
78
|
+
calls.llm.push({ messages, options });
|
|
79
|
+
return { kind: "llm-call", messages, options };
|
|
80
|
+
},
|
|
81
|
+
skill: vi.fn(),
|
|
82
|
+
sendMessage: vi.fn(),
|
|
83
|
+
prompt: vi.fn(),
|
|
84
|
+
},
|
|
85
|
+
},
|
|
86
|
+
};
|
|
87
|
+
}
|
|
88
|
+
|
|
89
|
+
function makeNode(id: string): CirNode {
|
|
90
|
+
return {
|
|
91
|
+
id,
|
|
92
|
+
kind: "tool",
|
|
93
|
+
source: { specNodeId: id, specNodeKind: "tool", specPath: `graph.nodes[0]` },
|
|
94
|
+
action: { tool: "echo", args: ["test"] },
|
|
95
|
+
} as CirNode;
|
|
96
|
+
}
|
|
97
|
+
|
|
98
|
+
function makeState(overrides?: Partial<ExecutionState>): ExecutionState {
|
|
99
|
+
return {
|
|
100
|
+
input: {},
|
|
101
|
+
outputs: {},
|
|
102
|
+
trace: [],
|
|
103
|
+
harnessState: {
|
|
104
|
+
inputs: {},
|
|
105
|
+
outputs: {},
|
|
106
|
+
nodeResults: {},
|
|
107
|
+
failures: [],
|
|
108
|
+
metrics: { retries: 0, durationMs: 0 },
|
|
109
|
+
},
|
|
110
|
+
startTimeMs: Date.now(),
|
|
111
|
+
...overrides,
|
|
112
|
+
};
|
|
113
|
+
}
|
|
114
|
+
|
|
115
|
+
describe("ExecutionTraceEntry enrichment", () => {
|
|
116
|
+
describe("recordTrace", () => {
|
|
117
|
+
it("captures startedAt on enter phase", () => {
|
|
118
|
+
const ctx = createMockContext();
|
|
119
|
+
const state = makeState();
|
|
120
|
+
const node = makeNode("n1");
|
|
121
|
+
const before = Date.now();
|
|
122
|
+
|
|
123
|
+
recordTrace(ctx.context as any, state, node, "enter");
|
|
124
|
+
|
|
125
|
+
const entry = state.trace[0];
|
|
126
|
+
expect(entry.startedAt).toBeDefined();
|
|
127
|
+
expect(entry.startedAt!).toBeGreaterThanOrEqual(before);
|
|
128
|
+
expect(entry.startedAt!).toBeLessThanOrEqual(Date.now());
|
|
129
|
+
expect(entry.completedAt).toBeUndefined();
|
|
130
|
+
});
|
|
131
|
+
|
|
132
|
+
it("captures completedAt on success phase", () => {
|
|
133
|
+
const ctx = createMockContext();
|
|
134
|
+
const state = makeState();
|
|
135
|
+
const node = makeNode("n1");
|
|
136
|
+
const before = Date.now();
|
|
137
|
+
|
|
138
|
+
recordTrace(ctx.context as any, state, node, "success");
|
|
139
|
+
|
|
140
|
+
const entry = state.trace[0];
|
|
141
|
+
expect(entry.completedAt).toBeDefined();
|
|
142
|
+
expect(entry.completedAt!).toBeGreaterThanOrEqual(before);
|
|
143
|
+
expect(entry.completedAt!).toBeLessThanOrEqual(Date.now());
|
|
144
|
+
expect(entry.startedAt).toBeUndefined();
|
|
145
|
+
});
|
|
146
|
+
|
|
147
|
+
it("captures completedAt on failure phase", () => {
|
|
148
|
+
const ctx = createMockContext();
|
|
149
|
+
const state = makeState();
|
|
150
|
+
const node = makeNode("n1");
|
|
151
|
+
|
|
152
|
+
recordTrace(ctx.context as any, state, node, "failure", { message: "boom" });
|
|
153
|
+
|
|
154
|
+
const entry = state.trace[0];
|
|
155
|
+
expect(entry.completedAt).toBeDefined();
|
|
156
|
+
expect(entry.startedAt).toBeUndefined();
|
|
157
|
+
});
|
|
158
|
+
|
|
159
|
+
it("does not set startedAt or completedAt for non-timestamp phases", () => {
|
|
160
|
+
const ctx = createMockContext();
|
|
161
|
+
const state = makeState();
|
|
162
|
+
const node = makeNode("n1");
|
|
163
|
+
|
|
164
|
+
recordTrace(ctx.context as any, state, node, "retry", { delayMs: 100 });
|
|
165
|
+
|
|
166
|
+
const entry = state.trace[0];
|
|
167
|
+
expect(entry.startedAt).toBeUndefined();
|
|
168
|
+
expect(entry.completedAt).toBeUndefined();
|
|
169
|
+
});
|
|
170
|
+
|
|
171
|
+
it("accepts inputSnapshot and outputSnapshot", () => {
|
|
172
|
+
const ctx = createMockContext();
|
|
173
|
+
const state = makeState();
|
|
174
|
+
const node = makeNode("n1");
|
|
175
|
+
|
|
176
|
+
recordTrace(ctx.context as any, state, node, "enter", undefined, { data: "input" }, { data: "output" });
|
|
177
|
+
|
|
178
|
+
const entry = state.trace[0];
|
|
179
|
+
expect(entry.inputSnapshot).toEqual({ data: "input" });
|
|
180
|
+
expect(entry.outputSnapshot).toEqual({ data: "output" });
|
|
181
|
+
});
|
|
182
|
+
|
|
183
|
+
it("caps inputSnapshot to ~1KB", () => {
|
|
184
|
+
const ctx = createMockContext();
|
|
185
|
+
const state = makeState();
|
|
186
|
+
const node = makeNode("n1");
|
|
187
|
+
const largeInput = "x".repeat(2000);
|
|
188
|
+
|
|
189
|
+
recordTrace(ctx.context as any, state, node, "enter", undefined, largeInput);
|
|
190
|
+
|
|
191
|
+
const entry = state.trace[0];
|
|
192
|
+
const serialized = JSON.stringify(entry.inputSnapshot);
|
|
193
|
+
expect(serialized.length).toBeLessThanOrEqual(1050);
|
|
194
|
+
});
|
|
195
|
+
|
|
196
|
+
it("caps outputSnapshot to ~1KB", () => {
|
|
197
|
+
const ctx = createMockContext();
|
|
198
|
+
const state = makeState();
|
|
199
|
+
const node = makeNode("n1");
|
|
200
|
+
const largeOutput = { data: "y".repeat(2000) };
|
|
201
|
+
|
|
202
|
+
recordTrace(ctx.context as any, state, node, "success", undefined, undefined, largeOutput);
|
|
203
|
+
|
|
204
|
+
const entry = state.trace[0];
|
|
205
|
+
const serialized = JSON.stringify(entry.outputSnapshot);
|
|
206
|
+
expect(serialized.length).toBeLessThanOrEqual(1050);
|
|
207
|
+
});
|
|
208
|
+
|
|
209
|
+
it("preserves parentNodeId in details", () => {
|
|
210
|
+
const ctx = createMockContext();
|
|
211
|
+
const state = makeState();
|
|
212
|
+
const node = makeNode("child-1");
|
|
213
|
+
|
|
214
|
+
recordTrace(ctx.context as any, state, node, "enter", { parentNodeId: "parent-1" });
|
|
215
|
+
|
|
216
|
+
const entry = state.trace[0];
|
|
217
|
+
expect(entry.details?.parentNodeId).toBe("parent-1");
|
|
218
|
+
});
|
|
219
|
+
|
|
220
|
+
it("does not truncate snapshots under 1KB", () => {
|
|
221
|
+
const ctx = createMockContext();
|
|
222
|
+
const state = makeState();
|
|
223
|
+
const node = makeNode("n1");
|
|
224
|
+
const smallInput = { key: "value" };
|
|
225
|
+
|
|
226
|
+
recordTrace(ctx.context as any, state, node, "enter", undefined, smallInput);
|
|
227
|
+
|
|
228
|
+
const entry = state.trace[0];
|
|
229
|
+
expect(entry.inputSnapshot).toEqual(smallInput);
|
|
230
|
+
});
|
|
231
|
+
});
|
|
232
|
+
});
|
|
233
|
+
|
|
234
|
+
describe("HarnessExecutionTrace", () => {
|
|
235
|
+
it("buildCompletedResult wraps trace into HarnessExecutionTrace", () => {
|
|
236
|
+
const compiled = compileHarnessSpec(createSimpleToolSpec());
|
|
237
|
+
const mock = createMockContext();
|
|
238
|
+
const iterator = compiled.workflows[0].generator(mock.context as any, {});
|
|
239
|
+
|
|
240
|
+
iterator.next();
|
|
241
|
+
const completed = iterator.next({ stdout: "ok" });
|
|
242
|
+
|
|
243
|
+
expect(completed.done).toBe(true);
|
|
244
|
+
const result = completed.value;
|
|
245
|
+
expect(result.trace).toBeDefined();
|
|
246
|
+
|
|
247
|
+
expect(isHarnessExecutionTrace(result.trace)).toBe(true);
|
|
248
|
+
if (isHarnessExecutionTrace(result.trace)) {
|
|
249
|
+
expect(result.trace.entries).toBeInstanceOf(Array);
|
|
250
|
+
expect(result.trace.entries.length).toBeGreaterThanOrEqual(2);
|
|
251
|
+
expect(result.trace.totalDurationMs).toBeGreaterThanOrEqual(0);
|
|
252
|
+
expect(result.trace.nodeCount).toBeGreaterThanOrEqual(1);
|
|
253
|
+
expect(result.trace.failureCount).toBe(0);
|
|
254
|
+
expect(result.trace.startTimeMs).toBeGreaterThan(0);
|
|
255
|
+
expect(result.trace.endTimeMs).toBeGreaterThanOrEqual(result.trace.startTimeMs);
|
|
256
|
+
}
|
|
257
|
+
});
|
|
258
|
+
|
|
259
|
+
it("failureCount reflects failed entries", () => {
|
|
260
|
+
const compiled = compileHarnessSpec(createRetrySpec());
|
|
261
|
+
const mock = createMockContext();
|
|
262
|
+
const iterator = compiled.workflows[0].generator(mock.context as any, {});
|
|
263
|
+
|
|
264
|
+
// First attempt
|
|
265
|
+
iterator.next();
|
|
266
|
+
// Trigger failure then retry
|
|
267
|
+
const afterTimer = iterator.throw?.(new Error("timeout while running verification"));
|
|
268
|
+
expect(afterTimer?.value).toEqual({ kind: "timer", delayMs: 2000 });
|
|
269
|
+
iterator.next();
|
|
270
|
+
// Second attempt succeeds
|
|
271
|
+
const completed = iterator.next({ passed: true });
|
|
272
|
+
|
|
273
|
+
expect(completed.done).toBe(true);
|
|
274
|
+
expect(isHarnessExecutionTrace(completed.value.trace)).toBe(true);
|
|
275
|
+
if (isHarnessExecutionTrace(completed.value.trace)) {
|
|
276
|
+
expect(completed.value.trace.failureCount).toBeGreaterThanOrEqual(1);
|
|
277
|
+
}
|
|
278
|
+
});
|
|
279
|
+
|
|
280
|
+
it("entries contain enriched trace fields", () => {
|
|
281
|
+
const compiled = compileHarnessSpec(createSimpleToolSpec());
|
|
282
|
+
const mock = createMockContext();
|
|
283
|
+
const iterator = compiled.workflows[0].generator(mock.context as any, {});
|
|
284
|
+
|
|
285
|
+
iterator.next();
|
|
286
|
+
const completed = iterator.next({ stdout: "ok" });
|
|
287
|
+
|
|
288
|
+
expect(isHarnessExecutionTrace(completed.value.trace)).toBe(true);
|
|
289
|
+
if (isHarnessExecutionTrace(completed.value.trace)) {
|
|
290
|
+
const enterEntry = completed.value.trace.entries.find(e => e.phase === "enter");
|
|
291
|
+
const successEntry = completed.value.trace.entries.find(e => e.phase === "success");
|
|
292
|
+
|
|
293
|
+
expect(enterEntry).toBeDefined();
|
|
294
|
+
expect(enterEntry!.startedAt).toBeDefined();
|
|
295
|
+
expect(successEntry).toBeDefined();
|
|
296
|
+
expect(successEntry!.completedAt).toBeDefined();
|
|
297
|
+
}
|
|
298
|
+
});
|
|
299
|
+
});
|
|
300
|
+
|
|
301
|
+
describe("LineageEntry with HarnessExecutionTrace", () => {
|
|
302
|
+
it("createLineageEntry produces HarnessExecutionTrace in trace field", () => {
|
|
303
|
+
const spec = createSimpleToolSpec();
|
|
304
|
+
const version = createInitialVersion(spec);
|
|
305
|
+
const now = Date.now();
|
|
306
|
+
const mockResult = {
|
|
307
|
+
status: "completed" as const,
|
|
308
|
+
terminalNodeId: "run-diff",
|
|
309
|
+
result: { stdout: "ok" },
|
|
310
|
+
outputs: { "run-diff": { stdout: "ok" } },
|
|
311
|
+
trace: {
|
|
312
|
+
entries: [
|
|
313
|
+
{ nodeId: "run-diff", source: { specNodeId: "run-diff", specNodeKind: "tool", specPath: "graph.nodes[0]" }, phase: "enter" as const, startedAt: now },
|
|
314
|
+
{ nodeId: "run-diff", source: { specNodeId: "run-diff", specNodeKind: "tool", specPath: "graph.nodes[0]" }, phase: "success" as const, completedAt: now + 50 },
|
|
315
|
+
],
|
|
316
|
+
totalDurationMs: 50,
|
|
317
|
+
nodeCount: 1,
|
|
318
|
+
failureCount: 0,
|
|
319
|
+
startTimeMs: now,
|
|
320
|
+
endTimeMs: now + 50,
|
|
321
|
+
},
|
|
322
|
+
harnessState: {
|
|
323
|
+
inputs: {},
|
|
324
|
+
outputs: { "run-diff": { stdout: "ok" } },
|
|
325
|
+
nodeResults: { "run-diff": { stdout: "ok" } },
|
|
326
|
+
failures: [],
|
|
327
|
+
metrics: { retries: 0, durationMs: 50 },
|
|
328
|
+
},
|
|
329
|
+
};
|
|
330
|
+
|
|
331
|
+
const lineage = createLineageEntry(version, mockResult);
|
|
332
|
+
|
|
333
|
+
expect(lineage.trace).toBeDefined();
|
|
334
|
+
expect(isHarnessExecutionTrace(lineage.trace)).toBe(true);
|
|
335
|
+
if (isHarnessExecutionTrace(lineage.trace)) {
|
|
336
|
+
expect(lineage.trace.entries).toHaveLength(2);
|
|
337
|
+
expect(lineage.trace.totalDurationMs).toBe(50);
|
|
338
|
+
expect(lineage.trace.nodeCount).toBe(1);
|
|
339
|
+
expect(lineage.trace.failureCount).toBe(0);
|
|
340
|
+
expect(lineage.trace.startTimeMs).toBe(now);
|
|
341
|
+
expect(lineage.trace.endTimeMs).toBe(now + 50);
|
|
342
|
+
}
|
|
343
|
+
});
|
|
344
|
+
});
|
|
345
|
+
|
|
346
|
+
function isHarnessExecutionTrace(trace: unknown): trace is HarnessExecutionTrace {
|
|
347
|
+
return (
|
|
348
|
+
typeof trace === "object" &&
|
|
349
|
+
trace !== null &&
|
|
350
|
+
"entries" in trace &&
|
|
351
|
+
"totalDurationMs" in trace &&
|
|
352
|
+
"nodeCount" in trace &&
|
|
353
|
+
"failureCount" in trace
|
|
354
|
+
);
|
|
355
|
+
}
|
|
356
|
+
|
|
357
|
+
function createSimpleToolSpec(): HarnessSpec {
|
|
358
|
+
return {
|
|
359
|
+
name: "run-diff",
|
|
360
|
+
executionPolicy: { timeout: 30 },
|
|
361
|
+
graph: {
|
|
362
|
+
entryNodeId: "run-diff",
|
|
363
|
+
nodes: [
|
|
364
|
+
{
|
|
365
|
+
id: "run-diff",
|
|
366
|
+
kind: "tool",
|
|
367
|
+
tool: "git",
|
|
368
|
+
args: ["diff", "main...feature"],
|
|
369
|
+
cwd: "/repo",
|
|
370
|
+
},
|
|
371
|
+
],
|
|
372
|
+
edges: [],
|
|
373
|
+
},
|
|
374
|
+
};
|
|
375
|
+
}
|
|
376
|
+
|
|
377
|
+
function createRetrySpec(): HarnessSpec {
|
|
378
|
+
return {
|
|
379
|
+
name: "retry-tool",
|
|
380
|
+
executionPolicy: {
|
|
381
|
+
failureClassification: [
|
|
382
|
+
{ pattern: "timeout", category: "transient", retry: true },
|
|
383
|
+
],
|
|
384
|
+
},
|
|
385
|
+
graph: {
|
|
386
|
+
entryNodeId: "verify",
|
|
387
|
+
nodes: [
|
|
388
|
+
{
|
|
389
|
+
id: "verify",
|
|
390
|
+
kind: "tool",
|
|
391
|
+
tool: "npm",
|
|
392
|
+
args: ["test"],
|
|
393
|
+
retryPolicy: {
|
|
394
|
+
maxAttempts: 2,
|
|
395
|
+
backoff: "constant",
|
|
396
|
+
initialDelay: 2,
|
|
397
|
+
retryOn: ["transient"],
|
|
398
|
+
},
|
|
399
|
+
},
|
|
400
|
+
],
|
|
401
|
+
edges: [],
|
|
402
|
+
},
|
|
403
|
+
};
|
|
404
|
+
}
|