@mhingston5/lasso 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +707 -0
- package/docs/agent-wrangling.png +0 -0
- package/package.json +26 -0
- package/src/capabilities/matcher.ts +25 -0
- package/src/capabilities/registry.ts +103 -0
- package/src/capabilities/types.ts +15 -0
- package/src/cir/lower.ts +253 -0
- package/src/cir/optimize.ts +251 -0
- package/src/cir/types.ts +131 -0
- package/src/cir/validate.ts +265 -0
- package/src/compiler/compile.ts +601 -0
- package/src/compiler/feedback.ts +471 -0
- package/src/compiler/runtime-helpers.ts +455 -0
- package/src/composition/chain.ts +58 -0
- package/src/composition/conditional.ts +76 -0
- package/src/composition/parallel.ts +75 -0
- package/src/composition/types.ts +105 -0
- package/src/environment/analyzer.ts +56 -0
- package/src/environment/discovery.ts +179 -0
- package/src/environment/types.ts +68 -0
- package/src/failures/classifiers.ts +134 -0
- package/src/failures/generator.ts +421 -0
- package/src/failures/map-reference-failures.ts +23 -0
- package/src/failures/ontology.ts +210 -0
- package/src/failures/recovery.ts +214 -0
- package/src/failures/types.ts +14 -0
- package/src/index.ts +67 -0
- package/src/memory/advisor.ts +132 -0
- package/src/memory/extractor.ts +166 -0
- package/src/memory/store.ts +107 -0
- package/src/memory/types.ts +53 -0
- package/src/metaharness/engine.ts +256 -0
- package/src/metaharness/predictor.ts +168 -0
- package/src/metaharness/types.ts +40 -0
- package/src/mutation/derive.ts +308 -0
- package/src/mutation/diff.ts +52 -0
- package/src/mutation/engine.ts +256 -0
- package/src/mutation/types.ts +84 -0
- package/src/pi/command-input.ts +209 -0
- package/src/pi/commands.ts +351 -0
- package/src/pi/extension.ts +16 -0
- package/src/planner/synthesize.ts +83 -0
- package/src/planner/template-rules.ts +183 -0
- package/src/planner/types.ts +42 -0
- package/src/reference/catalog.ts +128 -0
- package/src/reference/patch-validation-strategies.ts +170 -0
- package/src/reference/patch-validation.ts +174 -0
- package/src/reference/pr-review-merge.ts +155 -0
- package/src/reference/strategies.ts +126 -0
- package/src/reference/types.ts +33 -0
- package/src/replanner/risk-rules.ts +161 -0
- package/src/replanner/runtime.ts +308 -0
- package/src/replanner/synthesize.ts +619 -0
- package/src/replanner/types.ts +73 -0
- package/src/spec/schema.ts +254 -0
- package/src/spec/types.ts +319 -0
- package/src/spec/validate.ts +296 -0
- package/src/state/snapshots.ts +43 -0
- package/src/state/types.ts +12 -0
- package/src/synthesis/graph-builder.ts +267 -0
- package/src/synthesis/harness-builder.ts +113 -0
- package/src/synthesis/intent-ir.ts +63 -0
- package/src/synthesis/policy-builder.ts +320 -0
- package/src/synthesis/risk-analyzer.ts +182 -0
- package/src/synthesis/skill-parser.ts +441 -0
- package/src/verification/engine.ts +230 -0
- package/src/versioning/file-store.ts +103 -0
- package/src/versioning/history.ts +43 -0
- package/src/versioning/store.ts +16 -0
- package/src/versioning/types.ts +31 -0
- package/test/capabilities/matcher.test.ts +67 -0
- package/test/capabilities/registry.test.ts +136 -0
- package/test/capabilities/synthesis.test.ts +264 -0
- package/test/cir/lower.test.ts +417 -0
- package/test/cir/optimize.test.ts +266 -0
- package/test/cir/validate.test.ts +368 -0
- package/test/compiler/adaptive-runtime.test.ts +157 -0
- package/test/compiler/compile.test.ts +1198 -0
- package/test/compiler/feedback.test.ts +784 -0
- package/test/compiler/guardrails.test.ts +191 -0
- package/test/compiler/trace.test.ts +404 -0
- package/test/composition/chain.test.ts +328 -0
- package/test/composition/conditional.test.ts +241 -0
- package/test/composition/parallel.test.ts +215 -0
- package/test/environment/analyzer.test.ts +204 -0
- package/test/environment/discovery.test.ts +149 -0
- package/test/failures/classifiers.test.ts +287 -0
- package/test/failures/generator.test.ts +203 -0
- package/test/failures/ontology.test.ts +439 -0
- package/test/failures/recovery.test.ts +300 -0
- package/test/helpers/createFixtureRepo.ts +84 -0
- package/test/helpers/createPatchValidationFixture.ts +144 -0
- package/test/helpers/runCompiledWorkflow.ts +208 -0
- package/test/memory/advisor.test.ts +332 -0
- package/test/memory/extractor.test.ts +295 -0
- package/test/memory/store.test.ts +244 -0
- package/test/metaharness/engine.test.ts +575 -0
- package/test/metaharness/predictor.test.ts +436 -0
- package/test/mutation/derive-failure.test.ts +209 -0
- package/test/mutation/engine.test.ts +622 -0
- package/test/package-smoke.test.ts +29 -0
- package/test/pi/command-input.test.ts +153 -0
- package/test/pi/commands.test.ts +623 -0
- package/test/planner/classify-template.test.ts +32 -0
- package/test/planner/synthesize.test.ts +901 -0
- package/test/reference/PatchValidation.failures.test.ts +137 -0
- package/test/reference/PatchValidation.test.ts +326 -0
- package/test/reference/PrReviewMerge.failures.test.ts +121 -0
- package/test/reference/PrReviewMerge.test.ts +55 -0
- package/test/reference/catalog-open.test.ts +70 -0
- package/test/replanner/runtime.test.ts +207 -0
- package/test/replanner/synthesize.test.ts +303 -0
- package/test/spec/validate.test.ts +1056 -0
- package/test/state/snapshots.test.ts +264 -0
- package/test/synthesis/custom-workflow.test.ts +264 -0
- package/test/synthesis/graph-builder.test.ts +370 -0
- package/test/synthesis/harness-builder.test.ts +128 -0
- package/test/synthesis/policy-builder.test.ts +149 -0
- package/test/synthesis/risk-analyzer.test.ts +230 -0
- package/test/synthesis/skill-parser.test.ts +796 -0
- package/test/verification/engine.test.ts +509 -0
- package/test/versioning/history.test.ts +144 -0
- package/test/versioning/store.test.ts +254 -0
- package/vitest.config.ts +9 -0
|
@@ -0,0 +1,208 @@
|
|
|
1
|
+
import { execFileSync } from "node:child_process";
|
|
2
|
+
import type { CompiledHarnessResult, CompiledHarnessWorkflow } from "../../src/compiler/compile.js";
|
|
3
|
+
|
|
4
|
+
/** Minimal input shape required by the test runner — only `repoPath` is needed. */
|
|
5
|
+
export type WorkflowInput = unknown;
|
|
6
|
+
|
|
7
|
+
export interface RunCompiledWorkflowOptions {
|
|
8
|
+
llmResult?: unknown;
|
|
9
|
+
humanResponse?: unknown;
|
|
10
|
+
maxContinuations?: number;
|
|
11
|
+
}
|
|
12
|
+
|
|
13
|
+
type YieldItem =
|
|
14
|
+
| { kind: "tool-call"; name: string; args: { command: string } }
|
|
15
|
+
| { kind: "llm-call"; messages: unknown[]; options?: unknown }
|
|
16
|
+
| { kind: "wait-for-event"; eventName: string }
|
|
17
|
+
| { kind: "subworkflow"; name: string; input: unknown }
|
|
18
|
+
| { kind: "all"; tasks: YieldItem[] }
|
|
19
|
+
| { kind: "timer"; delayMs: number };
|
|
20
|
+
|
|
21
|
+
export async function runCompiledWorkflow(
|
|
22
|
+
compiled: CompiledHarnessWorkflow,
|
|
23
|
+
input: WorkflowInput,
|
|
24
|
+
options: RunCompiledWorkflowOptions,
|
|
25
|
+
): Promise<CompiledHarnessResult> {
|
|
26
|
+
const maxContinuations = options.maxContinuations ?? 0;
|
|
27
|
+
let currentInput: unknown = input;
|
|
28
|
+
let continuationCount = 0;
|
|
29
|
+
let lastResult: CompiledHarnessResult | undefined;
|
|
30
|
+
|
|
31
|
+
while (continuationCount <= maxContinuations) {
|
|
32
|
+
const context = createRuntimeContext(continuationCount < maxContinuations);
|
|
33
|
+
const iterator = compiled.workflows[0].generator(context as any, currentInput);
|
|
34
|
+
|
|
35
|
+
let next = iterator.next();
|
|
36
|
+
let continuationTriggered = false;
|
|
37
|
+
|
|
38
|
+
while (!next.done) {
|
|
39
|
+
try {
|
|
40
|
+
const resolved = executeYieldItem(next.value as YieldItem, currentInput, options, (nextInput) => {
|
|
41
|
+
continuationTriggered = true;
|
|
42
|
+
currentInput = nextInput;
|
|
43
|
+
});
|
|
44
|
+
next = iterator.next(resolved);
|
|
45
|
+
} catch (error) {
|
|
46
|
+
if (error instanceof ContinueAsNewError) {
|
|
47
|
+
continuationTriggered = true;
|
|
48
|
+
currentInput = error.input;
|
|
49
|
+
break;
|
|
50
|
+
}
|
|
51
|
+
if (!iterator.throw) {
|
|
52
|
+
throw error;
|
|
53
|
+
}
|
|
54
|
+
next = iterator.throw(error);
|
|
55
|
+
}
|
|
56
|
+
}
|
|
57
|
+
|
|
58
|
+
if (!continuationTriggered) {
|
|
59
|
+
return next.value;
|
|
60
|
+
}
|
|
61
|
+
|
|
62
|
+
lastResult = next.value;
|
|
63
|
+
continuationCount++;
|
|
64
|
+
}
|
|
65
|
+
|
|
66
|
+
if (lastResult) {
|
|
67
|
+
return lastResult;
|
|
68
|
+
}
|
|
69
|
+
|
|
70
|
+
throw new Error("Workflow did not complete");
|
|
71
|
+
}
|
|
72
|
+
|
|
73
|
+
class ContinueAsNewError extends Error {
|
|
74
|
+
constructor(public input: unknown) {
|
|
75
|
+
super("continueAsNew called");
|
|
76
|
+
}
|
|
77
|
+
}
|
|
78
|
+
|
|
79
|
+
function createRuntimeContext(allowContinueAsNew: boolean = false) {
|
|
80
|
+
return {
|
|
81
|
+
scheduleActivity: () => {
|
|
82
|
+
throw new Error("scheduleActivity is not used in reference workflow tests");
|
|
83
|
+
},
|
|
84
|
+
scheduleActivityWithRetry: () => {
|
|
85
|
+
throw new Error("scheduleActivityWithRetry is not used in reference workflow tests");
|
|
86
|
+
},
|
|
87
|
+
scheduleTimer: (delayMs: number) => ({ kind: "timer", delayMs }),
|
|
88
|
+
waitForEvent: (eventName: string) => ({ kind: "wait-for-event", eventName }),
|
|
89
|
+
scheduleSubOrchestration: (name: string, input: unknown) => ({ kind: "subworkflow", name, input }),
|
|
90
|
+
all: (tasks: YieldItem[]) => ({ kind: "all", tasks }),
|
|
91
|
+
race: () => {
|
|
92
|
+
throw new Error("race is not used in reference workflow tests");
|
|
93
|
+
},
|
|
94
|
+
utcNow: () => 0,
|
|
95
|
+
newGuid: () => "guid-1",
|
|
96
|
+
continueAsNew: (input: unknown) => {
|
|
97
|
+
if (!allowContinueAsNew) {
|
|
98
|
+
throw new Error("continueAsNew is not enabled in this test context");
|
|
99
|
+
}
|
|
100
|
+
throw new ContinueAsNewError(input);
|
|
101
|
+
},
|
|
102
|
+
setCustomStatus: () => {},
|
|
103
|
+
traceInfo: () => {},
|
|
104
|
+
traceWarn: () => {},
|
|
105
|
+
traceError: () => {},
|
|
106
|
+
traceDebug: () => {},
|
|
107
|
+
kv: {
|
|
108
|
+
get: () => undefined,
|
|
109
|
+
set: () => undefined,
|
|
110
|
+
clear: () => undefined,
|
|
111
|
+
},
|
|
112
|
+
pi: {
|
|
113
|
+
tool: (name: string, args: { command: string }) => ({ kind: "tool-call", name, args }),
|
|
114
|
+
llm: (messages: unknown[], options?: unknown) => ({ kind: "llm-call", messages, options }),
|
|
115
|
+
skill: () => {
|
|
116
|
+
throw new Error("skill is not used in reference workflow tests");
|
|
117
|
+
},
|
|
118
|
+
sendMessage: () => {
|
|
119
|
+
throw new Error("sendMessage is not used in reference workflow tests");
|
|
120
|
+
},
|
|
121
|
+
prompt: () => {
|
|
122
|
+
throw new Error("prompt is not used in reference workflow tests");
|
|
123
|
+
},
|
|
124
|
+
},
|
|
125
|
+
};
|
|
126
|
+
}
|
|
127
|
+
|
|
128
|
+
function executeYieldItem(
|
|
129
|
+
item: YieldItem,
|
|
130
|
+
input: WorkflowInput,
|
|
131
|
+
options: RunCompiledWorkflowOptions,
|
|
132
|
+
onContinueAsNew?: (input: unknown) => void,
|
|
133
|
+
): unknown {
|
|
134
|
+
switch (item.kind) {
|
|
135
|
+
case "tool-call":
|
|
136
|
+
return executeToolCall(item.name, item.args.command, resolveRepoPath(input));
|
|
137
|
+
case "llm-call":
|
|
138
|
+
return options.llmResult ?? { approved: true };
|
|
139
|
+
case "wait-for-event":
|
|
140
|
+
return options.humanResponse ?? { approved: true };
|
|
141
|
+
case "subworkflow":
|
|
142
|
+
return {
|
|
143
|
+
name: item.name,
|
|
144
|
+
input: item.input,
|
|
145
|
+
};
|
|
146
|
+
case "all":
|
|
147
|
+
return item.tasks.map(task => executeYieldItem(task, input, options, onContinueAsNew));
|
|
148
|
+
case "timer":
|
|
149
|
+
return { delayMs: item.delayMs };
|
|
150
|
+
}
|
|
151
|
+
}
|
|
152
|
+
|
|
153
|
+
function resolveRepoPath(input: WorkflowInput): string | undefined {
|
|
154
|
+
if (!input || typeof input !== "object") {
|
|
155
|
+
return undefined;
|
|
156
|
+
}
|
|
157
|
+
|
|
158
|
+
const record = input as Record<string, unknown>;
|
|
159
|
+
if (typeof record.repoPath === "string") {
|
|
160
|
+
return record.repoPath;
|
|
161
|
+
}
|
|
162
|
+
|
|
163
|
+
if (record.input && typeof record.input === "object") {
|
|
164
|
+
const nested = record.input as Record<string, unknown>;
|
|
165
|
+
if (typeof nested.repoPath === "string") {
|
|
166
|
+
return nested.repoPath;
|
|
167
|
+
}
|
|
168
|
+
}
|
|
169
|
+
|
|
170
|
+
return undefined;
|
|
171
|
+
}
|
|
172
|
+
|
|
173
|
+
function executeToolCall(name: string, command: string, cwd?: string): unknown {
|
|
174
|
+
if (name !== "bash") {
|
|
175
|
+
throw new Error(`Unsupported tool in reference workflow tests: ${name}`);
|
|
176
|
+
}
|
|
177
|
+
|
|
178
|
+
try {
|
|
179
|
+
const stdout = execFileSync(
|
|
180
|
+
"bash",
|
|
181
|
+
["-lc", command],
|
|
182
|
+
{
|
|
183
|
+
...(cwd ? { cwd } : {}),
|
|
184
|
+
encoding: "utf8",
|
|
185
|
+
stdio: ["ignore", "pipe", "pipe"],
|
|
186
|
+
},
|
|
187
|
+
).trim();
|
|
188
|
+
|
|
189
|
+
if (stdout.length === 0) {
|
|
190
|
+
return { stdout: "" };
|
|
191
|
+
}
|
|
192
|
+
|
|
193
|
+
try {
|
|
194
|
+
return JSON.parse(stdout);
|
|
195
|
+
} catch {
|
|
196
|
+
return { stdout };
|
|
197
|
+
}
|
|
198
|
+
} catch (error) {
|
|
199
|
+
const result = error as {
|
|
200
|
+
stderr?: Buffer | string;
|
|
201
|
+
stdout?: Buffer | string;
|
|
202
|
+
message?: string;
|
|
203
|
+
};
|
|
204
|
+
const stderr = result.stderr?.toString().trim();
|
|
205
|
+
const stdout = result.stdout?.toString().trim();
|
|
206
|
+
throw new Error(stderr || stdout || result.message || "bash command failed");
|
|
207
|
+
}
|
|
208
|
+
}
|
|
@@ -0,0 +1,332 @@
|
|
|
1
|
+
import { describe, it, expect, beforeEach } from "vitest";
|
|
2
|
+
import { adviseFromMemory } from "../../src/memory/advisor.js";
|
|
3
|
+
import { FileMemoryStore } from "../../src/memory/store.js";
|
|
4
|
+
import type { MemoryStore, HarnessMemory, MemoryAdvice } from "../../src/memory/types.js";
|
|
5
|
+
import type { HarnessSpec } from "../../src/spec/types.js";
|
|
6
|
+
import { mkdtemp, rm } from "node:fs/promises";
|
|
7
|
+
import { tmpdir } from "node:os";
|
|
8
|
+
import { join } from "node:path";
|
|
9
|
+
|
|
10
|
+
function makeSpec(name: string = "test-harness"): HarnessSpec {
|
|
11
|
+
return {
|
|
12
|
+
name,
|
|
13
|
+
graph: {
|
|
14
|
+
entryNodeId: "start",
|
|
15
|
+
nodes: [
|
|
16
|
+
{ id: "start", kind: "tool" as const, tool: "bash", args: ["echo", "start"] },
|
|
17
|
+
{ id: "deploy", kind: "tool" as const, tool: "bash", args: ["echo", "deploy"] },
|
|
18
|
+
],
|
|
19
|
+
edges: [{ from: "start", to: "deploy" }],
|
|
20
|
+
},
|
|
21
|
+
};
|
|
22
|
+
}
|
|
23
|
+
|
|
24
|
+
function makeMemory(taskId: string, overrides?: Partial<HarnessMemory>): HarnessMemory {
|
|
25
|
+
return {
|
|
26
|
+
taskId,
|
|
27
|
+
taskEmbedding: `hash-${taskId}`,
|
|
28
|
+
successfulPatterns: [],
|
|
29
|
+
failedPatterns: [],
|
|
30
|
+
mutationHistory: [],
|
|
31
|
+
effectivenessScore: 0.5,
|
|
32
|
+
lastUpdated: Date.now(),
|
|
33
|
+
...overrides,
|
|
34
|
+
};
|
|
35
|
+
}
|
|
36
|
+
|
|
37
|
+
describe("memory/adviseFromMemory", () => {
|
|
38
|
+
let storeDir: string;
|
|
39
|
+
let store: MemoryStore;
|
|
40
|
+
|
|
41
|
+
beforeEach(async () => {
|
|
42
|
+
storeDir = await mkdtemp(join(tmpdir(), "advisor-test-"));
|
|
43
|
+
store = new FileMemoryStore(storeDir);
|
|
44
|
+
});
|
|
45
|
+
|
|
46
|
+
afterEach(async () => {
|
|
47
|
+
await rm(storeDir, { recursive: true, force: true });
|
|
48
|
+
});
|
|
49
|
+
|
|
50
|
+
describe("basic advice generation", () => {
|
|
51
|
+
it("should return empty advice when no memories exist", async () => {
|
|
52
|
+
const advice = await adviseFromMemory("new-task", store);
|
|
53
|
+
|
|
54
|
+
expect(advice.suggestions).toEqual([]);
|
|
55
|
+
expect(advice.warnings).toEqual([]);
|
|
56
|
+
});
|
|
57
|
+
|
|
58
|
+
it("should return advice when matching memories exist", async () => {
|
|
59
|
+
const memory = makeMemory("similar-task", {
|
|
60
|
+
taskEmbedding: "hash-similar",
|
|
61
|
+
successfulPatterns: ["auth-check-before-deploy"],
|
|
62
|
+
effectivenessScore: 0.8,
|
|
63
|
+
});
|
|
64
|
+
await store.saveMemory(memory);
|
|
65
|
+
|
|
66
|
+
const advice = await adviseFromMemory("new-task", store, {
|
|
67
|
+
taskSignature: "hash-similar",
|
|
68
|
+
});
|
|
69
|
+
|
|
70
|
+
expect(advice.suggestions.length).toBeGreaterThan(0);
|
|
71
|
+
});
|
|
72
|
+
});
|
|
73
|
+
|
|
74
|
+
describe("successful pattern suggestions", () => {
|
|
75
|
+
it("should suggest successful patterns from similar tasks", async () => {
|
|
76
|
+
const memory = makeMemory("task-with-success", {
|
|
77
|
+
taskEmbedding: "hash-deploy",
|
|
78
|
+
successfulPatterns: ["auth-check-before-deploy", "verify-after-deploy"],
|
|
79
|
+
effectivenessScore: 0.9,
|
|
80
|
+
});
|
|
81
|
+
await store.saveMemory(memory);
|
|
82
|
+
|
|
83
|
+
const advice = await adviseFromMemory("new-deploy-task", store, {
|
|
84
|
+
taskSignature: "hash-deploy",
|
|
85
|
+
});
|
|
86
|
+
|
|
87
|
+
expect(advice.suggestions.some(s => s.includes("auth-check-before-deploy"))).toBe(true);
|
|
88
|
+
expect(advice.suggestions.some(s => s.includes("verify-after-deploy"))).toBe(true);
|
|
89
|
+
});
|
|
90
|
+
|
|
91
|
+
it("should include effectiveness context in suggestions", async () => {
|
|
92
|
+
const memory = makeMemory("high-perf-task", {
|
|
93
|
+
taskEmbedding: "hash-perf",
|
|
94
|
+
successfulPatterns: ["retry-with-backoff"],
|
|
95
|
+
effectivenessScore: 0.95,
|
|
96
|
+
});
|
|
97
|
+
await store.saveMemory(memory);
|
|
98
|
+
|
|
99
|
+
const advice = await adviseFromMemory("new-task", store, {
|
|
100
|
+
taskSignature: "hash-perf",
|
|
101
|
+
});
|
|
102
|
+
|
|
103
|
+
expect(advice.suggestions.some(s => s.includes("95") || s.includes("0.95"))).toBe(true);
|
|
104
|
+
});
|
|
105
|
+
|
|
106
|
+
it("should aggregate patterns from multiple matching memories", async () => {
|
|
107
|
+
await store.saveMemory(makeMemory("task-a", {
|
|
108
|
+
taskEmbedding: "hash-multi",
|
|
109
|
+
successfulPatterns: ["auth-check-before-deploy"],
|
|
110
|
+
effectivenessScore: 0.7,
|
|
111
|
+
}));
|
|
112
|
+
await store.saveMemory(makeMemory("task-b", {
|
|
113
|
+
taskEmbedding: "hash-multi",
|
|
114
|
+
successfulPatterns: ["verify-after-deploy"],
|
|
115
|
+
effectivenessScore: 0.8,
|
|
116
|
+
}));
|
|
117
|
+
|
|
118
|
+
const advice = await adviseFromMemory("new-task", store, {
|
|
119
|
+
taskSignature: "hash-multi",
|
|
120
|
+
});
|
|
121
|
+
|
|
122
|
+
expect(advice.suggestions.length).toBeGreaterThanOrEqual(2);
|
|
123
|
+
});
|
|
124
|
+
});
|
|
125
|
+
|
|
126
|
+
describe("failed pattern warnings", () => {
|
|
127
|
+
it("should warn about failed patterns from similar tasks", async () => {
|
|
128
|
+
const memory = makeMemory("task-with-failures", {
|
|
129
|
+
taskEmbedding: "hash-fail",
|
|
130
|
+
failedPatterns: ["deploy-without-auth", "skip-verification"],
|
|
131
|
+
effectivenessScore: 0.3,
|
|
132
|
+
});
|
|
133
|
+
await store.saveMemory(memory);
|
|
134
|
+
|
|
135
|
+
const advice = await adviseFromMemory("new-task", store, {
|
|
136
|
+
taskSignature: "hash-fail",
|
|
137
|
+
});
|
|
138
|
+
|
|
139
|
+
expect(advice.warnings.some(w => w.includes("deploy-without-auth"))).toBe(true);
|
|
140
|
+
expect(advice.warnings.some(w => w.includes("skip-verification"))).toBe(true);
|
|
141
|
+
});
|
|
142
|
+
|
|
143
|
+
it("should include failure context in warnings", async () => {
|
|
144
|
+
const memory = makeMemory("failing-task", {
|
|
145
|
+
taskEmbedding: "hash-warn",
|
|
146
|
+
failedPatterns: ["deploy-without-auth"],
|
|
147
|
+
mutationHistory: [
|
|
148
|
+
{
|
|
149
|
+
mutation: "add-node:auth-check",
|
|
150
|
+
triggeredBy: "auth-failure",
|
|
151
|
+
timestamp: Date.now(),
|
|
152
|
+
outcome: "improved",
|
|
153
|
+
},
|
|
154
|
+
],
|
|
155
|
+
effectivenessScore: 0.4,
|
|
156
|
+
});
|
|
157
|
+
await store.saveMemory(memory);
|
|
158
|
+
|
|
159
|
+
const advice = await adviseFromMemory("new-task", store, {
|
|
160
|
+
taskSignature: "hash-warn",
|
|
161
|
+
});
|
|
162
|
+
|
|
163
|
+
expect(advice.warnings.some(w => w.includes("deploy-without-auth"))).toBe(true);
|
|
164
|
+
});
|
|
165
|
+
});
|
|
166
|
+
|
|
167
|
+
describe("effectiveness-based filtering", () => {
|
|
168
|
+
it("should prioritize high-effectiveness memories", async () => {
|
|
169
|
+
await store.saveMemory(makeMemory("low-perf", {
|
|
170
|
+
taskEmbedding: "hash-priority",
|
|
171
|
+
successfulPatterns: ["basic-check"],
|
|
172
|
+
effectivenessScore: 0.2,
|
|
173
|
+
}));
|
|
174
|
+
await store.saveMemory(makeMemory("high-perf", {
|
|
175
|
+
taskEmbedding: "hash-priority",
|
|
176
|
+
successfulPatterns: ["advanced-check"],
|
|
177
|
+
effectivenessScore: 0.9,
|
|
178
|
+
}));
|
|
179
|
+
|
|
180
|
+
const advice = await adviseFromMemory("new-task", store, {
|
|
181
|
+
taskSignature: "hash-priority",
|
|
182
|
+
});
|
|
183
|
+
|
|
184
|
+
expect(advice.suggestions[0]).toBeDefined();
|
|
185
|
+
expect(advice.suggestions[0]).toContain("advanced-check");
|
|
186
|
+
});
|
|
187
|
+
|
|
188
|
+
it("should not suggest from low-effectiveness memories when minEffectiveness is set", async () => {
|
|
189
|
+
await store.saveMemory(makeMemory("low-perf", {
|
|
190
|
+
taskEmbedding: "hash-filter",
|
|
191
|
+
successfulPatterns: ["bad-pattern"],
|
|
192
|
+
effectivenessScore: 0.1,
|
|
193
|
+
}));
|
|
194
|
+
|
|
195
|
+
const advice = await adviseFromMemory("new-task", store, {
|
|
196
|
+
taskSignature: "hash-filter",
|
|
197
|
+
minEffectiveness: 0.5,
|
|
198
|
+
});
|
|
199
|
+
|
|
200
|
+
expect(advice.suggestions.some(s => s.includes("bad-pattern"))).toBe(false);
|
|
201
|
+
});
|
|
202
|
+
});
|
|
203
|
+
|
|
204
|
+
describe("mutation history insights", () => {
|
|
205
|
+
it("should suggest mutations that previously improved outcomes", async () => {
|
|
206
|
+
const memory = makeMemory("mutation-task", {
|
|
207
|
+
taskEmbedding: "hash-mutation",
|
|
208
|
+
successfulPatterns: [],
|
|
209
|
+
mutationHistory: [
|
|
210
|
+
{
|
|
211
|
+
mutation: "add-node:auth-check",
|
|
212
|
+
triggeredBy: "auth-failure",
|
|
213
|
+
timestamp: Date.now(),
|
|
214
|
+
outcome: "improved",
|
|
215
|
+
},
|
|
216
|
+
],
|
|
217
|
+
effectivenessScore: 0.7,
|
|
218
|
+
});
|
|
219
|
+
await store.saveMemory(memory);
|
|
220
|
+
|
|
221
|
+
const advice = await adviseFromMemory("new-task", store, {
|
|
222
|
+
taskSignature: "hash-mutation",
|
|
223
|
+
});
|
|
224
|
+
|
|
225
|
+
expect(advice.suggestions.some(s => s.includes("add-node:auth-check"))).toBe(true);
|
|
226
|
+
});
|
|
227
|
+
|
|
228
|
+
it("should warn about mutations that made things worse", async () => {
|
|
229
|
+
const memory = makeMemory("bad-mutation-task", {
|
|
230
|
+
taskEmbedding: "hash-bad-mutation",
|
|
231
|
+
mutationHistory: [
|
|
232
|
+
{
|
|
233
|
+
mutation: "remove-node:verification",
|
|
234
|
+
triggeredBy: "slow-execution",
|
|
235
|
+
timestamp: Date.now(),
|
|
236
|
+
outcome: "worse",
|
|
237
|
+
},
|
|
238
|
+
],
|
|
239
|
+
effectivenessScore: 0.3,
|
|
240
|
+
});
|
|
241
|
+
await store.saveMemory(memory);
|
|
242
|
+
|
|
243
|
+
const advice = await adviseFromMemory("new-task", store, {
|
|
244
|
+
taskSignature: "hash-bad-mutation",
|
|
245
|
+
});
|
|
246
|
+
|
|
247
|
+
expect(advice.warnings.some(w => w.includes("remove-node:verification"))).toBe(true);
|
|
248
|
+
});
|
|
249
|
+
});
|
|
250
|
+
|
|
251
|
+
describe("spec-aware advice", () => {
|
|
252
|
+
it("should tailor suggestions based on current spec nodes", async () => {
|
|
253
|
+
const memory = makeMemory("spec-aware-task", {
|
|
254
|
+
taskEmbedding: "hash-spec",
|
|
255
|
+
successfulPatterns: ["auth-check-before-deploy"],
|
|
256
|
+
effectivenessScore: 0.8,
|
|
257
|
+
});
|
|
258
|
+
await store.saveMemory(memory);
|
|
259
|
+
|
|
260
|
+
const spec = makeSpec("deploy-harness");
|
|
261
|
+
const advice = await adviseFromMemory("new-task", store, {
|
|
262
|
+
taskSignature: "hash-spec",
|
|
263
|
+
}, spec);
|
|
264
|
+
|
|
265
|
+
expect(advice.suggestions.length).toBeGreaterThan(0);
|
|
266
|
+
});
|
|
267
|
+
|
|
268
|
+
it("should not suggest patterns for nodes that already exist", async () => {
|
|
269
|
+
const memory = makeMemory("existing-node-task", {
|
|
270
|
+
taskEmbedding: "hash-existing",
|
|
271
|
+
successfulPatterns: ["auth-check-before-deploy"],
|
|
272
|
+
effectivenessScore: 0.8,
|
|
273
|
+
});
|
|
274
|
+
await store.saveMemory(memory);
|
|
275
|
+
|
|
276
|
+
const spec: HarnessSpec = {
|
|
277
|
+
name: "harness-with-auth",
|
|
278
|
+
graph: {
|
|
279
|
+
entryNodeId: "auth-check",
|
|
280
|
+
nodes: [
|
|
281
|
+
{ id: "auth-check", kind: "tool" as const, tool: "bash", args: ["echo", "auth"] },
|
|
282
|
+
{ id: "deploy", kind: "tool" as const, tool: "bash", args: ["echo", "deploy"] },
|
|
283
|
+
],
|
|
284
|
+
edges: [{ from: "auth-check", to: "deploy" }],
|
|
285
|
+
},
|
|
286
|
+
};
|
|
287
|
+
|
|
288
|
+
const advice = await adviseFromMemory("new-task", store, {
|
|
289
|
+
taskSignature: "hash-existing",
|
|
290
|
+
}, spec);
|
|
291
|
+
|
|
292
|
+
expect(advice.suggestions.some(s => s.includes("already"))).toBe(true);
|
|
293
|
+
});
|
|
294
|
+
});
|
|
295
|
+
|
|
296
|
+
describe("advice structure", () => {
|
|
297
|
+
it("should include source task IDs in advice", async () => {
|
|
298
|
+
const memory = makeMemory("source-task-123", {
|
|
299
|
+
taskEmbedding: "hash-source",
|
|
300
|
+
successfulPatterns: ["good-pattern"],
|
|
301
|
+
effectivenessScore: 0.75,
|
|
302
|
+
});
|
|
303
|
+
await store.saveMemory(memory);
|
|
304
|
+
|
|
305
|
+
const advice = await adviseFromMemory("new-task", store, {
|
|
306
|
+
taskSignature: "hash-source",
|
|
307
|
+
});
|
|
308
|
+
|
|
309
|
+
expect(advice.sourceTaskIds).toContain("source-task-123");
|
|
310
|
+
});
|
|
311
|
+
|
|
312
|
+
it("should include aggregate effectiveness score", async () => {
|
|
313
|
+
await store.saveMemory(makeMemory("task-a", {
|
|
314
|
+
taskEmbedding: "hash-agg",
|
|
315
|
+
successfulPatterns: ["pattern-a"],
|
|
316
|
+
effectivenessScore: 0.6,
|
|
317
|
+
}));
|
|
318
|
+
await store.saveMemory(makeMemory("task-b", {
|
|
319
|
+
taskEmbedding: "hash-agg",
|
|
320
|
+
successfulPatterns: ["pattern-b"],
|
|
321
|
+
effectivenessScore: 0.8,
|
|
322
|
+
}));
|
|
323
|
+
|
|
324
|
+
const advice = await adviseFromMemory("new-task", store, {
|
|
325
|
+
taskSignature: "hash-agg",
|
|
326
|
+
});
|
|
327
|
+
|
|
328
|
+
expect(advice.aggregateEffectiveness).toBeGreaterThan(0);
|
|
329
|
+
expect(advice.aggregateEffectiveness).toBeLessThanOrEqual(1);
|
|
330
|
+
});
|
|
331
|
+
});
|
|
332
|
+
});
|