@mhingston5/lasso 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +707 -0
- package/docs/agent-wrangling.png +0 -0
- package/package.json +26 -0
- package/src/capabilities/matcher.ts +25 -0
- package/src/capabilities/registry.ts +103 -0
- package/src/capabilities/types.ts +15 -0
- package/src/cir/lower.ts +253 -0
- package/src/cir/optimize.ts +251 -0
- package/src/cir/types.ts +131 -0
- package/src/cir/validate.ts +265 -0
- package/src/compiler/compile.ts +601 -0
- package/src/compiler/feedback.ts +471 -0
- package/src/compiler/runtime-helpers.ts +455 -0
- package/src/composition/chain.ts +58 -0
- package/src/composition/conditional.ts +76 -0
- package/src/composition/parallel.ts +75 -0
- package/src/composition/types.ts +105 -0
- package/src/environment/analyzer.ts +56 -0
- package/src/environment/discovery.ts +179 -0
- package/src/environment/types.ts +68 -0
- package/src/failures/classifiers.ts +134 -0
- package/src/failures/generator.ts +421 -0
- package/src/failures/map-reference-failures.ts +23 -0
- package/src/failures/ontology.ts +210 -0
- package/src/failures/recovery.ts +214 -0
- package/src/failures/types.ts +14 -0
- package/src/index.ts +67 -0
- package/src/memory/advisor.ts +132 -0
- package/src/memory/extractor.ts +166 -0
- package/src/memory/store.ts +107 -0
- package/src/memory/types.ts +53 -0
- package/src/metaharness/engine.ts +256 -0
- package/src/metaharness/predictor.ts +168 -0
- package/src/metaharness/types.ts +40 -0
- package/src/mutation/derive.ts +308 -0
- package/src/mutation/diff.ts +52 -0
- package/src/mutation/engine.ts +256 -0
- package/src/mutation/types.ts +84 -0
- package/src/pi/command-input.ts +209 -0
- package/src/pi/commands.ts +351 -0
- package/src/pi/extension.ts +16 -0
- package/src/planner/synthesize.ts +83 -0
- package/src/planner/template-rules.ts +183 -0
- package/src/planner/types.ts +42 -0
- package/src/reference/catalog.ts +128 -0
- package/src/reference/patch-validation-strategies.ts +170 -0
- package/src/reference/patch-validation.ts +174 -0
- package/src/reference/pr-review-merge.ts +155 -0
- package/src/reference/strategies.ts +126 -0
- package/src/reference/types.ts +33 -0
- package/src/replanner/risk-rules.ts +161 -0
- package/src/replanner/runtime.ts +308 -0
- package/src/replanner/synthesize.ts +619 -0
- package/src/replanner/types.ts +73 -0
- package/src/spec/schema.ts +254 -0
- package/src/spec/types.ts +319 -0
- package/src/spec/validate.ts +296 -0
- package/src/state/snapshots.ts +43 -0
- package/src/state/types.ts +12 -0
- package/src/synthesis/graph-builder.ts +267 -0
- package/src/synthesis/harness-builder.ts +113 -0
- package/src/synthesis/intent-ir.ts +63 -0
- package/src/synthesis/policy-builder.ts +320 -0
- package/src/synthesis/risk-analyzer.ts +182 -0
- package/src/synthesis/skill-parser.ts +441 -0
- package/src/verification/engine.ts +230 -0
- package/src/versioning/file-store.ts +103 -0
- package/src/versioning/history.ts +43 -0
- package/src/versioning/store.ts +16 -0
- package/src/versioning/types.ts +31 -0
- package/test/capabilities/matcher.test.ts +67 -0
- package/test/capabilities/registry.test.ts +136 -0
- package/test/capabilities/synthesis.test.ts +264 -0
- package/test/cir/lower.test.ts +417 -0
- package/test/cir/optimize.test.ts +266 -0
- package/test/cir/validate.test.ts +368 -0
- package/test/compiler/adaptive-runtime.test.ts +157 -0
- package/test/compiler/compile.test.ts +1198 -0
- package/test/compiler/feedback.test.ts +784 -0
- package/test/compiler/guardrails.test.ts +191 -0
- package/test/compiler/trace.test.ts +404 -0
- package/test/composition/chain.test.ts +328 -0
- package/test/composition/conditional.test.ts +241 -0
- package/test/composition/parallel.test.ts +215 -0
- package/test/environment/analyzer.test.ts +204 -0
- package/test/environment/discovery.test.ts +149 -0
- package/test/failures/classifiers.test.ts +287 -0
- package/test/failures/generator.test.ts +203 -0
- package/test/failures/ontology.test.ts +439 -0
- package/test/failures/recovery.test.ts +300 -0
- package/test/helpers/createFixtureRepo.ts +84 -0
- package/test/helpers/createPatchValidationFixture.ts +144 -0
- package/test/helpers/runCompiledWorkflow.ts +208 -0
- package/test/memory/advisor.test.ts +332 -0
- package/test/memory/extractor.test.ts +295 -0
- package/test/memory/store.test.ts +244 -0
- package/test/metaharness/engine.test.ts +575 -0
- package/test/metaharness/predictor.test.ts +436 -0
- package/test/mutation/derive-failure.test.ts +209 -0
- package/test/mutation/engine.test.ts +622 -0
- package/test/package-smoke.test.ts +29 -0
- package/test/pi/command-input.test.ts +153 -0
- package/test/pi/commands.test.ts +623 -0
- package/test/planner/classify-template.test.ts +32 -0
- package/test/planner/synthesize.test.ts +901 -0
- package/test/reference/PatchValidation.failures.test.ts +137 -0
- package/test/reference/PatchValidation.test.ts +326 -0
- package/test/reference/PrReviewMerge.failures.test.ts +121 -0
- package/test/reference/PrReviewMerge.test.ts +55 -0
- package/test/reference/catalog-open.test.ts +70 -0
- package/test/replanner/runtime.test.ts +207 -0
- package/test/replanner/synthesize.test.ts +303 -0
- package/test/spec/validate.test.ts +1056 -0
- package/test/state/snapshots.test.ts +264 -0
- package/test/synthesis/custom-workflow.test.ts +264 -0
- package/test/synthesis/graph-builder.test.ts +370 -0
- package/test/synthesis/harness-builder.test.ts +128 -0
- package/test/synthesis/policy-builder.test.ts +149 -0
- package/test/synthesis/risk-analyzer.test.ts +230 -0
- package/test/synthesis/skill-parser.test.ts +796 -0
- package/test/verification/engine.test.ts +509 -0
- package/test/versioning/history.test.ts +144 -0
- package/test/versioning/store.test.ts +254 -0
- package/vitest.config.ts +9 -0
|
@@ -0,0 +1,103 @@
|
|
|
1
|
+
import { mkdir, readFile, readdir, writeFile } from "node:fs/promises";
|
|
2
|
+
import { join } from "node:path";
|
|
3
|
+
import type { HarnessVersion, LineageEntry } from "./types.js";
|
|
4
|
+
import type { LineageFilter, LineageStore } from "./store.js";
|
|
5
|
+
|
|
6
|
+
export class FileLineageStore implements LineageStore {
|
|
7
|
+
private readonly versionsDir: string;
|
|
8
|
+
private readonly lineageDir: string;
|
|
9
|
+
|
|
10
|
+
constructor(private readonly storeDir: string) {
|
|
11
|
+
this.versionsDir = join(storeDir, "versions");
|
|
12
|
+
this.lineageDir = join(storeDir, "lineage");
|
|
13
|
+
}
|
|
14
|
+
|
|
15
|
+
async saveVersion(version: HarnessVersion): Promise<void> {
|
|
16
|
+
await mkdir(this.versionsDir, { recursive: true });
|
|
17
|
+
const data = structuredClone(version);
|
|
18
|
+
const filePath = join(this.versionsDir, `${version.version}.json`);
|
|
19
|
+
await writeFile(filePath, JSON.stringify(data, null, 2), "utf-8");
|
|
20
|
+
}
|
|
21
|
+
|
|
22
|
+
async saveLineage(entry: LineageEntry): Promise<void> {
|
|
23
|
+
await mkdir(this.lineageDir, { recursive: true });
|
|
24
|
+
const data = structuredClone(entry);
|
|
25
|
+
const filePath = join(this.lineageDir, `${entry.version}.json`);
|
|
26
|
+
await writeFile(filePath, JSON.stringify(data, null, 2), "utf-8");
|
|
27
|
+
}
|
|
28
|
+
|
|
29
|
+
async getVersion(version: number): Promise<HarnessVersion | null> {
|
|
30
|
+
try {
|
|
31
|
+
const filePath = join(this.versionsDir, `${version}.json`);
|
|
32
|
+
const raw = await readFile(filePath, "utf-8");
|
|
33
|
+
return JSON.parse(raw) as HarnessVersion;
|
|
34
|
+
} catch {
|
|
35
|
+
return null;
|
|
36
|
+
}
|
|
37
|
+
}
|
|
38
|
+
|
|
39
|
+
async getLineageForVersion(version: number): Promise<LineageEntry | null> {
|
|
40
|
+
try {
|
|
41
|
+
const filePath = join(this.lineageDir, `${version}.json`);
|
|
42
|
+
const raw = await readFile(filePath, "utf-8");
|
|
43
|
+
return JSON.parse(raw) as LineageEntry;
|
|
44
|
+
} catch {
|
|
45
|
+
return null;
|
|
46
|
+
}
|
|
47
|
+
}
|
|
48
|
+
|
|
49
|
+
async getLineageChain(version: number): Promise<LineageEntry[]> {
|
|
50
|
+
const chain: LineageEntry[] = [];
|
|
51
|
+
let current: number | undefined = version;
|
|
52
|
+
|
|
53
|
+
while (current !== undefined) {
|
|
54
|
+
const [entry, versionData] = await Promise.all([
|
|
55
|
+
this.getLineageForVersion(current),
|
|
56
|
+
this.getVersion(current),
|
|
57
|
+
]);
|
|
58
|
+
|
|
59
|
+
if (!entry || !versionData) {
|
|
60
|
+
break;
|
|
61
|
+
}
|
|
62
|
+
|
|
63
|
+
chain.unshift(entry);
|
|
64
|
+
current = versionData.parentVersion;
|
|
65
|
+
}
|
|
66
|
+
|
|
67
|
+
return chain;
|
|
68
|
+
}
|
|
69
|
+
|
|
70
|
+
async queryLineage(filter: LineageFilter): Promise<LineageEntry[]> {
|
|
71
|
+
let entries = await this.loadAllLineage();
|
|
72
|
+
|
|
73
|
+
if (filter.terminalNodeId) {
|
|
74
|
+
entries = entries.filter((e) => e.terminalNodeId === filter.terminalNodeId);
|
|
75
|
+
}
|
|
76
|
+
|
|
77
|
+
if (filter.since !== undefined) {
|
|
78
|
+
entries = entries.filter((e) => e.completedAt >= filter.since!);
|
|
79
|
+
}
|
|
80
|
+
|
|
81
|
+
if (filter.limit !== undefined) {
|
|
82
|
+
entries = entries.slice(0, filter.limit);
|
|
83
|
+
}
|
|
84
|
+
|
|
85
|
+
return entries;
|
|
86
|
+
}
|
|
87
|
+
|
|
88
|
+
private async loadAllLineage(): Promise<LineageEntry[]> {
|
|
89
|
+
try {
|
|
90
|
+
const files = await readdir(this.lineageDir);
|
|
91
|
+
const jsonFiles = files.filter((f) => f.endsWith(".json"));
|
|
92
|
+
const entries = await Promise.all(
|
|
93
|
+
jsonFiles.map(async (file) => {
|
|
94
|
+
const raw = await readFile(join(this.lineageDir, file), "utf-8");
|
|
95
|
+
return JSON.parse(raw) as LineageEntry;
|
|
96
|
+
}),
|
|
97
|
+
);
|
|
98
|
+
return entries.sort((a, b) => a.version - b.version);
|
|
99
|
+
} catch {
|
|
100
|
+
return [];
|
|
101
|
+
}
|
|
102
|
+
}
|
|
103
|
+
}
|
|
@@ -0,0 +1,43 @@
|
|
|
1
|
+
import type { CompiledHarnessResult } from "../compiler/compile.js";
|
|
2
|
+
import type { HarnessSpec } from "../spec/types.js";
|
|
3
|
+
import type { HarnessVersion, LineageEntry } from "./types.js";
|
|
4
|
+
|
|
5
|
+
export function createInitialVersion(spec: HarnessSpec): HarnessVersion {
|
|
6
|
+
return {
|
|
7
|
+
version: 1,
|
|
8
|
+
parentVersion: undefined,
|
|
9
|
+
reason: "initial",
|
|
10
|
+
spec: structuredClone(spec),
|
|
11
|
+
generatedAt: Date.now(),
|
|
12
|
+
};
|
|
13
|
+
}
|
|
14
|
+
|
|
15
|
+
export function createNextVersion(
|
|
16
|
+
parentVersion: HarnessVersion,
|
|
17
|
+
spec: HarnessSpec,
|
|
18
|
+
reason: string,
|
|
19
|
+
): HarnessVersion {
|
|
20
|
+
return {
|
|
21
|
+
version: parentVersion.version + 1,
|
|
22
|
+
parentVersion: parentVersion.version,
|
|
23
|
+
reason,
|
|
24
|
+
spec: structuredClone(spec),
|
|
25
|
+
generatedAt: Date.now(),
|
|
26
|
+
};
|
|
27
|
+
}
|
|
28
|
+
|
|
29
|
+
export function createLineageEntry(
|
|
30
|
+
version: HarnessVersion,
|
|
31
|
+
result: CompiledHarnessResult,
|
|
32
|
+
): LineageEntry {
|
|
33
|
+
return {
|
|
34
|
+
version: version.version,
|
|
35
|
+
terminalNodeId: result.terminalNodeId,
|
|
36
|
+
outputs: structuredClone(result.outputs),
|
|
37
|
+
nodeResults: structuredClone(result.harnessState.nodeResults),
|
|
38
|
+
failures: structuredClone(result.harnessState.failures),
|
|
39
|
+
metrics: structuredClone(result.harnessState.metrics),
|
|
40
|
+
trace: structuredClone(result.trace),
|
|
41
|
+
completedAt: Date.now(),
|
|
42
|
+
};
|
|
43
|
+
}
|
|
@@ -0,0 +1,16 @@
|
|
|
1
|
+
import type { HarnessVersion, LineageEntry } from "./types.js";
|
|
2
|
+
|
|
3
|
+
export interface LineageFilter {
|
|
4
|
+
terminalNodeId?: string;
|
|
5
|
+
since?: number;
|
|
6
|
+
limit?: number;
|
|
7
|
+
}
|
|
8
|
+
|
|
9
|
+
export interface LineageStore {
|
|
10
|
+
saveVersion(version: HarnessVersion): Promise<void>;
|
|
11
|
+
saveLineage(entry: LineageEntry): Promise<void>;
|
|
12
|
+
getVersion(version: number): Promise<HarnessVersion | null>;
|
|
13
|
+
getLineageForVersion(version: number): Promise<LineageEntry | null>;
|
|
14
|
+
getLineageChain(version: number): Promise<LineageEntry[]>;
|
|
15
|
+
queryLineage(filter: LineageFilter): Promise<LineageEntry[]>;
|
|
16
|
+
}
|
|
@@ -0,0 +1,31 @@
|
|
|
1
|
+
import type { HarnessSpec } from "../spec/types.js";
|
|
2
|
+
import type { HarnessState } from "../state/types.js";
|
|
3
|
+
import type { ExecutionTraceEntry } from "../compiler/runtime-helpers.js";
|
|
4
|
+
|
|
5
|
+
export interface HarnessVersion {
|
|
6
|
+
version: number;
|
|
7
|
+
parentVersion?: number;
|
|
8
|
+
reason: string;
|
|
9
|
+
spec: HarnessSpec;
|
|
10
|
+
generatedAt: number;
|
|
11
|
+
}
|
|
12
|
+
|
|
13
|
+
export interface HarnessExecutionTrace {
|
|
14
|
+
entries: ExecutionTraceEntry[];
|
|
15
|
+
totalDurationMs: number;
|
|
16
|
+
nodeCount: number;
|
|
17
|
+
failureCount: number;
|
|
18
|
+
startTimeMs: number;
|
|
19
|
+
endTimeMs: number;
|
|
20
|
+
}
|
|
21
|
+
|
|
22
|
+
export interface LineageEntry {
|
|
23
|
+
version: number;
|
|
24
|
+
terminalNodeId: string;
|
|
25
|
+
outputs: Record<string, unknown>;
|
|
26
|
+
nodeResults: Record<string, unknown>;
|
|
27
|
+
failures: HarnessState["failures"];
|
|
28
|
+
metrics: HarnessState["metrics"];
|
|
29
|
+
trace: HarnessExecutionTrace;
|
|
30
|
+
completedAt: number;
|
|
31
|
+
}
|
|
@@ -0,0 +1,67 @@
|
|
|
1
|
+
import { describe, it, expect } from "vitest";
|
|
2
|
+
import { matchCapabilities } from "../../src/capabilities/matcher.js";
|
|
3
|
+
import { DefaultCapabilityRegistry } from "../../src/capabilities/registry.js";
|
|
4
|
+
import type { Capability } from "../../src/capabilities/types.js";
|
|
5
|
+
|
|
6
|
+
describe("capability matcher", () => {
|
|
7
|
+
describe("matchCapabilities", () => {
|
|
8
|
+
it("should match all available capabilities", () => {
|
|
9
|
+
const registry = new DefaultCapabilityRegistry();
|
|
10
|
+
|
|
11
|
+
const result = matchCapabilities(["bash", "git"], registry);
|
|
12
|
+
|
|
13
|
+
expect(result.matched).toHaveLength(2);
|
|
14
|
+
expect(result.missing).toHaveLength(0);
|
|
15
|
+
expect(result.matched.map(c => c.id)).toContain("bash");
|
|
16
|
+
expect(result.matched.map(c => c.id)).toContain("git");
|
|
17
|
+
});
|
|
18
|
+
|
|
19
|
+
it("should report missing capabilities", () => {
|
|
20
|
+
const registry = new DefaultCapabilityRegistry();
|
|
21
|
+
|
|
22
|
+
const result = matchCapabilities(["bash", "nonexistent-tool"], registry);
|
|
23
|
+
|
|
24
|
+
expect(result.matched).toHaveLength(1);
|
|
25
|
+
expect(result.missing).toHaveLength(1);
|
|
26
|
+
expect(result.missing).toContain("nonexistent-tool");
|
|
27
|
+
expect(result.matched.map(c => c.id)).toContain("bash");
|
|
28
|
+
});
|
|
29
|
+
|
|
30
|
+
it("should handle empty required tools", () => {
|
|
31
|
+
const registry = new DefaultCapabilityRegistry();
|
|
32
|
+
|
|
33
|
+
const result = matchCapabilities([], registry);
|
|
34
|
+
|
|
35
|
+
expect(result.matched).toHaveLength(0);
|
|
36
|
+
expect(result.missing).toHaveLength(0);
|
|
37
|
+
});
|
|
38
|
+
|
|
39
|
+
it("should match capability with prerequisites declared", () => {
|
|
40
|
+
const registry = new DefaultCapabilityRegistry();
|
|
41
|
+
const custom: Capability = {
|
|
42
|
+
id: "custom-tool",
|
|
43
|
+
kind: "tool",
|
|
44
|
+
name: "Custom Tool",
|
|
45
|
+
prerequisites: ["bash", "git"],
|
|
46
|
+
risks: [],
|
|
47
|
+
verification: []
|
|
48
|
+
};
|
|
49
|
+
registry.registerCapability(custom);
|
|
50
|
+
|
|
51
|
+
const result = matchCapabilities(["custom-tool"], registry);
|
|
52
|
+
|
|
53
|
+
expect(result.matched).toHaveLength(1);
|
|
54
|
+
expect(result.missing).toHaveLength(0);
|
|
55
|
+
expect(result.matched[0].id).toBe("custom-tool");
|
|
56
|
+
});
|
|
57
|
+
|
|
58
|
+
it("should include all pre-registered capabilities", () => {
|
|
59
|
+
const registry = new DefaultCapabilityRegistry();
|
|
60
|
+
|
|
61
|
+
const result = matchCapabilities(["bash", "git", "node", "llm-review", "human-approval"], registry);
|
|
62
|
+
|
|
63
|
+
expect(result.matched).toHaveLength(5);
|
|
64
|
+
expect(result.missing).toHaveLength(0);
|
|
65
|
+
});
|
|
66
|
+
});
|
|
67
|
+
});
|
|
@@ -0,0 +1,136 @@
|
|
|
1
|
+
import { describe, it, expect } from "vitest";
|
|
2
|
+
import { DefaultCapabilityRegistry } from "../../src/capabilities/registry.js";
|
|
3
|
+
import type { Capability } from "../../src/capabilities/types.js";
|
|
4
|
+
|
|
5
|
+
describe("capability registry", () => {
|
|
6
|
+
describe("DefaultCapabilityRegistry", () => {
|
|
7
|
+
describe("CRUD operations", () => {
|
|
8
|
+
it("should register and retrieve a capability", () => {
|
|
9
|
+
const registry = new DefaultCapabilityRegistry();
|
|
10
|
+
const cap: Capability = {
|
|
11
|
+
id: "test-cap",
|
|
12
|
+
kind: "tool",
|
|
13
|
+
name: "Test Capability",
|
|
14
|
+
prerequisites: [],
|
|
15
|
+
risks: [],
|
|
16
|
+
verification: []
|
|
17
|
+
};
|
|
18
|
+
|
|
19
|
+
registry.registerCapability(cap);
|
|
20
|
+
|
|
21
|
+
expect(registry.hasCapability("test-cap")).toBe(true);
|
|
22
|
+
expect(registry.getCapability("test-cap")).toEqual(cap);
|
|
23
|
+
});
|
|
24
|
+
|
|
25
|
+
it("should return undefined for non-existent capability", () => {
|
|
26
|
+
const registry = new DefaultCapabilityRegistry();
|
|
27
|
+
|
|
28
|
+
expect(registry.hasCapability("nonexistent")).toBe(false);
|
|
29
|
+
expect(registry.getCapability("nonexistent")).toBeUndefined();
|
|
30
|
+
});
|
|
31
|
+
|
|
32
|
+
it("should list all registered capabilities", () => {
|
|
33
|
+
const registry = new DefaultCapabilityRegistry();
|
|
34
|
+
const initialCount = registry.getCapabilities().length;
|
|
35
|
+
const cap1: Capability = {
|
|
36
|
+
id: "cap1",
|
|
37
|
+
kind: "tool",
|
|
38
|
+
name: "Cap 1",
|
|
39
|
+
prerequisites: [],
|
|
40
|
+
risks: [],
|
|
41
|
+
verification: []
|
|
42
|
+
};
|
|
43
|
+
const cap2: Capability = {
|
|
44
|
+
id: "cap2",
|
|
45
|
+
kind: "llm",
|
|
46
|
+
name: "Cap 2",
|
|
47
|
+
prerequisites: [],
|
|
48
|
+
risks: [],
|
|
49
|
+
verification: []
|
|
50
|
+
};
|
|
51
|
+
|
|
52
|
+
registry.registerCapability(cap1);
|
|
53
|
+
registry.registerCapability(cap2);
|
|
54
|
+
|
|
55
|
+
const all = registry.getCapabilities();
|
|
56
|
+
expect(all).toHaveLength(initialCount + 2);
|
|
57
|
+
expect(all).toContainEqual(cap1);
|
|
58
|
+
expect(all).toContainEqual(cap2);
|
|
59
|
+
});
|
|
60
|
+
|
|
61
|
+
it("should overwrite existing capability on re-register", () => {
|
|
62
|
+
const registry = new DefaultCapabilityRegistry();
|
|
63
|
+
const initialCount = registry.getCapabilities().length;
|
|
64
|
+
const cap1: Capability = {
|
|
65
|
+
id: "test",
|
|
66
|
+
kind: "tool",
|
|
67
|
+
name: "Original",
|
|
68
|
+
prerequisites: [],
|
|
69
|
+
risks: [],
|
|
70
|
+
verification: []
|
|
71
|
+
};
|
|
72
|
+
const cap2: Capability = {
|
|
73
|
+
id: "test",
|
|
74
|
+
kind: "llm",
|
|
75
|
+
name: "Updated",
|
|
76
|
+
prerequisites: ["other"],
|
|
77
|
+
risks: ["risk1"],
|
|
78
|
+
verification: ["verify1"]
|
|
79
|
+
};
|
|
80
|
+
|
|
81
|
+
registry.registerCapability(cap1);
|
|
82
|
+
registry.registerCapability(cap2);
|
|
83
|
+
|
|
84
|
+
expect(registry.getCapabilities()).toHaveLength(initialCount + 1);
|
|
85
|
+
expect(registry.getCapability("test")?.name).toBe("Updated");
|
|
86
|
+
expect(registry.getCapability("test")?.prerequisites).toEqual(["other"]);
|
|
87
|
+
});
|
|
88
|
+
});
|
|
89
|
+
|
|
90
|
+
describe("pre-registered capabilities", () => {
|
|
91
|
+
it("should have bash capability pre-registered", () => {
|
|
92
|
+
const registry = new DefaultCapabilityRegistry();
|
|
93
|
+
|
|
94
|
+
expect(registry.hasCapability("bash")).toBe(true);
|
|
95
|
+
const bash = registry.getCapability("bash");
|
|
96
|
+
expect(bash?.kind).toBe("tool");
|
|
97
|
+
expect(bash?.risks.length).toBeGreaterThan(0);
|
|
98
|
+
});
|
|
99
|
+
|
|
100
|
+
it("should have git capability pre-registered", () => {
|
|
101
|
+
const registry = new DefaultCapabilityRegistry();
|
|
102
|
+
|
|
103
|
+
expect(registry.hasCapability("git")).toBe(true);
|
|
104
|
+
const git = registry.getCapability("git");
|
|
105
|
+
expect(git?.kind).toBe("tool");
|
|
106
|
+
expect(git?.prerequisites).toContain("bash");
|
|
107
|
+
});
|
|
108
|
+
|
|
109
|
+
it("should have node capability pre-registered", () => {
|
|
110
|
+
const registry = new DefaultCapabilityRegistry();
|
|
111
|
+
|
|
112
|
+
expect(registry.hasCapability("node")).toBe(true);
|
|
113
|
+
const node = registry.getCapability("node");
|
|
114
|
+
expect(node?.kind).toBe("tool");
|
|
115
|
+
expect(node?.prerequisites).toContain("bash");
|
|
116
|
+
});
|
|
117
|
+
|
|
118
|
+
it("should have llm-review capability pre-registered", () => {
|
|
119
|
+
const registry = new DefaultCapabilityRegistry();
|
|
120
|
+
|
|
121
|
+
expect(registry.hasCapability("llm-review")).toBe(true);
|
|
122
|
+
const llmReview = registry.getCapability("llm-review");
|
|
123
|
+
expect(llmReview?.kind).toBe("llm");
|
|
124
|
+
expect(llmReview?.verification.length).toBeGreaterThan(0);
|
|
125
|
+
});
|
|
126
|
+
|
|
127
|
+
it("should have human-approval capability pre-registered", () => {
|
|
128
|
+
const registry = new DefaultCapabilityRegistry();
|
|
129
|
+
|
|
130
|
+
expect(registry.hasCapability("human-approval")).toBe(true);
|
|
131
|
+
const humanApproval = registry.getCapability("human-approval");
|
|
132
|
+
expect(humanApproval?.kind).toBe("human");
|
|
133
|
+
});
|
|
134
|
+
});
|
|
135
|
+
});
|
|
136
|
+
});
|
|
@@ -0,0 +1,264 @@
|
|
|
1
|
+
import { describe, it, expect } from "vitest";
|
|
2
|
+
import { buildTaskGraph } from "../../src/synthesis/graph-builder.js";
|
|
3
|
+
import { analyzeRisks } from "../../src/synthesis/risk-analyzer.js";
|
|
4
|
+
import { synthesizePolicy } from "../../src/synthesis/policy-builder.js";
|
|
5
|
+
import { planWorkflowRequest } from "../../src/planner/synthesize.js";
|
|
6
|
+
import { DefaultCapabilityRegistry } from "../../src/capabilities/registry.js";
|
|
7
|
+
import type { IntentIR } from "../../src/synthesis/intent-ir.js";
|
|
8
|
+
import type { CapabilityRegistry } from "../../src/capabilities/types.js";
|
|
9
|
+
|
|
10
|
+
function createRegistry(): CapabilityRegistry {
|
|
11
|
+
return new DefaultCapabilityRegistry();
|
|
12
|
+
}
|
|
13
|
+
|
|
14
|
+
describe("capability-driven synthesis", () => {
|
|
15
|
+
describe("graph construction from capabilities", () => {
|
|
16
|
+
it("should build capability-driven graph when capabilities are present", () => {
|
|
17
|
+
const intent: IntentIR = {
|
|
18
|
+
family: "custom",
|
|
19
|
+
goal: "Run custom workflow",
|
|
20
|
+
inputs: {},
|
|
21
|
+
requiredTools: ["bash", "git"],
|
|
22
|
+
humanCheckpoints: [],
|
|
23
|
+
verificationTargets: [],
|
|
24
|
+
capabilities: ["bash", "git"]
|
|
25
|
+
};
|
|
26
|
+
|
|
27
|
+
const registry = createRegistry();
|
|
28
|
+
const graph = buildTaskGraph(intent, registry);
|
|
29
|
+
|
|
30
|
+
expect(graph.stages.length).toBeGreaterThan(0);
|
|
31
|
+
expect(graph.stages.some(s => s.type === "setup")).toBe(true);
|
|
32
|
+
});
|
|
33
|
+
|
|
34
|
+
it("should include verification stages from capability verification steps", () => {
|
|
35
|
+
const intent: IntentIR = {
|
|
36
|
+
family: "custom",
|
|
37
|
+
goal: "Run custom workflow",
|
|
38
|
+
inputs: {},
|
|
39
|
+
requiredTools: ["git"],
|
|
40
|
+
humanCheckpoints: [],
|
|
41
|
+
verificationTargets: [],
|
|
42
|
+
capabilities: ["git"]
|
|
43
|
+
};
|
|
44
|
+
|
|
45
|
+
const registry = createRegistry();
|
|
46
|
+
const graph = buildTaskGraph(intent, registry);
|
|
47
|
+
|
|
48
|
+
const verifyStages = graph.stages.filter(s => s.type === "verify");
|
|
49
|
+
expect(verifyStages.length).toBeGreaterThan(0);
|
|
50
|
+
});
|
|
51
|
+
|
|
52
|
+
it("should include risk stages from capability risk declarations", () => {
|
|
53
|
+
const intent: IntentIR = {
|
|
54
|
+
family: "custom",
|
|
55
|
+
goal: "Run custom workflow",
|
|
56
|
+
inputs: {},
|
|
57
|
+
requiredTools: ["bash"],
|
|
58
|
+
humanCheckpoints: [],
|
|
59
|
+
verificationTargets: [],
|
|
60
|
+
capabilities: ["bash"]
|
|
61
|
+
};
|
|
62
|
+
|
|
63
|
+
const registry = createRegistry();
|
|
64
|
+
const graph = buildTaskGraph(intent, registry);
|
|
65
|
+
|
|
66
|
+
expect(graph.stages.some(s => s.id.includes("risk") || s.description.toLowerCase().includes("risk"))).toBe(true);
|
|
67
|
+
});
|
|
68
|
+
|
|
69
|
+
it("should add human-approval stage when human-approval capability is required", () => {
|
|
70
|
+
const intent: IntentIR = {
|
|
71
|
+
family: "custom",
|
|
72
|
+
goal: "Run custom workflow with approval",
|
|
73
|
+
inputs: {},
|
|
74
|
+
requiredTools: ["bash", "human-approval"],
|
|
75
|
+
humanCheckpoints: [],
|
|
76
|
+
verificationTargets: [],
|
|
77
|
+
capabilities: ["bash", "human-approval"]
|
|
78
|
+
};
|
|
79
|
+
|
|
80
|
+
const registry = createRegistry();
|
|
81
|
+
const graph = buildTaskGraph(intent, registry);
|
|
82
|
+
|
|
83
|
+
expect(graph.stages.some(s => s.type === "approval")).toBe(true);
|
|
84
|
+
});
|
|
85
|
+
|
|
86
|
+
it("should handle missing capabilities gracefully", () => {
|
|
87
|
+
const intent: IntentIR = {
|
|
88
|
+
family: "custom",
|
|
89
|
+
goal: "Run custom workflow",
|
|
90
|
+
inputs: {},
|
|
91
|
+
requiredTools: ["bash", "nonexistent"],
|
|
92
|
+
humanCheckpoints: [],
|
|
93
|
+
verificationTargets: [],
|
|
94
|
+
capabilities: ["bash", "nonexistent"]
|
|
95
|
+
};
|
|
96
|
+
|
|
97
|
+
const registry = createRegistry();
|
|
98
|
+
const graph = buildTaskGraph(intent, registry);
|
|
99
|
+
|
|
100
|
+
expect(graph.stages.length).toBeGreaterThan(0);
|
|
101
|
+
});
|
|
102
|
+
});
|
|
103
|
+
|
|
104
|
+
describe("backward compatibility", () => {
|
|
105
|
+
it("should still build patch-validation graph without capabilities", () => {
|
|
106
|
+
const intent: IntentIR = {
|
|
107
|
+
family: "patch-validation",
|
|
108
|
+
goal: "Validate patch",
|
|
109
|
+
inputs: {
|
|
110
|
+
repoPath: "/repo",
|
|
111
|
+
baselineRef: "main",
|
|
112
|
+
candidateBranch: "fix",
|
|
113
|
+
reproduceCommands: ["npm run fail"],
|
|
114
|
+
verificationCommands: ["npm test"],
|
|
115
|
+
reviewInstructions: "Check"
|
|
116
|
+
},
|
|
117
|
+
requiredTools: ["git"],
|
|
118
|
+
humanCheckpoints: [],
|
|
119
|
+
verificationTargets: ["npm test"]
|
|
120
|
+
};
|
|
121
|
+
|
|
122
|
+
const graph = buildTaskGraph(intent);
|
|
123
|
+
|
|
124
|
+
expect(graph.family).toBe("patch-validation");
|
|
125
|
+
expect(graph.stages.map(s => s.type)).toEqual([
|
|
126
|
+
"setup", "reproduce", "apply", "verify", "review"
|
|
127
|
+
]);
|
|
128
|
+
});
|
|
129
|
+
|
|
130
|
+
it("should still build pr-review-merge graph without capabilities", () => {
|
|
131
|
+
const intent: IntentIR = {
|
|
132
|
+
family: "pr-review-merge",
|
|
133
|
+
goal: "Review and merge PR",
|
|
134
|
+
inputs: {
|
|
135
|
+
repoPath: "/repo",
|
|
136
|
+
sourceBranch: "feature",
|
|
137
|
+
targetBranch: "main",
|
|
138
|
+
reviewInstructions: "Check",
|
|
139
|
+
verificationCommands: ["npm test"]
|
|
140
|
+
},
|
|
141
|
+
requiredTools: ["git"],
|
|
142
|
+
humanCheckpoints: [],
|
|
143
|
+
verificationTargets: ["npm test"]
|
|
144
|
+
};
|
|
145
|
+
|
|
146
|
+
const graph = buildTaskGraph(intent);
|
|
147
|
+
|
|
148
|
+
expect(graph.family).toBe("pr-review-merge");
|
|
149
|
+
expect(graph.stages.map(s => s.type)).toEqual([
|
|
150
|
+
"setup", "review", "verify", "merge"
|
|
151
|
+
]);
|
|
152
|
+
});
|
|
153
|
+
});
|
|
154
|
+
|
|
155
|
+
describe("risk analysis with capabilities", () => {
|
|
156
|
+
it("should lower risk when all capabilities are available", () => {
|
|
157
|
+
const intent: IntentIR = {
|
|
158
|
+
family: "custom",
|
|
159
|
+
goal: "Run workflow",
|
|
160
|
+
inputs: {},
|
|
161
|
+
requiredTools: ["bash", "git"],
|
|
162
|
+
humanCheckpoints: [],
|
|
163
|
+
verificationTargets: [],
|
|
164
|
+
capabilities: ["bash", "git"]
|
|
165
|
+
};
|
|
166
|
+
|
|
167
|
+
const registry = createRegistry();
|
|
168
|
+
const graph = buildTaskGraph(intent, registry);
|
|
169
|
+
const risks = analyzeRisks(graph, registry);
|
|
170
|
+
|
|
171
|
+
expect(risks.overallRisk).toBe("low");
|
|
172
|
+
expect(risks.capabilityRisk).toBeDefined();
|
|
173
|
+
expect(risks.capabilityRisk?.allAvailable).toBe(true);
|
|
174
|
+
});
|
|
175
|
+
|
|
176
|
+
it("should escalate risk when capabilities are missing", () => {
|
|
177
|
+
const intent: IntentIR = {
|
|
178
|
+
family: "custom",
|
|
179
|
+
goal: "Run workflow",
|
|
180
|
+
inputs: {},
|
|
181
|
+
requiredTools: ["bash", "missing-tool"],
|
|
182
|
+
humanCheckpoints: [],
|
|
183
|
+
verificationTargets: [],
|
|
184
|
+
capabilities: ["bash", "missing-tool"]
|
|
185
|
+
};
|
|
186
|
+
|
|
187
|
+
const registry = createRegistry();
|
|
188
|
+
const graph = buildTaskGraph(intent, registry);
|
|
189
|
+
const risks = analyzeRisks(graph, registry);
|
|
190
|
+
|
|
191
|
+
expect(risks.capabilityRisk).toBeDefined();
|
|
192
|
+
expect(risks.capabilityRisk?.allAvailable).toBe(false);
|
|
193
|
+
expect(risks.capabilityRisk?.missing).toContain("missing-tool");
|
|
194
|
+
});
|
|
195
|
+
|
|
196
|
+
it("should incorporate capability risk declarations", () => {
|
|
197
|
+
const registry = createRegistry();
|
|
198
|
+
const intent: IntentIR = {
|
|
199
|
+
family: "custom",
|
|
200
|
+
goal: "Run workflow",
|
|
201
|
+
inputs: {},
|
|
202
|
+
requiredTools: ["bash"],
|
|
203
|
+
humanCheckpoints: [],
|
|
204
|
+
verificationTargets: [],
|
|
205
|
+
capabilities: ["bash"]
|
|
206
|
+
};
|
|
207
|
+
|
|
208
|
+
const graph = buildTaskGraph(intent, registry);
|
|
209
|
+
const risks = analyzeRisks(graph, registry);
|
|
210
|
+
|
|
211
|
+
expect(risks.capabilityRisk?.riskFactors.length).toBeGreaterThan(0);
|
|
212
|
+
});
|
|
213
|
+
});
|
|
214
|
+
|
|
215
|
+
describe("end-to-end capability-driven synthesis", () => {
|
|
216
|
+
it("should synthesize policy from capability-driven intent", () => {
|
|
217
|
+
const intent: IntentIR = {
|
|
218
|
+
family: "custom",
|
|
219
|
+
goal: "Run custom workflow",
|
|
220
|
+
inputs: {},
|
|
221
|
+
requiredTools: ["bash", "git"],
|
|
222
|
+
humanCheckpoints: [],
|
|
223
|
+
verificationTargets: [],
|
|
224
|
+
capabilities: ["bash", "git"]
|
|
225
|
+
};
|
|
226
|
+
|
|
227
|
+
const registry = createRegistry();
|
|
228
|
+
const graph = buildTaskGraph(intent, registry);
|
|
229
|
+
const risks = analyzeRisks(graph, registry);
|
|
230
|
+
const policyResult = synthesizePolicy(graph, risks);
|
|
231
|
+
|
|
232
|
+
expect(policyResult.success).toBe(true);
|
|
233
|
+
if (policyResult.success) {
|
|
234
|
+
expect(policyResult.policy.workflow).toBe("custom");
|
|
235
|
+
expect(policyResult.policy.rationale.length).toBeGreaterThan(0);
|
|
236
|
+
}
|
|
237
|
+
});
|
|
238
|
+
|
|
239
|
+
it("should include capability information in rationale", () => {
|
|
240
|
+
const intent: IntentIR = {
|
|
241
|
+
family: "custom",
|
|
242
|
+
goal: "Run custom workflow",
|
|
243
|
+
inputs: {},
|
|
244
|
+
requiredTools: ["bash", "git"],
|
|
245
|
+
humanCheckpoints: [],
|
|
246
|
+
verificationTargets: [],
|
|
247
|
+
capabilities: ["bash", "git"]
|
|
248
|
+
};
|
|
249
|
+
|
|
250
|
+
const registry = createRegistry();
|
|
251
|
+
const graph = buildTaskGraph(intent, registry);
|
|
252
|
+
const risks = analyzeRisks(graph, registry);
|
|
253
|
+
const policyResult = synthesizePolicy(graph, risks);
|
|
254
|
+
|
|
255
|
+
expect(policyResult.success).toBe(true);
|
|
256
|
+
if (policyResult.success) {
|
|
257
|
+
const hasCapabilityMention = policyResult.policy.rationale.some(
|
|
258
|
+
r => r.toLowerCase().includes("capability") || r.includes("bash") || r.includes("git")
|
|
259
|
+
);
|
|
260
|
+
expect(hasCapabilityMention).toBe(true);
|
|
261
|
+
}
|
|
262
|
+
});
|
|
263
|
+
});
|
|
264
|
+
});
|