@fusionkit/ensemble 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/agent.d.ts +21 -0
- package/dist/agent.js +186 -0
- package/dist/artifacts.d.ts +21 -0
- package/dist/artifacts.js +36 -0
- package/dist/claude-code.d.ts +25 -0
- package/dist/claude-code.js +398 -0
- package/dist/codex.d.ts +69 -0
- package/dist/codex.js +467 -0
- package/dist/command.d.ts +15 -0
- package/dist/command.js +82 -0
- package/dist/dashboard.d.ts +62 -0
- package/dist/dashboard.js +788 -0
- package/dist/external-executor.d.ts +56 -0
- package/dist/external-executor.js +288 -0
- package/dist/harness.d.ts +337 -0
- package/dist/harness.js +1 -0
- package/dist/index.d.ts +30 -0
- package/dist/index.js +15 -0
- package/dist/isolation.d.ts +25 -0
- package/dist/isolation.js +509 -0
- package/dist/judge.d.ts +77 -0
- package/dist/judge.js +16 -0
- package/dist/mock.d.ts +20 -0
- package/dist/mock.js +56 -0
- package/dist/run.d.ts +5 -0
- package/dist/run.js +520 -0
- package/dist/synthesis.d.ts +25 -0
- package/dist/synthesis.js +221 -0
- package/dist/test/codex.test.d.ts +1 -0
- package/dist/test/codex.test.js +237 -0
- package/dist/test/dashboard.test.d.ts +1 -0
- package/dist/test/dashboard.test.js +214 -0
- package/dist/test/ensemble.test.d.ts +1 -0
- package/dist/test/ensemble.test.js +780 -0
- package/dist/test/external-executor.test.d.ts +1 -0
- package/dist/test/external-executor.test.js +273 -0
- package/dist/test/isolation.test.d.ts +1 -0
- package/dist/test/isolation.test.js +359 -0
- package/dist/test/tool-executor.test.d.ts +1 -0
- package/dist/test/tool-executor.test.js +113 -0
- package/dist/test/unified.test.d.ts +1 -0
- package/dist/test/unified.test.js +150 -0
- package/dist/tool-executor.d.ts +14 -0
- package/dist/tool-executor.js +156 -0
- package/dist/trace.d.ts +8 -0
- package/dist/trace.js +7 -0
- package/dist/unified.d.ts +101 -0
- package/dist/unified.js +422 -0
- package/dist/worktree.d.ts +25 -0
- package/dist/worktree.js +75 -0
- package/package.json +35 -0
package/dist/mock.d.ts
ADDED
|
@@ -0,0 +1,20 @@
|
|
|
1
|
+
import type { HarnessAdapter, HarnessArtifact, HarnessCandidateOutput, HarnessToolRecord } from "./harness.js";
|
|
2
|
+
export type MockCandidateFixture = {
|
|
3
|
+
transcript?: string;
|
|
4
|
+
diff?: string;
|
|
5
|
+
modelCallId?: string;
|
|
6
|
+
modelCallRecord?: HarnessCandidateOutput["modelCallRecord"];
|
|
7
|
+
branchName?: string;
|
|
8
|
+
worktreePath?: string;
|
|
9
|
+
summary?: string;
|
|
10
|
+
status?: HarnessCandidateOutput["status"];
|
|
11
|
+
score?: number;
|
|
12
|
+
artifacts?: HarnessArtifact[];
|
|
13
|
+
toolRecords?: HarnessToolRecord[];
|
|
14
|
+
verification?: HarnessCandidateOutput["verification"];
|
|
15
|
+
};
|
|
16
|
+
export type MockHarnessOptions = {
|
|
17
|
+
id?: string;
|
|
18
|
+
candidates?: Record<string, MockCandidateFixture>;
|
|
19
|
+
};
|
|
20
|
+
export declare function createMockHarness(options?: MockHarnessOptions): HarnessAdapter;
|
package/dist/mock.js
ADDED
|
@@ -0,0 +1,56 @@
|
|
|
1
|
+
import { artifactHash } from "@fusionkit/protocol";
|
|
2
|
+
function artifactFor(kind, id, content) {
|
|
3
|
+
return {
|
|
4
|
+
artifact_id: id,
|
|
5
|
+
kind,
|
|
6
|
+
hash: artifactHash(content),
|
|
7
|
+
redaction_status: "synthetic"
|
|
8
|
+
};
|
|
9
|
+
}
|
|
10
|
+
export function createMockHarness(options = {}) {
|
|
11
|
+
const id = options.id ?? "mock";
|
|
12
|
+
return {
|
|
13
|
+
id,
|
|
14
|
+
prepare: () => ({ preparedAt: new Date().toISOString() }),
|
|
15
|
+
capabilities: () => ({
|
|
16
|
+
workspace_read: "supported",
|
|
17
|
+
apply_patch: "supported",
|
|
18
|
+
tool_records: "supported",
|
|
19
|
+
verification: "supported"
|
|
20
|
+
}),
|
|
21
|
+
verificationProfile: () => ({
|
|
22
|
+
id: `${id}-verification`,
|
|
23
|
+
requiredEvidence: ["transcript", "diff", "verification"]
|
|
24
|
+
}),
|
|
25
|
+
run: ({ descriptor, model, ordinal }) => {
|
|
26
|
+
const fixture = options.candidates?.[model.id] ?? {};
|
|
27
|
+
const transcript = fixture.transcript ?? `mock transcript for ${descriptor.id}/${model.id}`;
|
|
28
|
+
const diff = fixture.diff ?? `diff --git a/${model.id}.txt b/${model.id}.txt`;
|
|
29
|
+
const artifacts = fixture.artifacts ?? [
|
|
30
|
+
artifactFor("transcript", `artifact_${descriptor.id}_${model.id}_transcript`, transcript),
|
|
31
|
+
artifactFor("patch", `artifact_${descriptor.id}_${model.id}_diff`, diff)
|
|
32
|
+
];
|
|
33
|
+
return {
|
|
34
|
+
candidateId: `${descriptor.id}_${model.id}_${ordinal}`,
|
|
35
|
+
model,
|
|
36
|
+
status: fixture.status ?? "succeeded",
|
|
37
|
+
...(fixture.modelCallId ? { modelCallId: fixture.modelCallId } : {}),
|
|
38
|
+
...(fixture.modelCallRecord ? { modelCallRecord: fixture.modelCallRecord } : {}),
|
|
39
|
+
...(fixture.branchName ? { branchName: fixture.branchName } : {}),
|
|
40
|
+
...(fixture.worktreePath ? { worktreePath: fixture.worktreePath } : {}),
|
|
41
|
+
transcript,
|
|
42
|
+
diff,
|
|
43
|
+
...(fixture.summary ? { summary: fixture.summary } : {}),
|
|
44
|
+
score: fixture.score ?? 1,
|
|
45
|
+
artifacts,
|
|
46
|
+
toolRecords: fixture.toolRecords ?? [],
|
|
47
|
+
verification: fixture.verification ?? {
|
|
48
|
+
status: "succeeded",
|
|
49
|
+
evidence: ["mock verification passed"],
|
|
50
|
+
exitCode: 0
|
|
51
|
+
}
|
|
52
|
+
};
|
|
53
|
+
},
|
|
54
|
+
collectArtifacts: () => []
|
|
55
|
+
};
|
|
56
|
+
}
|
package/dist/run.d.ts
ADDED
package/dist/run.js
ADDED
|
@@ -0,0 +1,520 @@
|
|
|
1
|
+
import { assertHarnessCandidateRecordV1, assertHarnessRunRequestV1, assertHarnessRunResultV1, MODEL_FUSION_SCHEMA_BUNDLE_HASH, requestHash } from "@fusionkit/protocol";
|
|
2
|
+
import { createArtifactStore } from "./artifacts.js";
|
|
3
|
+
import { runJudgeSynthesis } from "./synthesis.js";
|
|
4
|
+
import { cleanupWorktreePlan, createWorktreePlan, defaultOutputRoot, diffCandidateWorktree, sealCandidateWorktree } from "./worktree.js";
|
|
5
|
+
const PRODUCER_GIT_SHA = "0".repeat(40);
|
|
6
|
+
const PRODUCER = "handoffkit-ensemble";
|
|
7
|
+
const PRODUCER_VERSION = "0.1.0";
|
|
8
|
+
const DEFAULT_CONTAINER_IMAGE = "node:22";
|
|
9
|
+
const DEFAULT_CONTAINER_ENGINE = "docker";
|
|
10
|
+
const DEFAULT_CONTAINER_WORKDIR = "/workspace";
|
|
11
|
+
const DEFAULT_MICROVM_PROVIDER = "vercel-sandbox";
|
|
12
|
+
const DEFAULT_MICROVM_RUNTIME = "node24";
|
|
13
|
+
const UNKNOWN_RUNTIME_DIGEST = "unknown";
|
|
14
|
+
function metadata(input) {
|
|
15
|
+
return {
|
|
16
|
+
schema: input.schema,
|
|
17
|
+
schema_version: "v1",
|
|
18
|
+
schema_bundle_hash: MODEL_FUSION_SCHEMA_BUNDLE_HASH,
|
|
19
|
+
producer: PRODUCER,
|
|
20
|
+
producer_version: PRODUCER_VERSION,
|
|
21
|
+
producer_git_sha: PRODUCER_GIT_SHA,
|
|
22
|
+
created_at: input.createdAt
|
|
23
|
+
};
|
|
24
|
+
}
|
|
25
|
+
function terminalStatus(outputs) {
|
|
26
|
+
if (outputs.some((output) => output.status === "failed"))
|
|
27
|
+
return "failed";
|
|
28
|
+
if (outputs.some((output) => output.status === "requires_action"))
|
|
29
|
+
return "requires_action";
|
|
30
|
+
if (outputs.every((output) => output.status === "skipped"))
|
|
31
|
+
return "skipped";
|
|
32
|
+
return "succeeded";
|
|
33
|
+
}
|
|
34
|
+
function assertDescriptor(descriptor) {
|
|
35
|
+
if ("checks" in descriptor && descriptor.checks !== undefined) {
|
|
36
|
+
throw new Error("ensemble descriptors do not accept ad hoc checks");
|
|
37
|
+
}
|
|
38
|
+
if (!descriptor.harness)
|
|
39
|
+
throw new Error("ensemble descriptor requires one harness");
|
|
40
|
+
if (!Array.isArray(descriptor.models) || descriptor.models.length === 0) {
|
|
41
|
+
throw new Error("ensemble descriptor requires at least one model");
|
|
42
|
+
}
|
|
43
|
+
for (const model of descriptor.models) {
|
|
44
|
+
if (!model.id || !model.model) {
|
|
45
|
+
throw new Error("each ensemble model requires id and model");
|
|
46
|
+
}
|
|
47
|
+
}
|
|
48
|
+
if (!descriptor.runtime?.id)
|
|
49
|
+
throw new Error("ensemble descriptor requires one runtime");
|
|
50
|
+
if (!descriptor.judge?.id)
|
|
51
|
+
throw new Error("ensemble descriptor requires one judge");
|
|
52
|
+
if (!descriptor.policy?.id)
|
|
53
|
+
throw new Error("ensemble descriptor requires one policy");
|
|
54
|
+
}
|
|
55
|
+
function freezeResult(result) {
|
|
56
|
+
for (const candidate of result.candidates) {
|
|
57
|
+
candidate.artifacts?.forEach((artifact) => Object.freeze(artifact));
|
|
58
|
+
Object.freeze(candidate.artifacts ?? []);
|
|
59
|
+
Object.freeze(candidate);
|
|
60
|
+
}
|
|
61
|
+
for (const artifact of result.artifacts)
|
|
62
|
+
Object.freeze(artifact);
|
|
63
|
+
for (const toolRecord of result.toolRecords)
|
|
64
|
+
Object.freeze(toolRecord);
|
|
65
|
+
for (const modelCallRecord of result.modelCallRecords)
|
|
66
|
+
Object.freeze(modelCallRecord);
|
|
67
|
+
if (result.harnessRunResult.artifacts) {
|
|
68
|
+
for (const artifact of result.harnessRunResult.artifacts)
|
|
69
|
+
Object.freeze(artifact);
|
|
70
|
+
Object.freeze(result.harnessRunResult.artifacts);
|
|
71
|
+
}
|
|
72
|
+
Object.freeze(result.harnessRunResult);
|
|
73
|
+
Object.freeze(result.harnessRunRequest);
|
|
74
|
+
Object.freeze(result.summary?.candidates ?? []);
|
|
75
|
+
Object.freeze(result.summary?.artifacts ?? []);
|
|
76
|
+
if (result.summary)
|
|
77
|
+
Object.freeze(result.summary);
|
|
78
|
+
Object.freeze(result.candidates);
|
|
79
|
+
Object.freeze(result.artifacts);
|
|
80
|
+
Object.freeze(result.toolRecords);
|
|
81
|
+
Object.freeze(result.modelCallRecords);
|
|
82
|
+
return Object.freeze(result);
|
|
83
|
+
}
|
|
84
|
+
function candidateMetadata(output, descriptor, worktree) {
|
|
85
|
+
const metadata = {
|
|
86
|
+
model_id: output.model.id,
|
|
87
|
+
model: output.model.model,
|
|
88
|
+
endpoint_id: output.model.endpointId ?? output.model.id
|
|
89
|
+
};
|
|
90
|
+
if (output.verification !== undefined) {
|
|
91
|
+
metadata.verification = output.verification;
|
|
92
|
+
}
|
|
93
|
+
if (output.summary !== undefined) {
|
|
94
|
+
metadata.summary = output.summary;
|
|
95
|
+
}
|
|
96
|
+
if (worktree !== undefined) {
|
|
97
|
+
metadata.base_git_sha = worktree.baseGitSha;
|
|
98
|
+
metadata.snapshot_hash = worktree.snapshotHash;
|
|
99
|
+
}
|
|
100
|
+
Object.assign(metadata, output.metadata ?? {});
|
|
101
|
+
if (metadata.hardening === undefined) {
|
|
102
|
+
metadata.hardening = fallbackCandidateHardening(descriptor);
|
|
103
|
+
}
|
|
104
|
+
if (descriptor.reviewEvidence !== undefined) {
|
|
105
|
+
metadata.review_evidence_attached = true;
|
|
106
|
+
}
|
|
107
|
+
if (output.modelCallRecord !== undefined) {
|
|
108
|
+
metadata.model_call_recorded = true;
|
|
109
|
+
}
|
|
110
|
+
return metadata;
|
|
111
|
+
}
|
|
112
|
+
function artifactsForOutput(input) {
|
|
113
|
+
const artifacts = [...(input.output.artifacts ?? [])];
|
|
114
|
+
const prefix = `${input.descriptor.id}_${input.candidateId}`;
|
|
115
|
+
if (input.patch.length > 0) {
|
|
116
|
+
artifacts.push(input.store.writeText({
|
|
117
|
+
artifactId: `${prefix}_patch`,
|
|
118
|
+
kind: "patch",
|
|
119
|
+
content: input.patch,
|
|
120
|
+
suffix: ".patch"
|
|
121
|
+
}));
|
|
122
|
+
}
|
|
123
|
+
if (input.output.transcript !== undefined) {
|
|
124
|
+
artifacts.push(input.store.writeText({
|
|
125
|
+
artifactId: `${prefix}_transcript`,
|
|
126
|
+
kind: "transcript",
|
|
127
|
+
content: input.output.transcript,
|
|
128
|
+
suffix: ".txt"
|
|
129
|
+
}));
|
|
130
|
+
}
|
|
131
|
+
if (input.output.log !== undefined) {
|
|
132
|
+
artifacts.push(input.store.writeText({
|
|
133
|
+
artifactId: `${prefix}_log`,
|
|
134
|
+
kind: "log",
|
|
135
|
+
content: input.output.log,
|
|
136
|
+
suffix: ".log"
|
|
137
|
+
}));
|
|
138
|
+
}
|
|
139
|
+
if (input.output.toolRecords && input.output.toolRecords.length > 0) {
|
|
140
|
+
artifacts.push(input.store.writeJson({
|
|
141
|
+
artifactId: `${prefix}_tool_journal`,
|
|
142
|
+
kind: "other",
|
|
143
|
+
value: input.output.toolRecords
|
|
144
|
+
}));
|
|
145
|
+
}
|
|
146
|
+
if (input.output.verification !== undefined) {
|
|
147
|
+
artifacts.push(input.store.writeJson({
|
|
148
|
+
artifactId: `${prefix}_verification`,
|
|
149
|
+
kind: "metrics",
|
|
150
|
+
value: input.output.verification
|
|
151
|
+
}));
|
|
152
|
+
}
|
|
153
|
+
if (input.output.modelCallRecord !== undefined) {
|
|
154
|
+
artifacts.push(input.store.writeJson({
|
|
155
|
+
artifactId: `${prefix}_model_call_record`,
|
|
156
|
+
kind: "metrics",
|
|
157
|
+
value: input.output.modelCallRecord
|
|
158
|
+
}));
|
|
159
|
+
}
|
|
160
|
+
if (input.worktree !== undefined) {
|
|
161
|
+
artifacts.push(input.store.writeJson({
|
|
162
|
+
artifactId: `${prefix}_worktree`,
|
|
163
|
+
kind: "worktree",
|
|
164
|
+
value: {
|
|
165
|
+
path: input.worktree.path,
|
|
166
|
+
baseGitSha: input.worktree.baseGitSha,
|
|
167
|
+
snapshotHash: input.worktree.snapshotHash
|
|
168
|
+
}
|
|
169
|
+
}));
|
|
170
|
+
}
|
|
171
|
+
artifacts.push(...(input.output.screenshots ?? []));
|
|
172
|
+
return artifacts;
|
|
173
|
+
}
|
|
174
|
+
function artifactRef(artifact) {
|
|
175
|
+
const { path: _path, ...ref } = artifact;
|
|
176
|
+
return ref;
|
|
177
|
+
}
|
|
178
|
+
function outputSummary(outputs, harnessId) {
|
|
179
|
+
const counts = new Map();
|
|
180
|
+
for (const output of outputs)
|
|
181
|
+
counts.set(output.status, (counts.get(output.status) ?? 0) + 1);
|
|
182
|
+
const countText = [...counts.entries()]
|
|
183
|
+
.sort(([left], [right]) => left.localeCompare(right))
|
|
184
|
+
.map(([status, count]) => `${status}:${count}`)
|
|
185
|
+
.join(", ");
|
|
186
|
+
return `${outputs.length} candidate(s) produced by ${harnessId}; statuses ${countText}`;
|
|
187
|
+
}
|
|
188
|
+
function runtimeHardeningMetadata(descriptor) {
|
|
189
|
+
const isolation = descriptor.runtime.isolation;
|
|
190
|
+
const base = {
|
|
191
|
+
requested_isolation: isolation?.kind ?? "process",
|
|
192
|
+
runtime_id: descriptor.runtime.id,
|
|
193
|
+
...(descriptor.runtime.environmentId !== undefined
|
|
194
|
+
? { environment_id: descriptor.runtime.environmentId }
|
|
195
|
+
: {})
|
|
196
|
+
};
|
|
197
|
+
if (isolation?.kind === "container") {
|
|
198
|
+
return {
|
|
199
|
+
...base,
|
|
200
|
+
image: isolation.image ?? DEFAULT_CONTAINER_IMAGE,
|
|
201
|
+
engine: isolation.engine ?? DEFAULT_CONTAINER_ENGINE,
|
|
202
|
+
driver: isolation.driver?.id ?? isolation.engine ?? DEFAULT_CONTAINER_ENGINE
|
|
203
|
+
};
|
|
204
|
+
}
|
|
205
|
+
if (isolation?.kind === "microvm") {
|
|
206
|
+
return {
|
|
207
|
+
...base,
|
|
208
|
+
provider: isolation.provider ?? DEFAULT_MICROVM_PROVIDER,
|
|
209
|
+
runtime: isolation.runtime ?? DEFAULT_MICROVM_RUNTIME,
|
|
210
|
+
driver: isolation.driver?.id ?? `${isolation.provider ?? DEFAULT_MICROVM_PROVIDER}-driver`,
|
|
211
|
+
...(isolation.snapshotId !== undefined ? { snapshot_id: isolation.snapshotId } : {}),
|
|
212
|
+
...(isolation.sandboxId !== undefined ? { sandbox_id: isolation.sandboxId } : {}),
|
|
213
|
+
...(isolation.imageDigest !== undefined ? { image_digest: isolation.imageDigest } : {}),
|
|
214
|
+
runtime_digest: isolation.runtimeDigest ?? UNKNOWN_RUNTIME_DIGEST
|
|
215
|
+
};
|
|
216
|
+
}
|
|
217
|
+
return base;
|
|
218
|
+
}
|
|
219
|
+
function fallbackCandidateHardening(descriptor) {
|
|
220
|
+
const isolation = descriptor.runtime.isolation;
|
|
221
|
+
const mountPolicy = isolation?.mountPolicy;
|
|
222
|
+
const networkPolicy = isolation?.networkPolicy;
|
|
223
|
+
const secretPolicy = isolation?.secretPolicy;
|
|
224
|
+
return {
|
|
225
|
+
requested_isolation: isolation?.kind ?? "process",
|
|
226
|
+
actual_isolation: "process",
|
|
227
|
+
runtime: {
|
|
228
|
+
...(isolation?.kind === "container"
|
|
229
|
+
? { image: isolation.image ?? DEFAULT_CONTAINER_IMAGE }
|
|
230
|
+
: {}),
|
|
231
|
+
...(isolation?.kind === "microvm"
|
|
232
|
+
? {
|
|
233
|
+
provider: isolation.provider ?? DEFAULT_MICROVM_PROVIDER,
|
|
234
|
+
runtime: isolation.runtime ?? DEFAULT_MICROVM_RUNTIME,
|
|
235
|
+
...(isolation.snapshotId !== undefined ? { snapshot_id: isolation.snapshotId } : {}),
|
|
236
|
+
...(isolation.sandboxId !== undefined ? { sandbox_id: isolation.sandboxId } : {}),
|
|
237
|
+
...(isolation.imageDigest !== undefined ? { image_digest: isolation.imageDigest } : {}),
|
|
238
|
+
runtime_digest: isolation.runtimeDigest ?? UNKNOWN_RUNTIME_DIGEST
|
|
239
|
+
}
|
|
240
|
+
: {}),
|
|
241
|
+
workdir: mountPolicy?.workdir ?? DEFAULT_CONTAINER_WORKDIR
|
|
242
|
+
},
|
|
243
|
+
mount_policy: {
|
|
244
|
+
worktree_writable: mountPolicy?.worktreeWritable ?? true,
|
|
245
|
+
read_only_caches: [...(mountPolicy?.readOnlyCachePaths ?? [])],
|
|
246
|
+
ignored_dirs: [...(mountPolicy?.ignoredDirs ?? [".git", "node_modules", ".warrant"])]
|
|
247
|
+
},
|
|
248
|
+
network_policy: {
|
|
249
|
+
default_deny: networkPolicy?.defaultDeny ?? true,
|
|
250
|
+
allow_hosts: [...(networkPolicy?.allowHosts ?? [])],
|
|
251
|
+
enforced: false
|
|
252
|
+
},
|
|
253
|
+
cleanup: {
|
|
254
|
+
attempted: false,
|
|
255
|
+
succeeded: true,
|
|
256
|
+
status: "not_required"
|
|
257
|
+
},
|
|
258
|
+
secret_absence: {
|
|
259
|
+
secret_names: [...(secretPolicy?.secretNames ?? [])],
|
|
260
|
+
secret_value_hashes: [...(secretPolicy?.secretValueHashes ?? [])],
|
|
261
|
+
injected_env_names: [...(secretPolicy?.injectedEnvNames ?? [])],
|
|
262
|
+
scanned: false,
|
|
263
|
+
leaks_found: false,
|
|
264
|
+
scan_scope: [],
|
|
265
|
+
leak_count: 0
|
|
266
|
+
}
|
|
267
|
+
};
|
|
268
|
+
}
|
|
269
|
+
function candidateHardening(output, descriptor) {
|
|
270
|
+
const hardening = output?.metadata?.hardening;
|
|
271
|
+
if (typeof hardening === "object" && hardening !== null && !Array.isArray(hardening)) {
|
|
272
|
+
return hardening;
|
|
273
|
+
}
|
|
274
|
+
if (output !== undefined)
|
|
275
|
+
return fallbackCandidateHardening(descriptor);
|
|
276
|
+
return undefined;
|
|
277
|
+
}
|
|
278
|
+
export async function runEnsemble(descriptor) {
|
|
279
|
+
assertDescriptor(descriptor);
|
|
280
|
+
const createdAt = new Date().toISOString();
|
|
281
|
+
const capabilities = descriptor.harness.capabilities(descriptor);
|
|
282
|
+
const harnessKind = descriptor.harness.harnessKind ?? "generic";
|
|
283
|
+
const outputRoot = defaultOutputRoot(descriptor);
|
|
284
|
+
const store = createArtifactStore(`${outputRoot}/artifacts`);
|
|
285
|
+
const worktreePlan = createWorktreePlan(descriptor);
|
|
286
|
+
const request = {
|
|
287
|
+
...metadata({ schema: "harness-run-request.v1", createdAt }),
|
|
288
|
+
request_id: `ensemble_req_${descriptor.id}`,
|
|
289
|
+
harness_kind: harnessKind,
|
|
290
|
+
source_repo: descriptor.sourceRepo,
|
|
291
|
+
base_git_sha: descriptor.baseGitSha,
|
|
292
|
+
prompt: descriptor.prompt,
|
|
293
|
+
prompt_hash: requestHash({
|
|
294
|
+
prompt: descriptor.prompt,
|
|
295
|
+
descriptor_id: descriptor.id
|
|
296
|
+
}),
|
|
297
|
+
allowed_tools: descriptor.policy.allowedTools,
|
|
298
|
+
side_effects: descriptor.policy.sideEffects,
|
|
299
|
+
requested_capabilities: capabilities,
|
|
300
|
+
metadata: {
|
|
301
|
+
harness_id: descriptor.harness.id,
|
|
302
|
+
runtime_id: descriptor.runtime.id,
|
|
303
|
+
judge_id: descriptor.judge.id,
|
|
304
|
+
policy_id: descriptor.policy.id,
|
|
305
|
+
hardening: runtimeHardeningMetadata(descriptor),
|
|
306
|
+
output_root: outputRoot,
|
|
307
|
+
...(worktreePlan
|
|
308
|
+
? {
|
|
309
|
+
snapshot_hash: worktreePlan.snapshotHash,
|
|
310
|
+
snapshot_base_git_sha: worktreePlan.baseGitSha
|
|
311
|
+
}
|
|
312
|
+
: {}),
|
|
313
|
+
...(descriptor.metadata ?? {})
|
|
314
|
+
}
|
|
315
|
+
};
|
|
316
|
+
assertHarnessRunRequestV1(request);
|
|
317
|
+
let prepared;
|
|
318
|
+
let outputs = [];
|
|
319
|
+
let cleanupWorktrees = worktreePlan?.worktrees ?? [];
|
|
320
|
+
try {
|
|
321
|
+
prepared = await descriptor.harness.prepare({ descriptor, request });
|
|
322
|
+
// Settle every candidate before continuing so a single failure cannot leave
|
|
323
|
+
// siblings running while we tear down their worktrees. A hard failure still
|
|
324
|
+
// aborts the run (re-thrown below) once all candidates have stopped.
|
|
325
|
+
const settled = await Promise.allSettled(descriptor.models.map((model, ordinal) => descriptor.harness.run({
|
|
326
|
+
descriptor,
|
|
327
|
+
request,
|
|
328
|
+
model,
|
|
329
|
+
ordinal,
|
|
330
|
+
prepared,
|
|
331
|
+
worktree: worktreePlan?.worktrees[ordinal]
|
|
332
|
+
})));
|
|
333
|
+
const rejection = settled.find((result) => result.status === "rejected");
|
|
334
|
+
if (rejection !== undefined)
|
|
335
|
+
throw rejection.reason;
|
|
336
|
+
outputs = settled.map((result) => result.value);
|
|
337
|
+
const collectedArtifacts = await descriptor.harness.collectArtifacts({
|
|
338
|
+
descriptor,
|
|
339
|
+
request,
|
|
340
|
+
candidates: outputs,
|
|
341
|
+
prepared
|
|
342
|
+
});
|
|
343
|
+
const verification = descriptor.harness.verificationProfile(descriptor);
|
|
344
|
+
const generatedArtifacts = new Map();
|
|
345
|
+
const sealedWorktrees = worktreePlan?.worktrees.map((worktree) => sealCandidateWorktree(worktree));
|
|
346
|
+
cleanupWorktrees = sealedWorktrees ?? cleanupWorktrees;
|
|
347
|
+
for (const [ordinal, output] of outputs.entries()) {
|
|
348
|
+
const worktree = sealedWorktrees?.[ordinal];
|
|
349
|
+
const id = output.candidateId ?? worktree?.candidateId ?? `${descriptor.id}_${output.model.id}_${ordinal}`;
|
|
350
|
+
const patch = worktree ? diffCandidateWorktree(worktree) : (output.diff ?? "");
|
|
351
|
+
generatedArtifacts.set(id, artifactsForOutput({
|
|
352
|
+
descriptor,
|
|
353
|
+
candidateId: id,
|
|
354
|
+
output,
|
|
355
|
+
patch,
|
|
356
|
+
...(worktree ? { worktree } : {}),
|
|
357
|
+
store
|
|
358
|
+
}));
|
|
359
|
+
}
|
|
360
|
+
const modelCallRecords = outputs.flatMap((output) => output.modelCallRecord ? [output.modelCallRecord] : []);
|
|
361
|
+
const candidates = outputs.map((output, ordinal) => {
|
|
362
|
+
const worktree = sealedWorktrees?.[ordinal];
|
|
363
|
+
const id = output.candidateId ??
|
|
364
|
+
worktree?.candidateId ??
|
|
365
|
+
`${descriptor.id}_${output.model.id}_${ordinal}`;
|
|
366
|
+
const artifacts = (generatedArtifacts.get(id) ?? output.artifacts ?? []).map(artifactRef);
|
|
367
|
+
const record = {
|
|
368
|
+
...metadata({ schema: "harness-candidate-record.v1", createdAt }),
|
|
369
|
+
candidate_id: id,
|
|
370
|
+
request_id: request.request_id,
|
|
371
|
+
harness_kind: harnessKind,
|
|
372
|
+
model_call_id: output.modelCallId ?? output.modelCallRecord?.call_id ?? `${id}_model_call`,
|
|
373
|
+
status: output.status,
|
|
374
|
+
side_effects: descriptor.policy.sideEffects,
|
|
375
|
+
artifacts,
|
|
376
|
+
...(output.branchName ?? worktree?.branchName
|
|
377
|
+
? { branch_name: output.branchName ?? worktree?.branchName }
|
|
378
|
+
: {}),
|
|
379
|
+
...(output.worktreePath ?? worktree?.path
|
|
380
|
+
? { worktree_path: output.worktreePath ?? worktree?.path }
|
|
381
|
+
: {}),
|
|
382
|
+
...(output.score !== undefined ? { score: output.score } : {}),
|
|
383
|
+
...(output.error ? { error: output.error } : {}),
|
|
384
|
+
metadata: candidateMetadata(output, descriptor, worktree)
|
|
385
|
+
};
|
|
386
|
+
assertHarnessCandidateRecordV1(record);
|
|
387
|
+
return record;
|
|
388
|
+
});
|
|
389
|
+
const baseArtifacts = [
|
|
390
|
+
...collectedArtifacts,
|
|
391
|
+
...candidates.flatMap((candidate) => candidate.artifacts ?? [])
|
|
392
|
+
];
|
|
393
|
+
const toolRecords = outputs.flatMap((output) => output.toolRecords ?? []);
|
|
394
|
+
const synthesis = await runJudgeSynthesis({
|
|
395
|
+
descriptor,
|
|
396
|
+
candidates,
|
|
397
|
+
outputs,
|
|
398
|
+
artifacts: baseArtifacts,
|
|
399
|
+
toolRecords,
|
|
400
|
+
modelCallRecords,
|
|
401
|
+
...(descriptor.reviewEvidence ? { reviewEvidence: descriptor.reviewEvidence } : {}),
|
|
402
|
+
...(worktreePlan?.workspace ? { workspace: worktreePlan.workspace } : {}),
|
|
403
|
+
...(worktreePlan?.baseGitSha ? { baseGitSha: worktreePlan.baseGitSha } : {}),
|
|
404
|
+
store
|
|
405
|
+
});
|
|
406
|
+
const artifacts = [
|
|
407
|
+
...baseArtifacts,
|
|
408
|
+
...(synthesis?.artifacts ?? [])
|
|
409
|
+
];
|
|
410
|
+
const summary = {
|
|
411
|
+
descriptorId: descriptor.id,
|
|
412
|
+
...(worktreePlan
|
|
413
|
+
? {
|
|
414
|
+
snapshot: {
|
|
415
|
+
baseGitSha: worktreePlan.baseGitSha,
|
|
416
|
+
snapshotHash: worktreePlan.snapshotHash,
|
|
417
|
+
workspace: worktreePlan.workspace
|
|
418
|
+
}
|
|
419
|
+
}
|
|
420
|
+
: {}),
|
|
421
|
+
candidates: candidates.map((candidate, ordinal) => {
|
|
422
|
+
const output = outputs[ordinal];
|
|
423
|
+
const diffArtifacts = (candidate.artifacts ?? []).filter((artifact) => artifact.kind === "patch");
|
|
424
|
+
return {
|
|
425
|
+
candidateId: candidate.candidate_id,
|
|
426
|
+
modelId: output?.model.id ?? "",
|
|
427
|
+
model: output?.model.model ?? "",
|
|
428
|
+
...(candidate.model_call_id ? { modelCallId: candidate.model_call_id } : {}),
|
|
429
|
+
status: candidate.status,
|
|
430
|
+
...(candidate.branch_name ? { branchName: candidate.branch_name } : {}),
|
|
431
|
+
...(candidate.worktree_path ? { worktreePath: candidate.worktree_path } : {}),
|
|
432
|
+
toolExecutionIds: output?.toolRecords?.map((record) => record.execution_id) ?? [],
|
|
433
|
+
diffArtifacts,
|
|
434
|
+
...(output?.verification ? { verification: output.verification } : {}),
|
|
435
|
+
...(candidateHardening(output, descriptor)
|
|
436
|
+
? { hardening: candidateHardening(output, descriptor) }
|
|
437
|
+
: {})
|
|
438
|
+
};
|
|
439
|
+
}),
|
|
440
|
+
artifacts,
|
|
441
|
+
modelCallRecords,
|
|
442
|
+
...(synthesis?.judgeSynthesisRecord
|
|
443
|
+
? { judgeSynthesisRecord: synthesis.judgeSynthesisRecord }
|
|
444
|
+
: {}),
|
|
445
|
+
finalPatchPath: synthesis?.finalPatchPath ?? null,
|
|
446
|
+
...(synthesis?.repairAttempts ? { repairAttempts: synthesis.repairAttempts } : {}),
|
|
447
|
+
...(synthesis?.failureSummary ? { failureSummary: synthesis.failureSummary } : {})
|
|
448
|
+
};
|
|
449
|
+
const summaryArtifact = store.writeJson({
|
|
450
|
+
artifactId: `${descriptor.id}_summary`,
|
|
451
|
+
kind: "metrics",
|
|
452
|
+
value: summary
|
|
453
|
+
});
|
|
454
|
+
const summaryPath = summaryArtifact.path;
|
|
455
|
+
const summaryArtifactRef = artifactRef(summaryArtifact);
|
|
456
|
+
const result = {
|
|
457
|
+
...metadata({ schema: "harness-run-result.v1", createdAt }),
|
|
458
|
+
result_id: `ensemble_result_${descriptor.id}`,
|
|
459
|
+
request_id: request.request_id,
|
|
460
|
+
harness_kind: harnessKind,
|
|
461
|
+
status: terminalStatus(outputs),
|
|
462
|
+
candidate_ids: candidates.map((candidate) => candidate.candidate_id),
|
|
463
|
+
output_summary: outputSummary(outputs, descriptor.harness.id),
|
|
464
|
+
artifacts: [...artifacts, summaryArtifactRef],
|
|
465
|
+
capabilities,
|
|
466
|
+
started_at: createdAt,
|
|
467
|
+
finished_at: new Date().toISOString(),
|
|
468
|
+
metadata: {
|
|
469
|
+
descriptor_id: descriptor.id,
|
|
470
|
+
summary_path: summaryPath,
|
|
471
|
+
hardening: {
|
|
472
|
+
requested_isolation: descriptor.runtime.isolation?.kind ?? "process",
|
|
473
|
+
candidate_count: candidates.length,
|
|
474
|
+
cleanup_succeeded: outputs.filter((output) => candidateHardening(output, descriptor)?.cleanup.succeeded === true).length,
|
|
475
|
+
cleanup_failed: outputs.filter((output) => candidateHardening(output, descriptor)?.cleanup.status === "failed").length
|
|
476
|
+
},
|
|
477
|
+
...(descriptor.reviewEvidence !== undefined
|
|
478
|
+
? { review_evidence: descriptor.reviewEvidence }
|
|
479
|
+
: {})
|
|
480
|
+
}
|
|
481
|
+
};
|
|
482
|
+
assertHarnessRunResultV1(result);
|
|
483
|
+
return freezeResult({
|
|
484
|
+
descriptorId: descriptor.id,
|
|
485
|
+
harnessRunRequest: request,
|
|
486
|
+
harnessRunResult: result,
|
|
487
|
+
candidates,
|
|
488
|
+
artifacts: [...artifacts, summaryArtifactRef],
|
|
489
|
+
toolRecords,
|
|
490
|
+
modelCallRecords,
|
|
491
|
+
verification,
|
|
492
|
+
summaryPath,
|
|
493
|
+
summary,
|
|
494
|
+
...(synthesis?.judgeSynthesisRecord
|
|
495
|
+
? { judgeSynthesisRecord: synthesis.judgeSynthesisRecord }
|
|
496
|
+
: {}),
|
|
497
|
+
...(synthesis ? { finalPatchPath: synthesis.finalPatchPath } : {}),
|
|
498
|
+
...(synthesis?.repairAttempts ? { repairAttempts: synthesis.repairAttempts } : {}),
|
|
499
|
+
...(synthesis?.failureSummary ? { failureSummary: synthesis.failureSummary } : {}),
|
|
500
|
+
...(descriptor.reviewEvidence ? { reviewEvidence: descriptor.reviewEvidence } : {})
|
|
501
|
+
});
|
|
502
|
+
}
|
|
503
|
+
finally {
|
|
504
|
+
await descriptor.harness.cleanup?.({
|
|
505
|
+
descriptor,
|
|
506
|
+
request,
|
|
507
|
+
candidates: outputs,
|
|
508
|
+
prepared
|
|
509
|
+
});
|
|
510
|
+
if (worktreePlan && descriptor.cleanupWorktrees === true) {
|
|
511
|
+
cleanupWorktrees = cleanupWorktreePlan({
|
|
512
|
+
...worktreePlan,
|
|
513
|
+
worktrees: cleanupWorktrees
|
|
514
|
+
});
|
|
515
|
+
}
|
|
516
|
+
}
|
|
517
|
+
}
|
|
518
|
+
export const ensemble = {
|
|
519
|
+
run: runEnsemble
|
|
520
|
+
};
|
|
@@ -0,0 +1,25 @@
|
|
|
1
|
+
import type { HarnessCandidateRecordV1, JudgeSynthesisRecordV1 } from "@fusionkit/protocol";
|
|
2
|
+
import type { ArtifactStore } from "./artifacts.js";
|
|
3
|
+
import type { EnsembleDescriptor, HarnessArtifact, HarnessCandidateOutput, HarnessToolRecord } from "./harness.js";
|
|
4
|
+
import type { JudgeInput, SynthesisFailureSummary, SynthesisRepairAttempt } from "./judge.js";
|
|
5
|
+
export type SynthesisResult = {
|
|
6
|
+
judgeInput: JudgeInput;
|
|
7
|
+
judgeSynthesisRecord: JudgeSynthesisRecordV1;
|
|
8
|
+
artifacts: HarnessArtifact[];
|
|
9
|
+
finalPatchPath: string | null;
|
|
10
|
+
repairAttempts: SynthesisRepairAttempt[];
|
|
11
|
+
failureSummary?: SynthesisFailureSummary;
|
|
12
|
+
};
|
|
13
|
+
export type RunSynthesisInput = {
|
|
14
|
+
descriptor: EnsembleDescriptor;
|
|
15
|
+
candidates: readonly HarnessCandidateRecordV1[];
|
|
16
|
+
outputs: readonly HarnessCandidateOutput[];
|
|
17
|
+
artifacts: readonly HarnessArtifact[];
|
|
18
|
+
toolRecords: readonly HarnessToolRecord[];
|
|
19
|
+
modelCallRecords: JudgeInput["modelCallRecords"];
|
|
20
|
+
reviewEvidence?: JudgeInput["reviewEvidence"];
|
|
21
|
+
workspace?: string;
|
|
22
|
+
baseGitSha?: string;
|
|
23
|
+
store: ArtifactStore;
|
|
24
|
+
};
|
|
25
|
+
export declare function runJudgeSynthesis(input: RunSynthesisInput): Promise<SynthesisResult | undefined>;
|