@fusionkit/ensemble 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (51) hide show
  1. package/dist/agent.d.ts +21 -0
  2. package/dist/agent.js +186 -0
  3. package/dist/artifacts.d.ts +21 -0
  4. package/dist/artifacts.js +36 -0
  5. package/dist/claude-code.d.ts +25 -0
  6. package/dist/claude-code.js +398 -0
  7. package/dist/codex.d.ts +69 -0
  8. package/dist/codex.js +467 -0
  9. package/dist/command.d.ts +15 -0
  10. package/dist/command.js +82 -0
  11. package/dist/dashboard.d.ts +62 -0
  12. package/dist/dashboard.js +788 -0
  13. package/dist/external-executor.d.ts +56 -0
  14. package/dist/external-executor.js +288 -0
  15. package/dist/harness.d.ts +337 -0
  16. package/dist/harness.js +1 -0
  17. package/dist/index.d.ts +30 -0
  18. package/dist/index.js +15 -0
  19. package/dist/isolation.d.ts +25 -0
  20. package/dist/isolation.js +509 -0
  21. package/dist/judge.d.ts +77 -0
  22. package/dist/judge.js +16 -0
  23. package/dist/mock.d.ts +20 -0
  24. package/dist/mock.js +56 -0
  25. package/dist/run.d.ts +5 -0
  26. package/dist/run.js +520 -0
  27. package/dist/synthesis.d.ts +25 -0
  28. package/dist/synthesis.js +221 -0
  29. package/dist/test/codex.test.d.ts +1 -0
  30. package/dist/test/codex.test.js +237 -0
  31. package/dist/test/dashboard.test.d.ts +1 -0
  32. package/dist/test/dashboard.test.js +214 -0
  33. package/dist/test/ensemble.test.d.ts +1 -0
  34. package/dist/test/ensemble.test.js +780 -0
  35. package/dist/test/external-executor.test.d.ts +1 -0
  36. package/dist/test/external-executor.test.js +273 -0
  37. package/dist/test/isolation.test.d.ts +1 -0
  38. package/dist/test/isolation.test.js +359 -0
  39. package/dist/test/tool-executor.test.d.ts +1 -0
  40. package/dist/test/tool-executor.test.js +113 -0
  41. package/dist/test/unified.test.d.ts +1 -0
  42. package/dist/test/unified.test.js +150 -0
  43. package/dist/tool-executor.d.ts +14 -0
  44. package/dist/tool-executor.js +156 -0
  45. package/dist/trace.d.ts +8 -0
  46. package/dist/trace.js +7 -0
  47. package/dist/unified.d.ts +101 -0
  48. package/dist/unified.js +422 -0
  49. package/dist/worktree.d.ts +25 -0
  50. package/dist/worktree.js +75 -0
  51. package/package.json +35 -0
package/dist/mock.d.ts ADDED
@@ -0,0 +1,20 @@
1
+ import type { HarnessAdapter, HarnessArtifact, HarnessCandidateOutput, HarnessToolRecord } from "./harness.js";
2
+ export type MockCandidateFixture = {
3
+ transcript?: string;
4
+ diff?: string;
5
+ modelCallId?: string;
6
+ modelCallRecord?: HarnessCandidateOutput["modelCallRecord"];
7
+ branchName?: string;
8
+ worktreePath?: string;
9
+ summary?: string;
10
+ status?: HarnessCandidateOutput["status"];
11
+ score?: number;
12
+ artifacts?: HarnessArtifact[];
13
+ toolRecords?: HarnessToolRecord[];
14
+ verification?: HarnessCandidateOutput["verification"];
15
+ };
16
+ export type MockHarnessOptions = {
17
+ id?: string;
18
+ candidates?: Record<string, MockCandidateFixture>;
19
+ };
20
+ export declare function createMockHarness(options?: MockHarnessOptions): HarnessAdapter;
package/dist/mock.js ADDED
@@ -0,0 +1,56 @@
1
+ import { artifactHash } from "@fusionkit/protocol";
2
+ function artifactFor(kind, id, content) {
3
+ return {
4
+ artifact_id: id,
5
+ kind,
6
+ hash: artifactHash(content),
7
+ redaction_status: "synthetic"
8
+ };
9
+ }
10
+ export function createMockHarness(options = {}) {
11
+ const id = options.id ?? "mock";
12
+ return {
13
+ id,
14
+ prepare: () => ({ preparedAt: new Date().toISOString() }),
15
+ capabilities: () => ({
16
+ workspace_read: "supported",
17
+ apply_patch: "supported",
18
+ tool_records: "supported",
19
+ verification: "supported"
20
+ }),
21
+ verificationProfile: () => ({
22
+ id: `${id}-verification`,
23
+ requiredEvidence: ["transcript", "diff", "verification"]
24
+ }),
25
+ run: ({ descriptor, model, ordinal }) => {
26
+ const fixture = options.candidates?.[model.id] ?? {};
27
+ const transcript = fixture.transcript ?? `mock transcript for ${descriptor.id}/${model.id}`;
28
+ const diff = fixture.diff ?? `diff --git a/${model.id}.txt b/${model.id}.txt`;
29
+ const artifacts = fixture.artifacts ?? [
30
+ artifactFor("transcript", `artifact_${descriptor.id}_${model.id}_transcript`, transcript),
31
+ artifactFor("patch", `artifact_${descriptor.id}_${model.id}_diff`, diff)
32
+ ];
33
+ return {
34
+ candidateId: `${descriptor.id}_${model.id}_${ordinal}`,
35
+ model,
36
+ status: fixture.status ?? "succeeded",
37
+ ...(fixture.modelCallId ? { modelCallId: fixture.modelCallId } : {}),
38
+ ...(fixture.modelCallRecord ? { modelCallRecord: fixture.modelCallRecord } : {}),
39
+ ...(fixture.branchName ? { branchName: fixture.branchName } : {}),
40
+ ...(fixture.worktreePath ? { worktreePath: fixture.worktreePath } : {}),
41
+ transcript,
42
+ diff,
43
+ ...(fixture.summary ? { summary: fixture.summary } : {}),
44
+ score: fixture.score ?? 1,
45
+ artifacts,
46
+ toolRecords: fixture.toolRecords ?? [],
47
+ verification: fixture.verification ?? {
48
+ status: "succeeded",
49
+ evidence: ["mock verification passed"],
50
+ exitCode: 0
51
+ }
52
+ };
53
+ },
54
+ collectArtifacts: () => []
55
+ };
56
+ }
package/dist/run.d.ts ADDED
@@ -0,0 +1,5 @@
1
+ import type { EnsembleDescriptor, EnsembleRunResult } from "./harness.js";
2
+ export declare function runEnsemble(descriptor: EnsembleDescriptor): Promise<EnsembleRunResult>;
3
+ export declare const ensemble: {
4
+ readonly run: typeof runEnsemble;
5
+ };
package/dist/run.js ADDED
@@ -0,0 +1,520 @@
1
+ import { assertHarnessCandidateRecordV1, assertHarnessRunRequestV1, assertHarnessRunResultV1, MODEL_FUSION_SCHEMA_BUNDLE_HASH, requestHash } from "@fusionkit/protocol";
2
+ import { createArtifactStore } from "./artifacts.js";
3
+ import { runJudgeSynthesis } from "./synthesis.js";
4
+ import { cleanupWorktreePlan, createWorktreePlan, defaultOutputRoot, diffCandidateWorktree, sealCandidateWorktree } from "./worktree.js";
5
+ const PRODUCER_GIT_SHA = "0".repeat(40);
6
+ const PRODUCER = "handoffkit-ensemble";
7
+ const PRODUCER_VERSION = "0.1.0";
8
+ const DEFAULT_CONTAINER_IMAGE = "node:22";
9
+ const DEFAULT_CONTAINER_ENGINE = "docker";
10
+ const DEFAULT_CONTAINER_WORKDIR = "/workspace";
11
+ const DEFAULT_MICROVM_PROVIDER = "vercel-sandbox";
12
+ const DEFAULT_MICROVM_RUNTIME = "node24";
13
+ const UNKNOWN_RUNTIME_DIGEST = "unknown";
14
+ function metadata(input) {
15
+ return {
16
+ schema: input.schema,
17
+ schema_version: "v1",
18
+ schema_bundle_hash: MODEL_FUSION_SCHEMA_BUNDLE_HASH,
19
+ producer: PRODUCER,
20
+ producer_version: PRODUCER_VERSION,
21
+ producer_git_sha: PRODUCER_GIT_SHA,
22
+ created_at: input.createdAt
23
+ };
24
+ }
25
+ function terminalStatus(outputs) {
26
+ if (outputs.some((output) => output.status === "failed"))
27
+ return "failed";
28
+ if (outputs.some((output) => output.status === "requires_action"))
29
+ return "requires_action";
30
+ if (outputs.every((output) => output.status === "skipped"))
31
+ return "skipped";
32
+ return "succeeded";
33
+ }
34
+ function assertDescriptor(descriptor) {
35
+ if ("checks" in descriptor && descriptor.checks !== undefined) {
36
+ throw new Error("ensemble descriptors do not accept ad hoc checks");
37
+ }
38
+ if (!descriptor.harness)
39
+ throw new Error("ensemble descriptor requires one harness");
40
+ if (!Array.isArray(descriptor.models) || descriptor.models.length === 0) {
41
+ throw new Error("ensemble descriptor requires at least one model");
42
+ }
43
+ for (const model of descriptor.models) {
44
+ if (!model.id || !model.model) {
45
+ throw new Error("each ensemble model requires id and model");
46
+ }
47
+ }
48
+ if (!descriptor.runtime?.id)
49
+ throw new Error("ensemble descriptor requires one runtime");
50
+ if (!descriptor.judge?.id)
51
+ throw new Error("ensemble descriptor requires one judge");
52
+ if (!descriptor.policy?.id)
53
+ throw new Error("ensemble descriptor requires one policy");
54
+ }
55
+ function freezeResult(result) {
56
+ for (const candidate of result.candidates) {
57
+ candidate.artifacts?.forEach((artifact) => Object.freeze(artifact));
58
+ Object.freeze(candidate.artifacts ?? []);
59
+ Object.freeze(candidate);
60
+ }
61
+ for (const artifact of result.artifacts)
62
+ Object.freeze(artifact);
63
+ for (const toolRecord of result.toolRecords)
64
+ Object.freeze(toolRecord);
65
+ for (const modelCallRecord of result.modelCallRecords)
66
+ Object.freeze(modelCallRecord);
67
+ if (result.harnessRunResult.artifacts) {
68
+ for (const artifact of result.harnessRunResult.artifacts)
69
+ Object.freeze(artifact);
70
+ Object.freeze(result.harnessRunResult.artifacts);
71
+ }
72
+ Object.freeze(result.harnessRunResult);
73
+ Object.freeze(result.harnessRunRequest);
74
+ Object.freeze(result.summary?.candidates ?? []);
75
+ Object.freeze(result.summary?.artifacts ?? []);
76
+ if (result.summary)
77
+ Object.freeze(result.summary);
78
+ Object.freeze(result.candidates);
79
+ Object.freeze(result.artifacts);
80
+ Object.freeze(result.toolRecords);
81
+ Object.freeze(result.modelCallRecords);
82
+ return Object.freeze(result);
83
+ }
84
+ function candidateMetadata(output, descriptor, worktree) {
85
+ const metadata = {
86
+ model_id: output.model.id,
87
+ model: output.model.model,
88
+ endpoint_id: output.model.endpointId ?? output.model.id
89
+ };
90
+ if (output.verification !== undefined) {
91
+ metadata.verification = output.verification;
92
+ }
93
+ if (output.summary !== undefined) {
94
+ metadata.summary = output.summary;
95
+ }
96
+ if (worktree !== undefined) {
97
+ metadata.base_git_sha = worktree.baseGitSha;
98
+ metadata.snapshot_hash = worktree.snapshotHash;
99
+ }
100
+ Object.assign(metadata, output.metadata ?? {});
101
+ if (metadata.hardening === undefined) {
102
+ metadata.hardening = fallbackCandidateHardening(descriptor);
103
+ }
104
+ if (descriptor.reviewEvidence !== undefined) {
105
+ metadata.review_evidence_attached = true;
106
+ }
107
+ if (output.modelCallRecord !== undefined) {
108
+ metadata.model_call_recorded = true;
109
+ }
110
+ return metadata;
111
+ }
112
+ function artifactsForOutput(input) {
113
+ const artifacts = [...(input.output.artifacts ?? [])];
114
+ const prefix = `${input.descriptor.id}_${input.candidateId}`;
115
+ if (input.patch.length > 0) {
116
+ artifacts.push(input.store.writeText({
117
+ artifactId: `${prefix}_patch`,
118
+ kind: "patch",
119
+ content: input.patch,
120
+ suffix: ".patch"
121
+ }));
122
+ }
123
+ if (input.output.transcript !== undefined) {
124
+ artifacts.push(input.store.writeText({
125
+ artifactId: `${prefix}_transcript`,
126
+ kind: "transcript",
127
+ content: input.output.transcript,
128
+ suffix: ".txt"
129
+ }));
130
+ }
131
+ if (input.output.log !== undefined) {
132
+ artifacts.push(input.store.writeText({
133
+ artifactId: `${prefix}_log`,
134
+ kind: "log",
135
+ content: input.output.log,
136
+ suffix: ".log"
137
+ }));
138
+ }
139
+ if (input.output.toolRecords && input.output.toolRecords.length > 0) {
140
+ artifacts.push(input.store.writeJson({
141
+ artifactId: `${prefix}_tool_journal`,
142
+ kind: "other",
143
+ value: input.output.toolRecords
144
+ }));
145
+ }
146
+ if (input.output.verification !== undefined) {
147
+ artifacts.push(input.store.writeJson({
148
+ artifactId: `${prefix}_verification`,
149
+ kind: "metrics",
150
+ value: input.output.verification
151
+ }));
152
+ }
153
+ if (input.output.modelCallRecord !== undefined) {
154
+ artifacts.push(input.store.writeJson({
155
+ artifactId: `${prefix}_model_call_record`,
156
+ kind: "metrics",
157
+ value: input.output.modelCallRecord
158
+ }));
159
+ }
160
+ if (input.worktree !== undefined) {
161
+ artifacts.push(input.store.writeJson({
162
+ artifactId: `${prefix}_worktree`,
163
+ kind: "worktree",
164
+ value: {
165
+ path: input.worktree.path,
166
+ baseGitSha: input.worktree.baseGitSha,
167
+ snapshotHash: input.worktree.snapshotHash
168
+ }
169
+ }));
170
+ }
171
+ artifacts.push(...(input.output.screenshots ?? []));
172
+ return artifacts;
173
+ }
174
+ function artifactRef(artifact) {
175
+ const { path: _path, ...ref } = artifact;
176
+ return ref;
177
+ }
178
+ function outputSummary(outputs, harnessId) {
179
+ const counts = new Map();
180
+ for (const output of outputs)
181
+ counts.set(output.status, (counts.get(output.status) ?? 0) + 1);
182
+ const countText = [...counts.entries()]
183
+ .sort(([left], [right]) => left.localeCompare(right))
184
+ .map(([status, count]) => `${status}:${count}`)
185
+ .join(", ");
186
+ return `${outputs.length} candidate(s) produced by ${harnessId}; statuses ${countText}`;
187
+ }
188
+ function runtimeHardeningMetadata(descriptor) {
189
+ const isolation = descriptor.runtime.isolation;
190
+ const base = {
191
+ requested_isolation: isolation?.kind ?? "process",
192
+ runtime_id: descriptor.runtime.id,
193
+ ...(descriptor.runtime.environmentId !== undefined
194
+ ? { environment_id: descriptor.runtime.environmentId }
195
+ : {})
196
+ };
197
+ if (isolation?.kind === "container") {
198
+ return {
199
+ ...base,
200
+ image: isolation.image ?? DEFAULT_CONTAINER_IMAGE,
201
+ engine: isolation.engine ?? DEFAULT_CONTAINER_ENGINE,
202
+ driver: isolation.driver?.id ?? isolation.engine ?? DEFAULT_CONTAINER_ENGINE
203
+ };
204
+ }
205
+ if (isolation?.kind === "microvm") {
206
+ return {
207
+ ...base,
208
+ provider: isolation.provider ?? DEFAULT_MICROVM_PROVIDER,
209
+ runtime: isolation.runtime ?? DEFAULT_MICROVM_RUNTIME,
210
+ driver: isolation.driver?.id ?? `${isolation.provider ?? DEFAULT_MICROVM_PROVIDER}-driver`,
211
+ ...(isolation.snapshotId !== undefined ? { snapshot_id: isolation.snapshotId } : {}),
212
+ ...(isolation.sandboxId !== undefined ? { sandbox_id: isolation.sandboxId } : {}),
213
+ ...(isolation.imageDigest !== undefined ? { image_digest: isolation.imageDigest } : {}),
214
+ runtime_digest: isolation.runtimeDigest ?? UNKNOWN_RUNTIME_DIGEST
215
+ };
216
+ }
217
+ return base;
218
+ }
219
+ function fallbackCandidateHardening(descriptor) {
220
+ const isolation = descriptor.runtime.isolation;
221
+ const mountPolicy = isolation?.mountPolicy;
222
+ const networkPolicy = isolation?.networkPolicy;
223
+ const secretPolicy = isolation?.secretPolicy;
224
+ return {
225
+ requested_isolation: isolation?.kind ?? "process",
226
+ actual_isolation: "process",
227
+ runtime: {
228
+ ...(isolation?.kind === "container"
229
+ ? { image: isolation.image ?? DEFAULT_CONTAINER_IMAGE }
230
+ : {}),
231
+ ...(isolation?.kind === "microvm"
232
+ ? {
233
+ provider: isolation.provider ?? DEFAULT_MICROVM_PROVIDER,
234
+ runtime: isolation.runtime ?? DEFAULT_MICROVM_RUNTIME,
235
+ ...(isolation.snapshotId !== undefined ? { snapshot_id: isolation.snapshotId } : {}),
236
+ ...(isolation.sandboxId !== undefined ? { sandbox_id: isolation.sandboxId } : {}),
237
+ ...(isolation.imageDigest !== undefined ? { image_digest: isolation.imageDigest } : {}),
238
+ runtime_digest: isolation.runtimeDigest ?? UNKNOWN_RUNTIME_DIGEST
239
+ }
240
+ : {}),
241
+ workdir: mountPolicy?.workdir ?? DEFAULT_CONTAINER_WORKDIR
242
+ },
243
+ mount_policy: {
244
+ worktree_writable: mountPolicy?.worktreeWritable ?? true,
245
+ read_only_caches: [...(mountPolicy?.readOnlyCachePaths ?? [])],
246
+ ignored_dirs: [...(mountPolicy?.ignoredDirs ?? [".git", "node_modules", ".warrant"])]
247
+ },
248
+ network_policy: {
249
+ default_deny: networkPolicy?.defaultDeny ?? true,
250
+ allow_hosts: [...(networkPolicy?.allowHosts ?? [])],
251
+ enforced: false
252
+ },
253
+ cleanup: {
254
+ attempted: false,
255
+ succeeded: true,
256
+ status: "not_required"
257
+ },
258
+ secret_absence: {
259
+ secret_names: [...(secretPolicy?.secretNames ?? [])],
260
+ secret_value_hashes: [...(secretPolicy?.secretValueHashes ?? [])],
261
+ injected_env_names: [...(secretPolicy?.injectedEnvNames ?? [])],
262
+ scanned: false,
263
+ leaks_found: false,
264
+ scan_scope: [],
265
+ leak_count: 0
266
+ }
267
+ };
268
+ }
269
+ function candidateHardening(output, descriptor) {
270
+ const hardening = output?.metadata?.hardening;
271
+ if (typeof hardening === "object" && hardening !== null && !Array.isArray(hardening)) {
272
+ return hardening;
273
+ }
274
+ if (output !== undefined)
275
+ return fallbackCandidateHardening(descriptor);
276
+ return undefined;
277
+ }
278
+ export async function runEnsemble(descriptor) {
279
+ assertDescriptor(descriptor);
280
+ const createdAt = new Date().toISOString();
281
+ const capabilities = descriptor.harness.capabilities(descriptor);
282
+ const harnessKind = descriptor.harness.harnessKind ?? "generic";
283
+ const outputRoot = defaultOutputRoot(descriptor);
284
+ const store = createArtifactStore(`${outputRoot}/artifacts`);
285
+ const worktreePlan = createWorktreePlan(descriptor);
286
+ const request = {
287
+ ...metadata({ schema: "harness-run-request.v1", createdAt }),
288
+ request_id: `ensemble_req_${descriptor.id}`,
289
+ harness_kind: harnessKind,
290
+ source_repo: descriptor.sourceRepo,
291
+ base_git_sha: descriptor.baseGitSha,
292
+ prompt: descriptor.prompt,
293
+ prompt_hash: requestHash({
294
+ prompt: descriptor.prompt,
295
+ descriptor_id: descriptor.id
296
+ }),
297
+ allowed_tools: descriptor.policy.allowedTools,
298
+ side_effects: descriptor.policy.sideEffects,
299
+ requested_capabilities: capabilities,
300
+ metadata: {
301
+ harness_id: descriptor.harness.id,
302
+ runtime_id: descriptor.runtime.id,
303
+ judge_id: descriptor.judge.id,
304
+ policy_id: descriptor.policy.id,
305
+ hardening: runtimeHardeningMetadata(descriptor),
306
+ output_root: outputRoot,
307
+ ...(worktreePlan
308
+ ? {
309
+ snapshot_hash: worktreePlan.snapshotHash,
310
+ snapshot_base_git_sha: worktreePlan.baseGitSha
311
+ }
312
+ : {}),
313
+ ...(descriptor.metadata ?? {})
314
+ }
315
+ };
316
+ assertHarnessRunRequestV1(request);
317
+ let prepared;
318
+ let outputs = [];
319
+ let cleanupWorktrees = worktreePlan?.worktrees ?? [];
320
+ try {
321
+ prepared = await descriptor.harness.prepare({ descriptor, request });
322
+ // Settle every candidate before continuing so a single failure cannot leave
323
+ // siblings running while we tear down their worktrees. A hard failure still
324
+ // aborts the run (re-thrown below) once all candidates have stopped.
325
+ const settled = await Promise.allSettled(descriptor.models.map((model, ordinal) => descriptor.harness.run({
326
+ descriptor,
327
+ request,
328
+ model,
329
+ ordinal,
330
+ prepared,
331
+ worktree: worktreePlan?.worktrees[ordinal]
332
+ })));
333
+ const rejection = settled.find((result) => result.status === "rejected");
334
+ if (rejection !== undefined)
335
+ throw rejection.reason;
336
+ outputs = settled.map((result) => result.value);
337
+ const collectedArtifacts = await descriptor.harness.collectArtifacts({
338
+ descriptor,
339
+ request,
340
+ candidates: outputs,
341
+ prepared
342
+ });
343
+ const verification = descriptor.harness.verificationProfile(descriptor);
344
+ const generatedArtifacts = new Map();
345
+ const sealedWorktrees = worktreePlan?.worktrees.map((worktree) => sealCandidateWorktree(worktree));
346
+ cleanupWorktrees = sealedWorktrees ?? cleanupWorktrees;
347
+ for (const [ordinal, output] of outputs.entries()) {
348
+ const worktree = sealedWorktrees?.[ordinal];
349
+ const id = output.candidateId ?? worktree?.candidateId ?? `${descriptor.id}_${output.model.id}_${ordinal}`;
350
+ const patch = worktree ? diffCandidateWorktree(worktree) : (output.diff ?? "");
351
+ generatedArtifacts.set(id, artifactsForOutput({
352
+ descriptor,
353
+ candidateId: id,
354
+ output,
355
+ patch,
356
+ ...(worktree ? { worktree } : {}),
357
+ store
358
+ }));
359
+ }
360
+ const modelCallRecords = outputs.flatMap((output) => output.modelCallRecord ? [output.modelCallRecord] : []);
361
+ const candidates = outputs.map((output, ordinal) => {
362
+ const worktree = sealedWorktrees?.[ordinal];
363
+ const id = output.candidateId ??
364
+ worktree?.candidateId ??
365
+ `${descriptor.id}_${output.model.id}_${ordinal}`;
366
+ const artifacts = (generatedArtifacts.get(id) ?? output.artifacts ?? []).map(artifactRef);
367
+ const record = {
368
+ ...metadata({ schema: "harness-candidate-record.v1", createdAt }),
369
+ candidate_id: id,
370
+ request_id: request.request_id,
371
+ harness_kind: harnessKind,
372
+ model_call_id: output.modelCallId ?? output.modelCallRecord?.call_id ?? `${id}_model_call`,
373
+ status: output.status,
374
+ side_effects: descriptor.policy.sideEffects,
375
+ artifacts,
376
+ ...(output.branchName ?? worktree?.branchName
377
+ ? { branch_name: output.branchName ?? worktree?.branchName }
378
+ : {}),
379
+ ...(output.worktreePath ?? worktree?.path
380
+ ? { worktree_path: output.worktreePath ?? worktree?.path }
381
+ : {}),
382
+ ...(output.score !== undefined ? { score: output.score } : {}),
383
+ ...(output.error ? { error: output.error } : {}),
384
+ metadata: candidateMetadata(output, descriptor, worktree)
385
+ };
386
+ assertHarnessCandidateRecordV1(record);
387
+ return record;
388
+ });
389
+ const baseArtifacts = [
390
+ ...collectedArtifacts,
391
+ ...candidates.flatMap((candidate) => candidate.artifacts ?? [])
392
+ ];
393
+ const toolRecords = outputs.flatMap((output) => output.toolRecords ?? []);
394
+ const synthesis = await runJudgeSynthesis({
395
+ descriptor,
396
+ candidates,
397
+ outputs,
398
+ artifacts: baseArtifacts,
399
+ toolRecords,
400
+ modelCallRecords,
401
+ ...(descriptor.reviewEvidence ? { reviewEvidence: descriptor.reviewEvidence } : {}),
402
+ ...(worktreePlan?.workspace ? { workspace: worktreePlan.workspace } : {}),
403
+ ...(worktreePlan?.baseGitSha ? { baseGitSha: worktreePlan.baseGitSha } : {}),
404
+ store
405
+ });
406
+ const artifacts = [
407
+ ...baseArtifacts,
408
+ ...(synthesis?.artifacts ?? [])
409
+ ];
410
+ const summary = {
411
+ descriptorId: descriptor.id,
412
+ ...(worktreePlan
413
+ ? {
414
+ snapshot: {
415
+ baseGitSha: worktreePlan.baseGitSha,
416
+ snapshotHash: worktreePlan.snapshotHash,
417
+ workspace: worktreePlan.workspace
418
+ }
419
+ }
420
+ : {}),
421
+ candidates: candidates.map((candidate, ordinal) => {
422
+ const output = outputs[ordinal];
423
+ const diffArtifacts = (candidate.artifacts ?? []).filter((artifact) => artifact.kind === "patch");
424
+ return {
425
+ candidateId: candidate.candidate_id,
426
+ modelId: output?.model.id ?? "",
427
+ model: output?.model.model ?? "",
428
+ ...(candidate.model_call_id ? { modelCallId: candidate.model_call_id } : {}),
429
+ status: candidate.status,
430
+ ...(candidate.branch_name ? { branchName: candidate.branch_name } : {}),
431
+ ...(candidate.worktree_path ? { worktreePath: candidate.worktree_path } : {}),
432
+ toolExecutionIds: output?.toolRecords?.map((record) => record.execution_id) ?? [],
433
+ diffArtifacts,
434
+ ...(output?.verification ? { verification: output.verification } : {}),
435
+ ...(candidateHardening(output, descriptor)
436
+ ? { hardening: candidateHardening(output, descriptor) }
437
+ : {})
438
+ };
439
+ }),
440
+ artifacts,
441
+ modelCallRecords,
442
+ ...(synthesis?.judgeSynthesisRecord
443
+ ? { judgeSynthesisRecord: synthesis.judgeSynthesisRecord }
444
+ : {}),
445
+ finalPatchPath: synthesis?.finalPatchPath ?? null,
446
+ ...(synthesis?.repairAttempts ? { repairAttempts: synthesis.repairAttempts } : {}),
447
+ ...(synthesis?.failureSummary ? { failureSummary: synthesis.failureSummary } : {})
448
+ };
449
+ const summaryArtifact = store.writeJson({
450
+ artifactId: `${descriptor.id}_summary`,
451
+ kind: "metrics",
452
+ value: summary
453
+ });
454
+ const summaryPath = summaryArtifact.path;
455
+ const summaryArtifactRef = artifactRef(summaryArtifact);
456
+ const result = {
457
+ ...metadata({ schema: "harness-run-result.v1", createdAt }),
458
+ result_id: `ensemble_result_${descriptor.id}`,
459
+ request_id: request.request_id,
460
+ harness_kind: harnessKind,
461
+ status: terminalStatus(outputs),
462
+ candidate_ids: candidates.map((candidate) => candidate.candidate_id),
463
+ output_summary: outputSummary(outputs, descriptor.harness.id),
464
+ artifacts: [...artifacts, summaryArtifactRef],
465
+ capabilities,
466
+ started_at: createdAt,
467
+ finished_at: new Date().toISOString(),
468
+ metadata: {
469
+ descriptor_id: descriptor.id,
470
+ summary_path: summaryPath,
471
+ hardening: {
472
+ requested_isolation: descriptor.runtime.isolation?.kind ?? "process",
473
+ candidate_count: candidates.length,
474
+ cleanup_succeeded: outputs.filter((output) => candidateHardening(output, descriptor)?.cleanup.succeeded === true).length,
475
+ cleanup_failed: outputs.filter((output) => candidateHardening(output, descriptor)?.cleanup.status === "failed").length
476
+ },
477
+ ...(descriptor.reviewEvidence !== undefined
478
+ ? { review_evidence: descriptor.reviewEvidence }
479
+ : {})
480
+ }
481
+ };
482
+ assertHarnessRunResultV1(result);
483
+ return freezeResult({
484
+ descriptorId: descriptor.id,
485
+ harnessRunRequest: request,
486
+ harnessRunResult: result,
487
+ candidates,
488
+ artifacts: [...artifacts, summaryArtifactRef],
489
+ toolRecords,
490
+ modelCallRecords,
491
+ verification,
492
+ summaryPath,
493
+ summary,
494
+ ...(synthesis?.judgeSynthesisRecord
495
+ ? { judgeSynthesisRecord: synthesis.judgeSynthesisRecord }
496
+ : {}),
497
+ ...(synthesis ? { finalPatchPath: synthesis.finalPatchPath } : {}),
498
+ ...(synthesis?.repairAttempts ? { repairAttempts: synthesis.repairAttempts } : {}),
499
+ ...(synthesis?.failureSummary ? { failureSummary: synthesis.failureSummary } : {}),
500
+ ...(descriptor.reviewEvidence ? { reviewEvidence: descriptor.reviewEvidence } : {})
501
+ });
502
+ }
503
+ finally {
504
+ await descriptor.harness.cleanup?.({
505
+ descriptor,
506
+ request,
507
+ candidates: outputs,
508
+ prepared
509
+ });
510
+ if (worktreePlan && descriptor.cleanupWorktrees === true) {
511
+ cleanupWorktrees = cleanupWorktreePlan({
512
+ ...worktreePlan,
513
+ worktrees: cleanupWorktrees
514
+ });
515
+ }
516
+ }
517
+ }
518
+ export const ensemble = {
519
+ run: runEnsemble
520
+ };
@@ -0,0 +1,25 @@
1
+ import type { HarnessCandidateRecordV1, JudgeSynthesisRecordV1 } from "@fusionkit/protocol";
2
+ import type { ArtifactStore } from "./artifacts.js";
3
+ import type { EnsembleDescriptor, HarnessArtifact, HarnessCandidateOutput, HarnessToolRecord } from "./harness.js";
4
+ import type { JudgeInput, SynthesisFailureSummary, SynthesisRepairAttempt } from "./judge.js";
5
+ export type SynthesisResult = {
6
+ judgeInput: JudgeInput;
7
+ judgeSynthesisRecord: JudgeSynthesisRecordV1;
8
+ artifacts: HarnessArtifact[];
9
+ finalPatchPath: string | null;
10
+ repairAttempts: SynthesisRepairAttempt[];
11
+ failureSummary?: SynthesisFailureSummary;
12
+ };
13
+ export type RunSynthesisInput = {
14
+ descriptor: EnsembleDescriptor;
15
+ candidates: readonly HarnessCandidateRecordV1[];
16
+ outputs: readonly HarnessCandidateOutput[];
17
+ artifacts: readonly HarnessArtifact[];
18
+ toolRecords: readonly HarnessToolRecord[];
19
+ modelCallRecords: JudgeInput["modelCallRecords"];
20
+ reviewEvidence?: JudgeInput["reviewEvidence"];
21
+ workspace?: string;
22
+ baseGitSha?: string;
23
+ store: ArtifactStore;
24
+ };
25
+ export declare function runJudgeSynthesis(input: RunSynthesisInput): Promise<SynthesisResult | undefined>;