@fusionkit/ensemble 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/agent.d.ts +21 -0
- package/dist/agent.js +186 -0
- package/dist/artifacts.d.ts +21 -0
- package/dist/artifacts.js +36 -0
- package/dist/claude-code.d.ts +25 -0
- package/dist/claude-code.js +398 -0
- package/dist/codex.d.ts +69 -0
- package/dist/codex.js +467 -0
- package/dist/command.d.ts +15 -0
- package/dist/command.js +82 -0
- package/dist/dashboard.d.ts +62 -0
- package/dist/dashboard.js +788 -0
- package/dist/external-executor.d.ts +56 -0
- package/dist/external-executor.js +288 -0
- package/dist/harness.d.ts +337 -0
- package/dist/harness.js +1 -0
- package/dist/index.d.ts +30 -0
- package/dist/index.js +15 -0
- package/dist/isolation.d.ts +25 -0
- package/dist/isolation.js +509 -0
- package/dist/judge.d.ts +77 -0
- package/dist/judge.js +16 -0
- package/dist/mock.d.ts +20 -0
- package/dist/mock.js +56 -0
- package/dist/run.d.ts +5 -0
- package/dist/run.js +520 -0
- package/dist/synthesis.d.ts +25 -0
- package/dist/synthesis.js +221 -0
- package/dist/test/codex.test.d.ts +1 -0
- package/dist/test/codex.test.js +237 -0
- package/dist/test/dashboard.test.d.ts +1 -0
- package/dist/test/dashboard.test.js +214 -0
- package/dist/test/ensemble.test.d.ts +1 -0
- package/dist/test/ensemble.test.js +780 -0
- package/dist/test/external-executor.test.d.ts +1 -0
- package/dist/test/external-executor.test.js +273 -0
- package/dist/test/isolation.test.d.ts +1 -0
- package/dist/test/isolation.test.js +359 -0
- package/dist/test/tool-executor.test.d.ts +1 -0
- package/dist/test/tool-executor.test.js +113 -0
- package/dist/test/unified.test.d.ts +1 -0
- package/dist/test/unified.test.js +150 -0
- package/dist/tool-executor.d.ts +14 -0
- package/dist/tool-executor.js +156 -0
- package/dist/trace.d.ts +8 -0
- package/dist/trace.js +7 -0
- package/dist/unified.d.ts +101 -0
- package/dist/unified.js +422 -0
- package/dist/worktree.d.ts +25 -0
- package/dist/worktree.js +75 -0
- package/package.json +35 -0
package/dist/unified.js
ADDED
|
@@ -0,0 +1,422 @@
|
|
|
1
|
+
import { spawn } from "node:child_process";
|
|
2
|
+
import { mkdirSync, writeFileSync } from "node:fs";
|
|
3
|
+
import { join, resolve } from "node:path";
|
|
4
|
+
import { newSpanId, TRACE_ID_HEADER, TRACE_SPAN_HEADER } from "@fusionkit/protocol";
|
|
5
|
+
import { gitText } from "@fusionkit/workspace";
|
|
6
|
+
import { createAgentHarness } from "./agent.js";
|
|
7
|
+
import { claudeCodeHarness } from "./claude-code.js";
|
|
8
|
+
import { createCommandHarness } from "./command.js";
|
|
9
|
+
import { codexHarness } from "./codex.js";
|
|
10
|
+
import { createMockHarness } from "./mock.js";
|
|
11
|
+
import { runEnsemble } from "./run.js";
|
|
12
|
+
function normalizeFusionBackendUrl(value) {
|
|
13
|
+
return value.replace(/\/+$/, "");
|
|
14
|
+
}
|
|
15
|
+
function chatCompletionsUrl(baseUrl) {
|
|
16
|
+
const normalized = normalizeFusionBackendUrl(baseUrl);
|
|
17
|
+
return normalized.endsWith("/v1")
|
|
18
|
+
? `${normalized}/chat/completions`
|
|
19
|
+
: `${normalized}/v1/chat/completions`;
|
|
20
|
+
}
|
|
21
|
+
function sideEffectsForHarness(kind) {
|
|
22
|
+
switch (kind) {
|
|
23
|
+
case "mock":
|
|
24
|
+
return "read_only";
|
|
25
|
+
case "command":
|
|
26
|
+
return "tool_execution";
|
|
27
|
+
case "agent":
|
|
28
|
+
return "writes_workspace";
|
|
29
|
+
case "codex":
|
|
30
|
+
case "claude-code":
|
|
31
|
+
case "cursor-acp":
|
|
32
|
+
case "cursor-desktop":
|
|
33
|
+
return "writes_workspace";
|
|
34
|
+
default: {
|
|
35
|
+
const exhausted = kind;
|
|
36
|
+
throw new Error(`unsupported unified harness: ${String(exhausted)}`);
|
|
37
|
+
}
|
|
38
|
+
}
|
|
39
|
+
}
|
|
40
|
+
function harnessAdapter(kind, options) {
|
|
41
|
+
switch (kind) {
|
|
42
|
+
case "mock":
|
|
43
|
+
return createMockHarness();
|
|
44
|
+
case "agent":
|
|
45
|
+
return createAgentHarness({
|
|
46
|
+
modelEndpoints: options.modelEndpoints ?? {},
|
|
47
|
+
fallbackBaseUrl: normalizeFusionBackendUrl(options.fusionBackendUrl),
|
|
48
|
+
...(options.fusionApiKey !== undefined ? { apiKey: options.fusionApiKey } : {}),
|
|
49
|
+
...(options.timeoutMs !== undefined ? { timeoutMs: options.timeoutMs } : {}),
|
|
50
|
+
...(options.traceId !== undefined ? { traceId: options.traceId } : {}),
|
|
51
|
+
...(options.parentSpanId !== undefined ? { parentSpanId: options.parentSpanId } : {}),
|
|
52
|
+
...(options.turn !== undefined ? { turn: options.turn } : {})
|
|
53
|
+
});
|
|
54
|
+
case "command": {
|
|
55
|
+
if (!options.command) {
|
|
56
|
+
throw new Error("--command is required for the command unified harness");
|
|
57
|
+
}
|
|
58
|
+
return createCommandHarness({
|
|
59
|
+
command: options.command,
|
|
60
|
+
timeoutMs: options.timeoutMs,
|
|
61
|
+
env: ({ model }) => {
|
|
62
|
+
const backend = options.modelEndpoints?.[model.id] ?? options.fusionBackendUrl;
|
|
63
|
+
return {
|
|
64
|
+
FUSIONKIT_BASE_URL: normalizeFusionBackendUrl(backend),
|
|
65
|
+
FUSIONKIT_CHAT_COMPLETIONS_URL: chatCompletionsUrl(backend),
|
|
66
|
+
FUSIONKIT_MODEL: model.model,
|
|
67
|
+
FUSIONKIT_MODEL_ID: model.id,
|
|
68
|
+
...(options.fusionApiKey ? { FUSIONKIT_API_KEY: options.fusionApiKey } : {}),
|
|
69
|
+
...(options.traceId ? { FUSION_TRACE_ID: options.traceId } : {})
|
|
70
|
+
};
|
|
71
|
+
}
|
|
72
|
+
});
|
|
73
|
+
}
|
|
74
|
+
case "codex":
|
|
75
|
+
return codexHarness({
|
|
76
|
+
timeoutMs: options.timeoutMs,
|
|
77
|
+
provider: {
|
|
78
|
+
kind: "openai-compatible",
|
|
79
|
+
baseUrl: normalizeFusionBackendUrl(options.fusionBackendUrl),
|
|
80
|
+
...(options.fusionApiKey ? { apiKey: options.fusionApiKey } : {})
|
|
81
|
+
}
|
|
82
|
+
});
|
|
83
|
+
case "claude-code":
|
|
84
|
+
return claudeCodeHarness({ timeoutMs: options.timeoutMs });
|
|
85
|
+
case "cursor-acp":
|
|
86
|
+
case "cursor-desktop":
|
|
87
|
+
throw new Error(`${kind} runs through the Cursor harness adapter path`);
|
|
88
|
+
default: {
|
|
89
|
+
const exhausted = kind;
|
|
90
|
+
throw new Error(`unsupported unified harness: ${String(exhausted)}`);
|
|
91
|
+
}
|
|
92
|
+
}
|
|
93
|
+
}
|
|
94
|
+
function responseShapeFor(kind) {
|
|
95
|
+
switch (kind) {
|
|
96
|
+
case "mock":
|
|
97
|
+
case "command":
|
|
98
|
+
return "Return a concise markdown summary and include any final patch guidance.";
|
|
99
|
+
case "agent":
|
|
100
|
+
return ("Respond to the user in the natural shape the request calls for: a direct answer, " +
|
|
101
|
+
"a plan, or the concrete code change. Reply in first person as the assistant.");
|
|
102
|
+
case "codex":
|
|
103
|
+
return "Return a Codex-style result summary with patch and verification evidence.";
|
|
104
|
+
case "claude-code":
|
|
105
|
+
return "Return a Claude Code-style transcript summary with patch/worktree evidence.";
|
|
106
|
+
case "cursor-acp":
|
|
107
|
+
case "cursor-desktop":
|
|
108
|
+
return "Return text suitable for Cursor ACP session/update plus route evidence notes.";
|
|
109
|
+
default: {
|
|
110
|
+
const exhausted = kind;
|
|
111
|
+
throw new Error(`unsupported unified harness: ${String(exhausted)}`);
|
|
112
|
+
}
|
|
113
|
+
}
|
|
114
|
+
}
|
|
115
|
+
function trajectoryFuseUrl(baseUrl) {
|
|
116
|
+
return `${normalizeFusionBackendUrl(baseUrl)}/v1/fusion/trajectories:fuse`;
|
|
117
|
+
}
|
|
118
|
+
function trajectoryToWire(trajectory) {
|
|
119
|
+
return {
|
|
120
|
+
trajectory_id: trajectory.trajectoryId,
|
|
121
|
+
model_id: trajectory.modelId,
|
|
122
|
+
status: trajectory.status,
|
|
123
|
+
steps: trajectory.steps,
|
|
124
|
+
final_output: trajectory.finalOutput,
|
|
125
|
+
...(trajectory.candidateId !== undefined ? { candidate_id: trajectory.candidateId } : {}),
|
|
126
|
+
...(trajectory.model !== undefined ? { model: trajectory.model } : {}),
|
|
127
|
+
...(trajectory.harnessKind !== undefined ? { harness_kind: trajectory.harnessKind } : {}),
|
|
128
|
+
...(trajectory.diff !== undefined && trajectory.diff.length > 0 ? { diff: trajectory.diff } : {}),
|
|
129
|
+
...(trajectory.verification !== undefined
|
|
130
|
+
? {
|
|
131
|
+
verification: {
|
|
132
|
+
status: trajectory.verification.status,
|
|
133
|
+
evidence: trajectory.verification.evidence,
|
|
134
|
+
...(trajectory.verification.exitCode !== undefined
|
|
135
|
+
? { exit_code: trajectory.verification.exitCode }
|
|
136
|
+
: {})
|
|
137
|
+
}
|
|
138
|
+
}
|
|
139
|
+
: {})
|
|
140
|
+
};
|
|
141
|
+
}
|
|
142
|
+
export function createFusionKitJudgeSynthesizer(input) {
|
|
143
|
+
const authHeaders = input.apiKey
|
|
144
|
+
? { authorization: `Bearer ${input.apiKey}` }
|
|
145
|
+
: {};
|
|
146
|
+
const traceHeaders = input.traceId !== undefined
|
|
147
|
+
? { [TRACE_ID_HEADER]: input.traceId, [TRACE_SPAN_HEADER]: newSpanId() }
|
|
148
|
+
: {};
|
|
149
|
+
return {
|
|
150
|
+
async synthesize(judgeInput) {
|
|
151
|
+
// Trajectory-level fusion: when candidates carry agent trajectories, fuse
|
|
152
|
+
// them through FusionKit's trajectory-aware, intent-agnostic synthesizer,
|
|
153
|
+
// which returns the answer in the request's native shape and first person.
|
|
154
|
+
const trajectories = judgeInput.candidates
|
|
155
|
+
.map((candidate) => candidate.trajectory)
|
|
156
|
+
.filter((trajectory) => trajectory !== undefined);
|
|
157
|
+
if (trajectories.length > 0) {
|
|
158
|
+
const fuseResponse = await fetch(trajectoryFuseUrl(input.fusionBackendUrl), {
|
|
159
|
+
method: "POST",
|
|
160
|
+
headers: { "content-type": "application/json", ...authHeaders, ...traceHeaders },
|
|
161
|
+
body: JSON.stringify({
|
|
162
|
+
messages: [{ role: "user", content: judgeInput.descriptor.prompt }],
|
|
163
|
+
trajectories: trajectories.map(trajectoryToWire)
|
|
164
|
+
})
|
|
165
|
+
});
|
|
166
|
+
if (!fuseResponse.ok) {
|
|
167
|
+
throw new Error(`FusionKit trajectory fusion failed: ${fuseResponse.status} ${(await fuseResponse.text()).slice(0, 500)}`);
|
|
168
|
+
}
|
|
169
|
+
const fused = (await fuseResponse.json());
|
|
170
|
+
return {
|
|
171
|
+
decision: "synthesize",
|
|
172
|
+
finalOutput: fused.final_output ?? "",
|
|
173
|
+
rationale: fused.rationale ?? "FusionKit trajectory fusion",
|
|
174
|
+
contributions: trajectories.map((trajectory) => ({
|
|
175
|
+
candidateId: trajectory.candidateId ?? trajectory.trajectoryId,
|
|
176
|
+
reason: `fused ${trajectory.status} trajectory`
|
|
177
|
+
}))
|
|
178
|
+
};
|
|
179
|
+
}
|
|
180
|
+
const response = await fetch(chatCompletionsUrl(input.fusionBackendUrl), {
|
|
181
|
+
method: "POST",
|
|
182
|
+
headers: {
|
|
183
|
+
"content-type": "application/json",
|
|
184
|
+
...(input.apiKey ? { authorization: `Bearer ${input.apiKey}` } : {})
|
|
185
|
+
},
|
|
186
|
+
body: JSON.stringify({
|
|
187
|
+
model: input.model,
|
|
188
|
+
messages: [
|
|
189
|
+
{
|
|
190
|
+
role: "system",
|
|
191
|
+
content: "You synthesize coding harness candidate evidence. " +
|
|
192
|
+
input.responseShape
|
|
193
|
+
},
|
|
194
|
+
{
|
|
195
|
+
role: "user",
|
|
196
|
+
content: JSON.stringify({
|
|
197
|
+
prompt: judgeInput.descriptor.prompt,
|
|
198
|
+
candidates: judgeInput.candidates,
|
|
199
|
+
toolRecords: judgeInput.toolRecords,
|
|
200
|
+
artifacts: judgeInput.artifacts
|
|
201
|
+
})
|
|
202
|
+
}
|
|
203
|
+
],
|
|
204
|
+
temperature: 0,
|
|
205
|
+
max_tokens: 800
|
|
206
|
+
})
|
|
207
|
+
});
|
|
208
|
+
if (!response.ok) {
|
|
209
|
+
throw new Error(`FusionKit judge request failed: ${response.status}`);
|
|
210
|
+
}
|
|
211
|
+
const body = (await response.json());
|
|
212
|
+
const finalOutput = body.choices?.[0]?.message?.content ?? "";
|
|
213
|
+
return {
|
|
214
|
+
decision: "synthesize",
|
|
215
|
+
finalOutput,
|
|
216
|
+
rationale: "FusionKit judge synthesis",
|
|
217
|
+
contributions: judgeInput.candidates.map((candidate) => ({
|
|
218
|
+
candidateId: candidate.candidateId,
|
|
219
|
+
reason: `included ${candidate.status} candidate evidence`
|
|
220
|
+
}))
|
|
221
|
+
};
|
|
222
|
+
}
|
|
223
|
+
};
|
|
224
|
+
}
|
|
225
|
+
/**
|
|
226
|
+
* Run the panel once: each panel model executes the task as a real coding agent
|
|
227
|
+
* in its own git worktree, and we capture the resulting trajectories (the
|
|
228
|
+
* candidate reference solutions the judge fuses). This reuses the full agent
|
|
229
|
+
* harness via `runEnsemble` with a capturing judge — no fusion/synthesis call is
|
|
230
|
+
* made here; the trajectories are the product.
|
|
231
|
+
*/
|
|
232
|
+
export async function runFusionPanels(options) {
|
|
233
|
+
let captured = [];
|
|
234
|
+
const e2eOptions = {
|
|
235
|
+
id: options.id ?? `panels_${Date.now()}`,
|
|
236
|
+
fusionBackendUrl: options.fusionBackendUrl,
|
|
237
|
+
repo: options.repo,
|
|
238
|
+
outputRoot: options.outputRoot,
|
|
239
|
+
prompt: options.prompt,
|
|
240
|
+
harnesses: ["agent"],
|
|
241
|
+
models: options.models,
|
|
242
|
+
...(options.modelEndpoints !== undefined ? { modelEndpoints: options.modelEndpoints } : {}),
|
|
243
|
+
...(options.fusionApiKey !== undefined ? { fusionApiKey: options.fusionApiKey } : {}),
|
|
244
|
+
...(options.timeoutMs !== undefined ? { timeoutMs: options.timeoutMs } : {}),
|
|
245
|
+
...(options.traceId !== undefined ? { traceId: options.traceId } : {}),
|
|
246
|
+
...(options.parentSpanId !== undefined ? { parentSpanId: options.parentSpanId } : {}),
|
|
247
|
+
...(options.turn !== undefined ? { turn: options.turn } : {})
|
|
248
|
+
};
|
|
249
|
+
const descriptor = descriptorFor("agent", e2eOptions);
|
|
250
|
+
descriptor.judge = {
|
|
251
|
+
id: "panel-capture",
|
|
252
|
+
synthesizer: {
|
|
253
|
+
synthesize(judgeInput) {
|
|
254
|
+
captured = judgeInput.candidates
|
|
255
|
+
.map((candidate) => candidate.trajectory)
|
|
256
|
+
.filter((trajectory) => trajectory !== undefined);
|
|
257
|
+
// The trajectories are the product; this output is discarded. A
|
|
258
|
+
// non-empty final_output is required by the synthesis record contract.
|
|
259
|
+
return { decision: "synthesize", finalOutput: `captured ${captured.length} panel trajectories` };
|
|
260
|
+
}
|
|
261
|
+
}
|
|
262
|
+
};
|
|
263
|
+
await runEnsemble(descriptor);
|
|
264
|
+
return captured.map(trajectoryToWire);
|
|
265
|
+
}
|
|
266
|
+
function descriptorFor(kind, options) {
|
|
267
|
+
const id = `${options.id ?? "unified_e2e"}_${kind.replace(/-/g, "_")}`;
|
|
268
|
+
return {
|
|
269
|
+
id,
|
|
270
|
+
harness: harnessAdapter(kind, options),
|
|
271
|
+
models: options.models,
|
|
272
|
+
runtime: { id: "unified-local" },
|
|
273
|
+
judge: {
|
|
274
|
+
id: "fusionkit-judge",
|
|
275
|
+
model: options.judgeModel ?? options.models[0]?.model ?? "fusionkit/router",
|
|
276
|
+
synthesizer: createFusionKitJudgeSynthesizer({
|
|
277
|
+
fusionBackendUrl: options.fusionBackendUrl,
|
|
278
|
+
model: options.judgeModel ?? options.models[0]?.model ?? "fusionkit/router",
|
|
279
|
+
apiKey: options.fusionApiKey,
|
|
280
|
+
responseShape: responseShapeFor(kind),
|
|
281
|
+
...(options.traceId !== undefined ? { traceId: options.traceId } : {})
|
|
282
|
+
})
|
|
283
|
+
},
|
|
284
|
+
policy: {
|
|
285
|
+
id: "unified-e2e",
|
|
286
|
+
allowedTools: ["read_file", "write_file", "apply_patch", "run_tests", "shell_command"],
|
|
287
|
+
sideEffects: sideEffectsForHarness(kind),
|
|
288
|
+
timeoutMs: options.timeoutMs
|
|
289
|
+
},
|
|
290
|
+
prompt: options.prompt,
|
|
291
|
+
sourceRepo: "handoffkit",
|
|
292
|
+
baseGitSha: gitText(options.repo, ["rev-parse", "HEAD"]).trim(),
|
|
293
|
+
workspace: options.repo,
|
|
294
|
+
outputRoot: options.outputRoot,
|
|
295
|
+
cleanupWorktrees: true,
|
|
296
|
+
metadata: {
|
|
297
|
+
unified_harness_e2e: true,
|
|
298
|
+
fusion_backend_url: normalizeFusionBackendUrl(options.fusionBackendUrl),
|
|
299
|
+
response_shape: responseShapeFor(kind)
|
|
300
|
+
}
|
|
301
|
+
};
|
|
302
|
+
}
|
|
303
|
+
function statusForResult(result) {
|
|
304
|
+
return result.failureSummary ? "failed" : result.harnessRunResult.status;
|
|
305
|
+
}
|
|
306
|
+
async function defaultCursorRunner(input) {
|
|
307
|
+
if (!input.cursorKitDir) {
|
|
308
|
+
return {
|
|
309
|
+
status: "skipped",
|
|
310
|
+
message: "Cursorkit directory not configured",
|
|
311
|
+
details: { reason: "cursor_kit_dir_missing" }
|
|
312
|
+
};
|
|
313
|
+
}
|
|
314
|
+
const suite = input.kind === "cursor-acp" ? "acp" : "desktop-route";
|
|
315
|
+
const args = [
|
|
316
|
+
"test:harness",
|
|
317
|
+
"--",
|
|
318
|
+
"--suite",
|
|
319
|
+
suite,
|
|
320
|
+
"--base-url",
|
|
321
|
+
normalizeFusionBackendUrl(input.fusionBackendUrl),
|
|
322
|
+
"--model",
|
|
323
|
+
input.model.id,
|
|
324
|
+
"--provider-model",
|
|
325
|
+
input.model.model,
|
|
326
|
+
"--timeout-ms",
|
|
327
|
+
String(input.timeoutMs ?? 60_000)
|
|
328
|
+
];
|
|
329
|
+
mkdirSync(input.outDir, { recursive: true });
|
|
330
|
+
return await new Promise((resolveResult) => {
|
|
331
|
+
const child = spawn("pnpm", args, {
|
|
332
|
+
cwd: input.cursorKitDir,
|
|
333
|
+
stdio: ["ignore", "pipe", "pipe"]
|
|
334
|
+
});
|
|
335
|
+
let stdout = "";
|
|
336
|
+
let stderr = "";
|
|
337
|
+
child.stdout?.on("data", (chunk) => {
|
|
338
|
+
stdout += chunk.toString("utf8");
|
|
339
|
+
});
|
|
340
|
+
child.stderr?.on("data", (chunk) => {
|
|
341
|
+
stderr += chunk.toString("utf8");
|
|
342
|
+
});
|
|
343
|
+
child.on("exit", (code) => {
|
|
344
|
+
const logPath = join(input.outDir, `${input.kind}-${input.model.id}.log`);
|
|
345
|
+
writeFileSync(logPath, [stdout, stderr].filter(Boolean).join("\n"));
|
|
346
|
+
resolveResult({
|
|
347
|
+
status: code === 0 ? "succeeded" : "failed",
|
|
348
|
+
message: code === 0 ? `${input.kind} completed` : `${input.kind} failed`,
|
|
349
|
+
artifacts: { log: logPath },
|
|
350
|
+
details: { exitCode: code ?? 1 }
|
|
351
|
+
});
|
|
352
|
+
});
|
|
353
|
+
});
|
|
354
|
+
}
|
|
355
|
+
async function runCursorHarness(kind, options) {
|
|
356
|
+
const runner = options.cursorRunner ?? defaultCursorRunner;
|
|
357
|
+
const perModel = await Promise.all(options.models.map((model) => runner({
|
|
358
|
+
kind,
|
|
359
|
+
model,
|
|
360
|
+
fusionBackendUrl: options.fusionBackendUrl,
|
|
361
|
+
repo: options.repo,
|
|
362
|
+
outDir: join(options.outputRoot, `${kind}-${model.id}`),
|
|
363
|
+
timeoutMs: options.timeoutMs,
|
|
364
|
+
cursorKitDir: options.cursorKitDir
|
|
365
|
+
})));
|
|
366
|
+
const failed = perModel.find((result) => result.status === "failed");
|
|
367
|
+
const skipped = perModel.every((result) => result.status === "skipped");
|
|
368
|
+
return {
|
|
369
|
+
harness: kind,
|
|
370
|
+
modelIds: options.models.map((model) => model.id),
|
|
371
|
+
status: failed ? "failed" : skipped ? "skipped" : "succeeded",
|
|
372
|
+
message: failed?.message ?? (skipped ? "all Cursor model probes skipped" : "Cursor probes completed"),
|
|
373
|
+
artifacts: Object.assign({}, ...perModel.map((result) => result.artifacts ?? {})),
|
|
374
|
+
details: {
|
|
375
|
+
perModel: perModel.map((result) => ({
|
|
376
|
+
status: result.status,
|
|
377
|
+
message: result.message,
|
|
378
|
+
...(result.details ?? {})
|
|
379
|
+
}))
|
|
380
|
+
}
|
|
381
|
+
};
|
|
382
|
+
}
|
|
383
|
+
export async function runUnifiedHarnessE2E(options) {
|
|
384
|
+
const outputRoot = resolve(options.outputRoot);
|
|
385
|
+
mkdirSync(outputRoot, { recursive: true });
|
|
386
|
+
const results = [];
|
|
387
|
+
for (const kind of options.harnesses) {
|
|
388
|
+
if (kind === "cursor-acp" || kind === "cursor-desktop") {
|
|
389
|
+
results.push(await runCursorHarness(kind, options));
|
|
390
|
+
continue;
|
|
391
|
+
}
|
|
392
|
+
const descriptor = descriptorFor(kind, options);
|
|
393
|
+
const ensembleResult = await runEnsemble(descriptor);
|
|
394
|
+
results.push({
|
|
395
|
+
harness: kind,
|
|
396
|
+
modelIds: options.models.map((model) => model.id),
|
|
397
|
+
status: statusForResult(ensembleResult),
|
|
398
|
+
message: ensembleResult.harnessRunResult.output_summary ?? "",
|
|
399
|
+
ensemble: ensembleResult,
|
|
400
|
+
artifacts: {
|
|
401
|
+
...(ensembleResult.summaryPath ? { summary: ensembleResult.summaryPath } : {})
|
|
402
|
+
},
|
|
403
|
+
details: {
|
|
404
|
+
candidateCount: ensembleResult.candidates.length,
|
|
405
|
+
artifactCount: ensembleResult.artifacts.length,
|
|
406
|
+
toolRecordCount: ensembleResult.toolRecords.length,
|
|
407
|
+
modelCallRecordCount: ensembleResult.modelCallRecords.length,
|
|
408
|
+
judgeSynthesis: ensembleResult.judgeSynthesisRecord !== undefined
|
|
409
|
+
}
|
|
410
|
+
});
|
|
411
|
+
}
|
|
412
|
+
const report = {
|
|
413
|
+
id: options.id ?? "unified_e2e",
|
|
414
|
+
generatedAt: new Date().toISOString(),
|
|
415
|
+
fusionBackendUrl: normalizeFusionBackendUrl(options.fusionBackendUrl),
|
|
416
|
+
repo: resolve(options.repo),
|
|
417
|
+
results
|
|
418
|
+
};
|
|
419
|
+
const reportPath = join(outputRoot, "unified-e2e-report.json");
|
|
420
|
+
writeFileSync(reportPath, JSON.stringify(report, null, 2) + "\n");
|
|
421
|
+
return { ...report, reportPath };
|
|
422
|
+
}
|
|
@@ -0,0 +1,25 @@
|
|
|
1
|
+
import type { EnsembleDescriptor, EnsembleModel } from "./harness.js";
|
|
2
|
+
export type CandidateWorktree = {
|
|
3
|
+
candidateId: string;
|
|
4
|
+
modelId: string;
|
|
5
|
+
branchName: string;
|
|
6
|
+
path: string;
|
|
7
|
+
baseGitSha: string;
|
|
8
|
+
snapshotHash: string;
|
|
9
|
+
sealed: boolean;
|
|
10
|
+
cleaned: boolean;
|
|
11
|
+
};
|
|
12
|
+
export type WorktreePlan = {
|
|
13
|
+
workspace: string;
|
|
14
|
+
baseGitSha: string;
|
|
15
|
+
snapshotHash: string;
|
|
16
|
+
root: string;
|
|
17
|
+
worktrees: CandidateWorktree[];
|
|
18
|
+
};
|
|
19
|
+
export declare function defaultOutputRoot(descriptor: EnsembleDescriptor): string;
|
|
20
|
+
export declare function candidateId(descriptor: EnsembleDescriptor, model: EnsembleModel, ordinal: number): string;
|
|
21
|
+
export declare function createWorktreePlan(descriptor: EnsembleDescriptor): WorktreePlan | undefined;
|
|
22
|
+
export declare function sealCandidateWorktree(worktree: CandidateWorktree): CandidateWorktree;
|
|
23
|
+
export declare function cleanupCandidateWorktree(workspace: string, worktree: CandidateWorktree): CandidateWorktree;
|
|
24
|
+
export declare function cleanupWorktreePlan(plan: WorktreePlan): CandidateWorktree[];
|
|
25
|
+
export declare function diffCandidateWorktree(worktree: CandidateWorktree): string;
|
package/dist/worktree.js
ADDED
|
@@ -0,0 +1,75 @@
|
|
|
1
|
+
import { mkdtempSync, rmSync } from "node:fs";
|
|
2
|
+
import { tmpdir } from "node:os";
|
|
3
|
+
import { join, resolve } from "node:path";
|
|
4
|
+
import { hashCanonicalSha256 } from "@fusionkit/protocol";
|
|
5
|
+
import { gitText } from "@fusionkit/workspace";
|
|
6
|
+
function safeSegment(value) {
|
|
7
|
+
return value.replace(/[^A-Za-z0-9_.:-]/g, "_");
|
|
8
|
+
}
|
|
9
|
+
export function defaultOutputRoot(descriptor) {
|
|
10
|
+
const base = descriptor.outputRoot ?? join(descriptor.workspace ?? tmpdir(), ".warrant", "ensemble");
|
|
11
|
+
return resolve(base, safeSegment(descriptor.id));
|
|
12
|
+
}
|
|
13
|
+
export function candidateId(descriptor, model, ordinal) {
|
|
14
|
+
return `${descriptor.id}_${model.id}_${ordinal}`.replace(/[^A-Za-z0-9_.:-]/g, "_");
|
|
15
|
+
}
|
|
16
|
+
export function createWorktreePlan(descriptor) {
|
|
17
|
+
if (!descriptor.workspace)
|
|
18
|
+
return undefined;
|
|
19
|
+
const workspace = resolve(descriptor.workspace);
|
|
20
|
+
const baseGitSha = descriptor.baseGitSha || gitText(workspace, ["rev-parse", "HEAD"]).trim();
|
|
21
|
+
const root = mkdtempSync(join(tmpdir(), `warrant-ensemble-${safeSegment(descriptor.id)}-`));
|
|
22
|
+
const snapshotHash = hashCanonicalSha256({
|
|
23
|
+
workspace,
|
|
24
|
+
baseGitSha,
|
|
25
|
+
descriptorId: descriptor.id,
|
|
26
|
+
models: descriptor.models.map((model) => model.id)
|
|
27
|
+
});
|
|
28
|
+
const worktrees = [];
|
|
29
|
+
try {
|
|
30
|
+
for (const [ordinal, model] of descriptor.models.entries()) {
|
|
31
|
+
const id = candidateId(descriptor, model, ordinal);
|
|
32
|
+
const path = join(root, id);
|
|
33
|
+
const branchName = `warrant/ensemble/${safeSegment(descriptor.id)}/${safeSegment(model.id)}`;
|
|
34
|
+
gitText(workspace, ["worktree", "add", "--detach", path, baseGitSha]);
|
|
35
|
+
worktrees.push({
|
|
36
|
+
candidateId: id,
|
|
37
|
+
modelId: model.id,
|
|
38
|
+
branchName,
|
|
39
|
+
path,
|
|
40
|
+
baseGitSha,
|
|
41
|
+
snapshotHash,
|
|
42
|
+
sealed: false,
|
|
43
|
+
cleaned: false
|
|
44
|
+
});
|
|
45
|
+
}
|
|
46
|
+
}
|
|
47
|
+
catch (error) {
|
|
48
|
+
// A mid-loop failure must not leak the worktrees already added or the root.
|
|
49
|
+
for (const worktree of worktrees) {
|
|
50
|
+
gitText(workspace, ["worktree", "remove", "--force", worktree.path], { allowFail: true });
|
|
51
|
+
}
|
|
52
|
+
rmSync(root, { recursive: true, force: true });
|
|
53
|
+
throw error;
|
|
54
|
+
}
|
|
55
|
+
return { workspace, baseGitSha, snapshotHash, root, worktrees };
|
|
56
|
+
}
|
|
57
|
+
export function sealCandidateWorktree(worktree) {
|
|
58
|
+
return Object.freeze({ ...worktree, sealed: true });
|
|
59
|
+
}
|
|
60
|
+
export function cleanupCandidateWorktree(workspace, worktree) {
|
|
61
|
+
gitText(workspace, ["worktree", "remove", "--force", worktree.path], { allowFail: true });
|
|
62
|
+
rmSync(worktree.path, { recursive: true, force: true });
|
|
63
|
+
return Object.freeze({ ...worktree, cleaned: true });
|
|
64
|
+
}
|
|
65
|
+
export function cleanupWorktreePlan(plan) {
|
|
66
|
+
const cleaned = plan.worktrees.map((worktree) => cleanupCandidateWorktree(plan.workspace, worktree));
|
|
67
|
+
rmSync(plan.root, { recursive: true, force: true });
|
|
68
|
+
return cleaned;
|
|
69
|
+
}
|
|
70
|
+
export function diffCandidateWorktree(worktree) {
|
|
71
|
+
gitText(worktree.path, ["add", "-A"], { allowFail: true });
|
|
72
|
+
return gitText(worktree.path, ["diff", "--cached", "--binary", worktree.baseGitSha], {
|
|
73
|
+
allowFail: true
|
|
74
|
+
});
|
|
75
|
+
}
|
package/package.json
ADDED
|
@@ -0,0 +1,35 @@
|
|
|
1
|
+
{
|
|
2
|
+
"name": "@fusionkit/ensemble",
|
|
3
|
+
"private": false,
|
|
4
|
+
"version": "0.1.0",
|
|
5
|
+
"repository": {
|
|
6
|
+
"type": "git",
|
|
7
|
+
"url": "git+https://github.com/velum-labs/handoffkit.git",
|
|
8
|
+
"directory": "packages/ensemble"
|
|
9
|
+
},
|
|
10
|
+
"description": "Harness-agnostic ensemble runner that emits model-fusion protocol records.",
|
|
11
|
+
"license": "UNLICENSED",
|
|
12
|
+
"type": "module",
|
|
13
|
+
"exports": {
|
|
14
|
+
".": {
|
|
15
|
+
"types": "./dist/index.d.ts",
|
|
16
|
+
"default": "./dist/index.js"
|
|
17
|
+
}
|
|
18
|
+
},
|
|
19
|
+
"files": [
|
|
20
|
+
"dist"
|
|
21
|
+
],
|
|
22
|
+
"publishConfig": {
|
|
23
|
+
"registry": "https://registry.npmjs.org",
|
|
24
|
+
"access": "public",
|
|
25
|
+
"provenance": true
|
|
26
|
+
},
|
|
27
|
+
"dependencies": {
|
|
28
|
+
"@fusionkit/adapter-ai-sdk": "0.1.0",
|
|
29
|
+
"@fusionkit/protocol": "0.1.0",
|
|
30
|
+
"@fusionkit/model-gateway": "0.1.0",
|
|
31
|
+
"@fusionkit/runner": "0.1.0",
|
|
32
|
+
"@fusionkit/session-harness": "0.1.0",
|
|
33
|
+
"@fusionkit/workspace": "0.1.0"
|
|
34
|
+
}
|
|
35
|
+
}
|