@fusionkit/ensemble 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/agent.d.ts +21 -0
- package/dist/agent.js +186 -0
- package/dist/artifacts.d.ts +21 -0
- package/dist/artifacts.js +36 -0
- package/dist/claude-code.d.ts +25 -0
- package/dist/claude-code.js +398 -0
- package/dist/codex.d.ts +69 -0
- package/dist/codex.js +467 -0
- package/dist/command.d.ts +15 -0
- package/dist/command.js +82 -0
- package/dist/dashboard.d.ts +62 -0
- package/dist/dashboard.js +788 -0
- package/dist/external-executor.d.ts +56 -0
- package/dist/external-executor.js +288 -0
- package/dist/harness.d.ts +337 -0
- package/dist/harness.js +1 -0
- package/dist/index.d.ts +30 -0
- package/dist/index.js +15 -0
- package/dist/isolation.d.ts +25 -0
- package/dist/isolation.js +509 -0
- package/dist/judge.d.ts +77 -0
- package/dist/judge.js +16 -0
- package/dist/mock.d.ts +20 -0
- package/dist/mock.js +56 -0
- package/dist/run.d.ts +5 -0
- package/dist/run.js +520 -0
- package/dist/synthesis.d.ts +25 -0
- package/dist/synthesis.js +221 -0
- package/dist/test/codex.test.d.ts +1 -0
- package/dist/test/codex.test.js +237 -0
- package/dist/test/dashboard.test.d.ts +1 -0
- package/dist/test/dashboard.test.js +214 -0
- package/dist/test/ensemble.test.d.ts +1 -0
- package/dist/test/ensemble.test.js +780 -0
- package/dist/test/external-executor.test.d.ts +1 -0
- package/dist/test/external-executor.test.js +273 -0
- package/dist/test/isolation.test.d.ts +1 -0
- package/dist/test/isolation.test.js +359 -0
- package/dist/test/tool-executor.test.d.ts +1 -0
- package/dist/test/tool-executor.test.js +113 -0
- package/dist/test/unified.test.d.ts +1 -0
- package/dist/test/unified.test.js +150 -0
- package/dist/tool-executor.d.ts +14 -0
- package/dist/tool-executor.js +156 -0
- package/dist/trace.d.ts +8 -0
- package/dist/trace.js +7 -0
- package/dist/unified.d.ts +101 -0
- package/dist/unified.js +422 -0
- package/dist/worktree.d.ts +25 -0
- package/dist/worktree.js +75 -0
- package/package.json +35 -0
|
@@ -0,0 +1,221 @@
|
|
|
1
|
+
import { spawnSync } from "node:child_process";
|
|
2
|
+
import { mkdtempSync, rmSync, writeFileSync } from "node:fs";
|
|
3
|
+
import { tmpdir } from "node:os";
|
|
4
|
+
import { join } from "node:path";
|
|
5
|
+
import { assertJudgeSynthesisRecordV1, MODEL_FUSION_SCHEMA_BUNDLE_HASH } from "@fusionkit/protocol";
|
|
6
|
+
import { gitText } from "@fusionkit/workspace";
|
|
7
|
+
const PRODUCER_GIT_SHA = "0".repeat(40);
|
|
8
|
+
const PRODUCER = "handoffkit-ensemble";
|
|
9
|
+
const PRODUCER_VERSION = "0.1.0";
|
|
10
|
+
function metadata(createdAt) {
|
|
11
|
+
return {
|
|
12
|
+
schema: "judge-synthesis-record.v1",
|
|
13
|
+
schema_version: "v1",
|
|
14
|
+
schema_bundle_hash: MODEL_FUSION_SCHEMA_BUNDLE_HASH,
|
|
15
|
+
producer: PRODUCER,
|
|
16
|
+
producer_version: PRODUCER_VERSION,
|
|
17
|
+
producer_git_sha: PRODUCER_GIT_SHA,
|
|
18
|
+
created_at: createdAt
|
|
19
|
+
};
|
|
20
|
+
}
|
|
21
|
+
function safeSegment(value) {
|
|
22
|
+
return value.replace(/[^A-Za-z0-9_.:-]/g, "_");
|
|
23
|
+
}
|
|
24
|
+
function candidateEvidence(candidates, outputs) {
|
|
25
|
+
return candidates.map((candidate, index) => {
|
|
26
|
+
const output = outputs[index];
|
|
27
|
+
return {
|
|
28
|
+
candidateId: candidate.candidate_id,
|
|
29
|
+
modelId: String(candidate.metadata?.model_id ?? output?.model.id ?? ""),
|
|
30
|
+
model: String(candidate.metadata?.model ?? output?.model.model ?? ""),
|
|
31
|
+
status: candidate.status,
|
|
32
|
+
artifacts: candidate.artifacts ?? [],
|
|
33
|
+
...(output?.verification ? { verification: output.verification } : {}),
|
|
34
|
+
...(output?.trajectory ? { trajectory: output.trajectory } : {})
|
|
35
|
+
};
|
|
36
|
+
});
|
|
37
|
+
}
|
|
38
|
+
function createSynthesisWorktree(input) {
|
|
39
|
+
if (!input.workspace || !input.baseGitSha)
|
|
40
|
+
return undefined;
|
|
41
|
+
const root = mkdtempSync(join(tmpdir(), `warrant-synthesis-${safeSegment(input.descriptor.id)}-`));
|
|
42
|
+
const worktree = join(root, "final");
|
|
43
|
+
gitText(input.workspace, ["worktree", "add", "--detach", worktree, input.baseGitSha]);
|
|
44
|
+
return worktree;
|
|
45
|
+
}
|
|
46
|
+
function removeSynthesisWorktree(workspace, worktree) {
|
|
47
|
+
if (!worktree)
|
|
48
|
+
return;
|
|
49
|
+
if (workspace)
|
|
50
|
+
gitText(workspace, ["worktree", "remove", "--force", worktree], { allowFail: true });
|
|
51
|
+
rmSync(join(worktree, ".."), { recursive: true, force: true });
|
|
52
|
+
}
|
|
53
|
+
function applyPatch(worktree, patch) {
|
|
54
|
+
if (!worktree || !patch || patch.length === 0)
|
|
55
|
+
return true;
|
|
56
|
+
const patchPath = join(worktree, "judge.patch");
|
|
57
|
+
writeFileSync(patchPath, patch);
|
|
58
|
+
const applied = spawnSync("git", ["apply", "--binary", "--whitespace=nowarn", patchPath], {
|
|
59
|
+
cwd: worktree,
|
|
60
|
+
encoding: "utf8"
|
|
61
|
+
});
|
|
62
|
+
return applied.status === 0;
|
|
63
|
+
}
|
|
64
|
+
function diffWorktree(worktree, baseGitSha) {
|
|
65
|
+
if (!worktree || !baseGitSha)
|
|
66
|
+
return "";
|
|
67
|
+
gitText(worktree, ["add", "-A"], { allowFail: true });
|
|
68
|
+
return gitText(worktree, ["diff", "--cached", "--binary", baseGitSha], { allowFail: true });
|
|
69
|
+
}
|
|
70
|
+
function recordFor(input, output, status, decision, metrics) {
|
|
71
|
+
const record = {
|
|
72
|
+
...metadata(new Date().toISOString()),
|
|
73
|
+
synthesis_id: `synthesis_${input.descriptor.id}`,
|
|
74
|
+
input_candidate_ids: input.candidates.map((candidate) => candidate.candidate_id),
|
|
75
|
+
status,
|
|
76
|
+
decision,
|
|
77
|
+
...(output.judgeModelCallId ? { judge_model_call_id: output.judgeModelCallId } : {}),
|
|
78
|
+
...(output.selectedCandidateId
|
|
79
|
+
? { selected_candidate_id: output.selectedCandidateId }
|
|
80
|
+
: {}),
|
|
81
|
+
...(output.rationale ? { rationale: output.rationale } : {}),
|
|
82
|
+
final_output: output.finalOutput,
|
|
83
|
+
...(output.score !== undefined ? { score: output.score } : {}),
|
|
84
|
+
metrics
|
|
85
|
+
};
|
|
86
|
+
assertJudgeSynthesisRecordV1(record);
|
|
87
|
+
return record;
|
|
88
|
+
}
|
|
89
|
+
function artifactRef(artifact) {
|
|
90
|
+
const { path: _path, ...ref } = artifact;
|
|
91
|
+
return ref;
|
|
92
|
+
}
|
|
93
|
+
export async function runJudgeSynthesis(input) {
|
|
94
|
+
const synthesizer = input.descriptor.judge.synthesizer;
|
|
95
|
+
if (!synthesizer)
|
|
96
|
+
return undefined;
|
|
97
|
+
const judgeInput = {
|
|
98
|
+
descriptor: input.descriptor,
|
|
99
|
+
candidates: candidateEvidence(input.candidates, input.outputs),
|
|
100
|
+
artifacts: input.artifacts,
|
|
101
|
+
toolRecords: input.toolRecords,
|
|
102
|
+
modelCallRecords: input.modelCallRecords,
|
|
103
|
+
...(input.reviewEvidence ? { reviewEvidence: input.reviewEvidence } : {})
|
|
104
|
+
};
|
|
105
|
+
const artifacts = [
|
|
106
|
+
artifactRef(input.store.writeJson({
|
|
107
|
+
artifactId: `${input.descriptor.id}_judge_input`,
|
|
108
|
+
kind: "metrics",
|
|
109
|
+
value: judgeInput
|
|
110
|
+
}))
|
|
111
|
+
];
|
|
112
|
+
let finalPatchPath = null;
|
|
113
|
+
let failureSummary;
|
|
114
|
+
const repairAttempts = [];
|
|
115
|
+
// Create the synthesis worktree lazily: a capture-only synthesizer (the panel
|
|
116
|
+
// trajectory capture used by `runFusionPanels`) produces no patch and has no
|
|
117
|
+
// verify/repair, so it never touches a worktree — skip the git add/remove.
|
|
118
|
+
let worktree;
|
|
119
|
+
try {
|
|
120
|
+
const first = await synthesizer.synthesize(judgeInput);
|
|
121
|
+
const needsWorktree = (first.patch?.content !== undefined && first.patch.content.length > 0) ||
|
|
122
|
+
synthesizer.verify !== undefined ||
|
|
123
|
+
synthesizer.repair !== undefined;
|
|
124
|
+
if (needsWorktree)
|
|
125
|
+
worktree = createSynthesisWorktree(input);
|
|
126
|
+
const applied = applyPatch(worktree, first.patch?.content);
|
|
127
|
+
if (!applied) {
|
|
128
|
+
const conflict = artifactRef(input.store.writeJson({
|
|
129
|
+
artifactId: `${input.descriptor.id}_patch_conflict`,
|
|
130
|
+
kind: "other",
|
|
131
|
+
value: {
|
|
132
|
+
type: "patch_conflict",
|
|
133
|
+
sourceCandidateIds: first.patch?.sourceCandidateIds ?? [],
|
|
134
|
+
patch: first.patch?.content ?? ""
|
|
135
|
+
}
|
|
136
|
+
}));
|
|
137
|
+
artifacts.push(conflict);
|
|
138
|
+
failureSummary = { reason: "patch_conflict" };
|
|
139
|
+
return {
|
|
140
|
+
judgeInput,
|
|
141
|
+
artifacts,
|
|
142
|
+
finalPatchPath,
|
|
143
|
+
repairAttempts,
|
|
144
|
+
failureSummary,
|
|
145
|
+
judgeSynthesisRecord: recordFor(input, first, "failed", "failed", {
|
|
146
|
+
contributions: first.contributions ?? [],
|
|
147
|
+
rejections: first.rejections ?? [],
|
|
148
|
+
failure: failureSummary
|
|
149
|
+
})
|
|
150
|
+
};
|
|
151
|
+
}
|
|
152
|
+
let verification = (await synthesizer.verify?.({
|
|
153
|
+
descriptor: input.descriptor,
|
|
154
|
+
worktreePath: worktree ?? "",
|
|
155
|
+
output: first,
|
|
156
|
+
repairRound: 0
|
|
157
|
+
})) ?? { status: "succeeded", evidence: ["verification not configured"], exitCode: 0 };
|
|
158
|
+
let output = first;
|
|
159
|
+
if (verification.status !== "succeeded") {
|
|
160
|
+
repairAttempts.push({ round: 1, verification, status: "failed" });
|
|
161
|
+
if (synthesizer.repair) {
|
|
162
|
+
const repaired = await synthesizer.repair({
|
|
163
|
+
...judgeInput,
|
|
164
|
+
failureEvidence: verification,
|
|
165
|
+
priorOutput: first
|
|
166
|
+
});
|
|
167
|
+
applyPatch(worktree, repaired.patch?.content);
|
|
168
|
+
output = repaired;
|
|
169
|
+
verification =
|
|
170
|
+
(await synthesizer.verify?.({
|
|
171
|
+
descriptor: input.descriptor,
|
|
172
|
+
worktreePath: worktree ?? "",
|
|
173
|
+
output: repaired,
|
|
174
|
+
repairRound: 1
|
|
175
|
+
})) ?? verification;
|
|
176
|
+
repairAttempts[0] = { round: 1, verification, status: verification.status };
|
|
177
|
+
}
|
|
178
|
+
}
|
|
179
|
+
const finalPatch = diffWorktree(worktree, input.baseGitSha);
|
|
180
|
+
if (finalPatch.length > 0) {
|
|
181
|
+
const patchArtifact = input.store.writeText({
|
|
182
|
+
artifactId: `${input.descriptor.id}_final_patch`,
|
|
183
|
+
kind: "patch",
|
|
184
|
+
content: finalPatch,
|
|
185
|
+
suffix: ".patch"
|
|
186
|
+
});
|
|
187
|
+
artifacts.push(artifactRef(patchArtifact));
|
|
188
|
+
finalPatchPath = patchArtifact.uri ?? null;
|
|
189
|
+
}
|
|
190
|
+
artifacts.push(artifactRef(input.store.writeJson({
|
|
191
|
+
artifactId: `${input.descriptor.id}_synthesis_verification`,
|
|
192
|
+
kind: "metrics",
|
|
193
|
+
value: verification
|
|
194
|
+
})));
|
|
195
|
+
if (verification.status !== "succeeded") {
|
|
196
|
+
failureSummary = { reason: "repair_failed", verification, repair: verification };
|
|
197
|
+
}
|
|
198
|
+
const decision = verification.status === "succeeded"
|
|
199
|
+
? output.decision
|
|
200
|
+
: repairAttempts.length > 0
|
|
201
|
+
? "repair_required"
|
|
202
|
+
: "failed";
|
|
203
|
+
return {
|
|
204
|
+
judgeInput,
|
|
205
|
+
artifacts,
|
|
206
|
+
finalPatchPath,
|
|
207
|
+
repairAttempts,
|
|
208
|
+
...(failureSummary ? { failureSummary } : {}),
|
|
209
|
+
judgeSynthesisRecord: recordFor(input, output, verification.status === "succeeded" ? "succeeded" : "failed", decision, {
|
|
210
|
+
contributions: output.contributions ?? [],
|
|
211
|
+
rejections: output.rejections ?? [],
|
|
212
|
+
verification,
|
|
213
|
+
repair_attempts: repairAttempts,
|
|
214
|
+
...(failureSummary ? { failure: failureSummary } : {})
|
|
215
|
+
})
|
|
216
|
+
};
|
|
217
|
+
}
|
|
218
|
+
finally {
|
|
219
|
+
removeSynthesisWorktree(input.workspace, worktree);
|
|
220
|
+
}
|
|
221
|
+
}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
export {};
|
|
@@ -0,0 +1,237 @@
|
|
|
1
|
+
import assert from "node:assert/strict";
|
|
2
|
+
import { createServer } from "node:http";
|
|
3
|
+
import { existsSync, mkdtempSync, readFileSync, rmSync, writeFileSync } from "node:fs";
|
|
4
|
+
import { tmpdir } from "node:os";
|
|
5
|
+
import { join } from "node:path";
|
|
6
|
+
import { test } from "node:test";
|
|
7
|
+
import { codexConfigToml, codexHarness } from "../codex.js";
|
|
8
|
+
import { createMockHarness } from "../mock.js";
|
|
9
|
+
import { ensemble } from "../run.js";
|
|
10
|
+
function tempOutputRoot() {
|
|
11
|
+
const outputRoot = mkdtempSync(join(tmpdir(), "ensemble-codex-out-"));
|
|
12
|
+
return {
|
|
13
|
+
outputRoot,
|
|
14
|
+
cleanup: () => rmSync(outputRoot, { recursive: true, force: true })
|
|
15
|
+
};
|
|
16
|
+
}
|
|
17
|
+
function descriptor(outputRoot, overrides = {}) {
|
|
18
|
+
return {
|
|
19
|
+
id: "codex_ensemble_test",
|
|
20
|
+
harness: createMockHarness(),
|
|
21
|
+
models: [{ id: "codex", model: "gpt-5.1-codex-max" }],
|
|
22
|
+
runtime: { id: "local" },
|
|
23
|
+
judge: { id: "judge", model: "fake-judge" },
|
|
24
|
+
policy: {
|
|
25
|
+
id: "policy",
|
|
26
|
+
allowedTools: ["read_file", "apply_patch"],
|
|
27
|
+
sideEffects: "writes_workspace",
|
|
28
|
+
timeoutMs: 1_000
|
|
29
|
+
},
|
|
30
|
+
prompt: "Summarize Codex harness evidence.",
|
|
31
|
+
sourceRepo: "handoffkit",
|
|
32
|
+
baseGitSha: "b".repeat(40),
|
|
33
|
+
outputRoot,
|
|
34
|
+
...overrides
|
|
35
|
+
};
|
|
36
|
+
}
|
|
37
|
+
async function readBody(req) {
|
|
38
|
+
const chunks = [];
|
|
39
|
+
for await (const chunk of req)
|
|
40
|
+
chunks.push(chunk);
|
|
41
|
+
return Buffer.concat(chunks);
|
|
42
|
+
}
|
|
43
|
+
async function closeServer(server) {
|
|
44
|
+
await new Promise((resolve, reject) => {
|
|
45
|
+
server.close((error) => (error ? reject(error) : resolve()));
|
|
46
|
+
});
|
|
47
|
+
}
|
|
48
|
+
async function startOpenAiCompatibleServer() {
|
|
49
|
+
const requests = [];
|
|
50
|
+
const server = createServer((req, res) => {
|
|
51
|
+
void (async () => {
|
|
52
|
+
const path = new URL(req.url ?? "/", "http://localhost").pathname;
|
|
53
|
+
if (req.method === "GET" && path === "/v1/models") {
|
|
54
|
+
res.writeHead(200, { "content-type": "application/json" });
|
|
55
|
+
res.end(JSON.stringify({ data: [{ id: "local-model" }] }));
|
|
56
|
+
return;
|
|
57
|
+
}
|
|
58
|
+
if (req.method === "POST" && path === "/v1/chat/completions") {
|
|
59
|
+
const body = JSON.parse((await readBody(req)).toString("utf8"));
|
|
60
|
+
requests.push(body);
|
|
61
|
+
const model = typeof body.model === "string" ? body.model : "local-model";
|
|
62
|
+
res.writeHead(200, { "content-type": "application/json" });
|
|
63
|
+
res.end(JSON.stringify({
|
|
64
|
+
id: "chatcmpl_test",
|
|
65
|
+
model,
|
|
66
|
+
choices: [{ message: { role: "assistant", content: "gateway-ok" } }],
|
|
67
|
+
usage: { prompt_tokens: 3, completion_tokens: 2, total_tokens: 5 }
|
|
68
|
+
}));
|
|
69
|
+
return;
|
|
70
|
+
}
|
|
71
|
+
res.writeHead(404, { "content-type": "application/json" });
|
|
72
|
+
res.end(JSON.stringify({ error: { message: "not found" } }));
|
|
73
|
+
})().catch((error) => {
|
|
74
|
+
res.writeHead(500, { "content-type": "application/json" });
|
|
75
|
+
res.end(JSON.stringify({ error: { message: String(error) } }));
|
|
76
|
+
});
|
|
77
|
+
});
|
|
78
|
+
await new Promise((resolve, reject) => {
|
|
79
|
+
server.once("error", reject);
|
|
80
|
+
server.listen(0, "127.0.0.1", () => {
|
|
81
|
+
server.off("error", reject);
|
|
82
|
+
resolve();
|
|
83
|
+
});
|
|
84
|
+
});
|
|
85
|
+
const address = server.address();
|
|
86
|
+
assert.ok(typeof address === "object" && address !== null);
|
|
87
|
+
return {
|
|
88
|
+
url: `http://127.0.0.1:${address.port}`,
|
|
89
|
+
requests,
|
|
90
|
+
close: () => closeServer(server)
|
|
91
|
+
};
|
|
92
|
+
}
|
|
93
|
+
test("codexConfigToml declares a Responses provider without requiring auth", () => {
|
|
94
|
+
const toml = codexConfigToml({
|
|
95
|
+
model: "local-model",
|
|
96
|
+
sandboxMode: "workspace-write",
|
|
97
|
+
approvalPolicy: "never",
|
|
98
|
+
provider: {
|
|
99
|
+
baseUrl: "http://127.0.0.1:9000",
|
|
100
|
+
requiresOpenAiAuth: false
|
|
101
|
+
}
|
|
102
|
+
});
|
|
103
|
+
assert.ok(toml.includes('model = "local-model"'));
|
|
104
|
+
assert.ok(toml.includes('model_provider = "warrant-codex"'));
|
|
105
|
+
assert.ok(toml.includes("[model_providers.warrant-codex]"));
|
|
106
|
+
assert.ok(toml.includes('base_url = "http://127.0.0.1:9000/v1"'));
|
|
107
|
+
assert.ok(toml.includes('wire_api = "responses"'));
|
|
108
|
+
assert.ok(toml.includes("requires_openai_auth = false"));
|
|
109
|
+
});
|
|
110
|
+
test("codex adapter skips clearly when credentials are absent", async () => {
|
|
111
|
+
const { outputRoot, cleanup } = tempOutputRoot();
|
|
112
|
+
const emptyCodexHome = mkdtempSync(join(tmpdir(), "ensemble-codex-empty-home-"));
|
|
113
|
+
let invoked = false;
|
|
114
|
+
const runner = () => {
|
|
115
|
+
invoked = true;
|
|
116
|
+
return { stdout: "", stderr: "", exitCode: 0 };
|
|
117
|
+
};
|
|
118
|
+
try {
|
|
119
|
+
const result = await ensemble.run(descriptor(outputRoot, {
|
|
120
|
+
harness: codexHarness({ env: { CODEX_HOME: emptyCodexHome }, runner })
|
|
121
|
+
}));
|
|
122
|
+
assert.equal(invoked, false);
|
|
123
|
+
assert.equal(result.harnessRunResult.status, "skipped");
|
|
124
|
+
assert.equal(result.candidates[0]?.status, "skipped");
|
|
125
|
+
assert.equal(result.candidates[0]?.error?.kind, "capability_missing");
|
|
126
|
+
assert.match(result.candidates[0]?.error?.message ?? "", /CODEX_API_KEY|OPENAI_API_KEY/);
|
|
127
|
+
}
|
|
128
|
+
finally {
|
|
129
|
+
cleanup();
|
|
130
|
+
rmSync(emptyCodexHome, { recursive: true, force: true });
|
|
131
|
+
}
|
|
132
|
+
});
|
|
133
|
+
test("codex adapter accepts local CLI auth without exported API keys", async () => {
|
|
134
|
+
const { outputRoot, cleanup } = tempOutputRoot();
|
|
135
|
+
const sourceHome = mkdtempSync(join(tmpdir(), "ensemble-codex-source-home-"));
|
|
136
|
+
writeFileSync(join(sourceHome, "auth.json"), "{\"auth\":\"redacted-test-token\"}\n");
|
|
137
|
+
let seenAuthFile = false;
|
|
138
|
+
const runner = (input) => {
|
|
139
|
+
const codexHome = input.env.CODEX_HOME;
|
|
140
|
+
assert.ok(codexHome);
|
|
141
|
+
assert.notEqual(codexHome, sourceHome);
|
|
142
|
+
assert.equal(input.env.CODEX_API_KEY, undefined);
|
|
143
|
+
assert.equal(input.env.OPENAI_API_KEY, undefined);
|
|
144
|
+
seenAuthFile = existsSync(join(codexHome, "auth.json"));
|
|
145
|
+
return { stdout: "codex local auth ok", stderr: "", exitCode: 0 };
|
|
146
|
+
};
|
|
147
|
+
try {
|
|
148
|
+
const result = await ensemble.run(descriptor(outputRoot, {
|
|
149
|
+
harness: codexHarness({ env: { CODEX_HOME: sourceHome }, runner })
|
|
150
|
+
}));
|
|
151
|
+
assert.equal(seenAuthFile, true);
|
|
152
|
+
assert.equal(result.harnessRunResult.status, "succeeded");
|
|
153
|
+
assert.equal(result.candidates[0]?.metadata?.provider_kind, "ambient");
|
|
154
|
+
}
|
|
155
|
+
finally {
|
|
156
|
+
cleanup();
|
|
157
|
+
rmSync(sourceHome, { recursive: true, force: true });
|
|
158
|
+
}
|
|
159
|
+
});
|
|
160
|
+
test("generic ensemble descriptor swaps mock harness for Codex harness", async () => {
|
|
161
|
+
const { outputRoot, cleanup } = tempOutputRoot();
|
|
162
|
+
let seenArgs;
|
|
163
|
+
let seenConfig = "";
|
|
164
|
+
const runner = (input) => {
|
|
165
|
+
seenArgs = input.args;
|
|
166
|
+
const codexHome = input.env.CODEX_HOME;
|
|
167
|
+
assert.ok(codexHome);
|
|
168
|
+
seenConfig = readFileSync(join(codexHome, "config.toml"), "utf8");
|
|
169
|
+
assert.equal(input.env.CODEX_API_KEY, "test-key");
|
|
170
|
+
return { stdout: '{"type":"message","message":"codex-ok"}\n', stderr: "", exitCode: 0 };
|
|
171
|
+
};
|
|
172
|
+
try {
|
|
173
|
+
const base = descriptor(outputRoot);
|
|
174
|
+
const mock = await ensemble.run(base);
|
|
175
|
+
const codex = await ensemble.run({
|
|
176
|
+
...base,
|
|
177
|
+
harness: codexHarness({ env: { CODEX_API_KEY: "test-key" }, runner })
|
|
178
|
+
});
|
|
179
|
+
assert.equal(mock.harnessRunResult.status, "succeeded");
|
|
180
|
+
assert.equal(codex.harnessRunResult.status, "succeeded");
|
|
181
|
+
assert.deepEqual(seenArgs?.slice(0, 3), ["exec", "--json", "--skip-git-repo-check"]);
|
|
182
|
+
assert.equal(seenArgs?.at(-1), base.prompt);
|
|
183
|
+
assert.ok(seenConfig.includes('model = "gpt-5.1-codex-max"'));
|
|
184
|
+
assert.equal(codex.candidates[0]?.metadata?.provider_kind, "ambient");
|
|
185
|
+
}
|
|
186
|
+
finally {
|
|
187
|
+
cleanup();
|
|
188
|
+
}
|
|
189
|
+
});
|
|
190
|
+
test("Codex OpenAI-compatible provider goes through Responses gateway records", async () => {
|
|
191
|
+
const { outputRoot, cleanup } = tempOutputRoot();
|
|
192
|
+
const upstream = await startOpenAiCompatibleServer();
|
|
193
|
+
let gatewayBaseUrl;
|
|
194
|
+
const runner = async (input) => {
|
|
195
|
+
const codexHome = input.env.CODEX_HOME;
|
|
196
|
+
assert.ok(codexHome);
|
|
197
|
+
const config = readFileSync(join(codexHome, "config.toml"), "utf8");
|
|
198
|
+
const match = /base_url = "([^"]+)"/.exec(config);
|
|
199
|
+
assert.ok(match);
|
|
200
|
+
gatewayBaseUrl = match[1];
|
|
201
|
+
assert.ok(gatewayBaseUrl);
|
|
202
|
+
const response = await fetch(`${gatewayBaseUrl}/responses`, {
|
|
203
|
+
method: "POST",
|
|
204
|
+
headers: { "content-type": "application/json" },
|
|
205
|
+
body: JSON.stringify({
|
|
206
|
+
input: "hello from fake codex",
|
|
207
|
+
stream: false
|
|
208
|
+
})
|
|
209
|
+
});
|
|
210
|
+
assert.equal(response.status, 200);
|
|
211
|
+
return { stdout: "codex gateway ok", stderr: "", exitCode: 0 };
|
|
212
|
+
};
|
|
213
|
+
try {
|
|
214
|
+
const result = await ensemble.run(descriptor(outputRoot, {
|
|
215
|
+
harness: codexHarness({
|
|
216
|
+
env: {},
|
|
217
|
+
provider: {
|
|
218
|
+
kind: "openai-compatible",
|
|
219
|
+
baseUrl: `${upstream.url}/v1`,
|
|
220
|
+
defaultModel: "local-model"
|
|
221
|
+
},
|
|
222
|
+
runner
|
|
223
|
+
})
|
|
224
|
+
}));
|
|
225
|
+
assert.match(gatewayBaseUrl ?? "", /^http:\/\/127\.0\.0\.1:\d+\/v1$/);
|
|
226
|
+
assert.equal(upstream.requests.length, 1);
|
|
227
|
+
assert.equal(result.harnessRunResult.status, "succeeded");
|
|
228
|
+
assert.equal(result.modelCallRecords.length, 1);
|
|
229
|
+
assert.equal(result.modelCallRecords[0]?.metadata?.dialect, "openai-responses");
|
|
230
|
+
assert.equal(result.modelCallRecords[0]?.model, "local-model");
|
|
231
|
+
assert.equal(result.candidates[0]?.metadata?.model_call_count, 1);
|
|
232
|
+
}
|
|
233
|
+
finally {
|
|
234
|
+
await upstream.close();
|
|
235
|
+
cleanup();
|
|
236
|
+
}
|
|
237
|
+
});
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
export {};
|
|
@@ -0,0 +1,214 @@
|
|
|
1
|
+
import assert from "node:assert/strict";
|
|
2
|
+
import { existsSync, mkdirSync, mkdtempSync, readFileSync, rmSync, writeFileSync } from "node:fs";
|
|
3
|
+
import { tmpdir } from "node:os";
|
|
4
|
+
import { join } from "node:path";
|
|
5
|
+
import { test } from "node:test";
|
|
6
|
+
import { assertHarnessRunResultV1 } from "@fusionkit/protocol";
|
|
7
|
+
import { gitText } from "@fusionkit/workspace";
|
|
8
|
+
import { createHarnessCapabilityMatrix, runHarnessSmokeDashboard } from "../dashboard.js";
|
|
9
|
+
import { createMockHarness } from "../mock.js";
|
|
10
|
+
function makeRepo() {
|
|
11
|
+
const root = mkdtempSync(join(tmpdir(), "ensemble-dashboard-"));
|
|
12
|
+
const repo = join(root, "repo");
|
|
13
|
+
mkdirSync(repo);
|
|
14
|
+
gitText(repo, ["init", "--quiet", "--initial-branch=main"]);
|
|
15
|
+
gitText(repo, ["config", "user.email", "dashboard@warrant.local"]);
|
|
16
|
+
gitText(repo, ["config", "user.name", "dashboard"]);
|
|
17
|
+
writeFileSync(join(repo, "README.md"), "# dashboard\n");
|
|
18
|
+
gitText(repo, ["add", "-A"]);
|
|
19
|
+
gitText(repo, ["commit", "--quiet", "-m", "init"]);
|
|
20
|
+
return {
|
|
21
|
+
repo,
|
|
22
|
+
outputRoot: join(root, "dashboard-out"),
|
|
23
|
+
cleanup: () => rmSync(root, { recursive: true, force: true })
|
|
24
|
+
};
|
|
25
|
+
}
|
|
26
|
+
test("capability matrix covers Cursor, Claude Code, Codex, command, and mock", () => {
|
|
27
|
+
const matrix = createHarnessCapabilityMatrix({ env: {} });
|
|
28
|
+
const harnessIds = matrix.rows.map((row) => row.harnessId);
|
|
29
|
+
assert.deepEqual(harnessIds, ["cursor", "claude-code", "codex", "command", "mock"]);
|
|
30
|
+
assert.ok(matrix.capabilities.includes("model_override"));
|
|
31
|
+
assert.ok(matrix.capabilities.includes("transcript_capture"));
|
|
32
|
+
assert.ok(matrix.capabilities.includes("diff_capture"));
|
|
33
|
+
assert.ok(matrix.capabilities.includes("tool_loop_capture"));
|
|
34
|
+
assert.ok(matrix.capabilities.includes("patch_apply_visibility"));
|
|
35
|
+
assert.ok(matrix.capabilities.includes("route_model_observation"));
|
|
36
|
+
assert.ok(matrix.capabilities.includes("verification_hint"));
|
|
37
|
+
assert.ok(matrix.capabilities.includes("replay_support"));
|
|
38
|
+
assert.ok(matrix.capabilities.includes("workspace_read"));
|
|
39
|
+
assert.ok(matrix.capabilities.includes("verification"));
|
|
40
|
+
assert.equal(matrix.rows.find((row) => row.harnessId === "cursor")?.availability, "missing");
|
|
41
|
+
assert.equal(matrix.rows.find((row) => row.harnessId === "claude-code")?.harnessKind, "claude_code");
|
|
42
|
+
assert.equal(matrix.rows.find((row) => row.harnessId === "codex")?.harnessKind, "codex");
|
|
43
|
+
});
|
|
44
|
+
test("smoke dashboard writes schema-valid success, failure, skipped, and missing records", async () => {
|
|
45
|
+
const fixture = makeRepo();
|
|
46
|
+
try {
|
|
47
|
+
const dashboard = await runHarnessSmokeDashboard({
|
|
48
|
+
repo: fixture.repo,
|
|
49
|
+
outputRoot: fixture.outputRoot,
|
|
50
|
+
timeoutMs: 1_000,
|
|
51
|
+
createdAt: "2026-06-16T00:00:00.000Z"
|
|
52
|
+
});
|
|
53
|
+
assert.equal(dashboard.records.length, 6);
|
|
54
|
+
assert.equal(existsSync(dashboard.dashboardPath), true);
|
|
55
|
+
for (const record of dashboard.records) {
|
|
56
|
+
assertHarnessRunResultV1(record.result);
|
|
57
|
+
assert.equal(existsSync(record.resultPath), true);
|
|
58
|
+
const written = JSON.parse(readFileSync(record.resultPath, "utf8"));
|
|
59
|
+
assertHarnessRunResultV1(written);
|
|
60
|
+
}
|
|
61
|
+
const statuses = dashboard.records.map((record) => record.result.status).sort();
|
|
62
|
+
assert.deepEqual(statuses, [
|
|
63
|
+
"failed",
|
|
64
|
+
"skipped",
|
|
65
|
+
"skipped",
|
|
66
|
+
"succeeded",
|
|
67
|
+
"succeeded",
|
|
68
|
+
"unsupported"
|
|
69
|
+
]);
|
|
70
|
+
assert.equal(dashboard.records.find((record) => record.taskId === "claude-code-skipped")?.result
|
|
71
|
+
.harness_kind, "claude_code");
|
|
72
|
+
assert.equal(dashboard.records.find((record) => record.taskId === "codex-skipped")?.result.harness_kind, "codex");
|
|
73
|
+
assert.equal(dashboard.records.find((record) => record.taskId === "cursor-missing")?.result
|
|
74
|
+
.errors?.[0]?.kind, "capability_missing");
|
|
75
|
+
const markdown = readFileSync(dashboard.dashboardPath, "utf8");
|
|
76
|
+
assert.match(markdown, /# HandoffKit Harness Smoke Dashboard/);
|
|
77
|
+
assert.match(markdown, /## Capability Matrix/);
|
|
78
|
+
assert.match(markdown, /## Adapter Readiness/);
|
|
79
|
+
assert.match(markdown, /contract\/mock ready/);
|
|
80
|
+
assert.match(markdown, /credentials missing\/skipped/);
|
|
81
|
+
assert.match(markdown, /live smoke not requested/);
|
|
82
|
+
assert.match(markdown, /command-failure/);
|
|
83
|
+
assert.match(markdown, /cursor-missing/);
|
|
84
|
+
assert.match(markdown, /harness-run-results\/mock-success\.json/);
|
|
85
|
+
assert.equal(dashboard.readiness.length, 5);
|
|
86
|
+
}
|
|
87
|
+
finally {
|
|
88
|
+
fixture.cleanup();
|
|
89
|
+
}
|
|
90
|
+
});
|
|
91
|
+
test("smoke dashboard only adds live records when explicit smoke env is enabled", async () => {
|
|
92
|
+
const fixture = makeRepo();
|
|
93
|
+
try {
|
|
94
|
+
const dashboard = await runHarnessSmokeDashboard({
|
|
95
|
+
repo: fixture.repo,
|
|
96
|
+
outputRoot: fixture.outputRoot,
|
|
97
|
+
timeoutMs: 1_000,
|
|
98
|
+
createdAt: "2026-06-16T00:00:00.000Z",
|
|
99
|
+
env: {},
|
|
100
|
+
liveSmoke: ["claude-code", "codex"]
|
|
101
|
+
});
|
|
102
|
+
assert.equal(dashboard.records.length, 6);
|
|
103
|
+
assert.equal(dashboard.records.some((record) => record.purpose === "live"), false);
|
|
104
|
+
}
|
|
105
|
+
finally {
|
|
106
|
+
fixture.cleanup();
|
|
107
|
+
}
|
|
108
|
+
});
|
|
109
|
+
test("explicit live smoke without credentials records a failed preflight", async () => {
|
|
110
|
+
const fixture = makeRepo();
|
|
111
|
+
try {
|
|
112
|
+
const dashboard = await runHarnessSmokeDashboard({
|
|
113
|
+
repo: fixture.repo,
|
|
114
|
+
outputRoot: fixture.outputRoot,
|
|
115
|
+
timeoutMs: 1_000,
|
|
116
|
+
createdAt: "2026-06-16T00:00:00.000Z",
|
|
117
|
+
env: { WARRANT_CLAUDE_SMOKE: "1" },
|
|
118
|
+
liveSmoke: ["claude-code"]
|
|
119
|
+
});
|
|
120
|
+
const live = dashboard.records.find((record) => record.taskId === "claude-code-live");
|
|
121
|
+
assert.equal(live?.purpose, "live");
|
|
122
|
+
assert.equal(live?.result.status, "failed");
|
|
123
|
+
assert.match(live?.result.output_summary ?? "", /Explicit live smoke failed before launch/);
|
|
124
|
+
assert.equal(dashboard.readiness.find((row) => row.harnessId === "claude-code")?.liveSmoke, "live smoke failed");
|
|
125
|
+
}
|
|
126
|
+
finally {
|
|
127
|
+
fixture.cleanup();
|
|
128
|
+
}
|
|
129
|
+
});
|
|
130
|
+
test("live smoke readiness reports sanitized local evidence refs", async () => {
|
|
131
|
+
const fixture = makeRepo();
|
|
132
|
+
const privateTranscript = "raw private transcript should not render";
|
|
133
|
+
try {
|
|
134
|
+
const claudeHarness = {
|
|
135
|
+
...createMockHarness({
|
|
136
|
+
id: "claude-code-live-mock",
|
|
137
|
+
candidates: {
|
|
138
|
+
claude: {
|
|
139
|
+
transcript: privateTranscript,
|
|
140
|
+
artifacts: [
|
|
141
|
+
{
|
|
142
|
+
artifact_id: "claude_safe_log",
|
|
143
|
+
kind: "log",
|
|
144
|
+
hash: `sha256:${"a".repeat(64)}`,
|
|
145
|
+
uri: "file:///tmp/private-claude.log",
|
|
146
|
+
redaction_status: "synthetic"
|
|
147
|
+
},
|
|
148
|
+
{
|
|
149
|
+
artifact_id: "claude_raw_transcript",
|
|
150
|
+
kind: "transcript",
|
|
151
|
+
hash: `sha256:${"b".repeat(64)}`,
|
|
152
|
+
uri: "file:///tmp/raw-claude.txt",
|
|
153
|
+
redaction_status: "raw"
|
|
154
|
+
}
|
|
155
|
+
]
|
|
156
|
+
}
|
|
157
|
+
}
|
|
158
|
+
}),
|
|
159
|
+
harnessKind: "claude_code"
|
|
160
|
+
};
|
|
161
|
+
const codexHarness = {
|
|
162
|
+
...createMockHarness({
|
|
163
|
+
id: "codex-live-mock",
|
|
164
|
+
candidates: {
|
|
165
|
+
codex: {
|
|
166
|
+
transcript: "codex private transcript should not render",
|
|
167
|
+
artifacts: [
|
|
168
|
+
{
|
|
169
|
+
artifact_id: "codex_safe_log",
|
|
170
|
+
kind: "log",
|
|
171
|
+
hash: `sha256:${"c".repeat(64)}`,
|
|
172
|
+
uri: "file:///tmp/private-codex.log",
|
|
173
|
+
redaction_status: "synthetic"
|
|
174
|
+
}
|
|
175
|
+
]
|
|
176
|
+
}
|
|
177
|
+
}
|
|
178
|
+
}),
|
|
179
|
+
harnessKind: "codex"
|
|
180
|
+
};
|
|
181
|
+
const dashboard = await runHarnessSmokeDashboard({
|
|
182
|
+
repo: fixture.repo,
|
|
183
|
+
outputRoot: fixture.outputRoot,
|
|
184
|
+
timeoutMs: 1_000,
|
|
185
|
+
createdAt: "2026-06-16T00:00:00.000Z",
|
|
186
|
+
env: {
|
|
187
|
+
WARRANT_ENSEMBLE_LIVE_SMOKE: "1",
|
|
188
|
+
VERCEL_TOKEN: "vercel-test",
|
|
189
|
+
ANTHROPIC_API_KEY: "anthropic-test",
|
|
190
|
+
CODEX_API_KEY: "codex-test"
|
|
191
|
+
},
|
|
192
|
+
liveSmoke: ["claude-code", "codex"],
|
|
193
|
+
liveSmokeHarnesses: {
|
|
194
|
+
"claude-code": claudeHarness,
|
|
195
|
+
codex: codexHarness
|
|
196
|
+
}
|
|
197
|
+
});
|
|
198
|
+
assert.equal(dashboard.records.length, 8);
|
|
199
|
+
assert.equal(dashboard.records.find((record) => record.taskId === "claude-code-live")?.result.status, "succeeded");
|
|
200
|
+
assert.equal(dashboard.records.find((record) => record.taskId === "codex-live")?.result.status, "succeeded");
|
|
201
|
+
assert.equal(dashboard.readiness.find((row) => row.harnessId === "claude-code")?.liveSmoke, "live smoke passed");
|
|
202
|
+
assert.equal(dashboard.readiness.find((row) => row.harnessId === "codex")?.liveSmoke, "live smoke passed");
|
|
203
|
+
const markdown = readFileSync(dashboard.dashboardPath, "utf8");
|
|
204
|
+
assert.match(markdown, /log:claude_safe_log:sha256/);
|
|
205
|
+
assert.match(markdown, /log:codex_safe_log:sha256/);
|
|
206
|
+
assert.match(markdown, /raw artifact ref\(s\) withheld/);
|
|
207
|
+
assert.equal(markdown.includes(privateTranscript), false);
|
|
208
|
+
assert.equal(markdown.includes("file:///tmp/private-claude.log"), false);
|
|
209
|
+
assert.equal(markdown.includes("file:///tmp/private-codex.log"), false);
|
|
210
|
+
}
|
|
211
|
+
finally {
|
|
212
|
+
fixture.cleanup();
|
|
213
|
+
}
|
|
214
|
+
});
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
export {};
|