@fusionkit/ensemble 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (51) hide show
  1. package/dist/agent.d.ts +21 -0
  2. package/dist/agent.js +186 -0
  3. package/dist/artifacts.d.ts +21 -0
  4. package/dist/artifacts.js +36 -0
  5. package/dist/claude-code.d.ts +25 -0
  6. package/dist/claude-code.js +398 -0
  7. package/dist/codex.d.ts +69 -0
  8. package/dist/codex.js +467 -0
  9. package/dist/command.d.ts +15 -0
  10. package/dist/command.js +82 -0
  11. package/dist/dashboard.d.ts +62 -0
  12. package/dist/dashboard.js +788 -0
  13. package/dist/external-executor.d.ts +56 -0
  14. package/dist/external-executor.js +288 -0
  15. package/dist/harness.d.ts +337 -0
  16. package/dist/harness.js +1 -0
  17. package/dist/index.d.ts +30 -0
  18. package/dist/index.js +15 -0
  19. package/dist/isolation.d.ts +25 -0
  20. package/dist/isolation.js +509 -0
  21. package/dist/judge.d.ts +77 -0
  22. package/dist/judge.js +16 -0
  23. package/dist/mock.d.ts +20 -0
  24. package/dist/mock.js +56 -0
  25. package/dist/run.d.ts +5 -0
  26. package/dist/run.js +520 -0
  27. package/dist/synthesis.d.ts +25 -0
  28. package/dist/synthesis.js +221 -0
  29. package/dist/test/codex.test.d.ts +1 -0
  30. package/dist/test/codex.test.js +237 -0
  31. package/dist/test/dashboard.test.d.ts +1 -0
  32. package/dist/test/dashboard.test.js +214 -0
  33. package/dist/test/ensemble.test.d.ts +1 -0
  34. package/dist/test/ensemble.test.js +780 -0
  35. package/dist/test/external-executor.test.d.ts +1 -0
  36. package/dist/test/external-executor.test.js +273 -0
  37. package/dist/test/isolation.test.d.ts +1 -0
  38. package/dist/test/isolation.test.js +359 -0
  39. package/dist/test/tool-executor.test.d.ts +1 -0
  40. package/dist/test/tool-executor.test.js +113 -0
  41. package/dist/test/unified.test.d.ts +1 -0
  42. package/dist/test/unified.test.js +150 -0
  43. package/dist/tool-executor.d.ts +14 -0
  44. package/dist/tool-executor.js +156 -0
  45. package/dist/trace.d.ts +8 -0
  46. package/dist/trace.js +7 -0
  47. package/dist/unified.d.ts +101 -0
  48. package/dist/unified.js +422 -0
  49. package/dist/worktree.d.ts +25 -0
  50. package/dist/worktree.js +75 -0
  51. package/package.json +35 -0
@@ -0,0 +1,221 @@
1
+ import { spawnSync } from "node:child_process";
2
+ import { mkdtempSync, rmSync, writeFileSync } from "node:fs";
3
+ import { tmpdir } from "node:os";
4
+ import { join } from "node:path";
5
+ import { assertJudgeSynthesisRecordV1, MODEL_FUSION_SCHEMA_BUNDLE_HASH } from "@fusionkit/protocol";
6
+ import { gitText } from "@fusionkit/workspace";
7
+ const PRODUCER_GIT_SHA = "0".repeat(40);
8
+ const PRODUCER = "handoffkit-ensemble";
9
+ const PRODUCER_VERSION = "0.1.0";
10
+ function metadata(createdAt) {
11
+ return {
12
+ schema: "judge-synthesis-record.v1",
13
+ schema_version: "v1",
14
+ schema_bundle_hash: MODEL_FUSION_SCHEMA_BUNDLE_HASH,
15
+ producer: PRODUCER,
16
+ producer_version: PRODUCER_VERSION,
17
+ producer_git_sha: PRODUCER_GIT_SHA,
18
+ created_at: createdAt
19
+ };
20
+ }
21
+ function safeSegment(value) {
22
+ return value.replace(/[^A-Za-z0-9_.:-]/g, "_");
23
+ }
24
+ function candidateEvidence(candidates, outputs) {
25
+ return candidates.map((candidate, index) => {
26
+ const output = outputs[index];
27
+ return {
28
+ candidateId: candidate.candidate_id,
29
+ modelId: String(candidate.metadata?.model_id ?? output?.model.id ?? ""),
30
+ model: String(candidate.metadata?.model ?? output?.model.model ?? ""),
31
+ status: candidate.status,
32
+ artifacts: candidate.artifacts ?? [],
33
+ ...(output?.verification ? { verification: output.verification } : {}),
34
+ ...(output?.trajectory ? { trajectory: output.trajectory } : {})
35
+ };
36
+ });
37
+ }
38
+ function createSynthesisWorktree(input) {
39
+ if (!input.workspace || !input.baseGitSha)
40
+ return undefined;
41
+ const root = mkdtempSync(join(tmpdir(), `warrant-synthesis-${safeSegment(input.descriptor.id)}-`));
42
+ const worktree = join(root, "final");
43
+ gitText(input.workspace, ["worktree", "add", "--detach", worktree, input.baseGitSha]);
44
+ return worktree;
45
+ }
46
+ function removeSynthesisWorktree(workspace, worktree) {
47
+ if (!worktree)
48
+ return;
49
+ if (workspace)
50
+ gitText(workspace, ["worktree", "remove", "--force", worktree], { allowFail: true });
51
+ rmSync(join(worktree, ".."), { recursive: true, force: true });
52
+ }
53
+ function applyPatch(worktree, patch) {
54
+ if (!worktree || !patch || patch.length === 0)
55
+ return true;
56
+ const patchPath = join(worktree, "judge.patch");
57
+ writeFileSync(patchPath, patch);
58
+ const applied = spawnSync("git", ["apply", "--binary", "--whitespace=nowarn", patchPath], {
59
+ cwd: worktree,
60
+ encoding: "utf8"
61
+ });
62
+ return applied.status === 0;
63
+ }
64
+ function diffWorktree(worktree, baseGitSha) {
65
+ if (!worktree || !baseGitSha)
66
+ return "";
67
+ gitText(worktree, ["add", "-A"], { allowFail: true });
68
+ return gitText(worktree, ["diff", "--cached", "--binary", baseGitSha], { allowFail: true });
69
+ }
70
+ function recordFor(input, output, status, decision, metrics) {
71
+ const record = {
72
+ ...metadata(new Date().toISOString()),
73
+ synthesis_id: `synthesis_${input.descriptor.id}`,
74
+ input_candidate_ids: input.candidates.map((candidate) => candidate.candidate_id),
75
+ status,
76
+ decision,
77
+ ...(output.judgeModelCallId ? { judge_model_call_id: output.judgeModelCallId } : {}),
78
+ ...(output.selectedCandidateId
79
+ ? { selected_candidate_id: output.selectedCandidateId }
80
+ : {}),
81
+ ...(output.rationale ? { rationale: output.rationale } : {}),
82
+ final_output: output.finalOutput,
83
+ ...(output.score !== undefined ? { score: output.score } : {}),
84
+ metrics
85
+ };
86
+ assertJudgeSynthesisRecordV1(record);
87
+ return record;
88
+ }
89
+ function artifactRef(artifact) {
90
+ const { path: _path, ...ref } = artifact;
91
+ return ref;
92
+ }
93
+ export async function runJudgeSynthesis(input) {
94
+ const synthesizer = input.descriptor.judge.synthesizer;
95
+ if (!synthesizer)
96
+ return undefined;
97
+ const judgeInput = {
98
+ descriptor: input.descriptor,
99
+ candidates: candidateEvidence(input.candidates, input.outputs),
100
+ artifacts: input.artifacts,
101
+ toolRecords: input.toolRecords,
102
+ modelCallRecords: input.modelCallRecords,
103
+ ...(input.reviewEvidence ? { reviewEvidence: input.reviewEvidence } : {})
104
+ };
105
+ const artifacts = [
106
+ artifactRef(input.store.writeJson({
107
+ artifactId: `${input.descriptor.id}_judge_input`,
108
+ kind: "metrics",
109
+ value: judgeInput
110
+ }))
111
+ ];
112
+ let finalPatchPath = null;
113
+ let failureSummary;
114
+ const repairAttempts = [];
115
+ // Create the synthesis worktree lazily: a capture-only synthesizer (the panel
116
+ // trajectory capture used by `runFusionPanels`) produces no patch and has no
117
+ // verify/repair, so it never touches a worktree — skip the git add/remove.
118
+ let worktree;
119
+ try {
120
+ const first = await synthesizer.synthesize(judgeInput);
121
+ const needsWorktree = (first.patch?.content !== undefined && first.patch.content.length > 0) ||
122
+ synthesizer.verify !== undefined ||
123
+ synthesizer.repair !== undefined;
124
+ if (needsWorktree)
125
+ worktree = createSynthesisWorktree(input);
126
+ const applied = applyPatch(worktree, first.patch?.content);
127
+ if (!applied) {
128
+ const conflict = artifactRef(input.store.writeJson({
129
+ artifactId: `${input.descriptor.id}_patch_conflict`,
130
+ kind: "other",
131
+ value: {
132
+ type: "patch_conflict",
133
+ sourceCandidateIds: first.patch?.sourceCandidateIds ?? [],
134
+ patch: first.patch?.content ?? ""
135
+ }
136
+ }));
137
+ artifacts.push(conflict);
138
+ failureSummary = { reason: "patch_conflict" };
139
+ return {
140
+ judgeInput,
141
+ artifacts,
142
+ finalPatchPath,
143
+ repairAttempts,
144
+ failureSummary,
145
+ judgeSynthesisRecord: recordFor(input, first, "failed", "failed", {
146
+ contributions: first.contributions ?? [],
147
+ rejections: first.rejections ?? [],
148
+ failure: failureSummary
149
+ })
150
+ };
151
+ }
152
+ let verification = (await synthesizer.verify?.({
153
+ descriptor: input.descriptor,
154
+ worktreePath: worktree ?? "",
155
+ output: first,
156
+ repairRound: 0
157
+ })) ?? { status: "succeeded", evidence: ["verification not configured"], exitCode: 0 };
158
+ let output = first;
159
+ if (verification.status !== "succeeded") {
160
+ repairAttempts.push({ round: 1, verification, status: "failed" });
161
+ if (synthesizer.repair) {
162
+ const repaired = await synthesizer.repair({
163
+ ...judgeInput,
164
+ failureEvidence: verification,
165
+ priorOutput: first
166
+ });
167
+ applyPatch(worktree, repaired.patch?.content);
168
+ output = repaired;
169
+ verification =
170
+ (await synthesizer.verify?.({
171
+ descriptor: input.descriptor,
172
+ worktreePath: worktree ?? "",
173
+ output: repaired,
174
+ repairRound: 1
175
+ })) ?? verification;
176
+ repairAttempts[0] = { round: 1, verification, status: verification.status };
177
+ }
178
+ }
179
+ const finalPatch = diffWorktree(worktree, input.baseGitSha);
180
+ if (finalPatch.length > 0) {
181
+ const patchArtifact = input.store.writeText({
182
+ artifactId: `${input.descriptor.id}_final_patch`,
183
+ kind: "patch",
184
+ content: finalPatch,
185
+ suffix: ".patch"
186
+ });
187
+ artifacts.push(artifactRef(patchArtifact));
188
+ finalPatchPath = patchArtifact.uri ?? null;
189
+ }
190
+ artifacts.push(artifactRef(input.store.writeJson({
191
+ artifactId: `${input.descriptor.id}_synthesis_verification`,
192
+ kind: "metrics",
193
+ value: verification
194
+ })));
195
+ if (verification.status !== "succeeded") {
196
+ failureSummary = { reason: "repair_failed", verification, repair: verification };
197
+ }
198
+ const decision = verification.status === "succeeded"
199
+ ? output.decision
200
+ : repairAttempts.length > 0
201
+ ? "repair_required"
202
+ : "failed";
203
+ return {
204
+ judgeInput,
205
+ artifacts,
206
+ finalPatchPath,
207
+ repairAttempts,
208
+ ...(failureSummary ? { failureSummary } : {}),
209
+ judgeSynthesisRecord: recordFor(input, output, verification.status === "succeeded" ? "succeeded" : "failed", decision, {
210
+ contributions: output.contributions ?? [],
211
+ rejections: output.rejections ?? [],
212
+ verification,
213
+ repair_attempts: repairAttempts,
214
+ ...(failureSummary ? { failure: failureSummary } : {})
215
+ })
216
+ };
217
+ }
218
+ finally {
219
+ removeSynthesisWorktree(input.workspace, worktree);
220
+ }
221
+ }
@@ -0,0 +1 @@
1
+ export {};
@@ -0,0 +1,237 @@
1
+ import assert from "node:assert/strict";
2
+ import { createServer } from "node:http";
3
+ import { existsSync, mkdtempSync, readFileSync, rmSync, writeFileSync } from "node:fs";
4
+ import { tmpdir } from "node:os";
5
+ import { join } from "node:path";
6
+ import { test } from "node:test";
7
+ import { codexConfigToml, codexHarness } from "../codex.js";
8
+ import { createMockHarness } from "../mock.js";
9
+ import { ensemble } from "../run.js";
10
+ function tempOutputRoot() {
11
+ const outputRoot = mkdtempSync(join(tmpdir(), "ensemble-codex-out-"));
12
+ return {
13
+ outputRoot,
14
+ cleanup: () => rmSync(outputRoot, { recursive: true, force: true })
15
+ };
16
+ }
17
+ function descriptor(outputRoot, overrides = {}) {
18
+ return {
19
+ id: "codex_ensemble_test",
20
+ harness: createMockHarness(),
21
+ models: [{ id: "codex", model: "gpt-5.1-codex-max" }],
22
+ runtime: { id: "local" },
23
+ judge: { id: "judge", model: "fake-judge" },
24
+ policy: {
25
+ id: "policy",
26
+ allowedTools: ["read_file", "apply_patch"],
27
+ sideEffects: "writes_workspace",
28
+ timeoutMs: 1_000
29
+ },
30
+ prompt: "Summarize Codex harness evidence.",
31
+ sourceRepo: "handoffkit",
32
+ baseGitSha: "b".repeat(40),
33
+ outputRoot,
34
+ ...overrides
35
+ };
36
+ }
37
+ async function readBody(req) {
38
+ const chunks = [];
39
+ for await (const chunk of req)
40
+ chunks.push(chunk);
41
+ return Buffer.concat(chunks);
42
+ }
43
+ async function closeServer(server) {
44
+ await new Promise((resolve, reject) => {
45
+ server.close((error) => (error ? reject(error) : resolve()));
46
+ });
47
+ }
48
+ async function startOpenAiCompatibleServer() {
49
+ const requests = [];
50
+ const server = createServer((req, res) => {
51
+ void (async () => {
52
+ const path = new URL(req.url ?? "/", "http://localhost").pathname;
53
+ if (req.method === "GET" && path === "/v1/models") {
54
+ res.writeHead(200, { "content-type": "application/json" });
55
+ res.end(JSON.stringify({ data: [{ id: "local-model" }] }));
56
+ return;
57
+ }
58
+ if (req.method === "POST" && path === "/v1/chat/completions") {
59
+ const body = JSON.parse((await readBody(req)).toString("utf8"));
60
+ requests.push(body);
61
+ const model = typeof body.model === "string" ? body.model : "local-model";
62
+ res.writeHead(200, { "content-type": "application/json" });
63
+ res.end(JSON.stringify({
64
+ id: "chatcmpl_test",
65
+ model,
66
+ choices: [{ message: { role: "assistant", content: "gateway-ok" } }],
67
+ usage: { prompt_tokens: 3, completion_tokens: 2, total_tokens: 5 }
68
+ }));
69
+ return;
70
+ }
71
+ res.writeHead(404, { "content-type": "application/json" });
72
+ res.end(JSON.stringify({ error: { message: "not found" } }));
73
+ })().catch((error) => {
74
+ res.writeHead(500, { "content-type": "application/json" });
75
+ res.end(JSON.stringify({ error: { message: String(error) } }));
76
+ });
77
+ });
78
+ await new Promise((resolve, reject) => {
79
+ server.once("error", reject);
80
+ server.listen(0, "127.0.0.1", () => {
81
+ server.off("error", reject);
82
+ resolve();
83
+ });
84
+ });
85
+ const address = server.address();
86
+ assert.ok(typeof address === "object" && address !== null);
87
+ return {
88
+ url: `http://127.0.0.1:${address.port}`,
89
+ requests,
90
+ close: () => closeServer(server)
91
+ };
92
+ }
93
+ test("codexConfigToml declares a Responses provider without requiring auth", () => {
94
+ const toml = codexConfigToml({
95
+ model: "local-model",
96
+ sandboxMode: "workspace-write",
97
+ approvalPolicy: "never",
98
+ provider: {
99
+ baseUrl: "http://127.0.0.1:9000",
100
+ requiresOpenAiAuth: false
101
+ }
102
+ });
103
+ assert.ok(toml.includes('model = "local-model"'));
104
+ assert.ok(toml.includes('model_provider = "warrant-codex"'));
105
+ assert.ok(toml.includes("[model_providers.warrant-codex]"));
106
+ assert.ok(toml.includes('base_url = "http://127.0.0.1:9000/v1"'));
107
+ assert.ok(toml.includes('wire_api = "responses"'));
108
+ assert.ok(toml.includes("requires_openai_auth = false"));
109
+ });
110
+ test("codex adapter skips clearly when credentials are absent", async () => {
111
+ const { outputRoot, cleanup } = tempOutputRoot();
112
+ const emptyCodexHome = mkdtempSync(join(tmpdir(), "ensemble-codex-empty-home-"));
113
+ let invoked = false;
114
+ const runner = () => {
115
+ invoked = true;
116
+ return { stdout: "", stderr: "", exitCode: 0 };
117
+ };
118
+ try {
119
+ const result = await ensemble.run(descriptor(outputRoot, {
120
+ harness: codexHarness({ env: { CODEX_HOME: emptyCodexHome }, runner })
121
+ }));
122
+ assert.equal(invoked, false);
123
+ assert.equal(result.harnessRunResult.status, "skipped");
124
+ assert.equal(result.candidates[0]?.status, "skipped");
125
+ assert.equal(result.candidates[0]?.error?.kind, "capability_missing");
126
+ assert.match(result.candidates[0]?.error?.message ?? "", /CODEX_API_KEY|OPENAI_API_KEY/);
127
+ }
128
+ finally {
129
+ cleanup();
130
+ rmSync(emptyCodexHome, { recursive: true, force: true });
131
+ }
132
+ });
133
+ test("codex adapter accepts local CLI auth without exported API keys", async () => {
134
+ const { outputRoot, cleanup } = tempOutputRoot();
135
+ const sourceHome = mkdtempSync(join(tmpdir(), "ensemble-codex-source-home-"));
136
+ writeFileSync(join(sourceHome, "auth.json"), "{\"auth\":\"redacted-test-token\"}\n");
137
+ let seenAuthFile = false;
138
+ const runner = (input) => {
139
+ const codexHome = input.env.CODEX_HOME;
140
+ assert.ok(codexHome);
141
+ assert.notEqual(codexHome, sourceHome);
142
+ assert.equal(input.env.CODEX_API_KEY, undefined);
143
+ assert.equal(input.env.OPENAI_API_KEY, undefined);
144
+ seenAuthFile = existsSync(join(codexHome, "auth.json"));
145
+ return { stdout: "codex local auth ok", stderr: "", exitCode: 0 };
146
+ };
147
+ try {
148
+ const result = await ensemble.run(descriptor(outputRoot, {
149
+ harness: codexHarness({ env: { CODEX_HOME: sourceHome }, runner })
150
+ }));
151
+ assert.equal(seenAuthFile, true);
152
+ assert.equal(result.harnessRunResult.status, "succeeded");
153
+ assert.equal(result.candidates[0]?.metadata?.provider_kind, "ambient");
154
+ }
155
+ finally {
156
+ cleanup();
157
+ rmSync(sourceHome, { recursive: true, force: true });
158
+ }
159
+ });
160
+ test("generic ensemble descriptor swaps mock harness for Codex harness", async () => {
161
+ const { outputRoot, cleanup } = tempOutputRoot();
162
+ let seenArgs;
163
+ let seenConfig = "";
164
+ const runner = (input) => {
165
+ seenArgs = input.args;
166
+ const codexHome = input.env.CODEX_HOME;
167
+ assert.ok(codexHome);
168
+ seenConfig = readFileSync(join(codexHome, "config.toml"), "utf8");
169
+ assert.equal(input.env.CODEX_API_KEY, "test-key");
170
+ return { stdout: '{"type":"message","message":"codex-ok"}\n', stderr: "", exitCode: 0 };
171
+ };
172
+ try {
173
+ const base = descriptor(outputRoot);
174
+ const mock = await ensemble.run(base);
175
+ const codex = await ensemble.run({
176
+ ...base,
177
+ harness: codexHarness({ env: { CODEX_API_KEY: "test-key" }, runner })
178
+ });
179
+ assert.equal(mock.harnessRunResult.status, "succeeded");
180
+ assert.equal(codex.harnessRunResult.status, "succeeded");
181
+ assert.deepEqual(seenArgs?.slice(0, 3), ["exec", "--json", "--skip-git-repo-check"]);
182
+ assert.equal(seenArgs?.at(-1), base.prompt);
183
+ assert.ok(seenConfig.includes('model = "gpt-5.1-codex-max"'));
184
+ assert.equal(codex.candidates[0]?.metadata?.provider_kind, "ambient");
185
+ }
186
+ finally {
187
+ cleanup();
188
+ }
189
+ });
190
+ test("Codex OpenAI-compatible provider goes through Responses gateway records", async () => {
191
+ const { outputRoot, cleanup } = tempOutputRoot();
192
+ const upstream = await startOpenAiCompatibleServer();
193
+ let gatewayBaseUrl;
194
+ const runner = async (input) => {
195
+ const codexHome = input.env.CODEX_HOME;
196
+ assert.ok(codexHome);
197
+ const config = readFileSync(join(codexHome, "config.toml"), "utf8");
198
+ const match = /base_url = "([^"]+)"/.exec(config);
199
+ assert.ok(match);
200
+ gatewayBaseUrl = match[1];
201
+ assert.ok(gatewayBaseUrl);
202
+ const response = await fetch(`${gatewayBaseUrl}/responses`, {
203
+ method: "POST",
204
+ headers: { "content-type": "application/json" },
205
+ body: JSON.stringify({
206
+ input: "hello from fake codex",
207
+ stream: false
208
+ })
209
+ });
210
+ assert.equal(response.status, 200);
211
+ return { stdout: "codex gateway ok", stderr: "", exitCode: 0 };
212
+ };
213
+ try {
214
+ const result = await ensemble.run(descriptor(outputRoot, {
215
+ harness: codexHarness({
216
+ env: {},
217
+ provider: {
218
+ kind: "openai-compatible",
219
+ baseUrl: `${upstream.url}/v1`,
220
+ defaultModel: "local-model"
221
+ },
222
+ runner
223
+ })
224
+ }));
225
+ assert.match(gatewayBaseUrl ?? "", /^http:\/\/127\.0\.0\.1:\d+\/v1$/);
226
+ assert.equal(upstream.requests.length, 1);
227
+ assert.equal(result.harnessRunResult.status, "succeeded");
228
+ assert.equal(result.modelCallRecords.length, 1);
229
+ assert.equal(result.modelCallRecords[0]?.metadata?.dialect, "openai-responses");
230
+ assert.equal(result.modelCallRecords[0]?.model, "local-model");
231
+ assert.equal(result.candidates[0]?.metadata?.model_call_count, 1);
232
+ }
233
+ finally {
234
+ await upstream.close();
235
+ cleanup();
236
+ }
237
+ });
@@ -0,0 +1 @@
1
+ export {};
@@ -0,0 +1,214 @@
1
+ import assert from "node:assert/strict";
2
+ import { existsSync, mkdirSync, mkdtempSync, readFileSync, rmSync, writeFileSync } from "node:fs";
3
+ import { tmpdir } from "node:os";
4
+ import { join } from "node:path";
5
+ import { test } from "node:test";
6
+ import { assertHarnessRunResultV1 } from "@fusionkit/protocol";
7
+ import { gitText } from "@fusionkit/workspace";
8
+ import { createHarnessCapabilityMatrix, runHarnessSmokeDashboard } from "../dashboard.js";
9
+ import { createMockHarness } from "../mock.js";
10
+ function makeRepo() {
11
+ const root = mkdtempSync(join(tmpdir(), "ensemble-dashboard-"));
12
+ const repo = join(root, "repo");
13
+ mkdirSync(repo);
14
+ gitText(repo, ["init", "--quiet", "--initial-branch=main"]);
15
+ gitText(repo, ["config", "user.email", "dashboard@warrant.local"]);
16
+ gitText(repo, ["config", "user.name", "dashboard"]);
17
+ writeFileSync(join(repo, "README.md"), "# dashboard\n");
18
+ gitText(repo, ["add", "-A"]);
19
+ gitText(repo, ["commit", "--quiet", "-m", "init"]);
20
+ return {
21
+ repo,
22
+ outputRoot: join(root, "dashboard-out"),
23
+ cleanup: () => rmSync(root, { recursive: true, force: true })
24
+ };
25
+ }
26
+ test("capability matrix covers Cursor, Claude Code, Codex, command, and mock", () => {
27
+ const matrix = createHarnessCapabilityMatrix({ env: {} });
28
+ const harnessIds = matrix.rows.map((row) => row.harnessId);
29
+ assert.deepEqual(harnessIds, ["cursor", "claude-code", "codex", "command", "mock"]);
30
+ assert.ok(matrix.capabilities.includes("model_override"));
31
+ assert.ok(matrix.capabilities.includes("transcript_capture"));
32
+ assert.ok(matrix.capabilities.includes("diff_capture"));
33
+ assert.ok(matrix.capabilities.includes("tool_loop_capture"));
34
+ assert.ok(matrix.capabilities.includes("patch_apply_visibility"));
35
+ assert.ok(matrix.capabilities.includes("route_model_observation"));
36
+ assert.ok(matrix.capabilities.includes("verification_hint"));
37
+ assert.ok(matrix.capabilities.includes("replay_support"));
38
+ assert.ok(matrix.capabilities.includes("workspace_read"));
39
+ assert.ok(matrix.capabilities.includes("verification"));
40
+ assert.equal(matrix.rows.find((row) => row.harnessId === "cursor")?.availability, "missing");
41
+ assert.equal(matrix.rows.find((row) => row.harnessId === "claude-code")?.harnessKind, "claude_code");
42
+ assert.equal(matrix.rows.find((row) => row.harnessId === "codex")?.harnessKind, "codex");
43
+ });
44
+ test("smoke dashboard writes schema-valid success, failure, skipped, and missing records", async () => {
45
+ const fixture = makeRepo();
46
+ try {
47
+ const dashboard = await runHarnessSmokeDashboard({
48
+ repo: fixture.repo,
49
+ outputRoot: fixture.outputRoot,
50
+ timeoutMs: 1_000,
51
+ createdAt: "2026-06-16T00:00:00.000Z"
52
+ });
53
+ assert.equal(dashboard.records.length, 6);
54
+ assert.equal(existsSync(dashboard.dashboardPath), true);
55
+ for (const record of dashboard.records) {
56
+ assertHarnessRunResultV1(record.result);
57
+ assert.equal(existsSync(record.resultPath), true);
58
+ const written = JSON.parse(readFileSync(record.resultPath, "utf8"));
59
+ assertHarnessRunResultV1(written);
60
+ }
61
+ const statuses = dashboard.records.map((record) => record.result.status).sort();
62
+ assert.deepEqual(statuses, [
63
+ "failed",
64
+ "skipped",
65
+ "skipped",
66
+ "succeeded",
67
+ "succeeded",
68
+ "unsupported"
69
+ ]);
70
+ assert.equal(dashboard.records.find((record) => record.taskId === "claude-code-skipped")?.result
71
+ .harness_kind, "claude_code");
72
+ assert.equal(dashboard.records.find((record) => record.taskId === "codex-skipped")?.result.harness_kind, "codex");
73
+ assert.equal(dashboard.records.find((record) => record.taskId === "cursor-missing")?.result
74
+ .errors?.[0]?.kind, "capability_missing");
75
+ const markdown = readFileSync(dashboard.dashboardPath, "utf8");
76
+ assert.match(markdown, /# HandoffKit Harness Smoke Dashboard/);
77
+ assert.match(markdown, /## Capability Matrix/);
78
+ assert.match(markdown, /## Adapter Readiness/);
79
+ assert.match(markdown, /contract\/mock ready/);
80
+ assert.match(markdown, /credentials missing\/skipped/);
81
+ assert.match(markdown, /live smoke not requested/);
82
+ assert.match(markdown, /command-failure/);
83
+ assert.match(markdown, /cursor-missing/);
84
+ assert.match(markdown, /harness-run-results\/mock-success\.json/);
85
+ assert.equal(dashboard.readiness.length, 5);
86
+ }
87
+ finally {
88
+ fixture.cleanup();
89
+ }
90
+ });
91
+ test("smoke dashboard only adds live records when explicit smoke env is enabled", async () => {
92
+ const fixture = makeRepo();
93
+ try {
94
+ const dashboard = await runHarnessSmokeDashboard({
95
+ repo: fixture.repo,
96
+ outputRoot: fixture.outputRoot,
97
+ timeoutMs: 1_000,
98
+ createdAt: "2026-06-16T00:00:00.000Z",
99
+ env: {},
100
+ liveSmoke: ["claude-code", "codex"]
101
+ });
102
+ assert.equal(dashboard.records.length, 6);
103
+ assert.equal(dashboard.records.some((record) => record.purpose === "live"), false);
104
+ }
105
+ finally {
106
+ fixture.cleanup();
107
+ }
108
+ });
109
+ test("explicit live smoke without credentials records a failed preflight", async () => {
110
+ const fixture = makeRepo();
111
+ try {
112
+ const dashboard = await runHarnessSmokeDashboard({
113
+ repo: fixture.repo,
114
+ outputRoot: fixture.outputRoot,
115
+ timeoutMs: 1_000,
116
+ createdAt: "2026-06-16T00:00:00.000Z",
117
+ env: { WARRANT_CLAUDE_SMOKE: "1" },
118
+ liveSmoke: ["claude-code"]
119
+ });
120
+ const live = dashboard.records.find((record) => record.taskId === "claude-code-live");
121
+ assert.equal(live?.purpose, "live");
122
+ assert.equal(live?.result.status, "failed");
123
+ assert.match(live?.result.output_summary ?? "", /Explicit live smoke failed before launch/);
124
+ assert.equal(dashboard.readiness.find((row) => row.harnessId === "claude-code")?.liveSmoke, "live smoke failed");
125
+ }
126
+ finally {
127
+ fixture.cleanup();
128
+ }
129
+ });
130
+ test("live smoke readiness reports sanitized local evidence refs", async () => {
131
+ const fixture = makeRepo();
132
+ const privateTranscript = "raw private transcript should not render";
133
+ try {
134
+ const claudeHarness = {
135
+ ...createMockHarness({
136
+ id: "claude-code-live-mock",
137
+ candidates: {
138
+ claude: {
139
+ transcript: privateTranscript,
140
+ artifacts: [
141
+ {
142
+ artifact_id: "claude_safe_log",
143
+ kind: "log",
144
+ hash: `sha256:${"a".repeat(64)}`,
145
+ uri: "file:///tmp/private-claude.log",
146
+ redaction_status: "synthetic"
147
+ },
148
+ {
149
+ artifact_id: "claude_raw_transcript",
150
+ kind: "transcript",
151
+ hash: `sha256:${"b".repeat(64)}`,
152
+ uri: "file:///tmp/raw-claude.txt",
153
+ redaction_status: "raw"
154
+ }
155
+ ]
156
+ }
157
+ }
158
+ }),
159
+ harnessKind: "claude_code"
160
+ };
161
+ const codexHarness = {
162
+ ...createMockHarness({
163
+ id: "codex-live-mock",
164
+ candidates: {
165
+ codex: {
166
+ transcript: "codex private transcript should not render",
167
+ artifacts: [
168
+ {
169
+ artifact_id: "codex_safe_log",
170
+ kind: "log",
171
+ hash: `sha256:${"c".repeat(64)}`,
172
+ uri: "file:///tmp/private-codex.log",
173
+ redaction_status: "synthetic"
174
+ }
175
+ ]
176
+ }
177
+ }
178
+ }),
179
+ harnessKind: "codex"
180
+ };
181
+ const dashboard = await runHarnessSmokeDashboard({
182
+ repo: fixture.repo,
183
+ outputRoot: fixture.outputRoot,
184
+ timeoutMs: 1_000,
185
+ createdAt: "2026-06-16T00:00:00.000Z",
186
+ env: {
187
+ WARRANT_ENSEMBLE_LIVE_SMOKE: "1",
188
+ VERCEL_TOKEN: "vercel-test",
189
+ ANTHROPIC_API_KEY: "anthropic-test",
190
+ CODEX_API_KEY: "codex-test"
191
+ },
192
+ liveSmoke: ["claude-code", "codex"],
193
+ liveSmokeHarnesses: {
194
+ "claude-code": claudeHarness,
195
+ codex: codexHarness
196
+ }
197
+ });
198
+ assert.equal(dashboard.records.length, 8);
199
+ assert.equal(dashboard.records.find((record) => record.taskId === "claude-code-live")?.result.status, "succeeded");
200
+ assert.equal(dashboard.records.find((record) => record.taskId === "codex-live")?.result.status, "succeeded");
201
+ assert.equal(dashboard.readiness.find((row) => row.harnessId === "claude-code")?.liveSmoke, "live smoke passed");
202
+ assert.equal(dashboard.readiness.find((row) => row.harnessId === "codex")?.liveSmoke, "live smoke passed");
203
+ const markdown = readFileSync(dashboard.dashboardPath, "utf8");
204
+ assert.match(markdown, /log:claude_safe_log:sha256/);
205
+ assert.match(markdown, /log:codex_safe_log:sha256/);
206
+ assert.match(markdown, /raw artifact ref\(s\) withheld/);
207
+ assert.equal(markdown.includes(privateTranscript), false);
208
+ assert.equal(markdown.includes("file:///tmp/private-claude.log"), false);
209
+ assert.equal(markdown.includes("file:///tmp/private-codex.log"), false);
210
+ }
211
+ finally {
212
+ fixture.cleanup();
213
+ }
214
+ });
@@ -0,0 +1 @@
1
+ export {};