@fusionkit/ensemble 0.1.4 → 0.1.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1 +0,0 @@
1
- export {};
@@ -1,237 +0,0 @@
1
- import assert from "node:assert/strict";
2
- import { createServer } from "node:http";
3
- import { existsSync, mkdtempSync, readFileSync, rmSync, writeFileSync } from "node:fs";
4
- import { tmpdir } from "node:os";
5
- import { join } from "node:path";
6
- import { test } from "node:test";
7
- import { codexConfigToml, codexHarness } from "../codex.js";
8
- import { createMockHarness } from "../mock.js";
9
- import { ensemble } from "../run.js";
10
- function tempOutputRoot() {
11
- const outputRoot = mkdtempSync(join(tmpdir(), "ensemble-codex-out-"));
12
- return {
13
- outputRoot,
14
- cleanup: () => rmSync(outputRoot, { recursive: true, force: true })
15
- };
16
- }
17
- function descriptor(outputRoot, overrides = {}) {
18
- return {
19
- id: "codex_ensemble_test",
20
- harness: createMockHarness(),
21
- models: [{ id: "codex", model: "gpt-5.1-codex-max" }],
22
- runtime: { id: "local" },
23
- judge: { id: "judge", model: "fake-judge" },
24
- policy: {
25
- id: "policy",
26
- allowedTools: ["read_file", "apply_patch"],
27
- sideEffects: "writes_workspace",
28
- timeoutMs: 1_000
29
- },
30
- prompt: "Summarize Codex harness evidence.",
31
- sourceRepo: "handoffkit",
32
- baseGitSha: "b".repeat(40),
33
- outputRoot,
34
- ...overrides
35
- };
36
- }
37
- async function readBody(req) {
38
- const chunks = [];
39
- for await (const chunk of req)
40
- chunks.push(chunk);
41
- return Buffer.concat(chunks);
42
- }
43
- async function closeServer(server) {
44
- await new Promise((resolve, reject) => {
45
- server.close((error) => (error ? reject(error) : resolve()));
46
- });
47
- }
48
- async function startOpenAiCompatibleServer() {
49
- const requests = [];
50
- const server = createServer((req, res) => {
51
- void (async () => {
52
- const path = new URL(req.url ?? "/", "http://localhost").pathname;
53
- if (req.method === "GET" && path === "/v1/models") {
54
- res.writeHead(200, { "content-type": "application/json" });
55
- res.end(JSON.stringify({ data: [{ id: "local-model" }] }));
56
- return;
57
- }
58
- if (req.method === "POST" && path === "/v1/chat/completions") {
59
- const body = JSON.parse((await readBody(req)).toString("utf8"));
60
- requests.push(body);
61
- const model = typeof body.model === "string" ? body.model : "local-model";
62
- res.writeHead(200, { "content-type": "application/json" });
63
- res.end(JSON.stringify({
64
- id: "chatcmpl_test",
65
- model,
66
- choices: [{ message: { role: "assistant", content: "gateway-ok" } }],
67
- usage: { prompt_tokens: 3, completion_tokens: 2, total_tokens: 5 }
68
- }));
69
- return;
70
- }
71
- res.writeHead(404, { "content-type": "application/json" });
72
- res.end(JSON.stringify({ error: { message: "not found" } }));
73
- })().catch((error) => {
74
- res.writeHead(500, { "content-type": "application/json" });
75
- res.end(JSON.stringify({ error: { message: String(error) } }));
76
- });
77
- });
78
- await new Promise((resolve, reject) => {
79
- server.once("error", reject);
80
- server.listen(0, "127.0.0.1", () => {
81
- server.off("error", reject);
82
- resolve();
83
- });
84
- });
85
- const address = server.address();
86
- assert.ok(typeof address === "object" && address !== null);
87
- return {
88
- url: `http://127.0.0.1:${address.port}`,
89
- requests,
90
- close: () => closeServer(server)
91
- };
92
- }
93
- test("codexConfigToml declares a Responses provider without requiring auth", () => {
94
- const toml = codexConfigToml({
95
- model: "local-model",
96
- sandboxMode: "workspace-write",
97
- approvalPolicy: "never",
98
- provider: {
99
- baseUrl: "http://127.0.0.1:9000",
100
- requiresOpenAiAuth: false
101
- }
102
- });
103
- assert.ok(toml.includes('model = "local-model"'));
104
- assert.ok(toml.includes('model_provider = "warrant-codex"'));
105
- assert.ok(toml.includes("[model_providers.warrant-codex]"));
106
- assert.ok(toml.includes('base_url = "http://127.0.0.1:9000/v1"'));
107
- assert.ok(toml.includes('wire_api = "responses"'));
108
- assert.ok(toml.includes("requires_openai_auth = false"));
109
- });
110
- test("codex adapter skips clearly when credentials are absent", async () => {
111
- const { outputRoot, cleanup } = tempOutputRoot();
112
- const emptyCodexHome = mkdtempSync(join(tmpdir(), "ensemble-codex-empty-home-"));
113
- let invoked = false;
114
- const runner = () => {
115
- invoked = true;
116
- return { stdout: "", stderr: "", exitCode: 0 };
117
- };
118
- try {
119
- const result = await ensemble.run(descriptor(outputRoot, {
120
- harness: codexHarness({ env: { CODEX_HOME: emptyCodexHome }, runner })
121
- }));
122
- assert.equal(invoked, false);
123
- assert.equal(result.harnessRunResult.status, "skipped");
124
- assert.equal(result.candidates[0]?.status, "skipped");
125
- assert.equal(result.candidates[0]?.error?.kind, "capability_missing");
126
- assert.match(result.candidates[0]?.error?.message ?? "", /CODEX_API_KEY|OPENAI_API_KEY/);
127
- }
128
- finally {
129
- cleanup();
130
- rmSync(emptyCodexHome, { recursive: true, force: true });
131
- }
132
- });
133
- test("codex adapter accepts local CLI auth without exported API keys", async () => {
134
- const { outputRoot, cleanup } = tempOutputRoot();
135
- const sourceHome = mkdtempSync(join(tmpdir(), "ensemble-codex-source-home-"));
136
- writeFileSync(join(sourceHome, "auth.json"), "{\"auth\":\"redacted-test-token\"}\n");
137
- let seenAuthFile = false;
138
- const runner = (input) => {
139
- const codexHome = input.env.CODEX_HOME;
140
- assert.ok(codexHome);
141
- assert.notEqual(codexHome, sourceHome);
142
- assert.equal(input.env.CODEX_API_KEY, undefined);
143
- assert.equal(input.env.OPENAI_API_KEY, undefined);
144
- seenAuthFile = existsSync(join(codexHome, "auth.json"));
145
- return { stdout: "codex local auth ok", stderr: "", exitCode: 0 };
146
- };
147
- try {
148
- const result = await ensemble.run(descriptor(outputRoot, {
149
- harness: codexHarness({ env: { CODEX_HOME: sourceHome }, runner })
150
- }));
151
- assert.equal(seenAuthFile, true);
152
- assert.equal(result.harnessRunResult.status, "succeeded");
153
- assert.equal(result.candidates[0]?.metadata?.provider_kind, "ambient");
154
- }
155
- finally {
156
- cleanup();
157
- rmSync(sourceHome, { recursive: true, force: true });
158
- }
159
- });
160
- test("generic ensemble descriptor swaps mock harness for Codex harness", async () => {
161
- const { outputRoot, cleanup } = tempOutputRoot();
162
- let seenArgs;
163
- let seenConfig = "";
164
- const runner = (input) => {
165
- seenArgs = input.args;
166
- const codexHome = input.env.CODEX_HOME;
167
- assert.ok(codexHome);
168
- seenConfig = readFileSync(join(codexHome, "config.toml"), "utf8");
169
- assert.equal(input.env.CODEX_API_KEY, "test-key");
170
- return { stdout: '{"type":"message","message":"codex-ok"}\n', stderr: "", exitCode: 0 };
171
- };
172
- try {
173
- const base = descriptor(outputRoot);
174
- const mock = await ensemble.run(base);
175
- const codex = await ensemble.run({
176
- ...base,
177
- harness: codexHarness({ env: { CODEX_API_KEY: "test-key" }, runner })
178
- });
179
- assert.equal(mock.harnessRunResult.status, "succeeded");
180
- assert.equal(codex.harnessRunResult.status, "succeeded");
181
- assert.deepEqual(seenArgs?.slice(0, 3), ["exec", "--json", "--skip-git-repo-check"]);
182
- assert.equal(seenArgs?.at(-1), base.prompt);
183
- assert.ok(seenConfig.includes('model = "gpt-5.1-codex-max"'));
184
- assert.equal(codex.candidates[0]?.metadata?.provider_kind, "ambient");
185
- }
186
- finally {
187
- cleanup();
188
- }
189
- });
190
- test("Codex OpenAI-compatible provider goes through Responses gateway records", async () => {
191
- const { outputRoot, cleanup } = tempOutputRoot();
192
- const upstream = await startOpenAiCompatibleServer();
193
- let gatewayBaseUrl;
194
- const runner = async (input) => {
195
- const codexHome = input.env.CODEX_HOME;
196
- assert.ok(codexHome);
197
- const config = readFileSync(join(codexHome, "config.toml"), "utf8");
198
- const match = /base_url = "([^"]+)"/.exec(config);
199
- assert.ok(match);
200
- gatewayBaseUrl = match[1];
201
- assert.ok(gatewayBaseUrl);
202
- const response = await fetch(`${gatewayBaseUrl}/responses`, {
203
- method: "POST",
204
- headers: { "content-type": "application/json" },
205
- body: JSON.stringify({
206
- input: "hello from fake codex",
207
- stream: false
208
- })
209
- });
210
- assert.equal(response.status, 200);
211
- return { stdout: "codex gateway ok", stderr: "", exitCode: 0 };
212
- };
213
- try {
214
- const result = await ensemble.run(descriptor(outputRoot, {
215
- harness: codexHarness({
216
- env: {},
217
- provider: {
218
- kind: "openai-compatible",
219
- baseUrl: `${upstream.url}/v1`,
220
- defaultModel: "local-model"
221
- },
222
- runner
223
- })
224
- }));
225
- assert.match(gatewayBaseUrl ?? "", /^http:\/\/127\.0\.0\.1:\d+\/v1$/);
226
- assert.equal(upstream.requests.length, 1);
227
- assert.equal(result.harnessRunResult.status, "succeeded");
228
- assert.equal(result.modelCallRecords.length, 1);
229
- assert.equal(result.modelCallRecords[0]?.metadata?.dialect, "openai-responses");
230
- assert.equal(result.modelCallRecords[0]?.model, "local-model");
231
- assert.equal(result.candidates[0]?.metadata?.model_call_count, 1);
232
- }
233
- finally {
234
- await upstream.close();
235
- cleanup();
236
- }
237
- });
@@ -1 +0,0 @@
1
- export {};
@@ -1,214 +0,0 @@
1
- import assert from "node:assert/strict";
2
- import { existsSync, mkdirSync, mkdtempSync, readFileSync, rmSync, writeFileSync } from "node:fs";
3
- import { tmpdir } from "node:os";
4
- import { join } from "node:path";
5
- import { test } from "node:test";
6
- import { assertHarnessRunResultV1 } from "@fusionkit/protocol";
7
- import { gitText } from "@fusionkit/workspace";
8
- import { createHarnessCapabilityMatrix, runHarnessSmokeDashboard } from "../dashboard.js";
9
- import { createMockHarness } from "../mock.js";
10
- function makeRepo() {
11
- const root = mkdtempSync(join(tmpdir(), "ensemble-dashboard-"));
12
- const repo = join(root, "repo");
13
- mkdirSync(repo);
14
- gitText(repo, ["init", "--quiet", "--initial-branch=main"]);
15
- gitText(repo, ["config", "user.email", "dashboard@warrant.local"]);
16
- gitText(repo, ["config", "user.name", "dashboard"]);
17
- writeFileSync(join(repo, "README.md"), "# dashboard\n");
18
- gitText(repo, ["add", "-A"]);
19
- gitText(repo, ["commit", "--quiet", "-m", "init"]);
20
- return {
21
- repo,
22
- outputRoot: join(root, "dashboard-out"),
23
- cleanup: () => rmSync(root, { recursive: true, force: true })
24
- };
25
- }
26
- test("capability matrix covers Cursor, Claude Code, Codex, command, and mock", () => {
27
- const matrix = createHarnessCapabilityMatrix({ env: {} });
28
- const harnessIds = matrix.rows.map((row) => row.harnessId);
29
- assert.deepEqual(harnessIds, ["cursor", "claude-code", "codex", "command", "mock"]);
30
- assert.ok(matrix.capabilities.includes("model_override"));
31
- assert.ok(matrix.capabilities.includes("transcript_capture"));
32
- assert.ok(matrix.capabilities.includes("diff_capture"));
33
- assert.ok(matrix.capabilities.includes("tool_loop_capture"));
34
- assert.ok(matrix.capabilities.includes("patch_apply_visibility"));
35
- assert.ok(matrix.capabilities.includes("route_model_observation"));
36
- assert.ok(matrix.capabilities.includes("verification_hint"));
37
- assert.ok(matrix.capabilities.includes("replay_support"));
38
- assert.ok(matrix.capabilities.includes("workspace_read"));
39
- assert.ok(matrix.capabilities.includes("verification"));
40
- assert.equal(matrix.rows.find((row) => row.harnessId === "cursor")?.availability, "missing");
41
- assert.equal(matrix.rows.find((row) => row.harnessId === "claude-code")?.harnessKind, "claude_code");
42
- assert.equal(matrix.rows.find((row) => row.harnessId === "codex")?.harnessKind, "codex");
43
- });
44
- test("smoke dashboard writes schema-valid success, failure, skipped, and missing records", async () => {
45
- const fixture = makeRepo();
46
- try {
47
- const dashboard = await runHarnessSmokeDashboard({
48
- repo: fixture.repo,
49
- outputRoot: fixture.outputRoot,
50
- timeoutMs: 1_000,
51
- createdAt: "2026-06-16T00:00:00.000Z"
52
- });
53
- assert.equal(dashboard.records.length, 6);
54
- assert.equal(existsSync(dashboard.dashboardPath), true);
55
- for (const record of dashboard.records) {
56
- assertHarnessRunResultV1(record.result);
57
- assert.equal(existsSync(record.resultPath), true);
58
- const written = JSON.parse(readFileSync(record.resultPath, "utf8"));
59
- assertHarnessRunResultV1(written);
60
- }
61
- const statuses = dashboard.records.map((record) => record.result.status).sort();
62
- assert.deepEqual(statuses, [
63
- "failed",
64
- "skipped",
65
- "skipped",
66
- "succeeded",
67
- "succeeded",
68
- "unsupported"
69
- ]);
70
- assert.equal(dashboard.records.find((record) => record.taskId === "claude-code-skipped")?.result
71
- .harness_kind, "claude_code");
72
- assert.equal(dashboard.records.find((record) => record.taskId === "codex-skipped")?.result.harness_kind, "codex");
73
- assert.equal(dashboard.records.find((record) => record.taskId === "cursor-missing")?.result
74
- .errors?.[0]?.kind, "capability_missing");
75
- const markdown = readFileSync(dashboard.dashboardPath, "utf8");
76
- assert.match(markdown, /# HandoffKit Harness Smoke Dashboard/);
77
- assert.match(markdown, /## Capability Matrix/);
78
- assert.match(markdown, /## Adapter Readiness/);
79
- assert.match(markdown, /contract\/mock ready/);
80
- assert.match(markdown, /credentials missing\/skipped/);
81
- assert.match(markdown, /live smoke not requested/);
82
- assert.match(markdown, /command-failure/);
83
- assert.match(markdown, /cursor-missing/);
84
- assert.match(markdown, /harness-run-results\/mock-success\.json/);
85
- assert.equal(dashboard.readiness.length, 5);
86
- }
87
- finally {
88
- fixture.cleanup();
89
- }
90
- });
91
- test("smoke dashboard only adds live records when explicit smoke env is enabled", async () => {
92
- const fixture = makeRepo();
93
- try {
94
- const dashboard = await runHarnessSmokeDashboard({
95
- repo: fixture.repo,
96
- outputRoot: fixture.outputRoot,
97
- timeoutMs: 1_000,
98
- createdAt: "2026-06-16T00:00:00.000Z",
99
- env: {},
100
- liveSmoke: ["claude-code", "codex"]
101
- });
102
- assert.equal(dashboard.records.length, 6);
103
- assert.equal(dashboard.records.some((record) => record.purpose === "live"), false);
104
- }
105
- finally {
106
- fixture.cleanup();
107
- }
108
- });
109
- test("explicit live smoke without credentials records a failed preflight", async () => {
110
- const fixture = makeRepo();
111
- try {
112
- const dashboard = await runHarnessSmokeDashboard({
113
- repo: fixture.repo,
114
- outputRoot: fixture.outputRoot,
115
- timeoutMs: 1_000,
116
- createdAt: "2026-06-16T00:00:00.000Z",
117
- env: { WARRANT_CLAUDE_SMOKE: "1" },
118
- liveSmoke: ["claude-code"]
119
- });
120
- const live = dashboard.records.find((record) => record.taskId === "claude-code-live");
121
- assert.equal(live?.purpose, "live");
122
- assert.equal(live?.result.status, "failed");
123
- assert.match(live?.result.output_summary ?? "", /Explicit live smoke failed before launch/);
124
- assert.equal(dashboard.readiness.find((row) => row.harnessId === "claude-code")?.liveSmoke, "live smoke failed");
125
- }
126
- finally {
127
- fixture.cleanup();
128
- }
129
- });
130
- test("live smoke readiness reports sanitized local evidence refs", async () => {
131
- const fixture = makeRepo();
132
- const privateTranscript = "raw private transcript should not render";
133
- try {
134
- const claudeHarness = {
135
- ...createMockHarness({
136
- id: "claude-code-live-mock",
137
- candidates: {
138
- claude: {
139
- transcript: privateTranscript,
140
- artifacts: [
141
- {
142
- artifact_id: "claude_safe_log",
143
- kind: "log",
144
- hash: `sha256:${"a".repeat(64)}`,
145
- uri: "file:///tmp/private-claude.log",
146
- redaction_status: "synthetic"
147
- },
148
- {
149
- artifact_id: "claude_raw_transcript",
150
- kind: "transcript",
151
- hash: `sha256:${"b".repeat(64)}`,
152
- uri: "file:///tmp/raw-claude.txt",
153
- redaction_status: "raw"
154
- }
155
- ]
156
- }
157
- }
158
- }),
159
- harnessKind: "claude_code"
160
- };
161
- const codexHarness = {
162
- ...createMockHarness({
163
- id: "codex-live-mock",
164
- candidates: {
165
- codex: {
166
- transcript: "codex private transcript should not render",
167
- artifacts: [
168
- {
169
- artifact_id: "codex_safe_log",
170
- kind: "log",
171
- hash: `sha256:${"c".repeat(64)}`,
172
- uri: "file:///tmp/private-codex.log",
173
- redaction_status: "synthetic"
174
- }
175
- ]
176
- }
177
- }
178
- }),
179
- harnessKind: "codex"
180
- };
181
- const dashboard = await runHarnessSmokeDashboard({
182
- repo: fixture.repo,
183
- outputRoot: fixture.outputRoot,
184
- timeoutMs: 1_000,
185
- createdAt: "2026-06-16T00:00:00.000Z",
186
- env: {
187
- WARRANT_ENSEMBLE_LIVE_SMOKE: "1",
188
- VERCEL_TOKEN: "vercel-test",
189
- ANTHROPIC_API_KEY: "anthropic-test",
190
- CODEX_API_KEY: "codex-test"
191
- },
192
- liveSmoke: ["claude-code", "codex"],
193
- liveSmokeHarnesses: {
194
- "claude-code": claudeHarness,
195
- codex: codexHarness
196
- }
197
- });
198
- assert.equal(dashboard.records.length, 8);
199
- assert.equal(dashboard.records.find((record) => record.taskId === "claude-code-live")?.result.status, "succeeded");
200
- assert.equal(dashboard.records.find((record) => record.taskId === "codex-live")?.result.status, "succeeded");
201
- assert.equal(dashboard.readiness.find((row) => row.harnessId === "claude-code")?.liveSmoke, "live smoke passed");
202
- assert.equal(dashboard.readiness.find((row) => row.harnessId === "codex")?.liveSmoke, "live smoke passed");
203
- const markdown = readFileSync(dashboard.dashboardPath, "utf8");
204
- assert.match(markdown, /log:claude_safe_log:sha256/);
205
- assert.match(markdown, /log:codex_safe_log:sha256/);
206
- assert.match(markdown, /raw artifact ref\(s\) withheld/);
207
- assert.equal(markdown.includes(privateTranscript), false);
208
- assert.equal(markdown.includes("file:///tmp/private-claude.log"), false);
209
- assert.equal(markdown.includes("file:///tmp/private-codex.log"), false);
210
- }
211
- finally {
212
- fixture.cleanup();
213
- }
214
- });