@fusionkit/cli 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (61) hide show
  1. package/dist/cli.d.ts +8 -0
  2. package/dist/cli.js +34 -0
  3. package/dist/commands/ensemble-gateway.d.ts +2 -0
  4. package/dist/commands/ensemble-gateway.js +114 -0
  5. package/dist/commands/ensemble-records.d.ts +33 -0
  6. package/dist/commands/ensemble-records.js +207 -0
  7. package/dist/commands/ensemble.d.ts +2 -0
  8. package/dist/commands/ensemble.js +254 -0
  9. package/dist/commands/fusion.d.ts +2 -0
  10. package/dist/commands/fusion.js +112 -0
  11. package/dist/commands/init.d.ts +2 -0
  12. package/dist/commands/init.js +24 -0
  13. package/dist/commands/lifecycle.d.ts +2 -0
  14. package/dist/commands/lifecycle.js +124 -0
  15. package/dist/commands/local.d.ts +2 -0
  16. package/dist/commands/local.js +25 -0
  17. package/dist/commands/plane.d.ts +2 -0
  18. package/dist/commands/plane.js +30 -0
  19. package/dist/commands/run.d.ts +2 -0
  20. package/dist/commands/run.js +149 -0
  21. package/dist/commands/runner.d.ts +2 -0
  22. package/dist/commands/runner.js +33 -0
  23. package/dist/commands/secrets.d.ts +2 -0
  24. package/dist/commands/secrets.js +21 -0
  25. package/dist/config.d.ts +30 -0
  26. package/dist/config.js +69 -0
  27. package/dist/fusion-quickstart.d.ts +182 -0
  28. package/dist/fusion-quickstart.js +673 -0
  29. package/dist/gateway.d.ts +63 -0
  30. package/dist/gateway.js +304 -0
  31. package/dist/index.d.ts +2 -0
  32. package/dist/index.js +28 -0
  33. package/dist/local.d.ts +40 -0
  34. package/dist/local.js +144 -0
  35. package/dist/render.d.ts +7 -0
  36. package/dist/render.js +131 -0
  37. package/dist/shared/errors.d.ts +6 -0
  38. package/dist/shared/errors.js +9 -0
  39. package/dist/shared/options.d.ts +24 -0
  40. package/dist/shared/options.js +106 -0
  41. package/dist/shared/plane.d.ts +13 -0
  42. package/dist/shared/plane.js +46 -0
  43. package/dist/shared/preflight.d.ts +15 -0
  44. package/dist/shared/preflight.js +48 -0
  45. package/dist/shared/proc.d.ts +41 -0
  46. package/dist/shared/proc.js +122 -0
  47. package/dist/test/cli.test.d.ts +1 -0
  48. package/dist/test/cli.test.js +867 -0
  49. package/dist/test/e2e.test.d.ts +1 -0
  50. package/dist/test/e2e.test.js +250 -0
  51. package/dist/test/fusion-quickstart.test.d.ts +1 -0
  52. package/dist/test/fusion-quickstart.test.js +189 -0
  53. package/dist/test/gateway-e2e.test.d.ts +1 -0
  54. package/dist/test/gateway-e2e.test.js +606 -0
  55. package/dist/test/handoff.test.d.ts +1 -0
  56. package/dist/test/handoff.test.js +212 -0
  57. package/dist/test/local.test.d.ts +1 -0
  58. package/dist/test/local.test.js +39 -0
  59. package/dist/test/proc.test.d.ts +1 -0
  60. package/dist/test/proc.test.js +22 -0
  61. package/package.json +48 -0
@@ -0,0 +1,867 @@
1
+ import assert from "node:assert/strict";
2
+ import { createHash } from "node:crypto";
3
+ import { spawn, spawnSync } from "node:child_process";
4
+ import { createServer } from "node:http";
5
+ import { existsSync, mkdtempSync, readFileSync, rmSync, writeFileSync, mkdirSync } from "node:fs";
6
+ import { tmpdir } from "node:os";
7
+ import { join } from "node:path";
8
+ import { fileURLToPath } from "node:url";
9
+ import { after, before, test } from "node:test";
10
+ import { MODEL_FUSION_SCHEMA_BUNDLE_HASH } from "@fusionkit/protocol";
11
+ import { makeRepo as makeStackRepo, mockRunRequest, startStack, uploadWorkspace } from "@fusionkit/testkit";
12
+ const CLI = fileURLToPath(new URL("../index.js", import.meta.url));
13
+ const SMOKE_ENV_KEYS = [
14
+ "WARRANT_CLAUDE_SMOKE",
15
+ "WARRANT_CODEX_SMOKE",
16
+ "WARRANT_ENSEMBLE_LIVE_SMOKE"
17
+ ];
18
+ let home;
19
+ async function readBody(req) {
20
+ const chunks = [];
21
+ for await (const chunk of req)
22
+ chunks.push(chunk);
23
+ return Buffer.concat(chunks);
24
+ }
25
+ async function closeServer(server) {
26
+ await new Promise((resolve, reject) => {
27
+ server.close((error) => (error ? reject(error) : resolve()));
28
+ });
29
+ }
30
+ async function startFusionBackend() {
31
+ const server = createServer((req, res) => {
32
+ void (async () => {
33
+ if (req.method !== "POST" || req.url !== "/v1/chat/completions") {
34
+ res.writeHead(404).end();
35
+ return;
36
+ }
37
+ const body = JSON.parse((await readBody(req)).toString("utf8"));
38
+ res.writeHead(200, { "content-type": "application/json" });
39
+ res.end(JSON.stringify({
40
+ choices: [{ message: { role: "assistant", content: `CLI_FUSION:${body.model}` } }]
41
+ }));
42
+ })().catch((error) => {
43
+ res.writeHead(500, { "content-type": "application/json" });
44
+ res.end(JSON.stringify({ error: String(error) }));
45
+ });
46
+ });
47
+ await new Promise((resolve, reject) => {
48
+ server.once("error", reject);
49
+ server.listen(0, "127.0.0.1", () => {
50
+ server.off("error", reject);
51
+ resolve();
52
+ });
53
+ });
54
+ const address = server.address();
55
+ assert.ok(typeof address === "object" && address !== null);
56
+ return {
57
+ url: `http://127.0.0.1:${address.port}`,
58
+ close: () => closeServer(server)
59
+ };
60
+ }
61
+ async function startSentinelBackend(sentinel) {
62
+ const server = createServer((req, res) => {
63
+ void (async () => {
64
+ if (req.method !== "POST" || req.url !== "/v1/chat/completions") {
65
+ res.writeHead(404).end();
66
+ return;
67
+ }
68
+ await readBody(req);
69
+ res.writeHead(200, { "content-type": "application/json" });
70
+ res.end(JSON.stringify({
71
+ choices: [{ message: { role: "assistant", content: `${sentinel} fusion synthesis` } }],
72
+ usage: { prompt_tokens: 1, completion_tokens: 1, total_tokens: 2 }
73
+ }));
74
+ })().catch((error) => {
75
+ res.writeHead(500, { "content-type": "application/json" });
76
+ res.end(JSON.stringify({ error: String(error) }));
77
+ });
78
+ });
79
+ await new Promise((resolve, reject) => {
80
+ server.once("error", reject);
81
+ server.listen(0, "127.0.0.1", () => {
82
+ server.off("error", reject);
83
+ resolve();
84
+ });
85
+ });
86
+ const address = server.address();
87
+ assert.ok(typeof address === "object" && address !== null);
88
+ return {
89
+ url: `http://127.0.0.1:${address.port}`,
90
+ close: () => closeServer(server)
91
+ };
92
+ }
93
+ function warrant(args, options = {}) {
94
+ const env = { ...process.env };
95
+ for (const key of SMOKE_ENV_KEYS)
96
+ delete env[key];
97
+ for (const [key, value] of Object.entries(options.env ?? {})) {
98
+ if (value === undefined)
99
+ delete env[key];
100
+ else
101
+ env[key] = value;
102
+ }
103
+ const result = spawnSync(process.execPath, [CLI, "--dir", options.dir ?? home, ...args], {
104
+ encoding: "utf8",
105
+ env,
106
+ input: options.input
107
+ });
108
+ return {
109
+ status: result.status ?? 1,
110
+ stdout: result.stdout,
111
+ stderr: result.stderr
112
+ };
113
+ }
114
+ async function warrantAsync(args, options = {}) {
115
+ const env = { ...process.env };
116
+ for (const key of SMOKE_ENV_KEYS)
117
+ delete env[key];
118
+ for (const [key, value] of Object.entries(options.env ?? {})) {
119
+ if (value === undefined)
120
+ delete env[key];
121
+ else
122
+ env[key] = value;
123
+ }
124
+ return await new Promise((resolve) => {
125
+ const child = spawn(process.execPath, [CLI, "--dir", options.dir ?? home, ...args], {
126
+ env,
127
+ stdio: ["pipe", "pipe", "pipe"]
128
+ });
129
+ let stdout = "";
130
+ let stderr = "";
131
+ child.stdout.on("data", (chunk) => {
132
+ stdout += chunk.toString("utf8");
133
+ });
134
+ child.stderr.on("data", (chunk) => {
135
+ stderr += chunk.toString("utf8");
136
+ });
137
+ child.on("exit", (code) => {
138
+ resolve({ status: code ?? 1, stdout, stderr });
139
+ });
140
+ if (options.input !== undefined) {
141
+ child.stdin.end(options.input);
142
+ }
143
+ else {
144
+ child.stdin.end();
145
+ }
146
+ });
147
+ }
148
+ before(() => {
149
+ home = mkdtempSync(join(tmpdir(), "warrant-cli-test-"));
150
+ rmSync(home, { recursive: true, force: true });
151
+ });
152
+ after(() => {
153
+ rmSync(home, { recursive: true, force: true });
154
+ });
155
+ test("help prints usage and lists the top-level commands", () => {
156
+ const result = warrant(["help"]);
157
+ assert.equal(result.status, 0);
158
+ assert.match(result.stdout, /real model fusion behind your coding agent/);
159
+ for (const command of ["run", "continue", "ensemble", "local", "fusion", "ui", "codex", "claude", "cursor", "serve"]) {
160
+ assert.match(result.stdout, new RegExp(`\\b${command}\\b`));
161
+ }
162
+ });
163
+ test("ensemble help lists its subcommands", () => {
164
+ const result = warrant(["ensemble", "--help"]);
165
+ assert.equal(result.status, 0);
166
+ for (const sub of ["run", "handoff", "dashboard", "e2e", "gateway"]) {
167
+ assert.match(result.stdout, new RegExp(`\\b${sub}\\b`));
168
+ }
169
+ });
170
+ test("ensemble dashboard help documents the live-smoke flag", () => {
171
+ const result = warrant(["ensemble", "dashboard", "--help"]);
172
+ assert.equal(result.status, 0);
173
+ assert.match(result.stdout, /--live-smoke/);
174
+ });
175
+ test("gateway help lists the front-door subcommands", () => {
176
+ const result = warrant(["ensemble", "gateway", "--help"]);
177
+ assert.equal(result.status, 0);
178
+ for (const sub of ["serve", "acp", "acp-registry", "test", "codex-config"]) {
179
+ assert.match(result.stdout, new RegExp(`\\b${sub}\\b`));
180
+ }
181
+ });
182
+ test("gateway acp-registry rejects an unknown action", () => {
183
+ const result = warrant(["ensemble", "gateway", "acp-registry", "bogus"]);
184
+ assert.notEqual(result.status, 0);
185
+ });
186
+ test("local without a tool prints usage and fails", () => {
187
+ const result = warrant(["local"]);
188
+ assert.equal(result.status, 1);
189
+ assert.match(result.stderr, /usage: warrant local </);
190
+ });
191
+ test("local rejects an unknown tool", () => {
192
+ const result = warrant(["local", "frobnicate"]);
193
+ assert.equal(result.status, 1);
194
+ assert.match(result.stderr, /usage: warrant local </);
195
+ });
196
+ test("local help documents the flags-before-tool contract", () => {
197
+ const result = warrant(["local", "--help"]);
198
+ assert.equal(result.status, 0);
199
+ assert.match(result.stdout, /must precede the tool name/);
200
+ });
201
+ test("fusion help documents the flags-before-tool contract", () => {
202
+ const result = warrant(["fusion", "--help"]);
203
+ assert.equal(result.status, 0);
204
+ assert.match(result.stdout, /must precede the tool name/);
205
+ });
206
+ test("init creates keys, config, and policy; refuses to re-init", () => {
207
+ const result = warrant(["init"]);
208
+ assert.equal(result.status, 0, result.stderr);
209
+ assert.match(result.stdout, /initialized warrant home/);
210
+ assert.match(result.stdout, /admin token \(for the control panel\)/);
211
+ assert.ok(existsSync(join(home, "config.json")));
212
+ assert.ok(existsSync(join(home, "policy.json")));
213
+ // The org private key is sealed at rest; a master key file is generated.
214
+ assert.ok(existsSync(join(home, "keys", "plane.key.enc")));
215
+ assert.ok(existsSync(join(home, "keys", "plane.pub.pem")));
216
+ assert.ok(existsSync(join(home, "master.key")));
217
+ const config = JSON.parse(readFileSync(join(home, "config.json"), "utf8"));
218
+ assert.equal(config.version, "warrant.config.v2");
219
+ assert.equal(config.host, "127.0.0.1");
220
+ // No key material lives in config.json anymore.
221
+ assert.equal(config.secretsKeyHex, undefined);
222
+ const again = warrant(["init"]);
223
+ assert.equal(again.status, 1);
224
+ assert.match(again.stderr, /already initialized/);
225
+ });
226
+ test("secrets are stored encrypted and listed by name only", () => {
227
+ const set = warrant(["secrets", "set", "NPM_TOKEN", "super-secret-value"]);
228
+ assert.equal(set.status, 0, set.stderr);
229
+ assert.match(set.stdout, /encrypted at rest/);
230
+ const list = warrant(["secrets", "list"]);
231
+ assert.equal(list.status, 0);
232
+ assert.equal(list.stdout.trim(), "NPM_TOKEN");
233
+ const stored = readFileSync(join(home, "secrets.enc"), "utf8");
234
+ assert.ok(!stored.includes("super-secret-value"), "value must be encrypted");
235
+ });
236
+ test("ui prints the control panel address and login token", () => {
237
+ const result = warrant(["ui"]);
238
+ assert.equal(result.status, 0);
239
+ assert.match(result.stdout, /control panel: http:\/\/127\.0\.0\.1:7172\/ui\//);
240
+ assert.match(result.stdout, /login token: {3}\S+/);
241
+ });
242
+ test("unknown commands and missing arguments fail with guidance", () => {
243
+ const unknown = warrant(["frobnicate"]);
244
+ assert.equal(unknown.status, 1);
245
+ assert.match(unknown.stderr, /unknown command/);
246
+ const missingAgent = warrant(["run", "do things"]);
247
+ assert.equal(missingAgent.status, 1);
248
+ assert.match(missingAgent.stderr, /--agent is required/);
249
+ const missingTask = warrant(["continue", "--agent", "mock"]);
250
+ assert.equal(missingTask.status, 1);
251
+ assert.match(missingTask.stderr, /task prompt is required/);
252
+ const badAgent = warrant(["continue", "--agent", "nonsense", "task"]);
253
+ assert.equal(badAgent.status, 1);
254
+ assert.match(badAgent.stderr, /unknown agent kind/);
255
+ });
256
+ test("verify fails closed on a tampered bundle file", () => {
257
+ const path = join(home, "garbage.bundle.json");
258
+ const fake = {
259
+ version: "warrant.bundle.v1",
260
+ contract: { signatures: [], workspace: { baseRef: "x" } },
261
+ receipt: {
262
+ contractHash: "0".repeat(64),
263
+ signatures: [],
264
+ status: "completed",
265
+ workspaceIn: { baseRef: "y", manifestHash: "z" },
266
+ workspaceOut: { diffHash: "", artifactHashes: [] },
267
+ secretsReleased: [],
268
+ eventsHead: "",
269
+ eventCount: 0
270
+ },
271
+ events: [],
272
+ keys: { planePublicKeyPem: "", runnerPublicKeyPem: "" }
273
+ };
274
+ writeFileSync(path, JSON.stringify(fake));
275
+ const result = warrant(["verify", path]);
276
+ assert.equal(result.status, 1);
277
+ assert.match(result.stderr, /VERIFICATION FAILED/);
278
+ });
279
+ function makeRepo() {
280
+ const root = mkdtempSync(join(tmpdir(), "warrant-ensemble-cli-"));
281
+ const repo = join(root, "repo");
282
+ const output = join(root, "out");
283
+ mkdirSync(repo);
284
+ spawnSync("git", ["init", "--quiet", "--initial-branch=main"], { cwd: repo });
285
+ spawnSync("git", ["config", "user.email", "cli@warrant.local"], { cwd: repo });
286
+ spawnSync("git", ["config", "user.name", "warrant-cli"], { cwd: repo });
287
+ writeFileSync(join(repo, "README.md"), "# cli ensemble\n");
288
+ spawnSync("git", ["add", "-A"], { cwd: repo });
289
+ spawnSync("git", ["commit", "--quiet", "-m", "init"], { cwd: repo });
290
+ return { repo, output, cleanup: () => rmSync(root, { recursive: true, force: true }) };
291
+ }
292
+ function makeCodingRepo() {
293
+ const fixture = makeRepo();
294
+ writeFileSync(join(fixture.repo, "calculator.js"), "exports.add = (left, right) => left - right;\n");
295
+ writeFileSync(join(fixture.repo, "calculator.test.js"), [
296
+ "const assert = require('node:assert/strict');",
297
+ "const { add } = require('./calculator.js');",
298
+ "assert.equal(add(2, 3), 5);",
299
+ "console.log('TEST_OK');",
300
+ ""
301
+ ].join("\n"));
302
+ writeFileSync(join(fixture.repo, "fix-and-test.js"), [
303
+ "const fs = require('node:fs');",
304
+ "fs.writeFileSync('calculator.js', 'exports.add = (left, right) => left + right;\\n');",
305
+ "require('./calculator.test.js');",
306
+ "console.log('PATCH_TEST_OK');",
307
+ ""
308
+ ].join("\n"));
309
+ spawnSync("git", ["add", "-A"], { cwd: fixture.repo });
310
+ spawnSync("git", ["commit", "--quiet", "-m", "add failing coding fixture"], { cwd: fixture.repo });
311
+ return fixture;
312
+ }
313
+ function addFusionCommandProbe(repo) {
314
+ writeFileSync(join(repo, "fusion-probe.js"), [
315
+ "const fs = require('node:fs');",
316
+ "(async () => {",
317
+ " const response = await fetch(process.env.FUSIONKIT_CHAT_COMPLETIONS_URL, {",
318
+ " method: 'POST',",
319
+ " headers: { 'content-type': 'application/json' },",
320
+ " body: JSON.stringify({",
321
+ " model: process.env.FUSIONKIT_MODEL,",
322
+ " messages: [{ role: 'user', content: 'probe' }]",
323
+ " })",
324
+ " });",
325
+ " const body = await response.json();",
326
+ " fs.writeFileSync('fusion-result.txt', body.choices[0].message.content);",
327
+ " console.log('FUSION_PROBE_OK');",
328
+ "})().catch((error) => { console.error(error); process.exit(1); });",
329
+ ""
330
+ ].join("\n"));
331
+ spawnSync("git", ["add", "-A"], { cwd: repo });
332
+ spawnSync("git", ["commit", "--quiet", "-m", "add fusion probe"], { cwd: repo });
333
+ }
334
+ test("ensemble mock smoke writes records and concise summary", () => {
335
+ const fixture = makeRepo();
336
+ try {
337
+ const result = warrant([
338
+ "ensemble",
339
+ "run",
340
+ "--harness",
341
+ "mock",
342
+ "--repo",
343
+ fixture.repo,
344
+ "--out",
345
+ fixture.output,
346
+ "--id",
347
+ "cli_mock",
348
+ "this prompt should not be printed in full"
349
+ ]);
350
+ assert.equal(result.status, 0, result.stderr);
351
+ assert.match(result.stdout, /ensemble cli_mock \[succeeded\]/);
352
+ assert.match(result.stdout, /candidates: 2/);
353
+ assert.ok(!result.stdout.includes("this prompt should not be printed in full"));
354
+ assert.ok(existsSync(join(fixture.output, "summary.json")));
355
+ assert.ok(existsSync(join(fixture.output, "harness-run-request.json")));
356
+ assert.ok(existsSync(join(fixture.output, "harness-run-result.json")));
357
+ assert.ok(existsSync(join(fixture.output, "candidates", "cli_mock_fast_0.json")));
358
+ const summary = JSON.parse(readFileSync(join(fixture.output, "summary.json"), "utf8"));
359
+ assert.equal(summary.candidates.length, 2);
360
+ assert.equal(typeof summary.finalPatchPath === "string" || summary.finalPatchPath === null, true);
361
+ }
362
+ finally {
363
+ fixture.cleanup();
364
+ }
365
+ });
366
+ test("ensemble command smoke records command output", () => {
367
+ const fixture = makeRepo();
368
+ try {
369
+ const result = warrant([
370
+ "ensemble",
371
+ "run",
372
+ "--harness",
373
+ "command",
374
+ "--command",
375
+ "printf command-ok",
376
+ "--repo",
377
+ fixture.repo,
378
+ "--out",
379
+ fixture.output,
380
+ "--id",
381
+ "cli_command",
382
+ "command prompt"
383
+ ]);
384
+ assert.equal(result.status, 0, result.stderr);
385
+ assert.match(result.stdout, /ensemble cli_command \[succeeded\]/);
386
+ const summary = readFileSync(join(fixture.output, "summary.json"), "utf8");
387
+ assert.ok(summary.includes("cli_command_command_0"));
388
+ const candidate = readFileSync(join(fixture.output, "candidates", "cli_command_command_0.json"), "utf8");
389
+ assert.ok(candidate.includes("succeeded"));
390
+ }
391
+ finally {
392
+ fixture.cleanup();
393
+ }
394
+ });
395
+ test("ensemble command failure exits nonzero but writes summary", () => {
396
+ const fixture = makeRepo();
397
+ try {
398
+ const result = warrant([
399
+ "ensemble",
400
+ "run",
401
+ "--harness",
402
+ "command",
403
+ "--command",
404
+ "exit 7",
405
+ "--repo",
406
+ fixture.repo,
407
+ "--out",
408
+ fixture.output,
409
+ "--id",
410
+ "cli_fail",
411
+ "command prompt"
412
+ ]);
413
+ assert.equal(result.status, 1);
414
+ assert.match(result.stdout, /ensemble cli_fail \[failed\]/);
415
+ assert.ok(existsSync(join(fixture.output, "summary.json")));
416
+ }
417
+ finally {
418
+ fixture.cleanup();
419
+ }
420
+ });
421
+ function benchmarkTask(taskId, prompt) {
422
+ const hash = (value) => `sha256:${createHash("sha256").update(value).digest("hex")}`;
423
+ return {
424
+ schema: "benchmark-task-record.v1",
425
+ schema_version: "v1",
426
+ schema_bundle_hash: MODEL_FUSION_SCHEMA_BUNDLE_HASH,
427
+ producer: "fusionkit-evals",
428
+ producer_version: "0.1.0",
429
+ producer_git_sha: "a".repeat(40),
430
+ created_at: "2026-01-01T00:00:00.000Z",
431
+ task_id: taskId,
432
+ task_kind: "harness_coding",
433
+ source_repo: "fusionkit",
434
+ source_sha: "b".repeat(40),
435
+ prompt,
436
+ prompt_hash: hash(prompt),
437
+ setup_hash: hash(`${taskId}:setup`),
438
+ expected_evidence: ["harness records join"],
439
+ scorer: { kind: "record_join" },
440
+ holdout: false,
441
+ contamination_notes: "synthetic cli handoff test",
442
+ allowed_tools: ["read_file"]
443
+ };
444
+ }
445
+ test("ensemble handoff rejects positional prompts", () => {
446
+ const payload = {
447
+ category: "coding",
448
+ manifest_path: "/tmp/handoff-positional-task.json",
449
+ task: benchmarkTask("handoff_positional", "should come from stdin")
450
+ };
451
+ const result = warrant(["ensemble", "handoff", "unexpected prompt"], {
452
+ input: JSON.stringify(payload)
453
+ });
454
+ assert.equal(result.status, 1);
455
+ assert.match(result.stderr, /does not accept positional arguments/);
456
+ assert.equal(result.stdout, "");
457
+ });
458
+ test("ensemble handoff emits FusionKit-compatible contract records on stdout", () => {
459
+ const fixture = makeRepo();
460
+ try {
461
+ const payload = {
462
+ category: "coding",
463
+ manifest_path: "/tmp/handoff-cli-task.json",
464
+ task: benchmarkTask("handoff_cli_task", "summarize the repo for handoff")
465
+ };
466
+ const result = warrant([
467
+ "ensemble",
468
+ "handoff",
469
+ "--harness",
470
+ "mock",
471
+ "--repo",
472
+ fixture.repo,
473
+ "--out",
474
+ fixture.output,
475
+ "--id",
476
+ "cli_handoff"
477
+ ], { input: JSON.stringify(payload) });
478
+ assert.equal(result.status, 0, result.stderr);
479
+ const parsed = JSON.parse(result.stdout);
480
+ const schemas = parsed.records.map((record) => record.schema);
481
+ assert.deepEqual(schemas.slice(0, 3), [
482
+ "benchmark-task-record.v1",
483
+ "harness-run-request.v1",
484
+ "harness-run-result.v1"
485
+ ]);
486
+ assert.ok(schemas.includes("harness-candidate-record.v1"));
487
+ assert.ok(schemas.includes("judge-synthesis-record.v1"));
488
+ assert.ok(existsSync(join(fixture.output, "harness-run-result.json")));
489
+ assert.ok(!result.stdout.includes("ensemble cli_handoff"));
490
+ }
491
+ finally {
492
+ fixture.cleanup();
493
+ }
494
+ });
495
+ test("ensemble handoff exits zero with failed command harness records for FusionKit ingestion", () => {
496
+ const fixture = makeRepo();
497
+ try {
498
+ const payload = {
499
+ category: "coding",
500
+ manifest_path: "/tmp/handoff-command-fail-task.json",
501
+ task: benchmarkTask("handoff_command_fail", "run a failing command harness")
502
+ };
503
+ const result = warrant([
504
+ "ensemble",
505
+ "handoff",
506
+ "--harness",
507
+ "command",
508
+ "--command",
509
+ "exit 7",
510
+ "--repo",
511
+ fixture.repo,
512
+ "--out",
513
+ fixture.output,
514
+ "--id",
515
+ "cli_command_fail"
516
+ ], { input: JSON.stringify(payload) });
517
+ assert.equal(result.status, 0, result.stderr);
518
+ const parsed = JSON.parse(result.stdout);
519
+ const runResult = parsed.records.find((record) => record.schema === "harness-run-result.v1");
520
+ assert.equal(runResult?.status, "failed");
521
+ assert.equal(runResult?.harness_kind, "generic");
522
+ assert.ok(parsed.records.some((record) => record.schema === "harness-candidate-record.v1"));
523
+ }
524
+ finally {
525
+ fixture.cleanup();
526
+ }
527
+ });
528
+ test("ensemble handoff command harness records real patch and test evidence", () => {
529
+ const fixture = makeCodingRepo();
530
+ try {
531
+ const payload = {
532
+ category: "coding",
533
+ manifest_path: "/tmp/handoff-command-patch-task.json",
534
+ task: {
535
+ ...benchmarkTask("handoff_command_patch", "Fix calculator.js so calculator.test.js passes, then run the test."),
536
+ allowed_tools: ["read_file", "write_file", "run_tests"]
537
+ }
538
+ };
539
+ const result = warrant([
540
+ "ensemble",
541
+ "handoff",
542
+ "--harness",
543
+ "command",
544
+ "--command",
545
+ "node fix-and-test.js",
546
+ "--repo",
547
+ fixture.repo,
548
+ "--out",
549
+ fixture.output,
550
+ "--id",
551
+ "cli_command_patch"
552
+ ], { input: JSON.stringify(payload) });
553
+ assert.equal(result.status, 0, result.stderr);
554
+ const parsed = JSON.parse(result.stdout);
555
+ const runResult = parsed.records.find((record) => record.schema === "harness-run-result.v1");
556
+ const candidate = parsed.records.find((record) => record.schema === "harness-candidate-record.v1");
557
+ const toolExecution = parsed.records.find((record) => record.schema === "tool-execution-record.v1");
558
+ assert.equal(runResult?.status, "succeeded");
559
+ assert.equal(candidate?.status, "succeeded");
560
+ assert.equal(toolExecution?.status, "succeeded");
561
+ const patch = candidate?.artifacts?.find((artifact) => artifact.kind === "patch");
562
+ const transcript = candidate?.artifacts?.find((artifact) => artifact.kind === "transcript");
563
+ assert.ok(patch?.uri, "candidate must include a patch artifact");
564
+ assert.ok(transcript?.uri, "candidate must include a transcript artifact");
565
+ assert.match(readFileSync(fileURLToPath(transcript.uri), "utf8"), /PATCH_TEST_OK/);
566
+ }
567
+ finally {
568
+ fixture.cleanup();
569
+ }
570
+ });
571
+ test("ensemble handoff returns structured skip records when codex credentials are absent", () => {
572
+ const fixture = makeRepo();
573
+ const emptyCodexHome = mkdtempSync(join(tmpdir(), "warrant-codex-empty-home-"));
574
+ try {
575
+ const payload = {
576
+ category: "coding",
577
+ manifest_path: "/tmp/handoff-codex-task.json",
578
+ task: benchmarkTask("handoff_codex_skip", "try the codex coding harness")
579
+ };
580
+ const result = warrant([
581
+ "ensemble",
582
+ "handoff",
583
+ "--harness",
584
+ "codex",
585
+ "--repo",
586
+ fixture.repo,
587
+ "--out",
588
+ fixture.output,
589
+ "--id",
590
+ "cli_codex_skip"
591
+ ], {
592
+ input: JSON.stringify(payload),
593
+ env: {
594
+ CODEX_API_KEY: "",
595
+ OPENAI_API_KEY: "",
596
+ CODEX_HOME: emptyCodexHome,
597
+ WARRANT_CODEX_RESPONSES_BASE_URL: "",
598
+ CODEX_RESPONSES_BASE_URL: "",
599
+ WARRANT_CODEX_OPENAI_BASE_URL: "",
600
+ OPENAI_BASE_URL: ""
601
+ }
602
+ });
603
+ assert.equal(result.status, 0, result.stderr);
604
+ const parsed = JSON.parse(result.stdout);
605
+ const runResult = parsed.records.find((record) => record.schema === "harness-run-result.v1");
606
+ assert.equal(runResult?.status, "skipped");
607
+ assert.equal(runResult?.harness_kind, "codex");
608
+ assert.ok(JSON.stringify(runResult?.errors).includes("Codex credentials are absent"));
609
+ }
610
+ finally {
611
+ fixture.cleanup();
612
+ rmSync(emptyCodexHome, { recursive: true, force: true });
613
+ }
614
+ });
615
+ test("ensemble dashboard writes markdown and run-result records", () => {
616
+ const fixture = makeRepo();
617
+ try {
618
+ const result = warrant([
619
+ "ensemble",
620
+ "dashboard",
621
+ "--repo",
622
+ fixture.repo,
623
+ "--out",
624
+ fixture.output,
625
+ "--timeout-ms",
626
+ "1000"
627
+ ]);
628
+ assert.equal(result.status, 0, result.stderr);
629
+ assert.match(result.stdout, /harness dashboard/);
630
+ assert.match(result.stdout, /records: 6/);
631
+ assert.ok(existsSync(join(fixture.output, "dashboard.md")));
632
+ assert.ok(existsSync(join(fixture.output, "harness-run-results", "mock-success.json")));
633
+ assert.ok(existsSync(join(fixture.output, "harness-run-results", "cursor-missing.json")));
634
+ const dashboard = readFileSync(join(fixture.output, "dashboard.md"), "utf8");
635
+ assert.match(dashboard, /Capability Matrix/);
636
+ assert.match(dashboard, /command-failure/);
637
+ }
638
+ finally {
639
+ fixture.cleanup();
640
+ }
641
+ });
642
+ test("ensemble dashboard live-smoke flag remains env-gated by default", () => {
643
+ const fixture = makeRepo();
644
+ try {
645
+ const result = warrant([
646
+ "ensemble",
647
+ "dashboard",
648
+ "--repo",
649
+ fixture.repo,
650
+ "--out",
651
+ fixture.output,
652
+ "--timeout-ms",
653
+ "1000",
654
+ "--live-smoke",
655
+ "claude-code",
656
+ "--live-smoke",
657
+ "codex"
658
+ ]);
659
+ assert.equal(result.status, 0, result.stderr);
660
+ assert.match(result.stdout, /records: 6/);
661
+ const dashboard = readFileSync(join(fixture.output, "dashboard.md"), "utf8");
662
+ assert.match(dashboard, /live smoke not requested/);
663
+ assert.equal(dashboard.includes("claude-code-live"), false);
664
+ assert.equal(dashboard.includes("codex-live"), false);
665
+ }
666
+ finally {
667
+ fixture.cleanup();
668
+ }
669
+ });
670
+ test("ensemble dashboard rejects unknown live-smoke targets", () => {
671
+ const fixture = makeRepo();
672
+ try {
673
+ const result = warrant([
674
+ "ensemble",
675
+ "dashboard",
676
+ "--repo",
677
+ fixture.repo,
678
+ "--out",
679
+ fixture.output,
680
+ "--live-smoke",
681
+ "cursor"
682
+ ]);
683
+ assert.equal(result.status, 1);
684
+ assert.match(result.stderr, /--live-smoke must be/);
685
+ }
686
+ finally {
687
+ fixture.cleanup();
688
+ }
689
+ });
690
+ test("ensemble task-file input works without printing prompt contents", () => {
691
+ const fixture = makeRepo();
692
+ try {
693
+ const taskFile = join(fixture.repo, "task.txt");
694
+ writeFileSync(taskFile, "secret-ish task text that should not print");
695
+ const result = warrant([
696
+ "ensemble",
697
+ "run",
698
+ "--harness",
699
+ "mock",
700
+ "--task-file",
701
+ taskFile,
702
+ "--repo",
703
+ fixture.repo,
704
+ "--out",
705
+ fixture.output,
706
+ "--id",
707
+ "cli_file"
708
+ ]);
709
+ assert.equal(result.status, 0, result.stderr);
710
+ assert.match(result.stdout, /ensemble cli_file \[succeeded\]/);
711
+ assert.ok(!result.stdout.includes("secret-ish task text"));
712
+ assert.ok(existsSync(join(fixture.output, "summary.json")));
713
+ }
714
+ finally {
715
+ fixture.cleanup();
716
+ }
717
+ });
718
+ test("ensemble e2e runs a FusionKit-backed command matrix and writes a report", async () => {
719
+ const fixture = makeRepo();
720
+ const backend = await startFusionBackend();
721
+ try {
722
+ addFusionCommandProbe(fixture.repo);
723
+ const result = await warrantAsync([
724
+ "ensemble",
725
+ "e2e",
726
+ "--fusion-backend",
727
+ backend.url,
728
+ "--harness",
729
+ "command",
730
+ "--command",
731
+ "node fusion-probe.js",
732
+ "--model",
733
+ "alpha=fusion-alpha",
734
+ "--judge-model",
735
+ "fusion-judge",
736
+ "--repo",
737
+ fixture.repo,
738
+ "--out",
739
+ fixture.output,
740
+ "Run the FusionKit-backed command harness."
741
+ ]);
742
+ assert.equal(result.status, 0, result.stderr);
743
+ assert.match(result.stdout, /unified e2e \[succeeded:1\]/);
744
+ assert.ok(existsSync(join(fixture.output, "unified-e2e-report.json")));
745
+ const report = readFileSync(join(fixture.output, "unified-e2e-report.json"), "utf8");
746
+ assert.match(report, /"harness": "command"/);
747
+ assert.match(report, /"judgeSynthesis": true/);
748
+ }
749
+ finally {
750
+ await backend.close();
751
+ fixture.cleanup();
752
+ }
753
+ });
754
+ test("ensemble gateway codex-config prints a Responses provider snippet", () => {
755
+ const result = warrant([
756
+ "ensemble",
757
+ "gateway",
758
+ "codex-config",
759
+ "--fusion-backend",
760
+ "http://127.0.0.1:8787"
761
+ ]);
762
+ assert.equal(result.status, 0, result.stderr);
763
+ assert.match(result.stdout, /model_provider = "fusion-gateway"/);
764
+ assert.match(result.stdout, /wire_api = "responses"/);
765
+ assert.match(result.stdout, /base_url = "http:\/\/127\.0\.0\.1:8787\/v1"/);
766
+ });
767
+ test("ensemble gateway test runs the unified front-door acceptance suite", async () => {
768
+ const fixture = makeRepo();
769
+ const backend = await startSentinelBackend("FUSION_OK");
770
+ try {
771
+ addFusionCommandProbe(fixture.repo);
772
+ const reportPath = join(fixture.output, "front-door-report.json");
773
+ const result = await warrantAsync([
774
+ "ensemble",
775
+ "gateway",
776
+ "test",
777
+ "--fusion-backend",
778
+ backend.url,
779
+ "--harness",
780
+ "command",
781
+ "--command",
782
+ "node fusion-probe.js",
783
+ "--model",
784
+ "alpha=fusion-alpha",
785
+ "--judge-model",
786
+ "fusion-judge",
787
+ "--repo",
788
+ fixture.repo,
789
+ "--out",
790
+ reportPath,
791
+ "--sentinel",
792
+ "FUSION_OK"
793
+ ]);
794
+ assert.equal(result.status, 0, result.stderr);
795
+ assert.match(result.stdout, /front-door acceptance report/);
796
+ assert.ok(existsSync(reportPath));
797
+ const report = JSON.parse(readFileSync(reportPath, "utf8"));
798
+ const statusOf = (id) => report.front_doors.find((door) => door.id === id)?.status;
799
+ assert.equal(statusOf("codex-responses"), "passed");
800
+ assert.equal(statusOf("claude-messages"), "passed");
801
+ assert.equal(statusOf("openai-chat"), "passed");
802
+ assert.equal(statusOf("generic-acp"), "passed");
803
+ assert.equal(statusOf("codex-acp"), "blocked");
804
+ assert.equal(statusOf("cursor-acp"), "blocked");
805
+ }
806
+ finally {
807
+ await backend.close();
808
+ fixture.cleanup();
809
+ }
810
+ });
811
+ test("lifecycle commands read a real run from a live plane", async () => {
812
+ const stack = await startStack({
813
+ policy: (policy) => {
814
+ policy.agents.allow = ["mock"];
815
+ }
816
+ });
817
+ const repo = makeStackRepo({ files: { "README.md": "# cli lifecycle\n" } });
818
+ const liveHome = mkdtempSync(join(tmpdir(), "warrant-cli-live-"));
819
+ rmSync(liveHome, { recursive: true, force: true });
820
+ try {
821
+ // The plane runs in this test process, so every CLI call must use the async
822
+ // spawner: a synchronous spawn would block the event loop and deadlock the
823
+ // in-process plane.
824
+ const init = await warrantAsync(["init"], { dir: liveHome });
825
+ assert.equal(init.status, 0, init.stderr);
826
+ // Point the freshly initialized home at the in-process test stack.
827
+ const configPath = join(liveHome, "config.json");
828
+ const config = JSON.parse(readFileSync(configPath, "utf8"));
829
+ config.planeUrl = stack.planeUrl;
830
+ config.adminToken = stack.adminToken;
831
+ writeFileSync(configPath, JSON.stringify(config, null, 2));
832
+ // Create one completed run through the SDK so the CLI has something to read.
833
+ const captured = await uploadWorkspace(stack.client, repo);
834
+ const created = await stack.client.requestRun(mockRunRequest({ prompt: "lifecycle probe", pool: stack.pool, workspace: captured.manifest }));
835
+ if (created.status === "awaiting_approval") {
836
+ await stack.client.approve(created.runId, { kind: "human", id: "cli-tester" });
837
+ }
838
+ assert.ok(await stack.runOnce());
839
+ const runs = await warrantAsync(["runs"], { dir: liveHome });
840
+ assert.equal(runs.status, 0, runs.stderr);
841
+ assert.match(runs.stdout, new RegExp(created.runId));
842
+ const receipt = await warrantAsync(["receipt", created.runId], { dir: liveHome });
843
+ assert.equal(receipt.status, 0, receipt.stderr);
844
+ const bundlePath = join(liveHome, "out.bundle.json");
845
+ const bundle = await warrantAsync(["bundle", created.runId, "--out", bundlePath], {
846
+ dir: liveHome
847
+ });
848
+ assert.equal(bundle.status, 0, bundle.stderr);
849
+ assert.match(bundle.stdout, /bundle written to/);
850
+ assert.ok(existsSync(bundlePath));
851
+ // The CLI round-trips its own bundle through offline verification.
852
+ const verify = await warrantAsync(["verify", bundlePath], { dir: liveHome });
853
+ assert.equal(verify.status, 0, verify.stderr);
854
+ assert.match(verify.stdout, /VERIFIED/);
855
+ const exported = await warrantAsync(["export"], { dir: liveHome });
856
+ assert.equal(exported.status, 0, exported.stderr);
857
+ assert.match(exported.stdout, new RegExp(created.runId));
858
+ const pull = await warrantAsync(["pull", created.runId, "--repo", repo], { dir: liveHome });
859
+ assert.equal(pull.status, 0, pull.stderr);
860
+ assert.match(pull.stdout, /applied|nothing to pull|branch/);
861
+ }
862
+ finally {
863
+ await stack.stop();
864
+ rmSync(repo, { recursive: true, force: true });
865
+ rmSync(liveHome, { recursive: true, force: true });
866
+ }
867
+ });