@fusionkit/ensemble 0.1.5 → 0.1.7
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/cursorkit-path.d.ts +17 -0
- package/dist/cursorkit-path.js +21 -0
- package/dist/harness.d.ts +7 -0
- package/dist/harness.js +51 -1
- package/dist/index.d.ts +5 -8
- package/dist/index.js +3 -4
- package/dist/run.js +2 -1
- package/dist/test/ensemble.test.js +0 -227
- package/dist/unified.d.ts +27 -3
- package/dist/unified.js +31 -36
- package/package.json +8 -7
- package/dist/claude-code.d.ts +0 -25
- package/dist/claude-code.js +0 -398
- package/dist/codex.d.ts +0 -69
- package/dist/codex.js +0 -467
- package/dist/cursor.d.ts +0 -42
- package/dist/cursor.js +0 -440
- package/dist/dashboard.d.ts +0 -62
- package/dist/dashboard.js +0 -762
- package/dist/test/codex.test.d.ts +0 -1
- package/dist/test/codex.test.js +0 -237
- package/dist/test/cursor.test.d.ts +0 -1
- package/dist/test/cursor.test.js +0 -97
- package/dist/test/dashboard.test.d.ts +0 -1
- package/dist/test/dashboard.test.js +0 -214
package/dist/dashboard.js
DELETED
|
@@ -1,762 +0,0 @@
|
|
|
1
|
-
import { mkdirSync, writeFileSync } from "node:fs";
|
|
2
|
-
import { join, resolve } from "node:path";
|
|
3
|
-
import { assertHarnessRunResultV1, MODEL_FUSION_SCHEMA_BUNDLE_HASH } from "@fusionkit/protocol";
|
|
4
|
-
import { gitText } from "@fusionkit/workspace";
|
|
5
|
-
import { claudeCodeHarness, claudeCodeHarnessCredentialSkipReason } from "./claude-code.js";
|
|
6
|
-
import { createCommandHarness } from "./command.js";
|
|
7
|
-
import { codexHarness, codexHarnessCredentialSkipReason } from "./codex.js";
|
|
8
|
-
import { createCursorHarness, cursorHarnessUnavailableReason } from "./cursor.js";
|
|
9
|
-
import { createMockHarness } from "./mock.js";
|
|
10
|
-
import { runEnsemble } from "./run.js";
|
|
11
|
-
const PRODUCER_GIT_SHA = "0".repeat(40);
|
|
12
|
-
const PRODUCER = "handoffkit-ensemble";
|
|
13
|
-
const PRODUCER_VERSION = "0.1.0";
|
|
14
|
-
const ZERO_GIT_SHA = "0".repeat(40);
|
|
15
|
-
const DEFAULT_TIMEOUT_MS = 30_000;
|
|
16
|
-
const DEFAULT_PROMPT = "Run the CI-safe harness smoke task and report concise evidence.";
|
|
17
|
-
const DEFAULT_COMMAND_SUCCESS = "printf command-ok";
|
|
18
|
-
const DEFAULT_COMMAND_FAILURE = "exit 7";
|
|
19
|
-
const DEFAULT_OUTPUT_DIR = ".warrant/ensemble-dashboard";
|
|
20
|
-
const CLAUDE_LIVE_SMOKE_ENV = "WARRANT_CLAUDE_SMOKE";
|
|
21
|
-
const CODEX_LIVE_SMOKE_ENV = "WARRANT_CODEX_SMOKE";
|
|
22
|
-
const CURSOR_LIVE_SMOKE_ENV = "WARRANT_CURSOR_SMOKE";
|
|
23
|
-
const ALL_LIVE_SMOKE_ENV = "WARRANT_ENSEMBLE_LIVE_SMOKE";
|
|
24
|
-
const LIVE_SMOKE_TARGETS = ["claude-code", "codex", "cursor"];
|
|
25
|
-
const CLAUDE_LIVE_SMOKE_PROMPT = "Read README.md if present, then reply exactly CLAUDE_LIVE_SMOKE_OK. Do not modify files.";
|
|
26
|
-
const CODEX_LIVE_SMOKE_PROMPT = "Read README.md if present, then reply exactly CODEX_LIVE_SMOKE_OK. Do not modify files.";
|
|
27
|
-
const CURSOR_LIVE_SMOKE_PROMPT = "Read README.md if present, then reply exactly CURSOR_LIVE_SMOKE_OK. Do not modify files.";
|
|
28
|
-
function metadata(schema, createdAt) {
|
|
29
|
-
return {
|
|
30
|
-
schema,
|
|
31
|
-
schema_version: "v1",
|
|
32
|
-
schema_bundle_hash: MODEL_FUSION_SCHEMA_BUNDLE_HASH,
|
|
33
|
-
producer: PRODUCER,
|
|
34
|
-
producer_version: PRODUCER_VERSION,
|
|
35
|
-
producer_git_sha: PRODUCER_GIT_SHA,
|
|
36
|
-
created_at: createdAt
|
|
37
|
-
};
|
|
38
|
-
}
|
|
39
|
-
function assertNever(value) {
|
|
40
|
-
throw new Error(`unhandled harness capability target: ${String(value)}`);
|
|
41
|
-
}
|
|
42
|
-
function safeFileName(value) {
|
|
43
|
-
return value.replace(/[^A-Za-z0-9_.:-]/g, "_");
|
|
44
|
-
}
|
|
45
|
-
function escapeMarkdownCell(value) {
|
|
46
|
-
return value.replace(/\|/g, "\\|").replace(/\n/g, " ");
|
|
47
|
-
}
|
|
48
|
-
function isEnvEnabled(env, name) {
|
|
49
|
-
return env[name] === "1" || env[name]?.toLowerCase() === "true";
|
|
50
|
-
}
|
|
51
|
-
function liveSmokeEnvName(target) {
|
|
52
|
-
switch (target) {
|
|
53
|
-
case "claude-code":
|
|
54
|
-
return CLAUDE_LIVE_SMOKE_ENV;
|
|
55
|
-
case "codex":
|
|
56
|
-
return CODEX_LIVE_SMOKE_ENV;
|
|
57
|
-
case "cursor":
|
|
58
|
-
return CURSOR_LIVE_SMOKE_ENV;
|
|
59
|
-
default: {
|
|
60
|
-
const exhausted = target;
|
|
61
|
-
throw new Error(`unsupported live smoke target: ${String(exhausted)}`);
|
|
62
|
-
}
|
|
63
|
-
}
|
|
64
|
-
}
|
|
65
|
-
function liveSmokeEnvEnabled(env, target) {
|
|
66
|
-
return isEnvEnabled(env, ALL_LIVE_SMOKE_ENV) || isEnvEnabled(env, liveSmokeEnvName(target));
|
|
67
|
-
}
|
|
68
|
-
function requestedLiveSmokeTargets(options) {
|
|
69
|
-
const selected = options.liveSmoke?.length ? options.liveSmoke : LIVE_SMOKE_TARGETS;
|
|
70
|
-
return selected.filter((target) => liveSmokeEnvEnabled(options.env, target));
|
|
71
|
-
}
|
|
72
|
-
function harnessKindFor(target) {
|
|
73
|
-
switch (target) {
|
|
74
|
-
case "cursor":
|
|
75
|
-
return "cursor";
|
|
76
|
-
case "claude-code":
|
|
77
|
-
return "claude_code";
|
|
78
|
-
case "codex":
|
|
79
|
-
return "codex";
|
|
80
|
-
case "command":
|
|
81
|
-
case "mock":
|
|
82
|
-
return "generic";
|
|
83
|
-
default:
|
|
84
|
-
return assertNever(target);
|
|
85
|
-
}
|
|
86
|
-
}
|
|
87
|
-
function displayNameFor(target) {
|
|
88
|
-
switch (target) {
|
|
89
|
-
case "cursor":
|
|
90
|
-
return "Cursor";
|
|
91
|
-
case "claude-code":
|
|
92
|
-
return "Claude Code";
|
|
93
|
-
case "codex":
|
|
94
|
-
return "Codex";
|
|
95
|
-
case "command":
|
|
96
|
-
return "Command";
|
|
97
|
-
case "mock":
|
|
98
|
-
return "Mock";
|
|
99
|
-
default:
|
|
100
|
-
return assertNever(target);
|
|
101
|
-
}
|
|
102
|
-
}
|
|
103
|
-
function dashboardCapabilitiesFor(target) {
|
|
104
|
-
switch (target) {
|
|
105
|
-
case "cursor":
|
|
106
|
-
return {
|
|
107
|
-
model_override: "supported",
|
|
108
|
-
transcript_capture: "supported",
|
|
109
|
-
diff_capture: "supported",
|
|
110
|
-
tool_loop_capture: "supported",
|
|
111
|
-
patch_apply_visibility: "supported",
|
|
112
|
-
route_model_observation: "supported",
|
|
113
|
-
verification_hint: "supported",
|
|
114
|
-
replay_support: "degraded"
|
|
115
|
-
};
|
|
116
|
-
case "claude-code":
|
|
117
|
-
return {
|
|
118
|
-
model_override: "supported",
|
|
119
|
-
transcript_capture: "supported",
|
|
120
|
-
diff_capture: "supported",
|
|
121
|
-
tool_loop_capture: "supported",
|
|
122
|
-
patch_apply_visibility: "supported",
|
|
123
|
-
route_model_observation: "degraded",
|
|
124
|
-
verification_hint: "supported",
|
|
125
|
-
replay_support: "degraded"
|
|
126
|
-
};
|
|
127
|
-
case "codex":
|
|
128
|
-
return {
|
|
129
|
-
model_override: "supported",
|
|
130
|
-
transcript_capture: "supported",
|
|
131
|
-
diff_capture: "supported",
|
|
132
|
-
tool_loop_capture: "degraded",
|
|
133
|
-
patch_apply_visibility: "supported",
|
|
134
|
-
route_model_observation: "supported",
|
|
135
|
-
verification_hint: "supported",
|
|
136
|
-
replay_support: "degraded"
|
|
137
|
-
};
|
|
138
|
-
case "command":
|
|
139
|
-
return {
|
|
140
|
-
model_override: "supported",
|
|
141
|
-
transcript_capture: "supported",
|
|
142
|
-
diff_capture: "unsupported",
|
|
143
|
-
tool_loop_capture: "supported",
|
|
144
|
-
patch_apply_visibility: "unsupported",
|
|
145
|
-
route_model_observation: "unsupported",
|
|
146
|
-
verification_hint: "supported",
|
|
147
|
-
replay_support: "supported"
|
|
148
|
-
};
|
|
149
|
-
case "mock":
|
|
150
|
-
return {
|
|
151
|
-
model_override: "supported",
|
|
152
|
-
transcript_capture: "supported",
|
|
153
|
-
diff_capture: "supported",
|
|
154
|
-
tool_loop_capture: "supported",
|
|
155
|
-
patch_apply_visibility: "supported",
|
|
156
|
-
route_model_observation: "degraded",
|
|
157
|
-
verification_hint: "supported",
|
|
158
|
-
replay_support: "supported"
|
|
159
|
-
};
|
|
160
|
-
default:
|
|
161
|
-
return assertNever(target);
|
|
162
|
-
}
|
|
163
|
-
}
|
|
164
|
-
function matrixCapabilities(target, capabilities) {
|
|
165
|
-
return {
|
|
166
|
-
...capabilities,
|
|
167
|
-
...dashboardCapabilitiesFor(target)
|
|
168
|
-
};
|
|
169
|
-
}
|
|
170
|
-
function descriptorForCapabilities(harness) {
|
|
171
|
-
return {
|
|
172
|
-
id: "capability_matrix",
|
|
173
|
-
harness,
|
|
174
|
-
models: [{ id: "capability", model: "capability-model" }],
|
|
175
|
-
runtime: { id: "local" },
|
|
176
|
-
judge: { id: "none" },
|
|
177
|
-
policy: {
|
|
178
|
-
id: "capability-policy",
|
|
179
|
-
allowedTools: ["read_file"],
|
|
180
|
-
sideEffects: "read_only",
|
|
181
|
-
timeoutMs: DEFAULT_TIMEOUT_MS
|
|
182
|
-
},
|
|
183
|
-
prompt: DEFAULT_PROMPT,
|
|
184
|
-
sourceRepo: "handoffkit",
|
|
185
|
-
baseGitSha: ZERO_GIT_SHA
|
|
186
|
-
};
|
|
187
|
-
}
|
|
188
|
-
function adapterCapabilities(harness) {
|
|
189
|
-
return harness.capabilities(descriptorForCapabilities(harness));
|
|
190
|
-
}
|
|
191
|
-
function matrixRow(input) {
|
|
192
|
-
return {
|
|
193
|
-
harnessId: input.harnessId,
|
|
194
|
-
harnessKind: harnessKindFor(input.harnessId),
|
|
195
|
-
displayName: displayNameFor(input.harnessId),
|
|
196
|
-
availability: input.availability,
|
|
197
|
-
capabilities: input.capabilities,
|
|
198
|
-
notes: input.notes
|
|
199
|
-
};
|
|
200
|
-
}
|
|
201
|
-
export function createHarnessCapabilityMatrix(options = {}) {
|
|
202
|
-
const env = options.env ?? {};
|
|
203
|
-
const rows = [
|
|
204
|
-
matrixRow({
|
|
205
|
-
harnessId: "cursor",
|
|
206
|
-
availability: "credential_gated",
|
|
207
|
-
capabilities: matrixCapabilities("cursor", adapterCapabilities(createCursorHarness({ env }))),
|
|
208
|
-
notes: [
|
|
209
|
-
"Credential-gated; requires a logged-in Cursor CLI and a built Cursorkit checkout (WARRANT_CURSORKIT_DIR)."
|
|
210
|
-
]
|
|
211
|
-
}),
|
|
212
|
-
matrixRow({
|
|
213
|
-
harnessId: "claude-code",
|
|
214
|
-
availability: "credential_gated",
|
|
215
|
-
capabilities: matrixCapabilities("claude-code", adapterCapabilities(claudeCodeHarness({ env }))),
|
|
216
|
-
notes: ["Credential-gated; dashboard smoke uses an empty env skip path."]
|
|
217
|
-
}),
|
|
218
|
-
matrixRow({
|
|
219
|
-
harnessId: "codex",
|
|
220
|
-
availability: "credential_gated",
|
|
221
|
-
capabilities: matrixCapabilities("codex", adapterCapabilities(codexHarness({ env, provider: { kind: "ambient" } }))),
|
|
222
|
-
notes: ["Credential-gated; dashboard smoke uses an empty env skip path."]
|
|
223
|
-
}),
|
|
224
|
-
matrixRow({
|
|
225
|
-
harnessId: "command",
|
|
226
|
-
availability: "available",
|
|
227
|
-
capabilities: matrixCapabilities("command", adapterCapabilities(createCommandHarness({
|
|
228
|
-
command: options.commandSuccess ?? DEFAULT_COMMAND_SUCCESS,
|
|
229
|
-
cwd: options.repo
|
|
230
|
-
}))),
|
|
231
|
-
notes: ["Runs local shell commands through the command harness."]
|
|
232
|
-
}),
|
|
233
|
-
matrixRow({
|
|
234
|
-
harnessId: "mock",
|
|
235
|
-
availability: "available",
|
|
236
|
-
capabilities: matrixCapabilities("mock", adapterCapabilities(createMockHarness())),
|
|
237
|
-
notes: ["Pure synthetic fixture harness for CI."]
|
|
238
|
-
})
|
|
239
|
-
];
|
|
240
|
-
const capabilities = [...new Set(rows.flatMap((row) => Object.keys(row.capabilities)))].sort();
|
|
241
|
-
return { capabilities, rows };
|
|
242
|
-
}
|
|
243
|
-
function currentGitSha(repo) {
|
|
244
|
-
try {
|
|
245
|
-
const sha = gitText(repo, ["rev-parse", "HEAD"]).trim();
|
|
246
|
-
return sha.length > 0 ? sha : ZERO_GIT_SHA;
|
|
247
|
-
}
|
|
248
|
-
catch {
|
|
249
|
-
return ZERO_GIT_SHA;
|
|
250
|
-
}
|
|
251
|
-
}
|
|
252
|
-
function smokeDescriptor(input) {
|
|
253
|
-
return {
|
|
254
|
-
id: input.id,
|
|
255
|
-
harness: input.harness,
|
|
256
|
-
models: [input.model],
|
|
257
|
-
runtime: { id: "local" },
|
|
258
|
-
judge: { id: "none" },
|
|
259
|
-
policy: {
|
|
260
|
-
id: `${input.id}_policy`,
|
|
261
|
-
allowedTools: input.allowedTools,
|
|
262
|
-
sideEffects: input.sideEffects,
|
|
263
|
-
timeoutMs: input.timeoutMs
|
|
264
|
-
},
|
|
265
|
-
prompt: input.prompt,
|
|
266
|
-
sourceRepo: input.repo,
|
|
267
|
-
baseGitSha: input.baseGitSha,
|
|
268
|
-
outputRoot: join(input.outputRoot, "runs", input.id)
|
|
269
|
-
};
|
|
270
|
-
}
|
|
271
|
-
function liveSmokePreflightFailureResult(input) {
|
|
272
|
-
const result = {
|
|
273
|
-
...metadata("harness-run-result.v1", input.createdAt),
|
|
274
|
-
result_id: `ensemble_result_${input.taskId}`,
|
|
275
|
-
request_id: `ensemble_req_${input.taskId}`,
|
|
276
|
-
harness_kind: harnessKindFor(input.harnessId),
|
|
277
|
-
status: "failed",
|
|
278
|
-
candidate_ids: [],
|
|
279
|
-
output_summary: `Explicit live smoke failed before launch: ${input.reason}`,
|
|
280
|
-
capabilities: matrixCapabilities(input.harnessId, adapterCapabilities(input.harness)),
|
|
281
|
-
started_at: input.createdAt,
|
|
282
|
-
finished_at: input.createdAt,
|
|
283
|
-
errors: [
|
|
284
|
-
{
|
|
285
|
-
kind: "capability_missing",
|
|
286
|
-
message: input.reason,
|
|
287
|
-
retryable: false
|
|
288
|
-
}
|
|
289
|
-
],
|
|
290
|
-
metadata: {
|
|
291
|
-
dashboard_outcome: "failure",
|
|
292
|
-
harness_id: input.harnessId,
|
|
293
|
-
live_smoke: true,
|
|
294
|
-
preflight: "credentials"
|
|
295
|
-
}
|
|
296
|
-
};
|
|
297
|
-
assertHarnessRunResultV1(result);
|
|
298
|
-
return result;
|
|
299
|
-
}
|
|
300
|
-
function failSkippedLiveSmokeResult(result, taskId) {
|
|
301
|
-
if (result.status !== "skipped")
|
|
302
|
-
return result;
|
|
303
|
-
const metadata = {
|
|
304
|
-
...(result.metadata ?? {}),
|
|
305
|
-
dashboard_outcome: "failure",
|
|
306
|
-
explicit_live_smoke: true,
|
|
307
|
-
original_status: "skipped"
|
|
308
|
-
};
|
|
309
|
-
const promoted = {
|
|
310
|
-
...result,
|
|
311
|
-
status: "failed",
|
|
312
|
-
output_summary: `Explicit live smoke ${taskId} failed because the adapter returned skipped. ` +
|
|
313
|
-
(result.output_summary ?? ""),
|
|
314
|
-
errors: [
|
|
315
|
-
...(result.errors ?? []),
|
|
316
|
-
{
|
|
317
|
-
kind: "capability_missing",
|
|
318
|
-
message: "Explicit live smoke was requested but the adapter returned skipped.",
|
|
319
|
-
retryable: false
|
|
320
|
-
}
|
|
321
|
-
],
|
|
322
|
-
metadata
|
|
323
|
-
};
|
|
324
|
-
assertHarnessRunResultV1(promoted);
|
|
325
|
-
return promoted;
|
|
326
|
-
}
|
|
327
|
-
function writeRunResult(outputRoot, taskId, result) {
|
|
328
|
-
const dir = join(outputRoot, "harness-run-results");
|
|
329
|
-
mkdirSync(dir, { recursive: true });
|
|
330
|
-
const path = join(dir, `${safeFileName(taskId)}.json`);
|
|
331
|
-
assertHarnessRunResultV1(result);
|
|
332
|
-
writeFileSync(path, JSON.stringify(result, null, 2) + "\n");
|
|
333
|
-
return path;
|
|
334
|
-
}
|
|
335
|
-
async function runSmokeTask(input) {
|
|
336
|
-
if (input.run.preflightFailureReason !== undefined) {
|
|
337
|
-
const result = liveSmokePreflightFailureResult({
|
|
338
|
-
createdAt: input.createdAt,
|
|
339
|
-
taskId: input.run.taskId,
|
|
340
|
-
harnessId: input.run.harnessId,
|
|
341
|
-
harness: input.run.harness,
|
|
342
|
-
reason: input.run.preflightFailureReason
|
|
343
|
-
});
|
|
344
|
-
const resultPath = writeRunResult(input.outputRoot, input.run.taskId, result);
|
|
345
|
-
return {
|
|
346
|
-
taskId: input.run.taskId,
|
|
347
|
-
harnessId: input.run.harnessId,
|
|
348
|
-
purpose: input.run.purpose,
|
|
349
|
-
outcome: input.run.outcome,
|
|
350
|
-
result,
|
|
351
|
-
resultPath
|
|
352
|
-
};
|
|
353
|
-
}
|
|
354
|
-
const descriptor = smokeDescriptor({
|
|
355
|
-
id: input.run.taskId,
|
|
356
|
-
harness: input.run.harness,
|
|
357
|
-
model: input.run.model,
|
|
358
|
-
repo: input.repo,
|
|
359
|
-
baseGitSha: input.baseGitSha,
|
|
360
|
-
outputRoot: input.outputRoot,
|
|
361
|
-
sideEffects: input.run.sideEffects,
|
|
362
|
-
allowedTools: input.run.allowedTools,
|
|
363
|
-
timeoutMs: input.timeoutMs,
|
|
364
|
-
prompt: input.run.prompt ?? DEFAULT_PROMPT
|
|
365
|
-
});
|
|
366
|
-
const result = await runEnsemble(descriptor);
|
|
367
|
-
const harnessRunResult = input.run.purpose === "live"
|
|
368
|
-
? failSkippedLiveSmokeResult(result.harnessRunResult, input.run.taskId)
|
|
369
|
-
: result.harnessRunResult;
|
|
370
|
-
const resultPath = writeRunResult(input.outputRoot, input.run.taskId, harnessRunResult);
|
|
371
|
-
return {
|
|
372
|
-
taskId: input.run.taskId,
|
|
373
|
-
harnessId: input.run.harnessId,
|
|
374
|
-
purpose: input.run.purpose,
|
|
375
|
-
outcome: input.run.outcome,
|
|
376
|
-
result: harnessRunResult,
|
|
377
|
-
resultPath
|
|
378
|
-
};
|
|
379
|
-
}
|
|
380
|
-
function smokeRuns(options) {
|
|
381
|
-
return [
|
|
382
|
-
{
|
|
383
|
-
taskId: "mock-success",
|
|
384
|
-
harnessId: "mock",
|
|
385
|
-
purpose: "contract",
|
|
386
|
-
outcome: "success",
|
|
387
|
-
harness: createMockHarness(),
|
|
388
|
-
model: { id: "mock", model: "synthetic-mock" },
|
|
389
|
-
sideEffects: "read_only",
|
|
390
|
-
allowedTools: ["read_file"]
|
|
391
|
-
},
|
|
392
|
-
{
|
|
393
|
-
taskId: "command-success",
|
|
394
|
-
harnessId: "command",
|
|
395
|
-
purpose: "contract",
|
|
396
|
-
outcome: "success",
|
|
397
|
-
harness: createCommandHarness({ command: options.commandSuccess }),
|
|
398
|
-
model: { id: "command", model: "local-shell" },
|
|
399
|
-
sideEffects: "tool_execution",
|
|
400
|
-
allowedTools: ["shell_command"]
|
|
401
|
-
},
|
|
402
|
-
{
|
|
403
|
-
taskId: "command-failure",
|
|
404
|
-
harnessId: "command",
|
|
405
|
-
purpose: "contract",
|
|
406
|
-
outcome: "failure",
|
|
407
|
-
harness: createCommandHarness({ command: options.commandFailure }),
|
|
408
|
-
model: { id: "command", model: "local-shell" },
|
|
409
|
-
sideEffects: "tool_execution",
|
|
410
|
-
allowedTools: ["shell_command"]
|
|
411
|
-
},
|
|
412
|
-
{
|
|
413
|
-
taskId: "claude-code-skipped",
|
|
414
|
-
harnessId: "claude-code",
|
|
415
|
-
purpose: "credential-skip",
|
|
416
|
-
outcome: "skipped",
|
|
417
|
-
harness: claudeCodeHarness({ env: {} }),
|
|
418
|
-
model: { id: "claude", model: "claude-sonnet-4-6" },
|
|
419
|
-
sideEffects: "writes_workspace",
|
|
420
|
-
allowedTools: ["read_file", "write_file", "apply_patch"]
|
|
421
|
-
},
|
|
422
|
-
{
|
|
423
|
-
taskId: "codex-skipped",
|
|
424
|
-
harnessId: "codex",
|
|
425
|
-
purpose: "credential-skip",
|
|
426
|
-
outcome: "skipped",
|
|
427
|
-
harness: codexHarness({ env: {}, provider: { kind: "ambient" } }),
|
|
428
|
-
model: { id: "codex", model: "gpt-5.5-codex" },
|
|
429
|
-
sideEffects: "writes_workspace",
|
|
430
|
-
allowedTools: ["read_file", "apply_patch"]
|
|
431
|
-
},
|
|
432
|
-
{
|
|
433
|
-
taskId: "cursor-skipped",
|
|
434
|
-
harnessId: "cursor",
|
|
435
|
-
purpose: "credential-skip",
|
|
436
|
-
outcome: "skipped",
|
|
437
|
-
harness: createCursorHarness({ env: {} }),
|
|
438
|
-
model: { id: "cursor", model: "fusion-panel" },
|
|
439
|
-
sideEffects: "writes_workspace",
|
|
440
|
-
allowedTools: ["read_file", "write_file", "apply_patch", "run_shell"]
|
|
441
|
-
}
|
|
442
|
-
];
|
|
443
|
-
}
|
|
444
|
-
function liveSmokeRuns(options) {
|
|
445
|
-
const runs = [];
|
|
446
|
-
if (options.targets.includes("claude-code")) {
|
|
447
|
-
const harness = options.harnesses?.["claude-code"] ??
|
|
448
|
-
claudeCodeHarness({ env: options.env, skipWhenUnavailable: false });
|
|
449
|
-
runs.push({
|
|
450
|
-
taskId: "claude-code-live",
|
|
451
|
-
harnessId: "claude-code",
|
|
452
|
-
purpose: "live",
|
|
453
|
-
outcome: "success",
|
|
454
|
-
harness,
|
|
455
|
-
model: {
|
|
456
|
-
id: "claude",
|
|
457
|
-
model: options.env.WARRANT_CLAUDE_SMOKE_MODEL ?? "claude-sonnet-4-6"
|
|
458
|
-
},
|
|
459
|
-
sideEffects: "read_only",
|
|
460
|
-
allowedTools: ["read_file"],
|
|
461
|
-
prompt: CLAUDE_LIVE_SMOKE_PROMPT,
|
|
462
|
-
preflightFailureReason: claudeCodeHarnessCredentialSkipReason(options.env)
|
|
463
|
-
});
|
|
464
|
-
}
|
|
465
|
-
if (options.targets.includes("codex")) {
|
|
466
|
-
const harness = options.harnesses?.codex ?? codexHarness({ env: options.env });
|
|
467
|
-
runs.push({
|
|
468
|
-
taskId: "codex-live",
|
|
469
|
-
harnessId: "codex",
|
|
470
|
-
purpose: "live",
|
|
471
|
-
outcome: "success",
|
|
472
|
-
harness,
|
|
473
|
-
model: {
|
|
474
|
-
id: "codex",
|
|
475
|
-
model: options.env.WARRANT_CODEX_SMOKE_MODEL ?? "gpt-5.5-codex"
|
|
476
|
-
},
|
|
477
|
-
sideEffects: "read_only",
|
|
478
|
-
allowedTools: ["read_file"],
|
|
479
|
-
prompt: CODEX_LIVE_SMOKE_PROMPT,
|
|
480
|
-
preflightFailureReason: codexHarnessCredentialSkipReason(options.env)
|
|
481
|
-
});
|
|
482
|
-
}
|
|
483
|
-
if (options.targets.includes("cursor")) {
|
|
484
|
-
const harness = options.harnesses?.cursor ??
|
|
485
|
-
createCursorHarness({ env: options.env, skipWhenUnavailable: false });
|
|
486
|
-
runs.push({
|
|
487
|
-
taskId: "cursor-live",
|
|
488
|
-
harnessId: "cursor",
|
|
489
|
-
purpose: "live",
|
|
490
|
-
outcome: "success",
|
|
491
|
-
harness,
|
|
492
|
-
model: {
|
|
493
|
-
id: "cursor",
|
|
494
|
-
model: options.env.WARRANT_CURSOR_SMOKE_MODEL ?? "fusion-panel"
|
|
495
|
-
},
|
|
496
|
-
sideEffects: "read_only",
|
|
497
|
-
allowedTools: ["read_file"],
|
|
498
|
-
prompt: CURSOR_LIVE_SMOKE_PROMPT,
|
|
499
|
-
preflightFailureReason: cursorHarnessUnavailableReason(options.env)
|
|
500
|
-
});
|
|
501
|
-
}
|
|
502
|
-
return runs;
|
|
503
|
-
}
|
|
504
|
-
function capabilityCell(capabilities, capability) {
|
|
505
|
-
return capabilities[capability] ?? "unknown";
|
|
506
|
-
}
|
|
507
|
-
function renderCapabilityMatrix(matrix) {
|
|
508
|
-
const header = [
|
|
509
|
-
"Harness",
|
|
510
|
-
"Kind",
|
|
511
|
-
"Availability",
|
|
512
|
-
...matrix.capabilities,
|
|
513
|
-
"Notes"
|
|
514
|
-
];
|
|
515
|
-
const lines = [
|
|
516
|
-
"## Capability Matrix",
|
|
517
|
-
"",
|
|
518
|
-
`| ${header.map(escapeMarkdownCell).join(" | ")} |`,
|
|
519
|
-
`| ${header.map(() => "---").join(" | ")} |`
|
|
520
|
-
];
|
|
521
|
-
for (const row of matrix.rows) {
|
|
522
|
-
const cells = [
|
|
523
|
-
row.displayName,
|
|
524
|
-
row.harnessKind,
|
|
525
|
-
row.availability,
|
|
526
|
-
...matrix.capabilities.map((capability) => capabilityCell(row.capabilities, capability)),
|
|
527
|
-
row.notes.join(" ")
|
|
528
|
-
];
|
|
529
|
-
lines.push(`| ${cells.map(escapeMarkdownCell).join(" | ")} |`);
|
|
530
|
-
}
|
|
531
|
-
return lines;
|
|
532
|
-
}
|
|
533
|
-
function relativePath(path, from) {
|
|
534
|
-
return path.startsWith(from) ? path.slice(from.length + 1) : path;
|
|
535
|
-
}
|
|
536
|
-
function safeArtifactRefs(artifacts) {
|
|
537
|
-
if (artifacts === undefined || artifacts.length === 0)
|
|
538
|
-
return [];
|
|
539
|
-
const refs = [];
|
|
540
|
-
let rawWithheld = 0;
|
|
541
|
-
for (const artifact of artifacts) {
|
|
542
|
-
if (artifact.redaction_status === "raw") {
|
|
543
|
-
rawWithheld++;
|
|
544
|
-
continue;
|
|
545
|
-
}
|
|
546
|
-
refs.push(`${artifact.kind}:${artifact.artifact_id}:${artifact.hash}`);
|
|
547
|
-
if (refs.length >= 5)
|
|
548
|
-
break;
|
|
549
|
-
}
|
|
550
|
-
if (rawWithheld > 0)
|
|
551
|
-
refs.push(`${rawWithheld} raw artifact ref(s) withheld`);
|
|
552
|
-
return refs;
|
|
553
|
-
}
|
|
554
|
-
function credentialStateFor(harnessId, env) {
|
|
555
|
-
switch (harnessId) {
|
|
556
|
-
case "claude-code":
|
|
557
|
-
return claudeCodeHarnessCredentialSkipReason(env) === undefined
|
|
558
|
-
? "credentials available"
|
|
559
|
-
: "credentials missing/skipped";
|
|
560
|
-
case "codex":
|
|
561
|
-
return codexHarnessCredentialSkipReason(env) === undefined
|
|
562
|
-
? "credentials available"
|
|
563
|
-
: "credentials missing/skipped";
|
|
564
|
-
case "command":
|
|
565
|
-
case "mock":
|
|
566
|
-
return "not required";
|
|
567
|
-
case "cursor":
|
|
568
|
-
return cursorHarnessUnavailableReason(env) === undefined
|
|
569
|
-
? "credentials available"
|
|
570
|
-
: "credentials missing/skipped";
|
|
571
|
-
default:
|
|
572
|
-
return assertNever(harnessId);
|
|
573
|
-
}
|
|
574
|
-
}
|
|
575
|
-
function contractReadinessFor(harnessId) {
|
|
576
|
-
switch (harnessId) {
|
|
577
|
-
case "mock":
|
|
578
|
-
case "command":
|
|
579
|
-
return "contract/mock ready";
|
|
580
|
-
case "claude-code":
|
|
581
|
-
case "codex":
|
|
582
|
-
case "cursor":
|
|
583
|
-
return "contract/mock ready";
|
|
584
|
-
default:
|
|
585
|
-
return assertNever(harnessId);
|
|
586
|
-
}
|
|
587
|
-
}
|
|
588
|
-
function liveSmokeState(record) {
|
|
589
|
-
if (record === undefined)
|
|
590
|
-
return "live smoke not requested";
|
|
591
|
-
switch (record.result.status) {
|
|
592
|
-
case "succeeded":
|
|
593
|
-
return "live smoke passed";
|
|
594
|
-
case "failed":
|
|
595
|
-
case "canceled":
|
|
596
|
-
case "requires_action":
|
|
597
|
-
case "unsupported":
|
|
598
|
-
case "pending":
|
|
599
|
-
case "running":
|
|
600
|
-
return "live smoke failed";
|
|
601
|
-
case "skipped":
|
|
602
|
-
return "live smoke skipped";
|
|
603
|
-
default: {
|
|
604
|
-
const exhausted = record.result.status;
|
|
605
|
-
throw new Error(`unsupported live smoke status: ${String(exhausted)}`);
|
|
606
|
-
}
|
|
607
|
-
}
|
|
608
|
-
}
|
|
609
|
-
function createAdapterReadiness(input) {
|
|
610
|
-
return input.matrix.rows.map((row) => {
|
|
611
|
-
const liveRecord = input.records.find((record) => record.harnessId === row.harnessId && record.purpose === "live");
|
|
612
|
-
const credentialRecord = input.records.find((record) => record.harnessId === row.harnessId && record.purpose === "credential-skip");
|
|
613
|
-
const evidence = [
|
|
614
|
-
...(credentialRecord !== undefined && credentialStateFor(row.harnessId, input.env).includes("missing")
|
|
615
|
-
? [`credential skip: ${relativePath(credentialRecord.resultPath, input.outputRoot)}`]
|
|
616
|
-
: []),
|
|
617
|
-
...(liveRecord !== undefined
|
|
618
|
-
? [
|
|
619
|
-
`live result: ${relativePath(liveRecord.resultPath, input.outputRoot)}`,
|
|
620
|
-
`status=${liveRecord.result.status}`
|
|
621
|
-
]
|
|
622
|
-
: [])
|
|
623
|
-
];
|
|
624
|
-
return {
|
|
625
|
-
harnessId: row.harnessId,
|
|
626
|
-
displayName: row.displayName,
|
|
627
|
-
contractReadiness: contractReadinessFor(row.harnessId),
|
|
628
|
-
credentialState: credentialStateFor(row.harnessId, input.env),
|
|
629
|
-
liveSmoke: liveSmokeState(liveRecord),
|
|
630
|
-
evidence,
|
|
631
|
-
artifactRefs: liveRecord !== undefined ? safeArtifactRefs(liveRecord.result.artifacts) : []
|
|
632
|
-
};
|
|
633
|
-
});
|
|
634
|
-
}
|
|
635
|
-
function renderAdapterReadiness(readiness) {
|
|
636
|
-
const lines = [
|
|
637
|
-
"## Adapter Readiness",
|
|
638
|
-
"",
|
|
639
|
-
"| Adapter | Contract/Mock Readiness | Credentials | Live Smoke | Last Evidence | Safe Artifact Refs |",
|
|
640
|
-
"| --- | --- | --- | --- | --- | --- |"
|
|
641
|
-
];
|
|
642
|
-
for (const row of readiness) {
|
|
643
|
-
const cells = [
|
|
644
|
-
row.displayName,
|
|
645
|
-
row.contractReadiness,
|
|
646
|
-
row.credentialState,
|
|
647
|
-
row.liveSmoke,
|
|
648
|
-
row.evidence.length > 0 ? row.evidence.join("; ") : "-",
|
|
649
|
-
row.artifactRefs.length > 0 ? row.artifactRefs.join("; ") : "-"
|
|
650
|
-
];
|
|
651
|
-
lines.push(`| ${cells.map(escapeMarkdownCell).join(" | ")} |`);
|
|
652
|
-
}
|
|
653
|
-
return lines;
|
|
654
|
-
}
|
|
655
|
-
function renderSmokeRecords(records, outputRoot) {
|
|
656
|
-
const lines = [
|
|
657
|
-
"## Smoke Records",
|
|
658
|
-
"",
|
|
659
|
-
"| Task | Harness | Purpose | Expected | Result Status | Harness Kind | Result Record | Summary |",
|
|
660
|
-
"| --- | --- | --- | --- | --- | --- | --- | --- |"
|
|
661
|
-
];
|
|
662
|
-
for (const record of records) {
|
|
663
|
-
const cells = [
|
|
664
|
-
record.taskId,
|
|
665
|
-
displayNameFor(record.harnessId),
|
|
666
|
-
record.purpose,
|
|
667
|
-
record.outcome,
|
|
668
|
-
record.result.status,
|
|
669
|
-
record.result.harness_kind,
|
|
670
|
-
relativePath(record.resultPath, outputRoot),
|
|
671
|
-
record.result.output_summary ?? ""
|
|
672
|
-
];
|
|
673
|
-
lines.push(`| ${cells.map(escapeMarkdownCell).join(" | ")} |`);
|
|
674
|
-
}
|
|
675
|
-
return lines;
|
|
676
|
-
}
|
|
677
|
-
function renderDashboard(input) {
|
|
678
|
-
const counts = new Map();
|
|
679
|
-
for (const record of input.records) {
|
|
680
|
-
counts.set(record.result.status, (counts.get(record.result.status) ?? 0) + 1);
|
|
681
|
-
}
|
|
682
|
-
const countText = [...counts.entries()]
|
|
683
|
-
.sort(([left], [right]) => left.localeCompare(right))
|
|
684
|
-
.map(([status, count]) => `${status}:${count}`)
|
|
685
|
-
.join(", ");
|
|
686
|
-
return [
|
|
687
|
-
"# HandoffKit Harness Smoke Dashboard",
|
|
688
|
-
"",
|
|
689
|
-
`Generated: ${input.createdAt}`,
|
|
690
|
-
`Output root: ${input.outputRoot}`,
|
|
691
|
-
`Run-result status counts: ${countText}`,
|
|
692
|
-
"",
|
|
693
|
-
...renderCapabilityMatrix(input.matrix),
|
|
694
|
-
"",
|
|
695
|
-
...renderAdapterReadiness(input.readiness),
|
|
696
|
-
"",
|
|
697
|
-
...renderSmokeRecords(input.records, input.outputRoot),
|
|
698
|
-
""
|
|
699
|
-
].join("\n");
|
|
700
|
-
}
|
|
701
|
-
export async function runHarnessSmokeDashboard(options = {}) {
|
|
702
|
-
const repo = resolve(options.repo ?? process.cwd());
|
|
703
|
-
const outputRoot = resolve(options.outputRoot ?? join(repo, DEFAULT_OUTPUT_DIR));
|
|
704
|
-
const timeoutMs = options.timeoutMs ?? DEFAULT_TIMEOUT_MS;
|
|
705
|
-
const createdAt = options.createdAt ?? new Date().toISOString();
|
|
706
|
-
const commandSuccess = options.commandSuccess ?? DEFAULT_COMMAND_SUCCESS;
|
|
707
|
-
const commandFailure = options.commandFailure ?? DEFAULT_COMMAND_FAILURE;
|
|
708
|
-
const env = options.env ?? process.env;
|
|
709
|
-
mkdirSync(outputRoot, { recursive: true });
|
|
710
|
-
const matrix = createHarnessCapabilityMatrix({
|
|
711
|
-
...options,
|
|
712
|
-
repo,
|
|
713
|
-
commandSuccess,
|
|
714
|
-
commandFailure,
|
|
715
|
-
env
|
|
716
|
-
});
|
|
717
|
-
const baseGitSha = currentGitSha(repo);
|
|
718
|
-
const records = [];
|
|
719
|
-
const runs = [
|
|
720
|
-
...smokeRuns({ commandSuccess, commandFailure }),
|
|
721
|
-
...liveSmokeRuns({
|
|
722
|
-
env,
|
|
723
|
-
targets: requestedLiveSmokeTargets({ env, liveSmoke: options.liveSmoke }),
|
|
724
|
-
harnesses: options.liveSmokeHarnesses
|
|
725
|
-
})
|
|
726
|
-
];
|
|
727
|
-
for (const run of runs) {
|
|
728
|
-
records.push(await runSmokeTask({
|
|
729
|
-
run,
|
|
730
|
-
repo,
|
|
731
|
-
baseGitSha,
|
|
732
|
-
outputRoot,
|
|
733
|
-
timeoutMs,
|
|
734
|
-
createdAt
|
|
735
|
-
}));
|
|
736
|
-
}
|
|
737
|
-
const readiness = createAdapterReadiness({
|
|
738
|
-
matrix,
|
|
739
|
-
records,
|
|
740
|
-
env,
|
|
741
|
-
outputRoot
|
|
742
|
-
});
|
|
743
|
-
const dashboardPath = join(outputRoot, "dashboard.md");
|
|
744
|
-
writeFileSync(dashboardPath, renderDashboard({
|
|
745
|
-
matrix,
|
|
746
|
-
readiness,
|
|
747
|
-
records,
|
|
748
|
-
createdAt,
|
|
749
|
-
outputRoot
|
|
750
|
-
}));
|
|
751
|
-
return {
|
|
752
|
-
outputRoot,
|
|
753
|
-
dashboardPath,
|
|
754
|
-
matrix,
|
|
755
|
-
records,
|
|
756
|
-
readiness
|
|
757
|
-
};
|
|
758
|
-
}
|
|
759
|
-
export const harnessDashboard = {
|
|
760
|
-
capabilities: createHarnessCapabilityMatrix,
|
|
761
|
-
run: runHarnessSmokeDashboard
|
|
762
|
-
};
|