@fusionkit/ensemble 0.1.5 → 0.1.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,17 @@
1
+ export type CursorkitCli = {
2
+ /** Absolute path to the bridge entrypoint (`cursorkit serve`). */
3
+ serveCli: string;
4
+ /** Absolute path to the bundled test-harness CLI (suite probes). */
5
+ harnessCli: string;
6
+ };
7
+ /**
8
+ * Resolve the bundled `@velum-labs/cursorkit` CLIs from node_modules. The
9
+ * package exports `"."` -> `dist/src/cli.js`; the harness CLI lives next to it
10
+ * at `dist/src/testing/cli.js` (not exposed via the exports map, so it is
11
+ * derived from the resolved `"."` entry rather than resolved directly).
12
+ *
13
+ * `FUSIONKIT_CURSORKIT_SERVE_CLI` overrides the resolved `serveCli` entry. This
14
+ * lets a custom build (or an integration test) point the bridge at an alternate
15
+ * entrypoint; the harness CLI is still derived relative to it.
16
+ */
17
+ export declare function resolveCursorkitCli(): CursorkitCli;
@@ -0,0 +1,21 @@
1
+ import { createRequire } from "node:module";
2
+ import { dirname, join } from "node:path";
3
+ const require = createRequire(import.meta.url);
4
+ /**
5
+ * Resolve the bundled `@velum-labs/cursorkit` CLIs from node_modules. The
6
+ * package exports `"."` -> `dist/src/cli.js`; the harness CLI lives next to it
7
+ * at `dist/src/testing/cli.js` (not exposed via the exports map, so it is
8
+ * derived from the resolved `"."` entry rather than resolved directly).
9
+ *
10
+ * `FUSIONKIT_CURSORKIT_SERVE_CLI` overrides the resolved `serveCli` entry. This
11
+ * lets a custom build (or an integration test) point the bridge at an alternate
12
+ * entrypoint; the harness CLI is still derived relative to it.
13
+ */
14
+ export function resolveCursorkitCli() {
15
+ const override = process.env.FUSIONKIT_CURSORKIT_SERVE_CLI;
16
+ const serveCli = override !== undefined && override.length > 0
17
+ ? override
18
+ : require.resolve("@velum-labs/cursorkit");
19
+ const harnessCli = join(dirname(serveCli), "testing", "cli.js");
20
+ return { serveCli, harnessCli };
21
+ }
package/dist/harness.d.ts CHANGED
@@ -190,6 +190,13 @@ export type CandidateHardeningMetadata = {
190
190
  leak_count: number;
191
191
  };
192
192
  };
193
+ /**
194
+ * Serialize hardening metadata as a `JsonValue`. The shape is JSON-compatible by
195
+ * construction, but TypeScript cannot prove an object type with optional members
196
+ * satisfies the `JsonValue` index signature, so this typed mapper does the
197
+ * conversion explicitly (omitting absent optionals) instead of an unchecked cast.
198
+ */
199
+ export declare function hardeningToJson(hardening: CandidateHardeningMetadata): JsonValue;
193
200
  export type EnsembleRuntime = {
194
201
  id: string;
195
202
  environmentId?: string;
package/dist/harness.js CHANGED
@@ -1 +1,51 @@
1
- export {};
1
+ /**
2
+ * Serialize hardening metadata as a `JsonValue`. The shape is JSON-compatible by
3
+ * construction, but TypeScript cannot prove an object type with optional members
4
+ * satisfies the `JsonValue` index signature, so this typed mapper does the
5
+ * conversion explicitly (omitting absent optionals) instead of an unchecked cast.
6
+ */
7
+ export function hardeningToJson(hardening) {
8
+ return {
9
+ requested_isolation: hardening.requested_isolation,
10
+ actual_isolation: hardening.actual_isolation,
11
+ runtime: {
12
+ ...(hardening.runtime.image !== undefined ? { image: hardening.runtime.image } : {}),
13
+ ...(hardening.runtime.driver !== undefined ? { driver: hardening.runtime.driver } : {}),
14
+ ...(hardening.runtime.provider !== undefined ? { provider: hardening.runtime.provider } : {}),
15
+ ...(hardening.runtime.runtime !== undefined ? { runtime: hardening.runtime.runtime } : {}),
16
+ ...(hardening.runtime.snapshot_id !== undefined ? { snapshot_id: hardening.runtime.snapshot_id } : {}),
17
+ ...(hardening.runtime.sandbox_id !== undefined ? { sandbox_id: hardening.runtime.sandbox_id } : {}),
18
+ ...(hardening.runtime.image_digest !== undefined ? { image_digest: hardening.runtime.image_digest } : {}),
19
+ ...(hardening.runtime.runtime_digest !== undefined
20
+ ? { runtime_digest: hardening.runtime.runtime_digest }
21
+ : {}),
22
+ workdir: hardening.runtime.workdir
23
+ },
24
+ mount_policy: {
25
+ worktree_writable: hardening.mount_policy.worktree_writable,
26
+ read_only_caches: [...hardening.mount_policy.read_only_caches],
27
+ ignored_dirs: [...hardening.mount_policy.ignored_dirs]
28
+ },
29
+ network_policy: {
30
+ default_deny: hardening.network_policy.default_deny,
31
+ allow_hosts: [...hardening.network_policy.allow_hosts],
32
+ enforced: hardening.network_policy.enforced
33
+ },
34
+ cleanup: {
35
+ attempted: hardening.cleanup.attempted,
36
+ succeeded: hardening.cleanup.succeeded,
37
+ status: hardening.cleanup.status,
38
+ ...(hardening.cleanup.timed_out !== undefined ? { timed_out: hardening.cleanup.timed_out } : {}),
39
+ ...(hardening.cleanup.error !== undefined ? { error: hardening.cleanup.error } : {})
40
+ },
41
+ secret_absence: {
42
+ secret_names: [...hardening.secret_absence.secret_names],
43
+ secret_value_hashes: [...hardening.secret_absence.secret_value_hashes],
44
+ injected_env_names: [...hardening.secret_absence.injected_env_names],
45
+ scanned: hardening.secret_absence.scanned,
46
+ leaks_found: hardening.secret_absence.leaks_found,
47
+ scan_scope: [...hardening.secret_absence.scan_scope],
48
+ leak_count: hardening.secret_absence.leak_count
49
+ }
50
+ };
51
+ }
package/dist/index.d.ts CHANGED
@@ -1,18 +1,14 @@
1
1
  export { createCommandHarness } from "./command.js";
2
2
  export type { CommandHarnessOptions } from "./command.js";
3
- export { claudeCodeHarness, claudeCodeHarnessCredentialSkipReason, createClaudeCodeHarness } from "./claude-code.js";
4
- export type { ClaudeCodeHarnessEnv, ClaudeCodeHarnessOptions } from "./claude-code.js";
5
- export { codexConfigToml, codexHarnessCredentialSkipReason, codexHarness, createCodexHarness } from "./codex.js";
6
- export type { CodexAmbientProvider, CodexApprovalPolicy, CodexConfigTomlInput, CodexExecInput, CodexExecResult, CodexExecRunner, CodexHarnessEnv, CodexHarnessOptions, CodexOpenAiCompatibleProvider, CodexProvider, CodexResponsesProvider, CodexSandboxMode } from "./codex.js";
3
+ export { resolveCursorkitCli } from "./cursorkit-path.js";
4
+ export type { CursorkitCli } from "./cursorkit-path.js";
7
5
  export { createArtifactStore } from "./artifacts.js";
8
6
  export type { ArtifactStore } from "./artifacts.js";
9
- export { createHarnessCapabilityMatrix, harnessDashboard, runHarnessSmokeDashboard } from "./dashboard.js";
10
- export type { HarnessAdapterReadiness, HarnessAvailability, HarnessCapabilityMatrix, HarnessCapabilityMatrixRow, HarnessCapabilityTarget, HarnessLiveSmokeTarget, HarnessSmokeDashboard, HarnessSmokeDashboardOptions, HarnessSmokeOutcome, HarnessSmokePurpose, HarnessSmokeRecord } from "./dashboard.js";
11
7
  export { createMockJudgeSynthesizer } from "./judge.js";
12
8
  export type { JudgeCandidateEvidence, JudgeInput, JudgePatch, JudgeRepairInput, JudgeSynthesizer, JudgeSynthesisOutput, JudgeVerificationInput, MockJudgeSynthesizerOptions, SynthesisFailureSummary, SynthesisRepairAttempt, SynthesisVerificationResult } from "./judge.js";
13
9
  export { ensemble, runEnsemble } from "./run.js";
14
- export { createFusionKitJudgeSynthesizer, runFusionPanels, runUnifiedHarnessE2E } from "./unified.js";
15
- export type { CursorHarnessRunnerInput, CursorHarnessRunnerResult, FusionPanelOptions, UnifiedHarnessE2EOptions, UnifiedHarnessE2EResult, UnifiedHarnessKind, UnifiedHarnessMatrixResult } from "./unified.js";
10
+ export { createFusionKitJudgeSynthesizer, runFusionPanels, runUnifiedHarnessE2E, setToolHarnessProvider } from "./unified.js";
11
+ export type { CursorHarnessRunnerInput, CursorHarnessRunnerResult, FusionPanelOptions, ToolHarnessProvider, ToolHarnessResolveOptions, UnifiedHarnessE2EOptions, UnifiedHarnessE2EResult, UnifiedHarnessKind, UnifiedHarnessMatrixResult } from "./unified.js";
16
12
  export { ambientTraceId, emitTrace, getTraceEmitter, newSpanId, newTraceId, TRACE_CANDIDATE_HEADER, TRACE_ID_HEADER, TRACE_PARENT_SPAN_HEADER, TRACE_SPAN_HEADER, TraceEmitter } from "./trace.js";
17
13
  export type { EmitInput, FusionTraceComponent, FusionTraceEvent, FusionTraceEventType } from "./trace.js";
18
14
  export { runJudgeSynthesis } from "./synthesis.js";
@@ -27,4 +23,5 @@ export type { FusionKitToolExecutionBatch, FusionKitToolExecutionRequest, Fusion
27
23
  export type { CandidateCommandIsolationInput, CandidateCommandIsolationResult } from "./isolation.js";
28
24
  export { cleanupCandidateWorktree, cleanupWorktreePlan, createWorktreePlan, defaultOutputRoot, diffCandidateWorktree, sealCandidateWorktree } from "./worktree.js";
29
25
  export type { CandidateWorktree, WorktreePlan } from "./worktree.js";
26
+ export { hardeningToJson } from "./harness.js";
30
27
  export type { EnsembleCandidateSummary, EnsembleDescriptor, EnsembleJudge, EnsembleModel, EnsemblePolicy, EnsembleRunResult, EnsembleRuntime, CandidateContainerDriver, CandidateContainerDriverInput, CandidateContainerDriverResult, CandidateHardeningMetadata, CandidateIsolationConfig, CandidateIsolationKind, CandidateIsolationMountPolicy, CandidateIsolationNetworkPolicy, CandidateIsolationSecretPolicy, HarnessAdapter, HarnessArtifact, HarnessCapabilities, HarnessCandidateOutput, HarnessCollectInput, HarnessPrepareInput, HarnessRunInput, HarnessToolRecord, ReviewEvidence, EnsembleRunSummary, VerificationProfile } from "./harness.js";
package/dist/index.js CHANGED
@@ -1,11 +1,9 @@
1
1
  export { createCommandHarness } from "./command.js";
2
- export { claudeCodeHarness, claudeCodeHarnessCredentialSkipReason, createClaudeCodeHarness } from "./claude-code.js";
3
- export { codexConfigToml, codexHarnessCredentialSkipReason, codexHarness, createCodexHarness } from "./codex.js";
2
+ export { resolveCursorkitCli } from "./cursorkit-path.js";
4
3
  export { createArtifactStore } from "./artifacts.js";
5
- export { createHarnessCapabilityMatrix, harnessDashboard, runHarnessSmokeDashboard } from "./dashboard.js";
6
4
  export { createMockJudgeSynthesizer } from "./judge.js";
7
5
  export { ensemble, runEnsemble } from "./run.js";
8
- export { createFusionKitJudgeSynthesizer, runFusionPanels, runUnifiedHarnessE2E } from "./unified.js";
6
+ export { createFusionKitJudgeSynthesizer, runFusionPanels, runUnifiedHarnessE2E, setToolHarnessProvider } from "./unified.js";
9
7
  export { ambientTraceId, emitTrace, getTraceEmitter, newSpanId, newTraceId, TRACE_CANDIDATE_HEADER, TRACE_ID_HEADER, TRACE_PARENT_SPAN_HEADER, TRACE_SPAN_HEADER, TraceEmitter } from "./trace.js";
10
8
  export { runJudgeSynthesis } from "./synthesis.js";
11
9
  export { createMockHarness } from "./mock.js";
@@ -13,3 +11,4 @@ export { createToolExecutor, registerDemoTools, sideEffectsForTool } from "./too
13
11
  export { executeFusionKitToolBatch, FusionKitToolExecutorClient, FusionKitToolExecutorClientError, FusionKitToolExecutorError, startFusionKitToolExecutorServer } from "./external-executor.js";
14
12
  export { createCliContainerDriver, runCandidateCommandWithIsolation, secretAbsenceMetadata, secretValueHash } from "./isolation.js";
15
13
  export { cleanupCandidateWorktree, cleanupWorktreePlan, createWorktreePlan, defaultOutputRoot, diffCandidateWorktree, sealCandidateWorktree } from "./worktree.js";
14
+ export { hardeningToJson } from "./harness.js";
package/dist/run.js CHANGED
@@ -1,5 +1,6 @@
1
1
  import { assertHarnessCandidateRecordV1, assertHarnessRunRequestV1, assertHarnessRunResultV1, MODEL_FUSION_SCHEMA_BUNDLE_HASH, requestHash } from "@fusionkit/protocol";
2
2
  import { createArtifactStore } from "./artifacts.js";
3
+ import { hardeningToJson } from "./harness.js";
3
4
  import { runJudgeSynthesis } from "./synthesis.js";
4
5
  import { cleanupWorktreePlan, createWorktreePlan, defaultOutputRoot, diffCandidateWorktree, sealCandidateWorktree } from "./worktree.js";
5
6
  const PRODUCER_GIT_SHA = "0".repeat(40);
@@ -99,7 +100,7 @@ function candidateMetadata(output, descriptor, worktree) {
99
100
  }
100
101
  Object.assign(metadata, output.metadata ?? {});
101
102
  if (metadata.hardening === undefined) {
102
- metadata.hardening = fallbackCandidateHardening(descriptor);
103
+ metadata.hardening = hardeningToJson(fallbackCandidateHardening(descriptor));
103
104
  }
104
105
  if (descriptor.reviewEvidence !== undefined) {
105
106
  metadata.review_evidence_attached = true;
@@ -5,9 +5,7 @@ import { join } from "node:path";
5
5
  import { test } from "node:test";
6
6
  import { assertJudgeSynthesisRecordV1, assertHarnessCandidateRecordV1, assertHarnessRunRequestV1, assertHarnessRunResultV1, MODEL_FUSION_SCHEMA_BUNDLE_HASH, requestHash, responseHash } from "@fusionkit/protocol";
7
7
  import { gitText } from "@fusionkit/workspace";
8
- import { claudeCodeHarness, claudeCodeHarnessCredentialSkipReason } from "../claude-code.js";
9
8
  import { createCommandHarness } from "../command.js";
10
- import { codexConfigToml, codexHarness, codexHarnessCredentialSkipReason } from "../codex.js";
11
9
  import { createMockJudgeSynthesizer } from "../judge.js";
12
10
  import { createMockHarness } from "../mock.js";
13
11
  import { runEnsemble } from "../run.js";
@@ -88,10 +86,6 @@ function addFilePatch(path, content) {
88
86
  ""
89
87
  ].join("\n");
90
88
  }
91
- function emptyCodexHome() {
92
- const path = mkdtempSync(join(tmpdir(), "ensemble-codex-empty-home-"));
93
- return { path, cleanup: () => rmSync(path, { recursive: true, force: true }) };
94
- }
95
89
  test("mock adapter runs N candidates and emits valid model-fusion records", async () => {
96
90
  const result = await runEnsemble(descriptor({
97
91
  harness: createMockHarness({
@@ -128,227 +122,6 @@ test("command adapter records command output, artifact, tool record, and verific
128
122
  "exec_ensemble_test_command_0"
129
123
  ]);
130
124
  });
131
- test("claude-code adapter can replace mock and skip clearly without credentials", async () => {
132
- const result = await runEnsemble(descriptor({
133
- models: [{ id: "claude", model: "claude-sonnet-4-6" }],
134
- harness: claudeCodeHarness({ env: {} })
135
- }));
136
- assert.equal(result.candidates.length, 1);
137
- assert.equal(result.harnessRunResult.status, "skipped");
138
- assert.equal(result.candidates[0]?.status, "skipped");
139
- assert.equal(result.candidates[0]?.error?.kind, "capability_missing");
140
- assert.match(result.candidates[0]?.error?.message ?? "", /missing Claude Code credential/);
141
- assert.match(result.summary?.candidates[0]?.verification?.evidence[0] ?? "", /missing Claude/);
142
- });
143
- test("claude-code adapter delegates through a session backend from a generic descriptor", async () => {
144
- const repo = makeRepo();
145
- const seen = {};
146
- const backend = {
147
- isolation: "vercel-sandbox",
148
- supports: () => true,
149
- execute: async (input) => {
150
- seen.agentKind = input.contract.agent.kind;
151
- seen.env = input.execution.env;
152
- seen.repoDir = input.repoDir;
153
- assert.equal(input.contract.isolation, "vercel-sandbox");
154
- assert.equal(input.contract.execution?.kind, "agent");
155
- assert.equal(input.secrets.length, 0);
156
- writeFileSync(join(input.repoDir, "CLAUDE_RESULT.md"), "fake claude result\n");
157
- input.emit({
158
- type: "command.executed",
159
- argvHash: requestHash({ adapter: "claude-code" }),
160
- exitCode: 0
161
- });
162
- return { exitCode: 0, log: Buffer.from("fake claude transcript") };
163
- }
164
- };
165
- try {
166
- const result = await runEnsemble(descriptor({
167
- models: [{ id: "claude", model: "claude-sonnet-4-6" }],
168
- harness: claudeCodeHarness({
169
- env: {
170
- ANTHROPIC_API_KEY: "sk-ant-test",
171
- VERCEL_TOKEN: "vercel-test"
172
- },
173
- backend
174
- }),
175
- workspace: repo.repo,
176
- baseGitSha: repo.head,
177
- outputRoot: repo.outputRoot,
178
- cleanupWorktrees: true
179
- }));
180
- assert.equal(result.harnessRunResult.status, "succeeded");
181
- assert.equal(result.candidates[0]?.status, "succeeded");
182
- assert.equal(seen.agentKind, "claude-code");
183
- assert.equal(seen.env?.ANTHROPIC_API_KEY, "sk-ant-test");
184
- assert.equal(Object.hasOwn(seen.env ?? {}, "VERCEL_TOKEN"), false);
185
- assert.notEqual(seen.repoDir, repo.repo);
186
- assert.ok(result.artifacts.some((artifact) => artifact.kind === "patch"));
187
- assert.match(result.candidates[0]?.metadata?.adapter, /claude-code/);
188
- }
189
- finally {
190
- repo.cleanup();
191
- }
192
- });
193
- test("smoke: claude-code adapter runs live when credentials are available", { skip: liveClaudeSmokeSkipReason() }, async () => {
194
- const repo = makeRepo();
195
- try {
196
- const result = await runEnsemble(descriptor({
197
- id: "claude_smoke",
198
- models: [{ id: "claude", model: "claude-sonnet-4-6" }],
199
- harness: claudeCodeHarness(),
200
- runtime: {
201
- id: "vercel-sandbox",
202
- isolation: {
203
- kind: "microvm",
204
- networkPolicy: {
205
- defaultDeny: true,
206
- allowHosts: [
207
- "registry.npmjs.org",
208
- "api.anthropic.com",
209
- "ai-gateway.vercel.sh"
210
- ]
211
- }
212
- }
213
- },
214
- policy: {
215
- id: "claude-smoke-policy",
216
- allowedTools: ["read_file"],
217
- sideEffects: "read_only",
218
- timeoutMs: 180_000
219
- },
220
- prompt: "Read README.md if present, then reply exactly CLAUDE_LIVE_SMOKE_OK. Do not modify files.",
221
- workspace: repo.repo,
222
- baseGitSha: repo.head,
223
- outputRoot: repo.outputRoot,
224
- cleanupWorktrees: true
225
- }));
226
- assertHarnessRunResultV1(result.harnessRunResult);
227
- assert.equal(result.harnessRunResult.status, "succeeded");
228
- assert.equal(result.candidates[0]?.status, "succeeded");
229
- }
230
- finally {
231
- repo.cleanup();
232
- }
233
- });
234
- test("codex config declares a Responses provider without touching Cursor records", () => {
235
- const toml = codexConfigToml({
236
- model: "gpt-5.5-codex",
237
- sandboxMode: "workspace-write",
238
- approvalPolicy: "never",
239
- provider: {
240
- providerId: "warrant-local",
241
- name: "Warrant Local",
242
- baseUrl: "https://gateway.example.com/v1/responses",
243
- apiKeyEnvName: "WARRANT_CODEX_API_KEY",
244
- requiresOpenAiAuth: true
245
- }
246
- });
247
- assert.match(toml, /model = "gpt-5\.5-codex"/);
248
- assert.match(toml, /model_provider = "warrant-local"/);
249
- assert.match(toml, /\[model_providers\.warrant-local\]/);
250
- assert.match(toml, /base_url = "https:\/\/gateway\.example\.com\/v1"/);
251
- assert.match(toml, /wire_api = "responses"/);
252
- assert.match(toml, /env_key = "WARRANT_CODEX_API_KEY"/);
253
- assert.equal(toml.includes("cursor"), false);
254
- });
255
- test("codex adapter emits schema-valid skipped output without credentials", async () => {
256
- const codexHome = emptyCodexHome();
257
- try {
258
- const result = await runEnsemble(descriptor({
259
- models: [{ id: "codex", model: "gpt-5.5-codex" }],
260
- harness: codexHarness({
261
- env: { CODEX_HOME: codexHome.path },
262
- provider: { kind: "ambient" }
263
- })
264
- }));
265
- assertHarnessRunResultV1(result.harnessRunResult);
266
- assert.equal(result.harnessRunResult.status, "skipped");
267
- assert.equal(result.candidates[0]?.status, "skipped");
268
- assert.equal(result.candidates[0]?.error?.kind, "capability_missing");
269
- assert.match(result.candidates[0]?.error?.message ?? "", /Codex credentials are absent/);
270
- assert.match(result.summary?.candidates[0]?.verification?.evidence[0] ?? "", /Codex credentials/);
271
- }
272
- finally {
273
- codexHome.cleanup();
274
- }
275
- });
276
- test("codex adapter runs through an injected Responses runner and records evidence", async () => {
277
- const calls = [];
278
- const result = await runEnsemble(descriptor({
279
- models: [{ id: "codex", model: "gpt-5.5-codex" }],
280
- harness: codexHarness({
281
- env: {},
282
- provider: {
283
- kind: "responses",
284
- baseUrl: "http://127.0.0.1:8787/v1/responses",
285
- apiKey: "inline-test-key",
286
- requiresOpenAiAuth: true,
287
- providerId: "local-responses"
288
- },
289
- runner: (input) => {
290
- calls.push(input);
291
- const codexHome = input.env.CODEX_HOME;
292
- assert.ok(codexHome);
293
- const config = readFileSync(join(codexHome, "config.toml"), "utf8");
294
- assert.match(config, /model_provider = "local-responses"/);
295
- assert.match(config, /base_url = "http:\/\/127\.0\.0\.1:8787\/v1"/);
296
- assert.match(config, /wire_api = "responses"/);
297
- assert.match(config, /env_key = "WARRANT_CODEX_PROVIDER_API_KEY"/);
298
- assert.equal(input.env.WARRANT_CODEX_PROVIDER_API_KEY, "inline-test-key");
299
- return {
300
- stdout: '{"type":"message","content":"codex-ok"}\n',
301
- stderr: "",
302
- exitCode: 0
303
- };
304
- }
305
- })
306
- }));
307
- assert.equal(calls.length, 1);
308
- assert.deepEqual(calls[0]?.args.slice(0, 3), [
309
- "exec",
310
- "--json",
311
- "--skip-git-repo-check"
312
- ]);
313
- assert.equal(result.harnessRunResult.status, "succeeded");
314
- assert.equal(result.candidates[0]?.status, "succeeded");
315
- assert.equal(result.toolRecords[0]?.status, "succeeded");
316
- assert.match(result.candidates[0]?.metadata?.adapter, /codex/);
317
- });
318
- function liveClaudeSmokeSkipReason() {
319
- if (process.env.WARRANT_CLAUDE_SMOKE !== "1") {
320
- return "set WARRANT_CLAUDE_SMOKE=1 plus Claude Code credentials to run the live Claude Code smoke";
321
- }
322
- return claudeCodeHarnessCredentialSkipReason() ?? false;
323
- }
324
- function liveCodexSmokeSkipReason() {
325
- if (process.env.WARRANT_CODEX_SMOKE !== "1") {
326
- return "set WARRANT_CODEX_SMOKE=1 plus Codex credentials to run the live Codex smoke";
327
- }
328
- return codexHarnessCredentialSkipReason() ?? false;
329
- }
330
- test("codex adapter live smoke is credential-gated", { skip: liveCodexSmokeSkipReason() }, async () => {
331
- const repo = makeRepo();
332
- try {
333
- const result = await runEnsemble(descriptor({
334
- prompt: "Read README.md if present, then reply exactly CODEX_LIVE_SMOKE_OK. Do not modify files.",
335
- models: [{ id: "codex", model: process.env.WARRANT_CODEX_SMOKE_MODEL ?? "gpt-5.5-codex" }],
336
- harness: codexHarness({
337
- timeoutMs: 60_000,
338
- sandboxMode: "read-only",
339
- approvalPolicy: "never"
340
- }),
341
- workspace: repo.repo,
342
- baseGitSha: repo.head,
343
- outputRoot: repo.outputRoot
344
- }));
345
- assertHarnessRunResultV1(result.harnessRunResult);
346
- assert.notEqual(result.harnessRunResult.status, "skipped");
347
- }
348
- finally {
349
- repo.cleanup();
350
- }
351
- });
352
125
  test("command adapter records optional container hardening metadata", async () => {
353
126
  const driver = {
354
127
  id: "fake-ensemble-container",
package/dist/unified.d.ts CHANGED
@@ -1,7 +1,33 @@
1
1
  import type { JsonValue, ModelFusionStatus } from "@fusionkit/protocol";
2
- import type { EnsembleModel, EnsembleRunResult } from "./harness.js";
2
+ import type { EnsembleDescriptor, EnsembleModel, EnsembleRunResult, HarnessAdapter } from "./harness.js";
3
3
  import type { JudgeSynthesizer } from "./judge.js";
4
4
  export type UnifiedHarnessKind = "mock" | "command" | "agent" | "codex" | "claude-code" | "cursor-acp" | "cursor-desktop";
5
+ /**
6
+ * Options the unified runner passes to a tool's harness factory. The per-tool
7
+ * packages map these onto their own harness options (provider base URL, etc.).
8
+ */
9
+ export type ToolHarnessResolveOptions = {
10
+ fusionBackendUrl: string;
11
+ fusionApiKey?: string;
12
+ timeoutMs?: number;
13
+ };
14
+ /**
15
+ * Provides everything ensemble needs about a tool-backed harness kind (codex,
16
+ * claude-code, cursor-*) without ensemble depending on any per-tool package. The
17
+ * fusionkit CLI registers one (built from its tool registry) via
18
+ * {@link setToolHarnessProvider}; without it, requesting a tool harness kind
19
+ * throws a clear error.
20
+ */
21
+ export type ToolHarnessProvider = {
22
+ adapter(kind: UnifiedHarnessKind, options: ToolHarnessResolveOptions): HarnessAdapter;
23
+ sideEffects(kind: UnifiedHarnessKind): EnsembleDescriptor["policy"]["sideEffects"];
24
+ responseShape(kind: UnifiedHarnessKind): string;
25
+ };
26
+ /**
27
+ * Register the provider that resolves tool-backed harness kinds. The fusionkit
28
+ * CLI wires this at startup from its tool registry.
29
+ */
30
+ export declare function setToolHarnessProvider(provider: ToolHarnessProvider | undefined): void;
5
31
  export type UnifiedHarnessMatrixResult = {
6
32
  harness: UnifiedHarnessKind;
7
33
  modelIds: string[];
@@ -26,7 +52,6 @@ export type CursorHarnessRunnerInput = {
26
52
  repo: string;
27
53
  outDir: string;
28
54
  timeoutMs?: number;
29
- cursorKitDir?: string;
30
55
  };
31
56
  export type CursorHarnessRunnerResult = {
32
57
  status: ModelFusionStatus;
@@ -46,7 +71,6 @@ export type UnifiedHarnessE2EOptions = {
46
71
  command?: string;
47
72
  timeoutMs?: number;
48
73
  judgeModel?: string;
49
- cursorKitDir?: string;
50
74
  cursorRunner?: (input: CursorHarnessRunnerInput) => Promise<CursorHarnessRunnerResult>;
51
75
  /**
52
76
  * Per-candidate model backend URLs keyed by `EnsembleModel.id`. When a
package/dist/unified.js CHANGED
@@ -4,12 +4,32 @@ import { join, resolve } from "node:path";
4
4
  import { newSpanId, TRACE_ID_HEADER, TRACE_SPAN_HEADER } from "@fusionkit/protocol";
5
5
  import { gitText } from "@fusionkit/workspace";
6
6
  import { createAgentHarness } from "./agent.js";
7
- import { claudeCodeHarness } from "./claude-code.js";
8
7
  import { createCommandHarness } from "./command.js";
9
- import { codexHarness } from "./codex.js";
10
- import { createCursorHarness } from "./cursor.js";
8
+ import { resolveCursorkitCli } from "./cursorkit-path.js";
11
9
  import { createMockHarness } from "./mock.js";
12
10
  import { runEnsemble } from "./run.js";
11
+ let toolHarnessProvider;
12
+ /**
13
+ * Register the provider that resolves tool-backed harness kinds. The fusionkit
14
+ * CLI wires this at startup from its tool registry.
15
+ */
16
+ export function setToolHarnessProvider(provider) {
17
+ toolHarnessProvider = provider;
18
+ }
19
+ function requireToolHarnessProvider(kind) {
20
+ if (toolHarnessProvider === undefined) {
21
+ throw new Error(`no tool harness provider registered for harness kind "${kind}"; ` +
22
+ "the fusionkit CLI wires this via setToolHarnessProvider (build the tool registry first).");
23
+ }
24
+ return toolHarnessProvider;
25
+ }
26
+ function resolveToolAdapter(kind, options) {
27
+ return requireToolHarnessProvider(kind).adapter(kind, {
28
+ fusionBackendUrl: normalizeFusionBackendUrl(options.fusionBackendUrl),
29
+ ...(options.fusionApiKey !== undefined ? { fusionApiKey: options.fusionApiKey } : {}),
30
+ ...(options.timeoutMs !== undefined ? { timeoutMs: options.timeoutMs } : {})
31
+ });
32
+ }
13
33
  function normalizeFusionBackendUrl(value) {
14
34
  return value.replace(/\/+$/, "");
15
35
  }
@@ -31,7 +51,7 @@ function sideEffectsForHarness(kind) {
31
51
  case "claude-code":
32
52
  case "cursor-acp":
33
53
  case "cursor-desktop":
34
- return "writes_workspace";
54
+ return requireToolHarnessProvider(kind).sideEffects(kind);
35
55
  default: {
36
56
  const exhausted = kind;
37
57
  throw new Error(`unsupported unified harness: ${String(exhausted)}`);
@@ -73,25 +93,10 @@ function harnessAdapter(kind, options) {
73
93
  });
74
94
  }
75
95
  case "codex":
76
- return codexHarness({
77
- timeoutMs: options.timeoutMs,
78
- provider: {
79
- kind: "openai-compatible",
80
- baseUrl: normalizeFusionBackendUrl(options.fusionBackendUrl),
81
- ...(options.fusionApiKey ? { apiKey: options.fusionApiKey } : {})
82
- }
83
- });
84
96
  case "claude-code":
85
- return claudeCodeHarness({ timeoutMs: options.timeoutMs });
86
97
  case "cursor-acp":
87
98
  case "cursor-desktop":
88
- return createCursorHarness({
89
- id: kind,
90
- fusionBackendUrl: normalizeFusionBackendUrl(options.fusionBackendUrl),
91
- ...(options.fusionApiKey ? { apiKey: options.fusionApiKey } : {}),
92
- ...(options.cursorKitDir !== undefined ? { cursorKitDir: options.cursorKitDir } : {}),
93
- ...(options.timeoutMs !== undefined ? { timeoutMs: options.timeoutMs } : {})
94
- });
99
+ return resolveToolAdapter(kind, options);
95
100
  default: {
96
101
  const exhausted = kind;
97
102
  throw new Error(`unsupported unified harness: ${String(exhausted)}`);
@@ -107,12 +112,10 @@ function responseShapeFor(kind) {
107
112
  return ("Respond to the user in the natural shape the request calls for: a direct answer, " +
108
113
  "a plan, or the concrete code change. Reply in first person as the assistant.");
109
114
  case "codex":
110
- return "Return a Codex-style result summary with patch and verification evidence.";
111
115
  case "claude-code":
112
- return "Return a Claude Code-style transcript summary with patch/worktree evidence.";
113
116
  case "cursor-acp":
114
117
  case "cursor-desktop":
115
- return "Return text suitable for Cursor ACP session/update plus route evidence notes.";
118
+ return requireToolHarnessProvider(kind).responseShape(kind);
116
119
  default: {
117
120
  const exhausted = kind;
118
121
  throw new Error(`unsupported unified harness: ${String(exhausted)}`);
@@ -311,17 +314,10 @@ function statusForResult(result) {
311
314
  return result.failureSummary ? "failed" : result.harnessRunResult.status;
312
315
  }
313
316
  async function defaultCursorRunner(input) {
314
- if (!input.cursorKitDir) {
315
- return {
316
- status: "skipped",
317
- message: "Cursorkit directory not configured",
318
- details: { reason: "cursor_kit_dir_missing" }
319
- };
320
- }
317
+ const { harnessCli } = resolveCursorkitCli();
321
318
  const suite = input.kind === "cursor-acp" ? "acp" : "desktop-route";
322
319
  const args = [
323
- "test:harness",
324
- "--",
320
+ harnessCli,
325
321
  "--suite",
326
322
  suite,
327
323
  "--base-url",
@@ -335,8 +331,8 @@ async function defaultCursorRunner(input) {
335
331
  ];
336
332
  mkdirSync(input.outDir, { recursive: true });
337
333
  return await new Promise((resolveResult) => {
338
- const child = spawn("pnpm", args, {
339
- cwd: input.cursorKitDir,
334
+ const child = spawn(process.execPath, args, {
335
+ cwd: input.outDir,
340
336
  stdio: ["ignore", "pipe", "pipe"]
341
337
  });
342
338
  let stdout = "";
@@ -367,8 +363,7 @@ async function runCursorHarness(kind, options) {
367
363
  fusionBackendUrl: options.fusionBackendUrl,
368
364
  repo: options.repo,
369
365
  outDir: join(options.outputRoot, `${kind}-${model.id}`),
370
- timeoutMs: options.timeoutMs,
371
- cursorKitDir: options.cursorKitDir
366
+ timeoutMs: options.timeoutMs
372
367
  })));
373
368
  const failed = perModel.find((result) => result.status === "failed");
374
369
  const skipped = perModel.every((result) => result.status === "skipped");
package/package.json CHANGED
@@ -1,7 +1,7 @@
1
1
  {
2
2
  "name": "@fusionkit/ensemble",
3
3
  "private": false,
4
- "version": "0.1.5",
4
+ "version": "0.1.6",
5
5
  "repository": {
6
6
  "type": "git",
7
7
  "url": "git+https://github.com/velum-labs/handoffkit.git",
@@ -25,11 +25,12 @@
25
25
  "provenance": true
26
26
  },
27
27
  "dependencies": {
28
- "@fusionkit/adapter-ai-sdk": "0.1.5",
29
- "@fusionkit/model-gateway": "0.1.5",
30
- "@fusionkit/runner": "0.1.5",
31
- "@fusionkit/session-harness": "0.1.5",
32
- "@fusionkit/workspace": "0.1.5",
33
- "@fusionkit/protocol": "0.1.5"
28
+ "@velum-labs/cursorkit": "0.1.2",
29
+ "@fusionkit/adapter-ai-sdk": "0.1.6",
30
+ "@fusionkit/model-gateway": "0.1.6",
31
+ "@fusionkit/protocol": "0.1.6",
32
+ "@fusionkit/session-harness": "0.1.6",
33
+ "@fusionkit/runner": "0.1.6",
34
+ "@fusionkit/workspace": "0.1.6"
34
35
  }
35
36
  }
@@ -1,25 +0,0 @@
1
- import type { NetworkPolicy } from "@fusionkit/protocol";
2
- import type { SessionBackend } from "@fusionkit/runner";
3
- import type { ClaudeCodeBindingOptions } from "@fusionkit/session-harness";
4
- import type { HarnessAdapter } from "./harness.js";
5
- export type ClaudeCodeHarnessEnv = Record<string, string | undefined>;
6
- export type ClaudeCodeHarnessOptions = ClaudeCodeBindingOptions & {
7
- id?: string;
8
- /** Defaults to `process.env`; tests can pass `{}` for deterministic skips. */
9
- env?: ClaudeCodeHarnessEnv;
10
- /** Already-released secret values forwarded through the session backend seam. */
11
- secrets?: {
12
- name: string;
13
- value: string;
14
- }[];
15
- /** Test/extension seam. Defaults to `aiSdkHarnessBackend(...)`. */
16
- backend?: SessionBackend;
17
- pool?: string;
18
- network?: NetworkPolicy;
19
- timeoutMs?: number;
20
- logMaxBytes?: number;
21
- skipWhenUnavailable?: boolean;
22
- };
23
- export declare function claudeCodeHarnessCredentialSkipReason(env?: ClaudeCodeHarnessEnv, options?: ClaudeCodeHarnessOptions): string | undefined;
24
- export declare function createClaudeCodeHarness(options?: ClaudeCodeHarnessOptions): HarnessAdapter;
25
- export declare function claudeCodeHarness(options?: ClaudeCodeHarnessOptions): HarnessAdapter;