@fusionkit/ensemble 0.1.4 → 0.1.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,17 @@
1
+ export type CursorkitCli = {
2
+ /** Absolute path to the bridge entrypoint (`cursorkit serve`). */
3
+ serveCli: string;
4
+ /** Absolute path to the bundled test-harness CLI (suite probes). */
5
+ harnessCli: string;
6
+ };
7
+ /**
8
+ * Resolve the bundled `@velum-labs/cursorkit` CLIs from node_modules. The
9
+ * package exports `"."` -> `dist/src/cli.js`; the harness CLI lives next to it
10
+ * at `dist/src/testing/cli.js` (not exposed via the exports map, so it is
11
+ * derived from the resolved `"."` entry rather than resolved directly).
12
+ *
13
+ * `FUSIONKIT_CURSORKIT_SERVE_CLI` overrides the resolved `serveCli` entry. This
14
+ * lets a custom build (or an integration test) point the bridge at an alternate
15
+ * entrypoint; the harness CLI is still derived relative to it.
16
+ */
17
+ export declare function resolveCursorkitCli(): CursorkitCli;
@@ -0,0 +1,21 @@
1
+ import { createRequire } from "node:module";
2
+ import { dirname, join } from "node:path";
3
+ const require = createRequire(import.meta.url);
4
+ /**
5
+ * Resolve the bundled `@velum-labs/cursorkit` CLIs from node_modules. The
6
+ * package exports `"."` -> `dist/src/cli.js`; the harness CLI lives next to it
7
+ * at `dist/src/testing/cli.js` (not exposed via the exports map, so it is
8
+ * derived from the resolved `"."` entry rather than resolved directly).
9
+ *
10
+ * `FUSIONKIT_CURSORKIT_SERVE_CLI` overrides the resolved `serveCli` entry. This
11
+ * lets a custom build (or an integration test) point the bridge at an alternate
12
+ * entrypoint; the harness CLI is still derived relative to it.
13
+ */
14
+ export function resolveCursorkitCli() {
15
+ const override = process.env.FUSIONKIT_CURSORKIT_SERVE_CLI;
16
+ const serveCli = override !== undefined && override.length > 0
17
+ ? override
18
+ : require.resolve("@velum-labs/cursorkit");
19
+ const harnessCli = join(dirname(serveCli), "testing", "cli.js");
20
+ return { serveCli, harnessCli };
21
+ }
package/dist/harness.d.ts CHANGED
@@ -190,6 +190,13 @@ export type CandidateHardeningMetadata = {
190
190
  leak_count: number;
191
191
  };
192
192
  };
193
+ /**
194
+ * Serialize hardening metadata as a `JsonValue`. The shape is JSON-compatible by
195
+ * construction, but TypeScript cannot prove an object type with optional members
196
+ * satisfies the `JsonValue` index signature, so this typed mapper does the
197
+ * conversion explicitly (omitting absent optionals) instead of an unchecked cast.
198
+ */
199
+ export declare function hardeningToJson(hardening: CandidateHardeningMetadata): JsonValue;
193
200
  export type EnsembleRuntime = {
194
201
  id: string;
195
202
  environmentId?: string;
package/dist/harness.js CHANGED
@@ -1 +1,51 @@
1
- export {};
1
+ /**
2
+ * Serialize hardening metadata as a `JsonValue`. The shape is JSON-compatible by
3
+ * construction, but TypeScript cannot prove an object type with optional members
4
+ * satisfies the `JsonValue` index signature, so this typed mapper does the
5
+ * conversion explicitly (omitting absent optionals) instead of an unchecked cast.
6
+ */
7
+ export function hardeningToJson(hardening) {
8
+ return {
9
+ requested_isolation: hardening.requested_isolation,
10
+ actual_isolation: hardening.actual_isolation,
11
+ runtime: {
12
+ ...(hardening.runtime.image !== undefined ? { image: hardening.runtime.image } : {}),
13
+ ...(hardening.runtime.driver !== undefined ? { driver: hardening.runtime.driver } : {}),
14
+ ...(hardening.runtime.provider !== undefined ? { provider: hardening.runtime.provider } : {}),
15
+ ...(hardening.runtime.runtime !== undefined ? { runtime: hardening.runtime.runtime } : {}),
16
+ ...(hardening.runtime.snapshot_id !== undefined ? { snapshot_id: hardening.runtime.snapshot_id } : {}),
17
+ ...(hardening.runtime.sandbox_id !== undefined ? { sandbox_id: hardening.runtime.sandbox_id } : {}),
18
+ ...(hardening.runtime.image_digest !== undefined ? { image_digest: hardening.runtime.image_digest } : {}),
19
+ ...(hardening.runtime.runtime_digest !== undefined
20
+ ? { runtime_digest: hardening.runtime.runtime_digest }
21
+ : {}),
22
+ workdir: hardening.runtime.workdir
23
+ },
24
+ mount_policy: {
25
+ worktree_writable: hardening.mount_policy.worktree_writable,
26
+ read_only_caches: [...hardening.mount_policy.read_only_caches],
27
+ ignored_dirs: [...hardening.mount_policy.ignored_dirs]
28
+ },
29
+ network_policy: {
30
+ default_deny: hardening.network_policy.default_deny,
31
+ allow_hosts: [...hardening.network_policy.allow_hosts],
32
+ enforced: hardening.network_policy.enforced
33
+ },
34
+ cleanup: {
35
+ attempted: hardening.cleanup.attempted,
36
+ succeeded: hardening.cleanup.succeeded,
37
+ status: hardening.cleanup.status,
38
+ ...(hardening.cleanup.timed_out !== undefined ? { timed_out: hardening.cleanup.timed_out } : {}),
39
+ ...(hardening.cleanup.error !== undefined ? { error: hardening.cleanup.error } : {})
40
+ },
41
+ secret_absence: {
42
+ secret_names: [...hardening.secret_absence.secret_names],
43
+ secret_value_hashes: [...hardening.secret_absence.secret_value_hashes],
44
+ injected_env_names: [...hardening.secret_absence.injected_env_names],
45
+ scanned: hardening.secret_absence.scanned,
46
+ leaks_found: hardening.secret_absence.leaks_found,
47
+ scan_scope: [...hardening.secret_absence.scan_scope],
48
+ leak_count: hardening.secret_absence.leak_count
49
+ }
50
+ };
51
+ }
package/dist/index.d.ts CHANGED
@@ -1,18 +1,14 @@
1
1
  export { createCommandHarness } from "./command.js";
2
2
  export type { CommandHarnessOptions } from "./command.js";
3
- export { claudeCodeHarness, claudeCodeHarnessCredentialSkipReason, createClaudeCodeHarness } from "./claude-code.js";
4
- export type { ClaudeCodeHarnessEnv, ClaudeCodeHarnessOptions } from "./claude-code.js";
5
- export { codexConfigToml, codexHarnessCredentialSkipReason, codexHarness, createCodexHarness } from "./codex.js";
6
- export type { CodexAmbientProvider, CodexApprovalPolicy, CodexConfigTomlInput, CodexExecInput, CodexExecResult, CodexExecRunner, CodexHarnessEnv, CodexHarnessOptions, CodexOpenAiCompatibleProvider, CodexProvider, CodexResponsesProvider, CodexSandboxMode } from "./codex.js";
3
+ export { resolveCursorkitCli } from "./cursorkit-path.js";
4
+ export type { CursorkitCli } from "./cursorkit-path.js";
7
5
  export { createArtifactStore } from "./artifacts.js";
8
6
  export type { ArtifactStore } from "./artifacts.js";
9
- export { createHarnessCapabilityMatrix, harnessDashboard, runHarnessSmokeDashboard } from "./dashboard.js";
10
- export type { HarnessAdapterReadiness, HarnessAvailability, HarnessCapabilityMatrix, HarnessCapabilityMatrixRow, HarnessCapabilityTarget, HarnessLiveSmokeTarget, HarnessSmokeDashboard, HarnessSmokeDashboardOptions, HarnessSmokeOutcome, HarnessSmokePurpose, HarnessSmokeRecord } from "./dashboard.js";
11
7
  export { createMockJudgeSynthesizer } from "./judge.js";
12
8
  export type { JudgeCandidateEvidence, JudgeInput, JudgePatch, JudgeRepairInput, JudgeSynthesizer, JudgeSynthesisOutput, JudgeVerificationInput, MockJudgeSynthesizerOptions, SynthesisFailureSummary, SynthesisRepairAttempt, SynthesisVerificationResult } from "./judge.js";
13
9
  export { ensemble, runEnsemble } from "./run.js";
14
- export { createFusionKitJudgeSynthesizer, runFusionPanels, runUnifiedHarnessE2E } from "./unified.js";
15
- export type { CursorHarnessRunnerInput, CursorHarnessRunnerResult, FusionPanelOptions, UnifiedHarnessE2EOptions, UnifiedHarnessE2EResult, UnifiedHarnessKind, UnifiedHarnessMatrixResult } from "./unified.js";
10
+ export { createFusionKitJudgeSynthesizer, runFusionPanels, runUnifiedHarnessE2E, setToolHarnessProvider } from "./unified.js";
11
+ export type { CursorHarnessRunnerInput, CursorHarnessRunnerResult, FusionPanelOptions, ToolHarnessProvider, ToolHarnessResolveOptions, UnifiedHarnessE2EOptions, UnifiedHarnessE2EResult, UnifiedHarnessKind, UnifiedHarnessMatrixResult } from "./unified.js";
16
12
  export { ambientTraceId, emitTrace, getTraceEmitter, newSpanId, newTraceId, TRACE_CANDIDATE_HEADER, TRACE_ID_HEADER, TRACE_PARENT_SPAN_HEADER, TRACE_SPAN_HEADER, TraceEmitter } from "./trace.js";
17
13
  export type { EmitInput, FusionTraceComponent, FusionTraceEvent, FusionTraceEventType } from "./trace.js";
18
14
  export { runJudgeSynthesis } from "./synthesis.js";
@@ -27,4 +23,5 @@ export type { FusionKitToolExecutionBatch, FusionKitToolExecutionRequest, Fusion
27
23
  export type { CandidateCommandIsolationInput, CandidateCommandIsolationResult } from "./isolation.js";
28
24
  export { cleanupCandidateWorktree, cleanupWorktreePlan, createWorktreePlan, defaultOutputRoot, diffCandidateWorktree, sealCandidateWorktree } from "./worktree.js";
29
25
  export type { CandidateWorktree, WorktreePlan } from "./worktree.js";
26
+ export { hardeningToJson } from "./harness.js";
30
27
  export type { EnsembleCandidateSummary, EnsembleDescriptor, EnsembleJudge, EnsembleModel, EnsemblePolicy, EnsembleRunResult, EnsembleRuntime, CandidateContainerDriver, CandidateContainerDriverInput, CandidateContainerDriverResult, CandidateHardeningMetadata, CandidateIsolationConfig, CandidateIsolationKind, CandidateIsolationMountPolicy, CandidateIsolationNetworkPolicy, CandidateIsolationSecretPolicy, HarnessAdapter, HarnessArtifact, HarnessCapabilities, HarnessCandidateOutput, HarnessCollectInput, HarnessPrepareInput, HarnessRunInput, HarnessToolRecord, ReviewEvidence, EnsembleRunSummary, VerificationProfile } from "./harness.js";
package/dist/index.js CHANGED
@@ -1,11 +1,9 @@
1
1
  export { createCommandHarness } from "./command.js";
2
- export { claudeCodeHarness, claudeCodeHarnessCredentialSkipReason, createClaudeCodeHarness } from "./claude-code.js";
3
- export { codexConfigToml, codexHarnessCredentialSkipReason, codexHarness, createCodexHarness } from "./codex.js";
2
+ export { resolveCursorkitCli } from "./cursorkit-path.js";
4
3
  export { createArtifactStore } from "./artifacts.js";
5
- export { createHarnessCapabilityMatrix, harnessDashboard, runHarnessSmokeDashboard } from "./dashboard.js";
6
4
  export { createMockJudgeSynthesizer } from "./judge.js";
7
5
  export { ensemble, runEnsemble } from "./run.js";
8
- export { createFusionKitJudgeSynthesizer, runFusionPanels, runUnifiedHarnessE2E } from "./unified.js";
6
+ export { createFusionKitJudgeSynthesizer, runFusionPanels, runUnifiedHarnessE2E, setToolHarnessProvider } from "./unified.js";
9
7
  export { ambientTraceId, emitTrace, getTraceEmitter, newSpanId, newTraceId, TRACE_CANDIDATE_HEADER, TRACE_ID_HEADER, TRACE_PARENT_SPAN_HEADER, TRACE_SPAN_HEADER, TraceEmitter } from "./trace.js";
10
8
  export { runJudgeSynthesis } from "./synthesis.js";
11
9
  export { createMockHarness } from "./mock.js";
@@ -13,3 +11,4 @@ export { createToolExecutor, registerDemoTools, sideEffectsForTool } from "./too
13
11
  export { executeFusionKitToolBatch, FusionKitToolExecutorClient, FusionKitToolExecutorClientError, FusionKitToolExecutorError, startFusionKitToolExecutorServer } from "./external-executor.js";
14
12
  export { createCliContainerDriver, runCandidateCommandWithIsolation, secretAbsenceMetadata, secretValueHash } from "./isolation.js";
15
13
  export { cleanupCandidateWorktree, cleanupWorktreePlan, createWorktreePlan, defaultOutputRoot, diffCandidateWorktree, sealCandidateWorktree } from "./worktree.js";
14
+ export { hardeningToJson } from "./harness.js";
package/dist/run.js CHANGED
@@ -1,5 +1,6 @@
1
1
  import { assertHarnessCandidateRecordV1, assertHarnessRunRequestV1, assertHarnessRunResultV1, MODEL_FUSION_SCHEMA_BUNDLE_HASH, requestHash } from "@fusionkit/protocol";
2
2
  import { createArtifactStore } from "./artifacts.js";
3
+ import { hardeningToJson } from "./harness.js";
3
4
  import { runJudgeSynthesis } from "./synthesis.js";
4
5
  import { cleanupWorktreePlan, createWorktreePlan, defaultOutputRoot, diffCandidateWorktree, sealCandidateWorktree } from "./worktree.js";
5
6
  const PRODUCER_GIT_SHA = "0".repeat(40);
@@ -99,7 +100,7 @@ function candidateMetadata(output, descriptor, worktree) {
99
100
  }
100
101
  Object.assign(metadata, output.metadata ?? {});
101
102
  if (metadata.hardening === undefined) {
102
- metadata.hardening = fallbackCandidateHardening(descriptor);
103
+ metadata.hardening = hardeningToJson(fallbackCandidateHardening(descriptor));
103
104
  }
104
105
  if (descriptor.reviewEvidence !== undefined) {
105
106
  metadata.review_evidence_attached = true;
@@ -5,9 +5,7 @@ import { join } from "node:path";
5
5
  import { test } from "node:test";
6
6
  import { assertJudgeSynthesisRecordV1, assertHarnessCandidateRecordV1, assertHarnessRunRequestV1, assertHarnessRunResultV1, MODEL_FUSION_SCHEMA_BUNDLE_HASH, requestHash, responseHash } from "@fusionkit/protocol";
7
7
  import { gitText } from "@fusionkit/workspace";
8
- import { claudeCodeHarness, claudeCodeHarnessCredentialSkipReason } from "../claude-code.js";
9
8
  import { createCommandHarness } from "../command.js";
10
- import { codexConfigToml, codexHarness, codexHarnessCredentialSkipReason } from "../codex.js";
11
9
  import { createMockJudgeSynthesizer } from "../judge.js";
12
10
  import { createMockHarness } from "../mock.js";
13
11
  import { runEnsemble } from "../run.js";
@@ -88,10 +86,6 @@ function addFilePatch(path, content) {
88
86
  ""
89
87
  ].join("\n");
90
88
  }
91
- function emptyCodexHome() {
92
- const path = mkdtempSync(join(tmpdir(), "ensemble-codex-empty-home-"));
93
- return { path, cleanup: () => rmSync(path, { recursive: true, force: true }) };
94
- }
95
89
  test("mock adapter runs N candidates and emits valid model-fusion records", async () => {
96
90
  const result = await runEnsemble(descriptor({
97
91
  harness: createMockHarness({
@@ -128,227 +122,6 @@ test("command adapter records command output, artifact, tool record, and verific
128
122
  "exec_ensemble_test_command_0"
129
123
  ]);
130
124
  });
131
- test("claude-code adapter can replace mock and skip clearly without credentials", async () => {
132
- const result = await runEnsemble(descriptor({
133
- models: [{ id: "claude", model: "claude-sonnet-4-6" }],
134
- harness: claudeCodeHarness({ env: {} })
135
- }));
136
- assert.equal(result.candidates.length, 1);
137
- assert.equal(result.harnessRunResult.status, "skipped");
138
- assert.equal(result.candidates[0]?.status, "skipped");
139
- assert.equal(result.candidates[0]?.error?.kind, "capability_missing");
140
- assert.match(result.candidates[0]?.error?.message ?? "", /missing Claude Code credential/);
141
- assert.match(result.summary?.candidates[0]?.verification?.evidence[0] ?? "", /missing Claude/);
142
- });
143
- test("claude-code adapter delegates through a session backend from a generic descriptor", async () => {
144
- const repo = makeRepo();
145
- const seen = {};
146
- const backend = {
147
- isolation: "vercel-sandbox",
148
- supports: () => true,
149
- execute: async (input) => {
150
- seen.agentKind = input.contract.agent.kind;
151
- seen.env = input.execution.env;
152
- seen.repoDir = input.repoDir;
153
- assert.equal(input.contract.isolation, "vercel-sandbox");
154
- assert.equal(input.contract.execution?.kind, "agent");
155
- assert.equal(input.secrets.length, 0);
156
- writeFileSync(join(input.repoDir, "CLAUDE_RESULT.md"), "fake claude result\n");
157
- input.emit({
158
- type: "command.executed",
159
- argvHash: requestHash({ adapter: "claude-code" }),
160
- exitCode: 0
161
- });
162
- return { exitCode: 0, log: Buffer.from("fake claude transcript") };
163
- }
164
- };
165
- try {
166
- const result = await runEnsemble(descriptor({
167
- models: [{ id: "claude", model: "claude-sonnet-4-6" }],
168
- harness: claudeCodeHarness({
169
- env: {
170
- ANTHROPIC_API_KEY: "sk-ant-test",
171
- VERCEL_TOKEN: "vercel-test"
172
- },
173
- backend
174
- }),
175
- workspace: repo.repo,
176
- baseGitSha: repo.head,
177
- outputRoot: repo.outputRoot,
178
- cleanupWorktrees: true
179
- }));
180
- assert.equal(result.harnessRunResult.status, "succeeded");
181
- assert.equal(result.candidates[0]?.status, "succeeded");
182
- assert.equal(seen.agentKind, "claude-code");
183
- assert.equal(seen.env?.ANTHROPIC_API_KEY, "sk-ant-test");
184
- assert.equal(Object.hasOwn(seen.env ?? {}, "VERCEL_TOKEN"), false);
185
- assert.notEqual(seen.repoDir, repo.repo);
186
- assert.ok(result.artifacts.some((artifact) => artifact.kind === "patch"));
187
- assert.match(result.candidates[0]?.metadata?.adapter, /claude-code/);
188
- }
189
- finally {
190
- repo.cleanup();
191
- }
192
- });
193
- test("smoke: claude-code adapter runs live when credentials are available", { skip: liveClaudeSmokeSkipReason() }, async () => {
194
- const repo = makeRepo();
195
- try {
196
- const result = await runEnsemble(descriptor({
197
- id: "claude_smoke",
198
- models: [{ id: "claude", model: "claude-sonnet-4-6" }],
199
- harness: claudeCodeHarness(),
200
- runtime: {
201
- id: "vercel-sandbox",
202
- isolation: {
203
- kind: "microvm",
204
- networkPolicy: {
205
- defaultDeny: true,
206
- allowHosts: [
207
- "registry.npmjs.org",
208
- "api.anthropic.com",
209
- "ai-gateway.vercel.sh"
210
- ]
211
- }
212
- }
213
- },
214
- policy: {
215
- id: "claude-smoke-policy",
216
- allowedTools: ["read_file"],
217
- sideEffects: "read_only",
218
- timeoutMs: 180_000
219
- },
220
- prompt: "Read README.md if present, then reply exactly CLAUDE_LIVE_SMOKE_OK. Do not modify files.",
221
- workspace: repo.repo,
222
- baseGitSha: repo.head,
223
- outputRoot: repo.outputRoot,
224
- cleanupWorktrees: true
225
- }));
226
- assertHarnessRunResultV1(result.harnessRunResult);
227
- assert.equal(result.harnessRunResult.status, "succeeded");
228
- assert.equal(result.candidates[0]?.status, "succeeded");
229
- }
230
- finally {
231
- repo.cleanup();
232
- }
233
- });
234
- test("codex config declares a Responses provider without touching Cursor records", () => {
235
- const toml = codexConfigToml({
236
- model: "gpt-5.5-codex",
237
- sandboxMode: "workspace-write",
238
- approvalPolicy: "never",
239
- provider: {
240
- providerId: "warrant-local",
241
- name: "Warrant Local",
242
- baseUrl: "https://gateway.example.com/v1/responses",
243
- apiKeyEnvName: "WARRANT_CODEX_API_KEY",
244
- requiresOpenAiAuth: true
245
- }
246
- });
247
- assert.match(toml, /model = "gpt-5\.5-codex"/);
248
- assert.match(toml, /model_provider = "warrant-local"/);
249
- assert.match(toml, /\[model_providers\.warrant-local\]/);
250
- assert.match(toml, /base_url = "https:\/\/gateway\.example\.com\/v1"/);
251
- assert.match(toml, /wire_api = "responses"/);
252
- assert.match(toml, /env_key = "WARRANT_CODEX_API_KEY"/);
253
- assert.equal(toml.includes("cursor"), false);
254
- });
255
- test("codex adapter emits schema-valid skipped output without credentials", async () => {
256
- const codexHome = emptyCodexHome();
257
- try {
258
- const result = await runEnsemble(descriptor({
259
- models: [{ id: "codex", model: "gpt-5.5-codex" }],
260
- harness: codexHarness({
261
- env: { CODEX_HOME: codexHome.path },
262
- provider: { kind: "ambient" }
263
- })
264
- }));
265
- assertHarnessRunResultV1(result.harnessRunResult);
266
- assert.equal(result.harnessRunResult.status, "skipped");
267
- assert.equal(result.candidates[0]?.status, "skipped");
268
- assert.equal(result.candidates[0]?.error?.kind, "capability_missing");
269
- assert.match(result.candidates[0]?.error?.message ?? "", /Codex credentials are absent/);
270
- assert.match(result.summary?.candidates[0]?.verification?.evidence[0] ?? "", /Codex credentials/);
271
- }
272
- finally {
273
- codexHome.cleanup();
274
- }
275
- });
276
- test("codex adapter runs through an injected Responses runner and records evidence", async () => {
277
- const calls = [];
278
- const result = await runEnsemble(descriptor({
279
- models: [{ id: "codex", model: "gpt-5.5-codex" }],
280
- harness: codexHarness({
281
- env: {},
282
- provider: {
283
- kind: "responses",
284
- baseUrl: "http://127.0.0.1:8787/v1/responses",
285
- apiKey: "inline-test-key",
286
- requiresOpenAiAuth: true,
287
- providerId: "local-responses"
288
- },
289
- runner: (input) => {
290
- calls.push(input);
291
- const codexHome = input.env.CODEX_HOME;
292
- assert.ok(codexHome);
293
- const config = readFileSync(join(codexHome, "config.toml"), "utf8");
294
- assert.match(config, /model_provider = "local-responses"/);
295
- assert.match(config, /base_url = "http:\/\/127\.0\.0\.1:8787\/v1"/);
296
- assert.match(config, /wire_api = "responses"/);
297
- assert.match(config, /env_key = "WARRANT_CODEX_PROVIDER_API_KEY"/);
298
- assert.equal(input.env.WARRANT_CODEX_PROVIDER_API_KEY, "inline-test-key");
299
- return {
300
- stdout: '{"type":"message","content":"codex-ok"}\n',
301
- stderr: "",
302
- exitCode: 0
303
- };
304
- }
305
- })
306
- }));
307
- assert.equal(calls.length, 1);
308
- assert.deepEqual(calls[0]?.args.slice(0, 3), [
309
- "exec",
310
- "--json",
311
- "--skip-git-repo-check"
312
- ]);
313
- assert.equal(result.harnessRunResult.status, "succeeded");
314
- assert.equal(result.candidates[0]?.status, "succeeded");
315
- assert.equal(result.toolRecords[0]?.status, "succeeded");
316
- assert.match(result.candidates[0]?.metadata?.adapter, /codex/);
317
- });
318
- function liveClaudeSmokeSkipReason() {
319
- if (process.env.WARRANT_CLAUDE_SMOKE !== "1") {
320
- return "set WARRANT_CLAUDE_SMOKE=1 plus Claude Code credentials to run the live Claude Code smoke";
321
- }
322
- return claudeCodeHarnessCredentialSkipReason() ?? false;
323
- }
324
- function liveCodexSmokeSkipReason() {
325
- if (process.env.WARRANT_CODEX_SMOKE !== "1") {
326
- return "set WARRANT_CODEX_SMOKE=1 plus Codex credentials to run the live Codex smoke";
327
- }
328
- return codexHarnessCredentialSkipReason() ?? false;
329
- }
330
- test("codex adapter live smoke is credential-gated", { skip: liveCodexSmokeSkipReason() }, async () => {
331
- const repo = makeRepo();
332
- try {
333
- const result = await runEnsemble(descriptor({
334
- prompt: "Read README.md if present, then reply exactly CODEX_LIVE_SMOKE_OK. Do not modify files.",
335
- models: [{ id: "codex", model: process.env.WARRANT_CODEX_SMOKE_MODEL ?? "gpt-5.5-codex" }],
336
- harness: codexHarness({
337
- timeoutMs: 60_000,
338
- sandboxMode: "read-only",
339
- approvalPolicy: "never"
340
- }),
341
- workspace: repo.repo,
342
- baseGitSha: repo.head,
343
- outputRoot: repo.outputRoot
344
- }));
345
- assertHarnessRunResultV1(result.harnessRunResult);
346
- assert.notEqual(result.harnessRunResult.status, "skipped");
347
- }
348
- finally {
349
- repo.cleanup();
350
- }
351
- });
352
125
  test("command adapter records optional container hardening metadata", async () => {
353
126
  const driver = {
354
127
  id: "fake-ensemble-container",
package/dist/unified.d.ts CHANGED
@@ -1,7 +1,33 @@
1
1
  import type { JsonValue, ModelFusionStatus } from "@fusionkit/protocol";
2
- import type { EnsembleModel, EnsembleRunResult } from "./harness.js";
2
+ import type { EnsembleDescriptor, EnsembleModel, EnsembleRunResult, HarnessAdapter } from "./harness.js";
3
3
  import type { JudgeSynthesizer } from "./judge.js";
4
4
  export type UnifiedHarnessKind = "mock" | "command" | "agent" | "codex" | "claude-code" | "cursor-acp" | "cursor-desktop";
5
+ /**
6
+ * Options the unified runner passes to a tool's harness factory. The per-tool
7
+ * packages map these onto their own harness options (provider base URL, etc.).
8
+ */
9
+ export type ToolHarnessResolveOptions = {
10
+ fusionBackendUrl: string;
11
+ fusionApiKey?: string;
12
+ timeoutMs?: number;
13
+ };
14
+ /**
15
+ * Provides everything ensemble needs about a tool-backed harness kind (codex,
16
+ * claude-code, cursor-*) without ensemble depending on any per-tool package. The
17
+ * fusionkit CLI registers one (built from its tool registry) via
18
+ * {@link setToolHarnessProvider}; without it, requesting a tool harness kind
19
+ * throws a clear error.
20
+ */
21
+ export type ToolHarnessProvider = {
22
+ adapter(kind: UnifiedHarnessKind, options: ToolHarnessResolveOptions): HarnessAdapter;
23
+ sideEffects(kind: UnifiedHarnessKind): EnsembleDescriptor["policy"]["sideEffects"];
24
+ responseShape(kind: UnifiedHarnessKind): string;
25
+ };
26
+ /**
27
+ * Register the provider that resolves tool-backed harness kinds. The fusionkit
28
+ * CLI wires this at startup from its tool registry.
29
+ */
30
+ export declare function setToolHarnessProvider(provider: ToolHarnessProvider | undefined): void;
5
31
  export type UnifiedHarnessMatrixResult = {
6
32
  harness: UnifiedHarnessKind;
7
33
  modelIds: string[];
@@ -26,7 +52,6 @@ export type CursorHarnessRunnerInput = {
26
52
  repo: string;
27
53
  outDir: string;
28
54
  timeoutMs?: number;
29
- cursorKitDir?: string;
30
55
  };
31
56
  export type CursorHarnessRunnerResult = {
32
57
  status: ModelFusionStatus;
@@ -46,7 +71,6 @@ export type UnifiedHarnessE2EOptions = {
46
71
  command?: string;
47
72
  timeoutMs?: number;
48
73
  judgeModel?: string;
49
- cursorKitDir?: string;
50
74
  cursorRunner?: (input: CursorHarnessRunnerInput) => Promise<CursorHarnessRunnerResult>;
51
75
  /**
52
76
  * Per-candidate model backend URLs keyed by `EnsembleModel.id`. When a
package/dist/unified.js CHANGED
@@ -4,11 +4,32 @@ import { join, resolve } from "node:path";
4
4
  import { newSpanId, TRACE_ID_HEADER, TRACE_SPAN_HEADER } from "@fusionkit/protocol";
5
5
  import { gitText } from "@fusionkit/workspace";
6
6
  import { createAgentHarness } from "./agent.js";
7
- import { claudeCodeHarness } from "./claude-code.js";
8
7
  import { createCommandHarness } from "./command.js";
9
- import { codexHarness } from "./codex.js";
8
+ import { resolveCursorkitCli } from "./cursorkit-path.js";
10
9
  import { createMockHarness } from "./mock.js";
11
10
  import { runEnsemble } from "./run.js";
11
+ let toolHarnessProvider;
12
+ /**
13
+ * Register the provider that resolves tool-backed harness kinds. The fusionkit
14
+ * CLI wires this at startup from its tool registry.
15
+ */
16
+ export function setToolHarnessProvider(provider) {
17
+ toolHarnessProvider = provider;
18
+ }
19
+ function requireToolHarnessProvider(kind) {
20
+ if (toolHarnessProvider === undefined) {
21
+ throw new Error(`no tool harness provider registered for harness kind "${kind}"; ` +
22
+ "the fusionkit CLI wires this via setToolHarnessProvider (build the tool registry first).");
23
+ }
24
+ return toolHarnessProvider;
25
+ }
26
+ function resolveToolAdapter(kind, options) {
27
+ return requireToolHarnessProvider(kind).adapter(kind, {
28
+ fusionBackendUrl: normalizeFusionBackendUrl(options.fusionBackendUrl),
29
+ ...(options.fusionApiKey !== undefined ? { fusionApiKey: options.fusionApiKey } : {}),
30
+ ...(options.timeoutMs !== undefined ? { timeoutMs: options.timeoutMs } : {})
31
+ });
32
+ }
12
33
  function normalizeFusionBackendUrl(value) {
13
34
  return value.replace(/\/+$/, "");
14
35
  }
@@ -30,7 +51,7 @@ function sideEffectsForHarness(kind) {
30
51
  case "claude-code":
31
52
  case "cursor-acp":
32
53
  case "cursor-desktop":
33
- return "writes_workspace";
54
+ return requireToolHarnessProvider(kind).sideEffects(kind);
34
55
  default: {
35
56
  const exhausted = kind;
36
57
  throw new Error(`unsupported unified harness: ${String(exhausted)}`);
@@ -72,19 +93,10 @@ function harnessAdapter(kind, options) {
72
93
  });
73
94
  }
74
95
  case "codex":
75
- return codexHarness({
76
- timeoutMs: options.timeoutMs,
77
- provider: {
78
- kind: "openai-compatible",
79
- baseUrl: normalizeFusionBackendUrl(options.fusionBackendUrl),
80
- ...(options.fusionApiKey ? { apiKey: options.fusionApiKey } : {})
81
- }
82
- });
83
96
  case "claude-code":
84
- return claudeCodeHarness({ timeoutMs: options.timeoutMs });
85
97
  case "cursor-acp":
86
98
  case "cursor-desktop":
87
- throw new Error(`${kind} runs through the Cursor harness adapter path`);
99
+ return resolveToolAdapter(kind, options);
88
100
  default: {
89
101
  const exhausted = kind;
90
102
  throw new Error(`unsupported unified harness: ${String(exhausted)}`);
@@ -100,12 +112,10 @@ function responseShapeFor(kind) {
100
112
  return ("Respond to the user in the natural shape the request calls for: a direct answer, " +
101
113
  "a plan, or the concrete code change. Reply in first person as the assistant.");
102
114
  case "codex":
103
- return "Return a Codex-style result summary with patch and verification evidence.";
104
115
  case "claude-code":
105
- return "Return a Claude Code-style transcript summary with patch/worktree evidence.";
106
116
  case "cursor-acp":
107
117
  case "cursor-desktop":
108
- return "Return text suitable for Cursor ACP session/update plus route evidence notes.";
118
+ return requireToolHarnessProvider(kind).responseShape(kind);
109
119
  default: {
110
120
  const exhausted = kind;
111
121
  throw new Error(`unsupported unified harness: ${String(exhausted)}`);
@@ -304,17 +314,10 @@ function statusForResult(result) {
304
314
  return result.failureSummary ? "failed" : result.harnessRunResult.status;
305
315
  }
306
316
  async function defaultCursorRunner(input) {
307
- if (!input.cursorKitDir) {
308
- return {
309
- status: "skipped",
310
- message: "Cursorkit directory not configured",
311
- details: { reason: "cursor_kit_dir_missing" }
312
- };
313
- }
317
+ const { harnessCli } = resolveCursorkitCli();
314
318
  const suite = input.kind === "cursor-acp" ? "acp" : "desktop-route";
315
319
  const args = [
316
- "test:harness",
317
- "--",
320
+ harnessCli,
318
321
  "--suite",
319
322
  suite,
320
323
  "--base-url",
@@ -328,8 +331,8 @@ async function defaultCursorRunner(input) {
328
331
  ];
329
332
  mkdirSync(input.outDir, { recursive: true });
330
333
  return await new Promise((resolveResult) => {
331
- const child = spawn("pnpm", args, {
332
- cwd: input.cursorKitDir,
334
+ const child = spawn(process.execPath, args, {
335
+ cwd: input.outDir,
333
336
  stdio: ["ignore", "pipe", "pipe"]
334
337
  });
335
338
  let stdout = "";
@@ -360,8 +363,7 @@ async function runCursorHarness(kind, options) {
360
363
  fusionBackendUrl: options.fusionBackendUrl,
361
364
  repo: options.repo,
362
365
  outDir: join(options.outputRoot, `${kind}-${model.id}`),
363
- timeoutMs: options.timeoutMs,
364
- cursorKitDir: options.cursorKitDir
366
+ timeoutMs: options.timeoutMs
365
367
  })));
366
368
  const failed = perModel.find((result) => result.status === "failed");
367
369
  const skipped = perModel.every((result) => result.status === "skipped");
@@ -385,7 +387,10 @@ export async function runUnifiedHarnessE2E(options) {
385
387
  mkdirSync(outputRoot, { recursive: true });
386
388
  const results = [];
387
389
  for (const kind of options.harnesses) {
388
- if (kind === "cursor-acp" || kind === "cursor-desktop") {
390
+ if ((kind === "cursor-acp" || kind === "cursor-desktop") &&
391
+ options.cursorRunner !== undefined) {
392
+ // Explicit probe runner: drive the Cursorkit harness suite and record a
393
+ // route/transcript probe instead of producing real ensemble candidates.
389
394
  results.push(await runCursorHarness(kind, options));
390
395
  continue;
391
396
  }
package/package.json CHANGED
@@ -1,7 +1,7 @@
1
1
  {
2
2
  "name": "@fusionkit/ensemble",
3
3
  "private": false,
4
- "version": "0.1.4",
4
+ "version": "0.1.6",
5
5
  "repository": {
6
6
  "type": "git",
7
7
  "url": "git+https://github.com/velum-labs/handoffkit.git",
@@ -25,11 +25,12 @@
25
25
  "provenance": true
26
26
  },
27
27
  "dependencies": {
28
- "@fusionkit/adapter-ai-sdk": "0.1.4",
29
- "@fusionkit/model-gateway": "0.1.4",
30
- "@fusionkit/protocol": "0.1.4",
31
- "@fusionkit/runner": "0.1.4",
32
- "@fusionkit/session-harness": "0.1.4",
33
- "@fusionkit/workspace": "0.1.4"
28
+ "@velum-labs/cursorkit": "0.1.2",
29
+ "@fusionkit/adapter-ai-sdk": "0.1.6",
30
+ "@fusionkit/model-gateway": "0.1.6",
31
+ "@fusionkit/protocol": "0.1.6",
32
+ "@fusionkit/session-harness": "0.1.6",
33
+ "@fusionkit/runner": "0.1.6",
34
+ "@fusionkit/workspace": "0.1.6"
34
35
  }
35
36
  }
@@ -1,25 +0,0 @@
1
- import type { NetworkPolicy } from "@fusionkit/protocol";
2
- import type { SessionBackend } from "@fusionkit/runner";
3
- import type { ClaudeCodeBindingOptions } from "@fusionkit/session-harness";
4
- import type { HarnessAdapter } from "./harness.js";
5
- export type ClaudeCodeHarnessEnv = Record<string, string | undefined>;
6
- export type ClaudeCodeHarnessOptions = ClaudeCodeBindingOptions & {
7
- id?: string;
8
- /** Defaults to `process.env`; tests can pass `{}` for deterministic skips. */
9
- env?: ClaudeCodeHarnessEnv;
10
- /** Already-released secret values forwarded through the session backend seam. */
11
- secrets?: {
12
- name: string;
13
- value: string;
14
- }[];
15
- /** Test/extension seam. Defaults to `aiSdkHarnessBackend(...)`. */
16
- backend?: SessionBackend;
17
- pool?: string;
18
- network?: NetworkPolicy;
19
- timeoutMs?: number;
20
- logMaxBytes?: number;
21
- skipWhenUnavailable?: boolean;
22
- };
23
- export declare function claudeCodeHarnessCredentialSkipReason(env?: ClaudeCodeHarnessEnv, options?: ClaudeCodeHarnessOptions): string | undefined;
24
- export declare function createClaudeCodeHarness(options?: ClaudeCodeHarnessOptions): HarnessAdapter;
25
- export declare function claudeCodeHarness(options?: ClaudeCodeHarnessOptions): HarnessAdapter;