@fusionkit/ensemble 0.1.5 → 0.1.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/cursorkit-path.d.ts +17 -0
- package/dist/cursorkit-path.js +21 -0
- package/dist/harness.d.ts +7 -0
- package/dist/harness.js +51 -1
- package/dist/index.d.ts +5 -8
- package/dist/index.js +3 -4
- package/dist/run.js +2 -1
- package/dist/test/ensemble.test.js +0 -227
- package/dist/unified.d.ts +27 -3
- package/dist/unified.js +31 -36
- package/package.json +8 -7
- package/dist/claude-code.d.ts +0 -25
- package/dist/claude-code.js +0 -398
- package/dist/codex.d.ts +0 -69
- package/dist/codex.js +0 -467
- package/dist/cursor.d.ts +0 -42
- package/dist/cursor.js +0 -440
- package/dist/dashboard.d.ts +0 -62
- package/dist/dashboard.js +0 -762
- package/dist/test/codex.test.d.ts +0 -1
- package/dist/test/codex.test.js +0 -237
- package/dist/test/cursor.test.d.ts +0 -1
- package/dist/test/cursor.test.js +0 -97
- package/dist/test/dashboard.test.d.ts +0 -1
- package/dist/test/dashboard.test.js +0 -214
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
export type CursorkitCli = {
|
|
2
|
+
/** Absolute path to the bridge entrypoint (`cursorkit serve`). */
|
|
3
|
+
serveCli: string;
|
|
4
|
+
/** Absolute path to the bundled test-harness CLI (suite probes). */
|
|
5
|
+
harnessCli: string;
|
|
6
|
+
};
|
|
7
|
+
/**
|
|
8
|
+
* Resolve the bundled `@velum-labs/cursorkit` CLIs from node_modules. The
|
|
9
|
+
* package exports `"."` -> `dist/src/cli.js`; the harness CLI lives next to it
|
|
10
|
+
* at `dist/src/testing/cli.js` (not exposed via the exports map, so it is
|
|
11
|
+
* derived from the resolved `"."` entry rather than resolved directly).
|
|
12
|
+
*
|
|
13
|
+
* `FUSIONKIT_CURSORKIT_SERVE_CLI` overrides the resolved `serveCli` entry. This
|
|
14
|
+
* lets a custom build (or an integration test) point the bridge at an alternate
|
|
15
|
+
* entrypoint; the harness CLI is still derived relative to it.
|
|
16
|
+
*/
|
|
17
|
+
export declare function resolveCursorkitCli(): CursorkitCli;
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
import { createRequire } from "node:module";
|
|
2
|
+
import { dirname, join } from "node:path";
|
|
3
|
+
const require = createRequire(import.meta.url);
|
|
4
|
+
/**
|
|
5
|
+
* Resolve the bundled `@velum-labs/cursorkit` CLIs from node_modules. The
|
|
6
|
+
* package exports `"."` -> `dist/src/cli.js`; the harness CLI lives next to it
|
|
7
|
+
* at `dist/src/testing/cli.js` (not exposed via the exports map, so it is
|
|
8
|
+
* derived from the resolved `"."` entry rather than resolved directly).
|
|
9
|
+
*
|
|
10
|
+
* `FUSIONKIT_CURSORKIT_SERVE_CLI` overrides the resolved `serveCli` entry. This
|
|
11
|
+
* lets a custom build (or an integration test) point the bridge at an alternate
|
|
12
|
+
* entrypoint; the harness CLI is still derived relative to it.
|
|
13
|
+
*/
|
|
14
|
+
export function resolveCursorkitCli() {
|
|
15
|
+
const override = process.env.FUSIONKIT_CURSORKIT_SERVE_CLI;
|
|
16
|
+
const serveCli = override !== undefined && override.length > 0
|
|
17
|
+
? override
|
|
18
|
+
: require.resolve("@velum-labs/cursorkit");
|
|
19
|
+
const harnessCli = join(dirname(serveCli), "testing", "cli.js");
|
|
20
|
+
return { serveCli, harnessCli };
|
|
21
|
+
}
|
package/dist/harness.d.ts
CHANGED
|
@@ -190,6 +190,13 @@ export type CandidateHardeningMetadata = {
|
|
|
190
190
|
leak_count: number;
|
|
191
191
|
};
|
|
192
192
|
};
|
|
193
|
+
/**
|
|
194
|
+
* Serialize hardening metadata as a `JsonValue`. The shape is JSON-compatible by
|
|
195
|
+
* construction, but TypeScript cannot prove an object type with optional members
|
|
196
|
+
* satisfies the `JsonValue` index signature, so this typed mapper does the
|
|
197
|
+
* conversion explicitly (omitting absent optionals) instead of an unchecked cast.
|
|
198
|
+
*/
|
|
199
|
+
export declare function hardeningToJson(hardening: CandidateHardeningMetadata): JsonValue;
|
|
193
200
|
export type EnsembleRuntime = {
|
|
194
201
|
id: string;
|
|
195
202
|
environmentId?: string;
|
package/dist/harness.js
CHANGED
|
@@ -1 +1,51 @@
|
|
|
1
|
-
|
|
1
|
+
/**
|
|
2
|
+
* Serialize hardening metadata as a `JsonValue`. The shape is JSON-compatible by
|
|
3
|
+
* construction, but TypeScript cannot prove an object type with optional members
|
|
4
|
+
* satisfies the `JsonValue` index signature, so this typed mapper does the
|
|
5
|
+
* conversion explicitly (omitting absent optionals) instead of an unchecked cast.
|
|
6
|
+
*/
|
|
7
|
+
export function hardeningToJson(hardening) {
|
|
8
|
+
return {
|
|
9
|
+
requested_isolation: hardening.requested_isolation,
|
|
10
|
+
actual_isolation: hardening.actual_isolation,
|
|
11
|
+
runtime: {
|
|
12
|
+
...(hardening.runtime.image !== undefined ? { image: hardening.runtime.image } : {}),
|
|
13
|
+
...(hardening.runtime.driver !== undefined ? { driver: hardening.runtime.driver } : {}),
|
|
14
|
+
...(hardening.runtime.provider !== undefined ? { provider: hardening.runtime.provider } : {}),
|
|
15
|
+
...(hardening.runtime.runtime !== undefined ? { runtime: hardening.runtime.runtime } : {}),
|
|
16
|
+
...(hardening.runtime.snapshot_id !== undefined ? { snapshot_id: hardening.runtime.snapshot_id } : {}),
|
|
17
|
+
...(hardening.runtime.sandbox_id !== undefined ? { sandbox_id: hardening.runtime.sandbox_id } : {}),
|
|
18
|
+
...(hardening.runtime.image_digest !== undefined ? { image_digest: hardening.runtime.image_digest } : {}),
|
|
19
|
+
...(hardening.runtime.runtime_digest !== undefined
|
|
20
|
+
? { runtime_digest: hardening.runtime.runtime_digest }
|
|
21
|
+
: {}),
|
|
22
|
+
workdir: hardening.runtime.workdir
|
|
23
|
+
},
|
|
24
|
+
mount_policy: {
|
|
25
|
+
worktree_writable: hardening.mount_policy.worktree_writable,
|
|
26
|
+
read_only_caches: [...hardening.mount_policy.read_only_caches],
|
|
27
|
+
ignored_dirs: [...hardening.mount_policy.ignored_dirs]
|
|
28
|
+
},
|
|
29
|
+
network_policy: {
|
|
30
|
+
default_deny: hardening.network_policy.default_deny,
|
|
31
|
+
allow_hosts: [...hardening.network_policy.allow_hosts],
|
|
32
|
+
enforced: hardening.network_policy.enforced
|
|
33
|
+
},
|
|
34
|
+
cleanup: {
|
|
35
|
+
attempted: hardening.cleanup.attempted,
|
|
36
|
+
succeeded: hardening.cleanup.succeeded,
|
|
37
|
+
status: hardening.cleanup.status,
|
|
38
|
+
...(hardening.cleanup.timed_out !== undefined ? { timed_out: hardening.cleanup.timed_out } : {}),
|
|
39
|
+
...(hardening.cleanup.error !== undefined ? { error: hardening.cleanup.error } : {})
|
|
40
|
+
},
|
|
41
|
+
secret_absence: {
|
|
42
|
+
secret_names: [...hardening.secret_absence.secret_names],
|
|
43
|
+
secret_value_hashes: [...hardening.secret_absence.secret_value_hashes],
|
|
44
|
+
injected_env_names: [...hardening.secret_absence.injected_env_names],
|
|
45
|
+
scanned: hardening.secret_absence.scanned,
|
|
46
|
+
leaks_found: hardening.secret_absence.leaks_found,
|
|
47
|
+
scan_scope: [...hardening.secret_absence.scan_scope],
|
|
48
|
+
leak_count: hardening.secret_absence.leak_count
|
|
49
|
+
}
|
|
50
|
+
};
|
|
51
|
+
}
|
package/dist/index.d.ts
CHANGED
|
@@ -1,18 +1,14 @@
|
|
|
1
1
|
export { createCommandHarness } from "./command.js";
|
|
2
2
|
export type { CommandHarnessOptions } from "./command.js";
|
|
3
|
-
export {
|
|
4
|
-
export type {
|
|
5
|
-
export { codexConfigToml, codexHarnessCredentialSkipReason, codexHarness, createCodexHarness } from "./codex.js";
|
|
6
|
-
export type { CodexAmbientProvider, CodexApprovalPolicy, CodexConfigTomlInput, CodexExecInput, CodexExecResult, CodexExecRunner, CodexHarnessEnv, CodexHarnessOptions, CodexOpenAiCompatibleProvider, CodexProvider, CodexResponsesProvider, CodexSandboxMode } from "./codex.js";
|
|
3
|
+
export { resolveCursorkitCli } from "./cursorkit-path.js";
|
|
4
|
+
export type { CursorkitCli } from "./cursorkit-path.js";
|
|
7
5
|
export { createArtifactStore } from "./artifacts.js";
|
|
8
6
|
export type { ArtifactStore } from "./artifacts.js";
|
|
9
|
-
export { createHarnessCapabilityMatrix, harnessDashboard, runHarnessSmokeDashboard } from "./dashboard.js";
|
|
10
|
-
export type { HarnessAdapterReadiness, HarnessAvailability, HarnessCapabilityMatrix, HarnessCapabilityMatrixRow, HarnessCapabilityTarget, HarnessLiveSmokeTarget, HarnessSmokeDashboard, HarnessSmokeDashboardOptions, HarnessSmokeOutcome, HarnessSmokePurpose, HarnessSmokeRecord } from "./dashboard.js";
|
|
11
7
|
export { createMockJudgeSynthesizer } from "./judge.js";
|
|
12
8
|
export type { JudgeCandidateEvidence, JudgeInput, JudgePatch, JudgeRepairInput, JudgeSynthesizer, JudgeSynthesisOutput, JudgeVerificationInput, MockJudgeSynthesizerOptions, SynthesisFailureSummary, SynthesisRepairAttempt, SynthesisVerificationResult } from "./judge.js";
|
|
13
9
|
export { ensemble, runEnsemble } from "./run.js";
|
|
14
|
-
export { createFusionKitJudgeSynthesizer, runFusionPanels, runUnifiedHarnessE2E } from "./unified.js";
|
|
15
|
-
export type { CursorHarnessRunnerInput, CursorHarnessRunnerResult, FusionPanelOptions, UnifiedHarnessE2EOptions, UnifiedHarnessE2EResult, UnifiedHarnessKind, UnifiedHarnessMatrixResult } from "./unified.js";
|
|
10
|
+
export { createFusionKitJudgeSynthesizer, runFusionPanels, runUnifiedHarnessE2E, setToolHarnessProvider } from "./unified.js";
|
|
11
|
+
export type { CursorHarnessRunnerInput, CursorHarnessRunnerResult, FusionPanelOptions, ToolHarnessProvider, ToolHarnessResolveOptions, UnifiedHarnessE2EOptions, UnifiedHarnessE2EResult, UnifiedHarnessKind, UnifiedHarnessMatrixResult } from "./unified.js";
|
|
16
12
|
export { ambientTraceId, emitTrace, getTraceEmitter, newSpanId, newTraceId, TRACE_CANDIDATE_HEADER, TRACE_ID_HEADER, TRACE_PARENT_SPAN_HEADER, TRACE_SPAN_HEADER, TraceEmitter } from "./trace.js";
|
|
17
13
|
export type { EmitInput, FusionTraceComponent, FusionTraceEvent, FusionTraceEventType } from "./trace.js";
|
|
18
14
|
export { runJudgeSynthesis } from "./synthesis.js";
|
|
@@ -27,4 +23,5 @@ export type { FusionKitToolExecutionBatch, FusionKitToolExecutionRequest, Fusion
|
|
|
27
23
|
export type { CandidateCommandIsolationInput, CandidateCommandIsolationResult } from "./isolation.js";
|
|
28
24
|
export { cleanupCandidateWorktree, cleanupWorktreePlan, createWorktreePlan, defaultOutputRoot, diffCandidateWorktree, sealCandidateWorktree } from "./worktree.js";
|
|
29
25
|
export type { CandidateWorktree, WorktreePlan } from "./worktree.js";
|
|
26
|
+
export { hardeningToJson } from "./harness.js";
|
|
30
27
|
export type { EnsembleCandidateSummary, EnsembleDescriptor, EnsembleJudge, EnsembleModel, EnsemblePolicy, EnsembleRunResult, EnsembleRuntime, CandidateContainerDriver, CandidateContainerDriverInput, CandidateContainerDriverResult, CandidateHardeningMetadata, CandidateIsolationConfig, CandidateIsolationKind, CandidateIsolationMountPolicy, CandidateIsolationNetworkPolicy, CandidateIsolationSecretPolicy, HarnessAdapter, HarnessArtifact, HarnessCapabilities, HarnessCandidateOutput, HarnessCollectInput, HarnessPrepareInput, HarnessRunInput, HarnessToolRecord, ReviewEvidence, EnsembleRunSummary, VerificationProfile } from "./harness.js";
|
package/dist/index.js
CHANGED
|
@@ -1,11 +1,9 @@
|
|
|
1
1
|
export { createCommandHarness } from "./command.js";
|
|
2
|
-
export {
|
|
3
|
-
export { codexConfigToml, codexHarnessCredentialSkipReason, codexHarness, createCodexHarness } from "./codex.js";
|
|
2
|
+
export { resolveCursorkitCli } from "./cursorkit-path.js";
|
|
4
3
|
export { createArtifactStore } from "./artifacts.js";
|
|
5
|
-
export { createHarnessCapabilityMatrix, harnessDashboard, runHarnessSmokeDashboard } from "./dashboard.js";
|
|
6
4
|
export { createMockJudgeSynthesizer } from "./judge.js";
|
|
7
5
|
export { ensemble, runEnsemble } from "./run.js";
|
|
8
|
-
export { createFusionKitJudgeSynthesizer, runFusionPanels, runUnifiedHarnessE2E } from "./unified.js";
|
|
6
|
+
export { createFusionKitJudgeSynthesizer, runFusionPanels, runUnifiedHarnessE2E, setToolHarnessProvider } from "./unified.js";
|
|
9
7
|
export { ambientTraceId, emitTrace, getTraceEmitter, newSpanId, newTraceId, TRACE_CANDIDATE_HEADER, TRACE_ID_HEADER, TRACE_PARENT_SPAN_HEADER, TRACE_SPAN_HEADER, TraceEmitter } from "./trace.js";
|
|
10
8
|
export { runJudgeSynthesis } from "./synthesis.js";
|
|
11
9
|
export { createMockHarness } from "./mock.js";
|
|
@@ -13,3 +11,4 @@ export { createToolExecutor, registerDemoTools, sideEffectsForTool } from "./too
|
|
|
13
11
|
export { executeFusionKitToolBatch, FusionKitToolExecutorClient, FusionKitToolExecutorClientError, FusionKitToolExecutorError, startFusionKitToolExecutorServer } from "./external-executor.js";
|
|
14
12
|
export { createCliContainerDriver, runCandidateCommandWithIsolation, secretAbsenceMetadata, secretValueHash } from "./isolation.js";
|
|
15
13
|
export { cleanupCandidateWorktree, cleanupWorktreePlan, createWorktreePlan, defaultOutputRoot, diffCandidateWorktree, sealCandidateWorktree } from "./worktree.js";
|
|
14
|
+
export { hardeningToJson } from "./harness.js";
|
package/dist/run.js
CHANGED
|
@@ -1,5 +1,6 @@
|
|
|
1
1
|
import { assertHarnessCandidateRecordV1, assertHarnessRunRequestV1, assertHarnessRunResultV1, MODEL_FUSION_SCHEMA_BUNDLE_HASH, requestHash } from "@fusionkit/protocol";
|
|
2
2
|
import { createArtifactStore } from "./artifacts.js";
|
|
3
|
+
import { hardeningToJson } from "./harness.js";
|
|
3
4
|
import { runJudgeSynthesis } from "./synthesis.js";
|
|
4
5
|
import { cleanupWorktreePlan, createWorktreePlan, defaultOutputRoot, diffCandidateWorktree, sealCandidateWorktree } from "./worktree.js";
|
|
5
6
|
const PRODUCER_GIT_SHA = "0".repeat(40);
|
|
@@ -99,7 +100,7 @@ function candidateMetadata(output, descriptor, worktree) {
|
|
|
99
100
|
}
|
|
100
101
|
Object.assign(metadata, output.metadata ?? {});
|
|
101
102
|
if (metadata.hardening === undefined) {
|
|
102
|
-
metadata.hardening = fallbackCandidateHardening(descriptor);
|
|
103
|
+
metadata.hardening = hardeningToJson(fallbackCandidateHardening(descriptor));
|
|
103
104
|
}
|
|
104
105
|
if (descriptor.reviewEvidence !== undefined) {
|
|
105
106
|
metadata.review_evidence_attached = true;
|
|
@@ -5,9 +5,7 @@ import { join } from "node:path";
|
|
|
5
5
|
import { test } from "node:test";
|
|
6
6
|
import { assertJudgeSynthesisRecordV1, assertHarnessCandidateRecordV1, assertHarnessRunRequestV1, assertHarnessRunResultV1, MODEL_FUSION_SCHEMA_BUNDLE_HASH, requestHash, responseHash } from "@fusionkit/protocol";
|
|
7
7
|
import { gitText } from "@fusionkit/workspace";
|
|
8
|
-
import { claudeCodeHarness, claudeCodeHarnessCredentialSkipReason } from "../claude-code.js";
|
|
9
8
|
import { createCommandHarness } from "../command.js";
|
|
10
|
-
import { codexConfigToml, codexHarness, codexHarnessCredentialSkipReason } from "../codex.js";
|
|
11
9
|
import { createMockJudgeSynthesizer } from "../judge.js";
|
|
12
10
|
import { createMockHarness } from "../mock.js";
|
|
13
11
|
import { runEnsemble } from "../run.js";
|
|
@@ -88,10 +86,6 @@ function addFilePatch(path, content) {
|
|
|
88
86
|
""
|
|
89
87
|
].join("\n");
|
|
90
88
|
}
|
|
91
|
-
function emptyCodexHome() {
|
|
92
|
-
const path = mkdtempSync(join(tmpdir(), "ensemble-codex-empty-home-"));
|
|
93
|
-
return { path, cleanup: () => rmSync(path, { recursive: true, force: true }) };
|
|
94
|
-
}
|
|
95
89
|
test("mock adapter runs N candidates and emits valid model-fusion records", async () => {
|
|
96
90
|
const result = await runEnsemble(descriptor({
|
|
97
91
|
harness: createMockHarness({
|
|
@@ -128,227 +122,6 @@ test("command adapter records command output, artifact, tool record, and verific
|
|
|
128
122
|
"exec_ensemble_test_command_0"
|
|
129
123
|
]);
|
|
130
124
|
});
|
|
131
|
-
test("claude-code adapter can replace mock and skip clearly without credentials", async () => {
|
|
132
|
-
const result = await runEnsemble(descriptor({
|
|
133
|
-
models: [{ id: "claude", model: "claude-sonnet-4-6" }],
|
|
134
|
-
harness: claudeCodeHarness({ env: {} })
|
|
135
|
-
}));
|
|
136
|
-
assert.equal(result.candidates.length, 1);
|
|
137
|
-
assert.equal(result.harnessRunResult.status, "skipped");
|
|
138
|
-
assert.equal(result.candidates[0]?.status, "skipped");
|
|
139
|
-
assert.equal(result.candidates[0]?.error?.kind, "capability_missing");
|
|
140
|
-
assert.match(result.candidates[0]?.error?.message ?? "", /missing Claude Code credential/);
|
|
141
|
-
assert.match(result.summary?.candidates[0]?.verification?.evidence[0] ?? "", /missing Claude/);
|
|
142
|
-
});
|
|
143
|
-
test("claude-code adapter delegates through a session backend from a generic descriptor", async () => {
|
|
144
|
-
const repo = makeRepo();
|
|
145
|
-
const seen = {};
|
|
146
|
-
const backend = {
|
|
147
|
-
isolation: "vercel-sandbox",
|
|
148
|
-
supports: () => true,
|
|
149
|
-
execute: async (input) => {
|
|
150
|
-
seen.agentKind = input.contract.agent.kind;
|
|
151
|
-
seen.env = input.execution.env;
|
|
152
|
-
seen.repoDir = input.repoDir;
|
|
153
|
-
assert.equal(input.contract.isolation, "vercel-sandbox");
|
|
154
|
-
assert.equal(input.contract.execution?.kind, "agent");
|
|
155
|
-
assert.equal(input.secrets.length, 0);
|
|
156
|
-
writeFileSync(join(input.repoDir, "CLAUDE_RESULT.md"), "fake claude result\n");
|
|
157
|
-
input.emit({
|
|
158
|
-
type: "command.executed",
|
|
159
|
-
argvHash: requestHash({ adapter: "claude-code" }),
|
|
160
|
-
exitCode: 0
|
|
161
|
-
});
|
|
162
|
-
return { exitCode: 0, log: Buffer.from("fake claude transcript") };
|
|
163
|
-
}
|
|
164
|
-
};
|
|
165
|
-
try {
|
|
166
|
-
const result = await runEnsemble(descriptor({
|
|
167
|
-
models: [{ id: "claude", model: "claude-sonnet-4-6" }],
|
|
168
|
-
harness: claudeCodeHarness({
|
|
169
|
-
env: {
|
|
170
|
-
ANTHROPIC_API_KEY: "sk-ant-test",
|
|
171
|
-
VERCEL_TOKEN: "vercel-test"
|
|
172
|
-
},
|
|
173
|
-
backend
|
|
174
|
-
}),
|
|
175
|
-
workspace: repo.repo,
|
|
176
|
-
baseGitSha: repo.head,
|
|
177
|
-
outputRoot: repo.outputRoot,
|
|
178
|
-
cleanupWorktrees: true
|
|
179
|
-
}));
|
|
180
|
-
assert.equal(result.harnessRunResult.status, "succeeded");
|
|
181
|
-
assert.equal(result.candidates[0]?.status, "succeeded");
|
|
182
|
-
assert.equal(seen.agentKind, "claude-code");
|
|
183
|
-
assert.equal(seen.env?.ANTHROPIC_API_KEY, "sk-ant-test");
|
|
184
|
-
assert.equal(Object.hasOwn(seen.env ?? {}, "VERCEL_TOKEN"), false);
|
|
185
|
-
assert.notEqual(seen.repoDir, repo.repo);
|
|
186
|
-
assert.ok(result.artifacts.some((artifact) => artifact.kind === "patch"));
|
|
187
|
-
assert.match(result.candidates[0]?.metadata?.adapter, /claude-code/);
|
|
188
|
-
}
|
|
189
|
-
finally {
|
|
190
|
-
repo.cleanup();
|
|
191
|
-
}
|
|
192
|
-
});
|
|
193
|
-
test("smoke: claude-code adapter runs live when credentials are available", { skip: liveClaudeSmokeSkipReason() }, async () => {
|
|
194
|
-
const repo = makeRepo();
|
|
195
|
-
try {
|
|
196
|
-
const result = await runEnsemble(descriptor({
|
|
197
|
-
id: "claude_smoke",
|
|
198
|
-
models: [{ id: "claude", model: "claude-sonnet-4-6" }],
|
|
199
|
-
harness: claudeCodeHarness(),
|
|
200
|
-
runtime: {
|
|
201
|
-
id: "vercel-sandbox",
|
|
202
|
-
isolation: {
|
|
203
|
-
kind: "microvm",
|
|
204
|
-
networkPolicy: {
|
|
205
|
-
defaultDeny: true,
|
|
206
|
-
allowHosts: [
|
|
207
|
-
"registry.npmjs.org",
|
|
208
|
-
"api.anthropic.com",
|
|
209
|
-
"ai-gateway.vercel.sh"
|
|
210
|
-
]
|
|
211
|
-
}
|
|
212
|
-
}
|
|
213
|
-
},
|
|
214
|
-
policy: {
|
|
215
|
-
id: "claude-smoke-policy",
|
|
216
|
-
allowedTools: ["read_file"],
|
|
217
|
-
sideEffects: "read_only",
|
|
218
|
-
timeoutMs: 180_000
|
|
219
|
-
},
|
|
220
|
-
prompt: "Read README.md if present, then reply exactly CLAUDE_LIVE_SMOKE_OK. Do not modify files.",
|
|
221
|
-
workspace: repo.repo,
|
|
222
|
-
baseGitSha: repo.head,
|
|
223
|
-
outputRoot: repo.outputRoot,
|
|
224
|
-
cleanupWorktrees: true
|
|
225
|
-
}));
|
|
226
|
-
assertHarnessRunResultV1(result.harnessRunResult);
|
|
227
|
-
assert.equal(result.harnessRunResult.status, "succeeded");
|
|
228
|
-
assert.equal(result.candidates[0]?.status, "succeeded");
|
|
229
|
-
}
|
|
230
|
-
finally {
|
|
231
|
-
repo.cleanup();
|
|
232
|
-
}
|
|
233
|
-
});
|
|
234
|
-
test("codex config declares a Responses provider without touching Cursor records", () => {
|
|
235
|
-
const toml = codexConfigToml({
|
|
236
|
-
model: "gpt-5.5-codex",
|
|
237
|
-
sandboxMode: "workspace-write",
|
|
238
|
-
approvalPolicy: "never",
|
|
239
|
-
provider: {
|
|
240
|
-
providerId: "warrant-local",
|
|
241
|
-
name: "Warrant Local",
|
|
242
|
-
baseUrl: "https://gateway.example.com/v1/responses",
|
|
243
|
-
apiKeyEnvName: "WARRANT_CODEX_API_KEY",
|
|
244
|
-
requiresOpenAiAuth: true
|
|
245
|
-
}
|
|
246
|
-
});
|
|
247
|
-
assert.match(toml, /model = "gpt-5\.5-codex"/);
|
|
248
|
-
assert.match(toml, /model_provider = "warrant-local"/);
|
|
249
|
-
assert.match(toml, /\[model_providers\.warrant-local\]/);
|
|
250
|
-
assert.match(toml, /base_url = "https:\/\/gateway\.example\.com\/v1"/);
|
|
251
|
-
assert.match(toml, /wire_api = "responses"/);
|
|
252
|
-
assert.match(toml, /env_key = "WARRANT_CODEX_API_KEY"/);
|
|
253
|
-
assert.equal(toml.includes("cursor"), false);
|
|
254
|
-
});
|
|
255
|
-
test("codex adapter emits schema-valid skipped output without credentials", async () => {
|
|
256
|
-
const codexHome = emptyCodexHome();
|
|
257
|
-
try {
|
|
258
|
-
const result = await runEnsemble(descriptor({
|
|
259
|
-
models: [{ id: "codex", model: "gpt-5.5-codex" }],
|
|
260
|
-
harness: codexHarness({
|
|
261
|
-
env: { CODEX_HOME: codexHome.path },
|
|
262
|
-
provider: { kind: "ambient" }
|
|
263
|
-
})
|
|
264
|
-
}));
|
|
265
|
-
assertHarnessRunResultV1(result.harnessRunResult);
|
|
266
|
-
assert.equal(result.harnessRunResult.status, "skipped");
|
|
267
|
-
assert.equal(result.candidates[0]?.status, "skipped");
|
|
268
|
-
assert.equal(result.candidates[0]?.error?.kind, "capability_missing");
|
|
269
|
-
assert.match(result.candidates[0]?.error?.message ?? "", /Codex credentials are absent/);
|
|
270
|
-
assert.match(result.summary?.candidates[0]?.verification?.evidence[0] ?? "", /Codex credentials/);
|
|
271
|
-
}
|
|
272
|
-
finally {
|
|
273
|
-
codexHome.cleanup();
|
|
274
|
-
}
|
|
275
|
-
});
|
|
276
|
-
test("codex adapter runs through an injected Responses runner and records evidence", async () => {
|
|
277
|
-
const calls = [];
|
|
278
|
-
const result = await runEnsemble(descriptor({
|
|
279
|
-
models: [{ id: "codex", model: "gpt-5.5-codex" }],
|
|
280
|
-
harness: codexHarness({
|
|
281
|
-
env: {},
|
|
282
|
-
provider: {
|
|
283
|
-
kind: "responses",
|
|
284
|
-
baseUrl: "http://127.0.0.1:8787/v1/responses",
|
|
285
|
-
apiKey: "inline-test-key",
|
|
286
|
-
requiresOpenAiAuth: true,
|
|
287
|
-
providerId: "local-responses"
|
|
288
|
-
},
|
|
289
|
-
runner: (input) => {
|
|
290
|
-
calls.push(input);
|
|
291
|
-
const codexHome = input.env.CODEX_HOME;
|
|
292
|
-
assert.ok(codexHome);
|
|
293
|
-
const config = readFileSync(join(codexHome, "config.toml"), "utf8");
|
|
294
|
-
assert.match(config, /model_provider = "local-responses"/);
|
|
295
|
-
assert.match(config, /base_url = "http:\/\/127\.0\.0\.1:8787\/v1"/);
|
|
296
|
-
assert.match(config, /wire_api = "responses"/);
|
|
297
|
-
assert.match(config, /env_key = "WARRANT_CODEX_PROVIDER_API_KEY"/);
|
|
298
|
-
assert.equal(input.env.WARRANT_CODEX_PROVIDER_API_KEY, "inline-test-key");
|
|
299
|
-
return {
|
|
300
|
-
stdout: '{"type":"message","content":"codex-ok"}\n',
|
|
301
|
-
stderr: "",
|
|
302
|
-
exitCode: 0
|
|
303
|
-
};
|
|
304
|
-
}
|
|
305
|
-
})
|
|
306
|
-
}));
|
|
307
|
-
assert.equal(calls.length, 1);
|
|
308
|
-
assert.deepEqual(calls[0]?.args.slice(0, 3), [
|
|
309
|
-
"exec",
|
|
310
|
-
"--json",
|
|
311
|
-
"--skip-git-repo-check"
|
|
312
|
-
]);
|
|
313
|
-
assert.equal(result.harnessRunResult.status, "succeeded");
|
|
314
|
-
assert.equal(result.candidates[0]?.status, "succeeded");
|
|
315
|
-
assert.equal(result.toolRecords[0]?.status, "succeeded");
|
|
316
|
-
assert.match(result.candidates[0]?.metadata?.adapter, /codex/);
|
|
317
|
-
});
|
|
318
|
-
function liveClaudeSmokeSkipReason() {
|
|
319
|
-
if (process.env.WARRANT_CLAUDE_SMOKE !== "1") {
|
|
320
|
-
return "set WARRANT_CLAUDE_SMOKE=1 plus Claude Code credentials to run the live Claude Code smoke";
|
|
321
|
-
}
|
|
322
|
-
return claudeCodeHarnessCredentialSkipReason() ?? false;
|
|
323
|
-
}
|
|
324
|
-
function liveCodexSmokeSkipReason() {
|
|
325
|
-
if (process.env.WARRANT_CODEX_SMOKE !== "1") {
|
|
326
|
-
return "set WARRANT_CODEX_SMOKE=1 plus Codex credentials to run the live Codex smoke";
|
|
327
|
-
}
|
|
328
|
-
return codexHarnessCredentialSkipReason() ?? false;
|
|
329
|
-
}
|
|
330
|
-
test("codex adapter live smoke is credential-gated", { skip: liveCodexSmokeSkipReason() }, async () => {
|
|
331
|
-
const repo = makeRepo();
|
|
332
|
-
try {
|
|
333
|
-
const result = await runEnsemble(descriptor({
|
|
334
|
-
prompt: "Read README.md if present, then reply exactly CODEX_LIVE_SMOKE_OK. Do not modify files.",
|
|
335
|
-
models: [{ id: "codex", model: process.env.WARRANT_CODEX_SMOKE_MODEL ?? "gpt-5.5-codex" }],
|
|
336
|
-
harness: codexHarness({
|
|
337
|
-
timeoutMs: 60_000,
|
|
338
|
-
sandboxMode: "read-only",
|
|
339
|
-
approvalPolicy: "never"
|
|
340
|
-
}),
|
|
341
|
-
workspace: repo.repo,
|
|
342
|
-
baseGitSha: repo.head,
|
|
343
|
-
outputRoot: repo.outputRoot
|
|
344
|
-
}));
|
|
345
|
-
assertHarnessRunResultV1(result.harnessRunResult);
|
|
346
|
-
assert.notEqual(result.harnessRunResult.status, "skipped");
|
|
347
|
-
}
|
|
348
|
-
finally {
|
|
349
|
-
repo.cleanup();
|
|
350
|
-
}
|
|
351
|
-
});
|
|
352
125
|
test("command adapter records optional container hardening metadata", async () => {
|
|
353
126
|
const driver = {
|
|
354
127
|
id: "fake-ensemble-container",
|
package/dist/unified.d.ts
CHANGED
|
@@ -1,7 +1,33 @@
|
|
|
1
1
|
import type { JsonValue, ModelFusionStatus } from "@fusionkit/protocol";
|
|
2
|
-
import type { EnsembleModel, EnsembleRunResult } from "./harness.js";
|
|
2
|
+
import type { EnsembleDescriptor, EnsembleModel, EnsembleRunResult, HarnessAdapter } from "./harness.js";
|
|
3
3
|
import type { JudgeSynthesizer } from "./judge.js";
|
|
4
4
|
export type UnifiedHarnessKind = "mock" | "command" | "agent" | "codex" | "claude-code" | "cursor-acp" | "cursor-desktop";
|
|
5
|
+
/**
|
|
6
|
+
* Options the unified runner passes to a tool's harness factory. The per-tool
|
|
7
|
+
* packages map these onto their own harness options (provider base URL, etc.).
|
|
8
|
+
*/
|
|
9
|
+
export type ToolHarnessResolveOptions = {
|
|
10
|
+
fusionBackendUrl: string;
|
|
11
|
+
fusionApiKey?: string;
|
|
12
|
+
timeoutMs?: number;
|
|
13
|
+
};
|
|
14
|
+
/**
|
|
15
|
+
* Provides everything ensemble needs about a tool-backed harness kind (codex,
|
|
16
|
+
* claude-code, cursor-*) without ensemble depending on any per-tool package. The
|
|
17
|
+
* fusionkit CLI registers one (built from its tool registry) via
|
|
18
|
+
* {@link setToolHarnessProvider}; without it, requesting a tool harness kind
|
|
19
|
+
* throws a clear error.
|
|
20
|
+
*/
|
|
21
|
+
export type ToolHarnessProvider = {
|
|
22
|
+
adapter(kind: UnifiedHarnessKind, options: ToolHarnessResolveOptions): HarnessAdapter;
|
|
23
|
+
sideEffects(kind: UnifiedHarnessKind): EnsembleDescriptor["policy"]["sideEffects"];
|
|
24
|
+
responseShape(kind: UnifiedHarnessKind): string;
|
|
25
|
+
};
|
|
26
|
+
/**
|
|
27
|
+
* Register the provider that resolves tool-backed harness kinds. The fusionkit
|
|
28
|
+
* CLI wires this at startup from its tool registry.
|
|
29
|
+
*/
|
|
30
|
+
export declare function setToolHarnessProvider(provider: ToolHarnessProvider | undefined): void;
|
|
5
31
|
export type UnifiedHarnessMatrixResult = {
|
|
6
32
|
harness: UnifiedHarnessKind;
|
|
7
33
|
modelIds: string[];
|
|
@@ -26,7 +52,6 @@ export type CursorHarnessRunnerInput = {
|
|
|
26
52
|
repo: string;
|
|
27
53
|
outDir: string;
|
|
28
54
|
timeoutMs?: number;
|
|
29
|
-
cursorKitDir?: string;
|
|
30
55
|
};
|
|
31
56
|
export type CursorHarnessRunnerResult = {
|
|
32
57
|
status: ModelFusionStatus;
|
|
@@ -46,7 +71,6 @@ export type UnifiedHarnessE2EOptions = {
|
|
|
46
71
|
command?: string;
|
|
47
72
|
timeoutMs?: number;
|
|
48
73
|
judgeModel?: string;
|
|
49
|
-
cursorKitDir?: string;
|
|
50
74
|
cursorRunner?: (input: CursorHarnessRunnerInput) => Promise<CursorHarnessRunnerResult>;
|
|
51
75
|
/**
|
|
52
76
|
* Per-candidate model backend URLs keyed by `EnsembleModel.id`. When a
|
package/dist/unified.js
CHANGED
|
@@ -4,12 +4,32 @@ import { join, resolve } from "node:path";
|
|
|
4
4
|
import { newSpanId, TRACE_ID_HEADER, TRACE_SPAN_HEADER } from "@fusionkit/protocol";
|
|
5
5
|
import { gitText } from "@fusionkit/workspace";
|
|
6
6
|
import { createAgentHarness } from "./agent.js";
|
|
7
|
-
import { claudeCodeHarness } from "./claude-code.js";
|
|
8
7
|
import { createCommandHarness } from "./command.js";
|
|
9
|
-
import {
|
|
10
|
-
import { createCursorHarness } from "./cursor.js";
|
|
8
|
+
import { resolveCursorkitCli } from "./cursorkit-path.js";
|
|
11
9
|
import { createMockHarness } from "./mock.js";
|
|
12
10
|
import { runEnsemble } from "./run.js";
|
|
11
|
+
let toolHarnessProvider;
|
|
12
|
+
/**
|
|
13
|
+
* Register the provider that resolves tool-backed harness kinds. The fusionkit
|
|
14
|
+
* CLI wires this at startup from its tool registry.
|
|
15
|
+
*/
|
|
16
|
+
export function setToolHarnessProvider(provider) {
|
|
17
|
+
toolHarnessProvider = provider;
|
|
18
|
+
}
|
|
19
|
+
function requireToolHarnessProvider(kind) {
|
|
20
|
+
if (toolHarnessProvider === undefined) {
|
|
21
|
+
throw new Error(`no tool harness provider registered for harness kind "${kind}"; ` +
|
|
22
|
+
"the fusionkit CLI wires this via setToolHarnessProvider (build the tool registry first).");
|
|
23
|
+
}
|
|
24
|
+
return toolHarnessProvider;
|
|
25
|
+
}
|
|
26
|
+
function resolveToolAdapter(kind, options) {
|
|
27
|
+
return requireToolHarnessProvider(kind).adapter(kind, {
|
|
28
|
+
fusionBackendUrl: normalizeFusionBackendUrl(options.fusionBackendUrl),
|
|
29
|
+
...(options.fusionApiKey !== undefined ? { fusionApiKey: options.fusionApiKey } : {}),
|
|
30
|
+
...(options.timeoutMs !== undefined ? { timeoutMs: options.timeoutMs } : {})
|
|
31
|
+
});
|
|
32
|
+
}
|
|
13
33
|
function normalizeFusionBackendUrl(value) {
|
|
14
34
|
return value.replace(/\/+$/, "");
|
|
15
35
|
}
|
|
@@ -31,7 +51,7 @@ function sideEffectsForHarness(kind) {
|
|
|
31
51
|
case "claude-code":
|
|
32
52
|
case "cursor-acp":
|
|
33
53
|
case "cursor-desktop":
|
|
34
|
-
return
|
|
54
|
+
return requireToolHarnessProvider(kind).sideEffects(kind);
|
|
35
55
|
default: {
|
|
36
56
|
const exhausted = kind;
|
|
37
57
|
throw new Error(`unsupported unified harness: ${String(exhausted)}`);
|
|
@@ -73,25 +93,10 @@ function harnessAdapter(kind, options) {
|
|
|
73
93
|
});
|
|
74
94
|
}
|
|
75
95
|
case "codex":
|
|
76
|
-
return codexHarness({
|
|
77
|
-
timeoutMs: options.timeoutMs,
|
|
78
|
-
provider: {
|
|
79
|
-
kind: "openai-compatible",
|
|
80
|
-
baseUrl: normalizeFusionBackendUrl(options.fusionBackendUrl),
|
|
81
|
-
...(options.fusionApiKey ? { apiKey: options.fusionApiKey } : {})
|
|
82
|
-
}
|
|
83
|
-
});
|
|
84
96
|
case "claude-code":
|
|
85
|
-
return claudeCodeHarness({ timeoutMs: options.timeoutMs });
|
|
86
97
|
case "cursor-acp":
|
|
87
98
|
case "cursor-desktop":
|
|
88
|
-
return
|
|
89
|
-
id: kind,
|
|
90
|
-
fusionBackendUrl: normalizeFusionBackendUrl(options.fusionBackendUrl),
|
|
91
|
-
...(options.fusionApiKey ? { apiKey: options.fusionApiKey } : {}),
|
|
92
|
-
...(options.cursorKitDir !== undefined ? { cursorKitDir: options.cursorKitDir } : {}),
|
|
93
|
-
...(options.timeoutMs !== undefined ? { timeoutMs: options.timeoutMs } : {})
|
|
94
|
-
});
|
|
99
|
+
return resolveToolAdapter(kind, options);
|
|
95
100
|
default: {
|
|
96
101
|
const exhausted = kind;
|
|
97
102
|
throw new Error(`unsupported unified harness: ${String(exhausted)}`);
|
|
@@ -107,12 +112,10 @@ function responseShapeFor(kind) {
|
|
|
107
112
|
return ("Respond to the user in the natural shape the request calls for: a direct answer, " +
|
|
108
113
|
"a plan, or the concrete code change. Reply in first person as the assistant.");
|
|
109
114
|
case "codex":
|
|
110
|
-
return "Return a Codex-style result summary with patch and verification evidence.";
|
|
111
115
|
case "claude-code":
|
|
112
|
-
return "Return a Claude Code-style transcript summary with patch/worktree evidence.";
|
|
113
116
|
case "cursor-acp":
|
|
114
117
|
case "cursor-desktop":
|
|
115
|
-
return
|
|
118
|
+
return requireToolHarnessProvider(kind).responseShape(kind);
|
|
116
119
|
default: {
|
|
117
120
|
const exhausted = kind;
|
|
118
121
|
throw new Error(`unsupported unified harness: ${String(exhausted)}`);
|
|
@@ -311,17 +314,10 @@ function statusForResult(result) {
|
|
|
311
314
|
return result.failureSummary ? "failed" : result.harnessRunResult.status;
|
|
312
315
|
}
|
|
313
316
|
async function defaultCursorRunner(input) {
|
|
314
|
-
|
|
315
|
-
return {
|
|
316
|
-
status: "skipped",
|
|
317
|
-
message: "Cursorkit directory not configured",
|
|
318
|
-
details: { reason: "cursor_kit_dir_missing" }
|
|
319
|
-
};
|
|
320
|
-
}
|
|
317
|
+
const { harnessCli } = resolveCursorkitCli();
|
|
321
318
|
const suite = input.kind === "cursor-acp" ? "acp" : "desktop-route";
|
|
322
319
|
const args = [
|
|
323
|
-
|
|
324
|
-
"--",
|
|
320
|
+
harnessCli,
|
|
325
321
|
"--suite",
|
|
326
322
|
suite,
|
|
327
323
|
"--base-url",
|
|
@@ -335,8 +331,8 @@ async function defaultCursorRunner(input) {
|
|
|
335
331
|
];
|
|
336
332
|
mkdirSync(input.outDir, { recursive: true });
|
|
337
333
|
return await new Promise((resolveResult) => {
|
|
338
|
-
const child = spawn(
|
|
339
|
-
cwd: input.
|
|
334
|
+
const child = spawn(process.execPath, args, {
|
|
335
|
+
cwd: input.outDir,
|
|
340
336
|
stdio: ["ignore", "pipe", "pipe"]
|
|
341
337
|
});
|
|
342
338
|
let stdout = "";
|
|
@@ -367,8 +363,7 @@ async function runCursorHarness(kind, options) {
|
|
|
367
363
|
fusionBackendUrl: options.fusionBackendUrl,
|
|
368
364
|
repo: options.repo,
|
|
369
365
|
outDir: join(options.outputRoot, `${kind}-${model.id}`),
|
|
370
|
-
timeoutMs: options.timeoutMs
|
|
371
|
-
cursorKitDir: options.cursorKitDir
|
|
366
|
+
timeoutMs: options.timeoutMs
|
|
372
367
|
})));
|
|
373
368
|
const failed = perModel.find((result) => result.status === "failed");
|
|
374
369
|
const skipped = perModel.every((result) => result.status === "skipped");
|
package/package.json
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@fusionkit/ensemble",
|
|
3
3
|
"private": false,
|
|
4
|
-
"version": "0.1.
|
|
4
|
+
"version": "0.1.6",
|
|
5
5
|
"repository": {
|
|
6
6
|
"type": "git",
|
|
7
7
|
"url": "git+https://github.com/velum-labs/handoffkit.git",
|
|
@@ -25,11 +25,12 @@
|
|
|
25
25
|
"provenance": true
|
|
26
26
|
},
|
|
27
27
|
"dependencies": {
|
|
28
|
-
"@
|
|
29
|
-
"@fusionkit/
|
|
30
|
-
"@fusionkit/
|
|
31
|
-
"@fusionkit/
|
|
32
|
-
"@fusionkit/
|
|
33
|
-
"@fusionkit/
|
|
28
|
+
"@velum-labs/cursorkit": "0.1.2",
|
|
29
|
+
"@fusionkit/adapter-ai-sdk": "0.1.6",
|
|
30
|
+
"@fusionkit/model-gateway": "0.1.6",
|
|
31
|
+
"@fusionkit/protocol": "0.1.6",
|
|
32
|
+
"@fusionkit/session-harness": "0.1.6",
|
|
33
|
+
"@fusionkit/runner": "0.1.6",
|
|
34
|
+
"@fusionkit/workspace": "0.1.6"
|
|
34
35
|
}
|
|
35
36
|
}
|
package/dist/claude-code.d.ts
DELETED
|
@@ -1,25 +0,0 @@
|
|
|
1
|
-
import type { NetworkPolicy } from "@fusionkit/protocol";
|
|
2
|
-
import type { SessionBackend } from "@fusionkit/runner";
|
|
3
|
-
import type { ClaudeCodeBindingOptions } from "@fusionkit/session-harness";
|
|
4
|
-
import type { HarnessAdapter } from "./harness.js";
|
|
5
|
-
export type ClaudeCodeHarnessEnv = Record<string, string | undefined>;
|
|
6
|
-
export type ClaudeCodeHarnessOptions = ClaudeCodeBindingOptions & {
|
|
7
|
-
id?: string;
|
|
8
|
-
/** Defaults to `process.env`; tests can pass `{}` for deterministic skips. */
|
|
9
|
-
env?: ClaudeCodeHarnessEnv;
|
|
10
|
-
/** Already-released secret values forwarded through the session backend seam. */
|
|
11
|
-
secrets?: {
|
|
12
|
-
name: string;
|
|
13
|
-
value: string;
|
|
14
|
-
}[];
|
|
15
|
-
/** Test/extension seam. Defaults to `aiSdkHarnessBackend(...)`. */
|
|
16
|
-
backend?: SessionBackend;
|
|
17
|
-
pool?: string;
|
|
18
|
-
network?: NetworkPolicy;
|
|
19
|
-
timeoutMs?: number;
|
|
20
|
-
logMaxBytes?: number;
|
|
21
|
-
skipWhenUnavailable?: boolean;
|
|
22
|
-
};
|
|
23
|
-
export declare function claudeCodeHarnessCredentialSkipReason(env?: ClaudeCodeHarnessEnv, options?: ClaudeCodeHarnessOptions): string | undefined;
|
|
24
|
-
export declare function createClaudeCodeHarness(options?: ClaudeCodeHarnessOptions): HarnessAdapter;
|
|
25
|
-
export declare function claudeCodeHarness(options?: ClaudeCodeHarnessOptions): HarnessAdapter;
|