@fusionkit/ensemble 0.1.4 → 0.1.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/cursorkit-path.d.ts +17 -0
- package/dist/cursorkit-path.js +21 -0
- package/dist/harness.d.ts +7 -0
- package/dist/harness.js +51 -1
- package/dist/index.d.ts +5 -8
- package/dist/index.js +3 -4
- package/dist/run.js +2 -1
- package/dist/test/ensemble.test.js +0 -227
- package/dist/unified.d.ts +27 -3
- package/dist/unified.js +35 -30
- package/package.json +8 -7
- package/dist/claude-code.d.ts +0 -25
- package/dist/claude-code.js +0 -398
- package/dist/codex.d.ts +0 -69
- package/dist/codex.js +0 -467
- package/dist/dashboard.d.ts +0 -62
- package/dist/dashboard.js +0 -788
- package/dist/test/codex.test.d.ts +0 -1
- package/dist/test/codex.test.js +0 -237
- package/dist/test/dashboard.test.d.ts +0 -1
- package/dist/test/dashboard.test.js +0 -214
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
export type CursorkitCli = {
|
|
2
|
+
/** Absolute path to the bridge entrypoint (`cursorkit serve`). */
|
|
3
|
+
serveCli: string;
|
|
4
|
+
/** Absolute path to the bundled test-harness CLI (suite probes). */
|
|
5
|
+
harnessCli: string;
|
|
6
|
+
};
|
|
7
|
+
/**
|
|
8
|
+
* Resolve the bundled `@velum-labs/cursorkit` CLIs from node_modules. The
|
|
9
|
+
* package exports `"."` -> `dist/src/cli.js`; the harness CLI lives next to it
|
|
10
|
+
* at `dist/src/testing/cli.js` (not exposed via the exports map, so it is
|
|
11
|
+
* derived from the resolved `"."` entry rather than resolved directly).
|
|
12
|
+
*
|
|
13
|
+
* `FUSIONKIT_CURSORKIT_SERVE_CLI` overrides the resolved `serveCli` entry. This
|
|
14
|
+
* lets a custom build (or an integration test) point the bridge at an alternate
|
|
15
|
+
* entrypoint; the harness CLI is still derived relative to it.
|
|
16
|
+
*/
|
|
17
|
+
export declare function resolveCursorkitCli(): CursorkitCli;
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
import { createRequire } from "node:module";
|
|
2
|
+
import { dirname, join } from "node:path";
|
|
3
|
+
const require = createRequire(import.meta.url);
|
|
4
|
+
/**
|
|
5
|
+
* Resolve the bundled `@velum-labs/cursorkit` CLIs from node_modules. The
|
|
6
|
+
* package exports `"."` -> `dist/src/cli.js`; the harness CLI lives next to it
|
|
7
|
+
* at `dist/src/testing/cli.js` (not exposed via the exports map, so it is
|
|
8
|
+
* derived from the resolved `"."` entry rather than resolved directly).
|
|
9
|
+
*
|
|
10
|
+
* `FUSIONKIT_CURSORKIT_SERVE_CLI` overrides the resolved `serveCli` entry. This
|
|
11
|
+
* lets a custom build (or an integration test) point the bridge at an alternate
|
|
12
|
+
* entrypoint; the harness CLI is still derived relative to it.
|
|
13
|
+
*/
|
|
14
|
+
export function resolveCursorkitCli() {
|
|
15
|
+
const override = process.env.FUSIONKIT_CURSORKIT_SERVE_CLI;
|
|
16
|
+
const serveCli = override !== undefined && override.length > 0
|
|
17
|
+
? override
|
|
18
|
+
: require.resolve("@velum-labs/cursorkit");
|
|
19
|
+
const harnessCli = join(dirname(serveCli), "testing", "cli.js");
|
|
20
|
+
return { serveCli, harnessCli };
|
|
21
|
+
}
|
package/dist/harness.d.ts
CHANGED
|
@@ -190,6 +190,13 @@ export type CandidateHardeningMetadata = {
|
|
|
190
190
|
leak_count: number;
|
|
191
191
|
};
|
|
192
192
|
};
|
|
193
|
+
/**
|
|
194
|
+
* Serialize hardening metadata as a `JsonValue`. The shape is JSON-compatible by
|
|
195
|
+
* construction, but TypeScript cannot prove an object type with optional members
|
|
196
|
+
* satisfies the `JsonValue` index signature, so this typed mapper does the
|
|
197
|
+
* conversion explicitly (omitting absent optionals) instead of an unchecked cast.
|
|
198
|
+
*/
|
|
199
|
+
export declare function hardeningToJson(hardening: CandidateHardeningMetadata): JsonValue;
|
|
193
200
|
export type EnsembleRuntime = {
|
|
194
201
|
id: string;
|
|
195
202
|
environmentId?: string;
|
package/dist/harness.js
CHANGED
|
@@ -1 +1,51 @@
|
|
|
1
|
-
|
|
1
|
+
/**
|
|
2
|
+
* Serialize hardening metadata as a `JsonValue`. The shape is JSON-compatible by
|
|
3
|
+
* construction, but TypeScript cannot prove an object type with optional members
|
|
4
|
+
* satisfies the `JsonValue` index signature, so this typed mapper does the
|
|
5
|
+
* conversion explicitly (omitting absent optionals) instead of an unchecked cast.
|
|
6
|
+
*/
|
|
7
|
+
export function hardeningToJson(hardening) {
|
|
8
|
+
return {
|
|
9
|
+
requested_isolation: hardening.requested_isolation,
|
|
10
|
+
actual_isolation: hardening.actual_isolation,
|
|
11
|
+
runtime: {
|
|
12
|
+
...(hardening.runtime.image !== undefined ? { image: hardening.runtime.image } : {}),
|
|
13
|
+
...(hardening.runtime.driver !== undefined ? { driver: hardening.runtime.driver } : {}),
|
|
14
|
+
...(hardening.runtime.provider !== undefined ? { provider: hardening.runtime.provider } : {}),
|
|
15
|
+
...(hardening.runtime.runtime !== undefined ? { runtime: hardening.runtime.runtime } : {}),
|
|
16
|
+
...(hardening.runtime.snapshot_id !== undefined ? { snapshot_id: hardening.runtime.snapshot_id } : {}),
|
|
17
|
+
...(hardening.runtime.sandbox_id !== undefined ? { sandbox_id: hardening.runtime.sandbox_id } : {}),
|
|
18
|
+
...(hardening.runtime.image_digest !== undefined ? { image_digest: hardening.runtime.image_digest } : {}),
|
|
19
|
+
...(hardening.runtime.runtime_digest !== undefined
|
|
20
|
+
? { runtime_digest: hardening.runtime.runtime_digest }
|
|
21
|
+
: {}),
|
|
22
|
+
workdir: hardening.runtime.workdir
|
|
23
|
+
},
|
|
24
|
+
mount_policy: {
|
|
25
|
+
worktree_writable: hardening.mount_policy.worktree_writable,
|
|
26
|
+
read_only_caches: [...hardening.mount_policy.read_only_caches],
|
|
27
|
+
ignored_dirs: [...hardening.mount_policy.ignored_dirs]
|
|
28
|
+
},
|
|
29
|
+
network_policy: {
|
|
30
|
+
default_deny: hardening.network_policy.default_deny,
|
|
31
|
+
allow_hosts: [...hardening.network_policy.allow_hosts],
|
|
32
|
+
enforced: hardening.network_policy.enforced
|
|
33
|
+
},
|
|
34
|
+
cleanup: {
|
|
35
|
+
attempted: hardening.cleanup.attempted,
|
|
36
|
+
succeeded: hardening.cleanup.succeeded,
|
|
37
|
+
status: hardening.cleanup.status,
|
|
38
|
+
...(hardening.cleanup.timed_out !== undefined ? { timed_out: hardening.cleanup.timed_out } : {}),
|
|
39
|
+
...(hardening.cleanup.error !== undefined ? { error: hardening.cleanup.error } : {})
|
|
40
|
+
},
|
|
41
|
+
secret_absence: {
|
|
42
|
+
secret_names: [...hardening.secret_absence.secret_names],
|
|
43
|
+
secret_value_hashes: [...hardening.secret_absence.secret_value_hashes],
|
|
44
|
+
injected_env_names: [...hardening.secret_absence.injected_env_names],
|
|
45
|
+
scanned: hardening.secret_absence.scanned,
|
|
46
|
+
leaks_found: hardening.secret_absence.leaks_found,
|
|
47
|
+
scan_scope: [...hardening.secret_absence.scan_scope],
|
|
48
|
+
leak_count: hardening.secret_absence.leak_count
|
|
49
|
+
}
|
|
50
|
+
};
|
|
51
|
+
}
|
package/dist/index.d.ts
CHANGED
|
@@ -1,18 +1,14 @@
|
|
|
1
1
|
export { createCommandHarness } from "./command.js";
|
|
2
2
|
export type { CommandHarnessOptions } from "./command.js";
|
|
3
|
-
export {
|
|
4
|
-
export type {
|
|
5
|
-
export { codexConfigToml, codexHarnessCredentialSkipReason, codexHarness, createCodexHarness } from "./codex.js";
|
|
6
|
-
export type { CodexAmbientProvider, CodexApprovalPolicy, CodexConfigTomlInput, CodexExecInput, CodexExecResult, CodexExecRunner, CodexHarnessEnv, CodexHarnessOptions, CodexOpenAiCompatibleProvider, CodexProvider, CodexResponsesProvider, CodexSandboxMode } from "./codex.js";
|
|
3
|
+
export { resolveCursorkitCli } from "./cursorkit-path.js";
|
|
4
|
+
export type { CursorkitCli } from "./cursorkit-path.js";
|
|
7
5
|
export { createArtifactStore } from "./artifacts.js";
|
|
8
6
|
export type { ArtifactStore } from "./artifacts.js";
|
|
9
|
-
export { createHarnessCapabilityMatrix, harnessDashboard, runHarnessSmokeDashboard } from "./dashboard.js";
|
|
10
|
-
export type { HarnessAdapterReadiness, HarnessAvailability, HarnessCapabilityMatrix, HarnessCapabilityMatrixRow, HarnessCapabilityTarget, HarnessLiveSmokeTarget, HarnessSmokeDashboard, HarnessSmokeDashboardOptions, HarnessSmokeOutcome, HarnessSmokePurpose, HarnessSmokeRecord } from "./dashboard.js";
|
|
11
7
|
export { createMockJudgeSynthesizer } from "./judge.js";
|
|
12
8
|
export type { JudgeCandidateEvidence, JudgeInput, JudgePatch, JudgeRepairInput, JudgeSynthesizer, JudgeSynthesisOutput, JudgeVerificationInput, MockJudgeSynthesizerOptions, SynthesisFailureSummary, SynthesisRepairAttempt, SynthesisVerificationResult } from "./judge.js";
|
|
13
9
|
export { ensemble, runEnsemble } from "./run.js";
|
|
14
|
-
export { createFusionKitJudgeSynthesizer, runFusionPanels, runUnifiedHarnessE2E } from "./unified.js";
|
|
15
|
-
export type { CursorHarnessRunnerInput, CursorHarnessRunnerResult, FusionPanelOptions, UnifiedHarnessE2EOptions, UnifiedHarnessE2EResult, UnifiedHarnessKind, UnifiedHarnessMatrixResult } from "./unified.js";
|
|
10
|
+
export { createFusionKitJudgeSynthesizer, runFusionPanels, runUnifiedHarnessE2E, setToolHarnessProvider } from "./unified.js";
|
|
11
|
+
export type { CursorHarnessRunnerInput, CursorHarnessRunnerResult, FusionPanelOptions, ToolHarnessProvider, ToolHarnessResolveOptions, UnifiedHarnessE2EOptions, UnifiedHarnessE2EResult, UnifiedHarnessKind, UnifiedHarnessMatrixResult } from "./unified.js";
|
|
16
12
|
export { ambientTraceId, emitTrace, getTraceEmitter, newSpanId, newTraceId, TRACE_CANDIDATE_HEADER, TRACE_ID_HEADER, TRACE_PARENT_SPAN_HEADER, TRACE_SPAN_HEADER, TraceEmitter } from "./trace.js";
|
|
17
13
|
export type { EmitInput, FusionTraceComponent, FusionTraceEvent, FusionTraceEventType } from "./trace.js";
|
|
18
14
|
export { runJudgeSynthesis } from "./synthesis.js";
|
|
@@ -27,4 +23,5 @@ export type { FusionKitToolExecutionBatch, FusionKitToolExecutionRequest, Fusion
|
|
|
27
23
|
export type { CandidateCommandIsolationInput, CandidateCommandIsolationResult } from "./isolation.js";
|
|
28
24
|
export { cleanupCandidateWorktree, cleanupWorktreePlan, createWorktreePlan, defaultOutputRoot, diffCandidateWorktree, sealCandidateWorktree } from "./worktree.js";
|
|
29
25
|
export type { CandidateWorktree, WorktreePlan } from "./worktree.js";
|
|
26
|
+
export { hardeningToJson } from "./harness.js";
|
|
30
27
|
export type { EnsembleCandidateSummary, EnsembleDescriptor, EnsembleJudge, EnsembleModel, EnsemblePolicy, EnsembleRunResult, EnsembleRuntime, CandidateContainerDriver, CandidateContainerDriverInput, CandidateContainerDriverResult, CandidateHardeningMetadata, CandidateIsolationConfig, CandidateIsolationKind, CandidateIsolationMountPolicy, CandidateIsolationNetworkPolicy, CandidateIsolationSecretPolicy, HarnessAdapter, HarnessArtifact, HarnessCapabilities, HarnessCandidateOutput, HarnessCollectInput, HarnessPrepareInput, HarnessRunInput, HarnessToolRecord, ReviewEvidence, EnsembleRunSummary, VerificationProfile } from "./harness.js";
|
package/dist/index.js
CHANGED
|
@@ -1,11 +1,9 @@
|
|
|
1
1
|
export { createCommandHarness } from "./command.js";
|
|
2
|
-
export {
|
|
3
|
-
export { codexConfigToml, codexHarnessCredentialSkipReason, codexHarness, createCodexHarness } from "./codex.js";
|
|
2
|
+
export { resolveCursorkitCli } from "./cursorkit-path.js";
|
|
4
3
|
export { createArtifactStore } from "./artifacts.js";
|
|
5
|
-
export { createHarnessCapabilityMatrix, harnessDashboard, runHarnessSmokeDashboard } from "./dashboard.js";
|
|
6
4
|
export { createMockJudgeSynthesizer } from "./judge.js";
|
|
7
5
|
export { ensemble, runEnsemble } from "./run.js";
|
|
8
|
-
export { createFusionKitJudgeSynthesizer, runFusionPanels, runUnifiedHarnessE2E } from "./unified.js";
|
|
6
|
+
export { createFusionKitJudgeSynthesizer, runFusionPanels, runUnifiedHarnessE2E, setToolHarnessProvider } from "./unified.js";
|
|
9
7
|
export { ambientTraceId, emitTrace, getTraceEmitter, newSpanId, newTraceId, TRACE_CANDIDATE_HEADER, TRACE_ID_HEADER, TRACE_PARENT_SPAN_HEADER, TRACE_SPAN_HEADER, TraceEmitter } from "./trace.js";
|
|
10
8
|
export { runJudgeSynthesis } from "./synthesis.js";
|
|
11
9
|
export { createMockHarness } from "./mock.js";
|
|
@@ -13,3 +11,4 @@ export { createToolExecutor, registerDemoTools, sideEffectsForTool } from "./too
|
|
|
13
11
|
export { executeFusionKitToolBatch, FusionKitToolExecutorClient, FusionKitToolExecutorClientError, FusionKitToolExecutorError, startFusionKitToolExecutorServer } from "./external-executor.js";
|
|
14
12
|
export { createCliContainerDriver, runCandidateCommandWithIsolation, secretAbsenceMetadata, secretValueHash } from "./isolation.js";
|
|
15
13
|
export { cleanupCandidateWorktree, cleanupWorktreePlan, createWorktreePlan, defaultOutputRoot, diffCandidateWorktree, sealCandidateWorktree } from "./worktree.js";
|
|
14
|
+
export { hardeningToJson } from "./harness.js";
|
package/dist/run.js
CHANGED
|
@@ -1,5 +1,6 @@
|
|
|
1
1
|
import { assertHarnessCandidateRecordV1, assertHarnessRunRequestV1, assertHarnessRunResultV1, MODEL_FUSION_SCHEMA_BUNDLE_HASH, requestHash } from "@fusionkit/protocol";
|
|
2
2
|
import { createArtifactStore } from "./artifacts.js";
|
|
3
|
+
import { hardeningToJson } from "./harness.js";
|
|
3
4
|
import { runJudgeSynthesis } from "./synthesis.js";
|
|
4
5
|
import { cleanupWorktreePlan, createWorktreePlan, defaultOutputRoot, diffCandidateWorktree, sealCandidateWorktree } from "./worktree.js";
|
|
5
6
|
const PRODUCER_GIT_SHA = "0".repeat(40);
|
|
@@ -99,7 +100,7 @@ function candidateMetadata(output, descriptor, worktree) {
|
|
|
99
100
|
}
|
|
100
101
|
Object.assign(metadata, output.metadata ?? {});
|
|
101
102
|
if (metadata.hardening === undefined) {
|
|
102
|
-
metadata.hardening = fallbackCandidateHardening(descriptor);
|
|
103
|
+
metadata.hardening = hardeningToJson(fallbackCandidateHardening(descriptor));
|
|
103
104
|
}
|
|
104
105
|
if (descriptor.reviewEvidence !== undefined) {
|
|
105
106
|
metadata.review_evidence_attached = true;
|
|
@@ -5,9 +5,7 @@ import { join } from "node:path";
|
|
|
5
5
|
import { test } from "node:test";
|
|
6
6
|
import { assertJudgeSynthesisRecordV1, assertHarnessCandidateRecordV1, assertHarnessRunRequestV1, assertHarnessRunResultV1, MODEL_FUSION_SCHEMA_BUNDLE_HASH, requestHash, responseHash } from "@fusionkit/protocol";
|
|
7
7
|
import { gitText } from "@fusionkit/workspace";
|
|
8
|
-
import { claudeCodeHarness, claudeCodeHarnessCredentialSkipReason } from "../claude-code.js";
|
|
9
8
|
import { createCommandHarness } from "../command.js";
|
|
10
|
-
import { codexConfigToml, codexHarness, codexHarnessCredentialSkipReason } from "../codex.js";
|
|
11
9
|
import { createMockJudgeSynthesizer } from "../judge.js";
|
|
12
10
|
import { createMockHarness } from "../mock.js";
|
|
13
11
|
import { runEnsemble } from "../run.js";
|
|
@@ -88,10 +86,6 @@ function addFilePatch(path, content) {
|
|
|
88
86
|
""
|
|
89
87
|
].join("\n");
|
|
90
88
|
}
|
|
91
|
-
function emptyCodexHome() {
|
|
92
|
-
const path = mkdtempSync(join(tmpdir(), "ensemble-codex-empty-home-"));
|
|
93
|
-
return { path, cleanup: () => rmSync(path, { recursive: true, force: true }) };
|
|
94
|
-
}
|
|
95
89
|
test("mock adapter runs N candidates and emits valid model-fusion records", async () => {
|
|
96
90
|
const result = await runEnsemble(descriptor({
|
|
97
91
|
harness: createMockHarness({
|
|
@@ -128,227 +122,6 @@ test("command adapter records command output, artifact, tool record, and verific
|
|
|
128
122
|
"exec_ensemble_test_command_0"
|
|
129
123
|
]);
|
|
130
124
|
});
|
|
131
|
-
test("claude-code adapter can replace mock and skip clearly without credentials", async () => {
|
|
132
|
-
const result = await runEnsemble(descriptor({
|
|
133
|
-
models: [{ id: "claude", model: "claude-sonnet-4-6" }],
|
|
134
|
-
harness: claudeCodeHarness({ env: {} })
|
|
135
|
-
}));
|
|
136
|
-
assert.equal(result.candidates.length, 1);
|
|
137
|
-
assert.equal(result.harnessRunResult.status, "skipped");
|
|
138
|
-
assert.equal(result.candidates[0]?.status, "skipped");
|
|
139
|
-
assert.equal(result.candidates[0]?.error?.kind, "capability_missing");
|
|
140
|
-
assert.match(result.candidates[0]?.error?.message ?? "", /missing Claude Code credential/);
|
|
141
|
-
assert.match(result.summary?.candidates[0]?.verification?.evidence[0] ?? "", /missing Claude/);
|
|
142
|
-
});
|
|
143
|
-
test("claude-code adapter delegates through a session backend from a generic descriptor", async () => {
|
|
144
|
-
const repo = makeRepo();
|
|
145
|
-
const seen = {};
|
|
146
|
-
const backend = {
|
|
147
|
-
isolation: "vercel-sandbox",
|
|
148
|
-
supports: () => true,
|
|
149
|
-
execute: async (input) => {
|
|
150
|
-
seen.agentKind = input.contract.agent.kind;
|
|
151
|
-
seen.env = input.execution.env;
|
|
152
|
-
seen.repoDir = input.repoDir;
|
|
153
|
-
assert.equal(input.contract.isolation, "vercel-sandbox");
|
|
154
|
-
assert.equal(input.contract.execution?.kind, "agent");
|
|
155
|
-
assert.equal(input.secrets.length, 0);
|
|
156
|
-
writeFileSync(join(input.repoDir, "CLAUDE_RESULT.md"), "fake claude result\n");
|
|
157
|
-
input.emit({
|
|
158
|
-
type: "command.executed",
|
|
159
|
-
argvHash: requestHash({ adapter: "claude-code" }),
|
|
160
|
-
exitCode: 0
|
|
161
|
-
});
|
|
162
|
-
return { exitCode: 0, log: Buffer.from("fake claude transcript") };
|
|
163
|
-
}
|
|
164
|
-
};
|
|
165
|
-
try {
|
|
166
|
-
const result = await runEnsemble(descriptor({
|
|
167
|
-
models: [{ id: "claude", model: "claude-sonnet-4-6" }],
|
|
168
|
-
harness: claudeCodeHarness({
|
|
169
|
-
env: {
|
|
170
|
-
ANTHROPIC_API_KEY: "sk-ant-test",
|
|
171
|
-
VERCEL_TOKEN: "vercel-test"
|
|
172
|
-
},
|
|
173
|
-
backend
|
|
174
|
-
}),
|
|
175
|
-
workspace: repo.repo,
|
|
176
|
-
baseGitSha: repo.head,
|
|
177
|
-
outputRoot: repo.outputRoot,
|
|
178
|
-
cleanupWorktrees: true
|
|
179
|
-
}));
|
|
180
|
-
assert.equal(result.harnessRunResult.status, "succeeded");
|
|
181
|
-
assert.equal(result.candidates[0]?.status, "succeeded");
|
|
182
|
-
assert.equal(seen.agentKind, "claude-code");
|
|
183
|
-
assert.equal(seen.env?.ANTHROPIC_API_KEY, "sk-ant-test");
|
|
184
|
-
assert.equal(Object.hasOwn(seen.env ?? {}, "VERCEL_TOKEN"), false);
|
|
185
|
-
assert.notEqual(seen.repoDir, repo.repo);
|
|
186
|
-
assert.ok(result.artifacts.some((artifact) => artifact.kind === "patch"));
|
|
187
|
-
assert.match(result.candidates[0]?.metadata?.adapter, /claude-code/);
|
|
188
|
-
}
|
|
189
|
-
finally {
|
|
190
|
-
repo.cleanup();
|
|
191
|
-
}
|
|
192
|
-
});
|
|
193
|
-
test("smoke: claude-code adapter runs live when credentials are available", { skip: liveClaudeSmokeSkipReason() }, async () => {
|
|
194
|
-
const repo = makeRepo();
|
|
195
|
-
try {
|
|
196
|
-
const result = await runEnsemble(descriptor({
|
|
197
|
-
id: "claude_smoke",
|
|
198
|
-
models: [{ id: "claude", model: "claude-sonnet-4-6" }],
|
|
199
|
-
harness: claudeCodeHarness(),
|
|
200
|
-
runtime: {
|
|
201
|
-
id: "vercel-sandbox",
|
|
202
|
-
isolation: {
|
|
203
|
-
kind: "microvm",
|
|
204
|
-
networkPolicy: {
|
|
205
|
-
defaultDeny: true,
|
|
206
|
-
allowHosts: [
|
|
207
|
-
"registry.npmjs.org",
|
|
208
|
-
"api.anthropic.com",
|
|
209
|
-
"ai-gateway.vercel.sh"
|
|
210
|
-
]
|
|
211
|
-
}
|
|
212
|
-
}
|
|
213
|
-
},
|
|
214
|
-
policy: {
|
|
215
|
-
id: "claude-smoke-policy",
|
|
216
|
-
allowedTools: ["read_file"],
|
|
217
|
-
sideEffects: "read_only",
|
|
218
|
-
timeoutMs: 180_000
|
|
219
|
-
},
|
|
220
|
-
prompt: "Read README.md if present, then reply exactly CLAUDE_LIVE_SMOKE_OK. Do not modify files.",
|
|
221
|
-
workspace: repo.repo,
|
|
222
|
-
baseGitSha: repo.head,
|
|
223
|
-
outputRoot: repo.outputRoot,
|
|
224
|
-
cleanupWorktrees: true
|
|
225
|
-
}));
|
|
226
|
-
assertHarnessRunResultV1(result.harnessRunResult);
|
|
227
|
-
assert.equal(result.harnessRunResult.status, "succeeded");
|
|
228
|
-
assert.equal(result.candidates[0]?.status, "succeeded");
|
|
229
|
-
}
|
|
230
|
-
finally {
|
|
231
|
-
repo.cleanup();
|
|
232
|
-
}
|
|
233
|
-
});
|
|
234
|
-
test("codex config declares a Responses provider without touching Cursor records", () => {
|
|
235
|
-
const toml = codexConfigToml({
|
|
236
|
-
model: "gpt-5.5-codex",
|
|
237
|
-
sandboxMode: "workspace-write",
|
|
238
|
-
approvalPolicy: "never",
|
|
239
|
-
provider: {
|
|
240
|
-
providerId: "warrant-local",
|
|
241
|
-
name: "Warrant Local",
|
|
242
|
-
baseUrl: "https://gateway.example.com/v1/responses",
|
|
243
|
-
apiKeyEnvName: "WARRANT_CODEX_API_KEY",
|
|
244
|
-
requiresOpenAiAuth: true
|
|
245
|
-
}
|
|
246
|
-
});
|
|
247
|
-
assert.match(toml, /model = "gpt-5\.5-codex"/);
|
|
248
|
-
assert.match(toml, /model_provider = "warrant-local"/);
|
|
249
|
-
assert.match(toml, /\[model_providers\.warrant-local\]/);
|
|
250
|
-
assert.match(toml, /base_url = "https:\/\/gateway\.example\.com\/v1"/);
|
|
251
|
-
assert.match(toml, /wire_api = "responses"/);
|
|
252
|
-
assert.match(toml, /env_key = "WARRANT_CODEX_API_KEY"/);
|
|
253
|
-
assert.equal(toml.includes("cursor"), false);
|
|
254
|
-
});
|
|
255
|
-
test("codex adapter emits schema-valid skipped output without credentials", async () => {
|
|
256
|
-
const codexHome = emptyCodexHome();
|
|
257
|
-
try {
|
|
258
|
-
const result = await runEnsemble(descriptor({
|
|
259
|
-
models: [{ id: "codex", model: "gpt-5.5-codex" }],
|
|
260
|
-
harness: codexHarness({
|
|
261
|
-
env: { CODEX_HOME: codexHome.path },
|
|
262
|
-
provider: { kind: "ambient" }
|
|
263
|
-
})
|
|
264
|
-
}));
|
|
265
|
-
assertHarnessRunResultV1(result.harnessRunResult);
|
|
266
|
-
assert.equal(result.harnessRunResult.status, "skipped");
|
|
267
|
-
assert.equal(result.candidates[0]?.status, "skipped");
|
|
268
|
-
assert.equal(result.candidates[0]?.error?.kind, "capability_missing");
|
|
269
|
-
assert.match(result.candidates[0]?.error?.message ?? "", /Codex credentials are absent/);
|
|
270
|
-
assert.match(result.summary?.candidates[0]?.verification?.evidence[0] ?? "", /Codex credentials/);
|
|
271
|
-
}
|
|
272
|
-
finally {
|
|
273
|
-
codexHome.cleanup();
|
|
274
|
-
}
|
|
275
|
-
});
|
|
276
|
-
test("codex adapter runs through an injected Responses runner and records evidence", async () => {
|
|
277
|
-
const calls = [];
|
|
278
|
-
const result = await runEnsemble(descriptor({
|
|
279
|
-
models: [{ id: "codex", model: "gpt-5.5-codex" }],
|
|
280
|
-
harness: codexHarness({
|
|
281
|
-
env: {},
|
|
282
|
-
provider: {
|
|
283
|
-
kind: "responses",
|
|
284
|
-
baseUrl: "http://127.0.0.1:8787/v1/responses",
|
|
285
|
-
apiKey: "inline-test-key",
|
|
286
|
-
requiresOpenAiAuth: true,
|
|
287
|
-
providerId: "local-responses"
|
|
288
|
-
},
|
|
289
|
-
runner: (input) => {
|
|
290
|
-
calls.push(input);
|
|
291
|
-
const codexHome = input.env.CODEX_HOME;
|
|
292
|
-
assert.ok(codexHome);
|
|
293
|
-
const config = readFileSync(join(codexHome, "config.toml"), "utf8");
|
|
294
|
-
assert.match(config, /model_provider = "local-responses"/);
|
|
295
|
-
assert.match(config, /base_url = "http:\/\/127\.0\.0\.1:8787\/v1"/);
|
|
296
|
-
assert.match(config, /wire_api = "responses"/);
|
|
297
|
-
assert.match(config, /env_key = "WARRANT_CODEX_PROVIDER_API_KEY"/);
|
|
298
|
-
assert.equal(input.env.WARRANT_CODEX_PROVIDER_API_KEY, "inline-test-key");
|
|
299
|
-
return {
|
|
300
|
-
stdout: '{"type":"message","content":"codex-ok"}\n',
|
|
301
|
-
stderr: "",
|
|
302
|
-
exitCode: 0
|
|
303
|
-
};
|
|
304
|
-
}
|
|
305
|
-
})
|
|
306
|
-
}));
|
|
307
|
-
assert.equal(calls.length, 1);
|
|
308
|
-
assert.deepEqual(calls[0]?.args.slice(0, 3), [
|
|
309
|
-
"exec",
|
|
310
|
-
"--json",
|
|
311
|
-
"--skip-git-repo-check"
|
|
312
|
-
]);
|
|
313
|
-
assert.equal(result.harnessRunResult.status, "succeeded");
|
|
314
|
-
assert.equal(result.candidates[0]?.status, "succeeded");
|
|
315
|
-
assert.equal(result.toolRecords[0]?.status, "succeeded");
|
|
316
|
-
assert.match(result.candidates[0]?.metadata?.adapter, /codex/);
|
|
317
|
-
});
|
|
318
|
-
function liveClaudeSmokeSkipReason() {
|
|
319
|
-
if (process.env.WARRANT_CLAUDE_SMOKE !== "1") {
|
|
320
|
-
return "set WARRANT_CLAUDE_SMOKE=1 plus Claude Code credentials to run the live Claude Code smoke";
|
|
321
|
-
}
|
|
322
|
-
return claudeCodeHarnessCredentialSkipReason() ?? false;
|
|
323
|
-
}
|
|
324
|
-
function liveCodexSmokeSkipReason() {
|
|
325
|
-
if (process.env.WARRANT_CODEX_SMOKE !== "1") {
|
|
326
|
-
return "set WARRANT_CODEX_SMOKE=1 plus Codex credentials to run the live Codex smoke";
|
|
327
|
-
}
|
|
328
|
-
return codexHarnessCredentialSkipReason() ?? false;
|
|
329
|
-
}
|
|
330
|
-
test("codex adapter live smoke is credential-gated", { skip: liveCodexSmokeSkipReason() }, async () => {
|
|
331
|
-
const repo = makeRepo();
|
|
332
|
-
try {
|
|
333
|
-
const result = await runEnsemble(descriptor({
|
|
334
|
-
prompt: "Read README.md if present, then reply exactly CODEX_LIVE_SMOKE_OK. Do not modify files.",
|
|
335
|
-
models: [{ id: "codex", model: process.env.WARRANT_CODEX_SMOKE_MODEL ?? "gpt-5.5-codex" }],
|
|
336
|
-
harness: codexHarness({
|
|
337
|
-
timeoutMs: 60_000,
|
|
338
|
-
sandboxMode: "read-only",
|
|
339
|
-
approvalPolicy: "never"
|
|
340
|
-
}),
|
|
341
|
-
workspace: repo.repo,
|
|
342
|
-
baseGitSha: repo.head,
|
|
343
|
-
outputRoot: repo.outputRoot
|
|
344
|
-
}));
|
|
345
|
-
assertHarnessRunResultV1(result.harnessRunResult);
|
|
346
|
-
assert.notEqual(result.harnessRunResult.status, "skipped");
|
|
347
|
-
}
|
|
348
|
-
finally {
|
|
349
|
-
repo.cleanup();
|
|
350
|
-
}
|
|
351
|
-
});
|
|
352
125
|
test("command adapter records optional container hardening metadata", async () => {
|
|
353
126
|
const driver = {
|
|
354
127
|
id: "fake-ensemble-container",
|
package/dist/unified.d.ts
CHANGED
|
@@ -1,7 +1,33 @@
|
|
|
1
1
|
import type { JsonValue, ModelFusionStatus } from "@fusionkit/protocol";
|
|
2
|
-
import type { EnsembleModel, EnsembleRunResult } from "./harness.js";
|
|
2
|
+
import type { EnsembleDescriptor, EnsembleModel, EnsembleRunResult, HarnessAdapter } from "./harness.js";
|
|
3
3
|
import type { JudgeSynthesizer } from "./judge.js";
|
|
4
4
|
export type UnifiedHarnessKind = "mock" | "command" | "agent" | "codex" | "claude-code" | "cursor-acp" | "cursor-desktop";
|
|
5
|
+
/**
|
|
6
|
+
* Options the unified runner passes to a tool's harness factory. The per-tool
|
|
7
|
+
* packages map these onto their own harness options (provider base URL, etc.).
|
|
8
|
+
*/
|
|
9
|
+
export type ToolHarnessResolveOptions = {
|
|
10
|
+
fusionBackendUrl: string;
|
|
11
|
+
fusionApiKey?: string;
|
|
12
|
+
timeoutMs?: number;
|
|
13
|
+
};
|
|
14
|
+
/**
|
|
15
|
+
* Provides everything ensemble needs about a tool-backed harness kind (codex,
|
|
16
|
+
* claude-code, cursor-*) without ensemble depending on any per-tool package. The
|
|
17
|
+
* fusionkit CLI registers one (built from its tool registry) via
|
|
18
|
+
* {@link setToolHarnessProvider}; without it, requesting a tool harness kind
|
|
19
|
+
* throws a clear error.
|
|
20
|
+
*/
|
|
21
|
+
export type ToolHarnessProvider = {
|
|
22
|
+
adapter(kind: UnifiedHarnessKind, options: ToolHarnessResolveOptions): HarnessAdapter;
|
|
23
|
+
sideEffects(kind: UnifiedHarnessKind): EnsembleDescriptor["policy"]["sideEffects"];
|
|
24
|
+
responseShape(kind: UnifiedHarnessKind): string;
|
|
25
|
+
};
|
|
26
|
+
/**
|
|
27
|
+
* Register the provider that resolves tool-backed harness kinds. The fusionkit
|
|
28
|
+
* CLI wires this at startup from its tool registry.
|
|
29
|
+
*/
|
|
30
|
+
export declare function setToolHarnessProvider(provider: ToolHarnessProvider | undefined): void;
|
|
5
31
|
export type UnifiedHarnessMatrixResult = {
|
|
6
32
|
harness: UnifiedHarnessKind;
|
|
7
33
|
modelIds: string[];
|
|
@@ -26,7 +52,6 @@ export type CursorHarnessRunnerInput = {
|
|
|
26
52
|
repo: string;
|
|
27
53
|
outDir: string;
|
|
28
54
|
timeoutMs?: number;
|
|
29
|
-
cursorKitDir?: string;
|
|
30
55
|
};
|
|
31
56
|
export type CursorHarnessRunnerResult = {
|
|
32
57
|
status: ModelFusionStatus;
|
|
@@ -46,7 +71,6 @@ export type UnifiedHarnessE2EOptions = {
|
|
|
46
71
|
command?: string;
|
|
47
72
|
timeoutMs?: number;
|
|
48
73
|
judgeModel?: string;
|
|
49
|
-
cursorKitDir?: string;
|
|
50
74
|
cursorRunner?: (input: CursorHarnessRunnerInput) => Promise<CursorHarnessRunnerResult>;
|
|
51
75
|
/**
|
|
52
76
|
* Per-candidate model backend URLs keyed by `EnsembleModel.id`. When a
|
package/dist/unified.js
CHANGED
|
@@ -4,11 +4,32 @@ import { join, resolve } from "node:path";
|
|
|
4
4
|
import { newSpanId, TRACE_ID_HEADER, TRACE_SPAN_HEADER } from "@fusionkit/protocol";
|
|
5
5
|
import { gitText } from "@fusionkit/workspace";
|
|
6
6
|
import { createAgentHarness } from "./agent.js";
|
|
7
|
-
import { claudeCodeHarness } from "./claude-code.js";
|
|
8
7
|
import { createCommandHarness } from "./command.js";
|
|
9
|
-
import {
|
|
8
|
+
import { resolveCursorkitCli } from "./cursorkit-path.js";
|
|
10
9
|
import { createMockHarness } from "./mock.js";
|
|
11
10
|
import { runEnsemble } from "./run.js";
|
|
11
|
+
let toolHarnessProvider;
|
|
12
|
+
/**
|
|
13
|
+
* Register the provider that resolves tool-backed harness kinds. The fusionkit
|
|
14
|
+
* CLI wires this at startup from its tool registry.
|
|
15
|
+
*/
|
|
16
|
+
export function setToolHarnessProvider(provider) {
|
|
17
|
+
toolHarnessProvider = provider;
|
|
18
|
+
}
|
|
19
|
+
function requireToolHarnessProvider(kind) {
|
|
20
|
+
if (toolHarnessProvider === undefined) {
|
|
21
|
+
throw new Error(`no tool harness provider registered for harness kind "${kind}"; ` +
|
|
22
|
+
"the fusionkit CLI wires this via setToolHarnessProvider (build the tool registry first).");
|
|
23
|
+
}
|
|
24
|
+
return toolHarnessProvider;
|
|
25
|
+
}
|
|
26
|
+
function resolveToolAdapter(kind, options) {
|
|
27
|
+
return requireToolHarnessProvider(kind).adapter(kind, {
|
|
28
|
+
fusionBackendUrl: normalizeFusionBackendUrl(options.fusionBackendUrl),
|
|
29
|
+
...(options.fusionApiKey !== undefined ? { fusionApiKey: options.fusionApiKey } : {}),
|
|
30
|
+
...(options.timeoutMs !== undefined ? { timeoutMs: options.timeoutMs } : {})
|
|
31
|
+
});
|
|
32
|
+
}
|
|
12
33
|
function normalizeFusionBackendUrl(value) {
|
|
13
34
|
return value.replace(/\/+$/, "");
|
|
14
35
|
}
|
|
@@ -30,7 +51,7 @@ function sideEffectsForHarness(kind) {
|
|
|
30
51
|
case "claude-code":
|
|
31
52
|
case "cursor-acp":
|
|
32
53
|
case "cursor-desktop":
|
|
33
|
-
return
|
|
54
|
+
return requireToolHarnessProvider(kind).sideEffects(kind);
|
|
34
55
|
default: {
|
|
35
56
|
const exhausted = kind;
|
|
36
57
|
throw new Error(`unsupported unified harness: ${String(exhausted)}`);
|
|
@@ -72,19 +93,10 @@ function harnessAdapter(kind, options) {
|
|
|
72
93
|
});
|
|
73
94
|
}
|
|
74
95
|
case "codex":
|
|
75
|
-
return codexHarness({
|
|
76
|
-
timeoutMs: options.timeoutMs,
|
|
77
|
-
provider: {
|
|
78
|
-
kind: "openai-compatible",
|
|
79
|
-
baseUrl: normalizeFusionBackendUrl(options.fusionBackendUrl),
|
|
80
|
-
...(options.fusionApiKey ? { apiKey: options.fusionApiKey } : {})
|
|
81
|
-
}
|
|
82
|
-
});
|
|
83
96
|
case "claude-code":
|
|
84
|
-
return claudeCodeHarness({ timeoutMs: options.timeoutMs });
|
|
85
97
|
case "cursor-acp":
|
|
86
98
|
case "cursor-desktop":
|
|
87
|
-
|
|
99
|
+
return resolveToolAdapter(kind, options);
|
|
88
100
|
default: {
|
|
89
101
|
const exhausted = kind;
|
|
90
102
|
throw new Error(`unsupported unified harness: ${String(exhausted)}`);
|
|
@@ -100,12 +112,10 @@ function responseShapeFor(kind) {
|
|
|
100
112
|
return ("Respond to the user in the natural shape the request calls for: a direct answer, " +
|
|
101
113
|
"a plan, or the concrete code change. Reply in first person as the assistant.");
|
|
102
114
|
case "codex":
|
|
103
|
-
return "Return a Codex-style result summary with patch and verification evidence.";
|
|
104
115
|
case "claude-code":
|
|
105
|
-
return "Return a Claude Code-style transcript summary with patch/worktree evidence.";
|
|
106
116
|
case "cursor-acp":
|
|
107
117
|
case "cursor-desktop":
|
|
108
|
-
return
|
|
118
|
+
return requireToolHarnessProvider(kind).responseShape(kind);
|
|
109
119
|
default: {
|
|
110
120
|
const exhausted = kind;
|
|
111
121
|
throw new Error(`unsupported unified harness: ${String(exhausted)}`);
|
|
@@ -304,17 +314,10 @@ function statusForResult(result) {
|
|
|
304
314
|
return result.failureSummary ? "failed" : result.harnessRunResult.status;
|
|
305
315
|
}
|
|
306
316
|
async function defaultCursorRunner(input) {
|
|
307
|
-
|
|
308
|
-
return {
|
|
309
|
-
status: "skipped",
|
|
310
|
-
message: "Cursorkit directory not configured",
|
|
311
|
-
details: { reason: "cursor_kit_dir_missing" }
|
|
312
|
-
};
|
|
313
|
-
}
|
|
317
|
+
const { harnessCli } = resolveCursorkitCli();
|
|
314
318
|
const suite = input.kind === "cursor-acp" ? "acp" : "desktop-route";
|
|
315
319
|
const args = [
|
|
316
|
-
|
|
317
|
-
"--",
|
|
320
|
+
harnessCli,
|
|
318
321
|
"--suite",
|
|
319
322
|
suite,
|
|
320
323
|
"--base-url",
|
|
@@ -328,8 +331,8 @@ async function defaultCursorRunner(input) {
|
|
|
328
331
|
];
|
|
329
332
|
mkdirSync(input.outDir, { recursive: true });
|
|
330
333
|
return await new Promise((resolveResult) => {
|
|
331
|
-
const child = spawn(
|
|
332
|
-
cwd: input.
|
|
334
|
+
const child = spawn(process.execPath, args, {
|
|
335
|
+
cwd: input.outDir,
|
|
333
336
|
stdio: ["ignore", "pipe", "pipe"]
|
|
334
337
|
});
|
|
335
338
|
let stdout = "";
|
|
@@ -360,8 +363,7 @@ async function runCursorHarness(kind, options) {
|
|
|
360
363
|
fusionBackendUrl: options.fusionBackendUrl,
|
|
361
364
|
repo: options.repo,
|
|
362
365
|
outDir: join(options.outputRoot, `${kind}-${model.id}`),
|
|
363
|
-
timeoutMs: options.timeoutMs
|
|
364
|
-
cursorKitDir: options.cursorKitDir
|
|
366
|
+
timeoutMs: options.timeoutMs
|
|
365
367
|
})));
|
|
366
368
|
const failed = perModel.find((result) => result.status === "failed");
|
|
367
369
|
const skipped = perModel.every((result) => result.status === "skipped");
|
|
@@ -385,7 +387,10 @@ export async function runUnifiedHarnessE2E(options) {
|
|
|
385
387
|
mkdirSync(outputRoot, { recursive: true });
|
|
386
388
|
const results = [];
|
|
387
389
|
for (const kind of options.harnesses) {
|
|
388
|
-
if (kind === "cursor-acp" || kind === "cursor-desktop")
|
|
390
|
+
if ((kind === "cursor-acp" || kind === "cursor-desktop") &&
|
|
391
|
+
options.cursorRunner !== undefined) {
|
|
392
|
+
// Explicit probe runner: drive the Cursorkit harness suite and record a
|
|
393
|
+
// route/transcript probe instead of producing real ensemble candidates.
|
|
389
394
|
results.push(await runCursorHarness(kind, options));
|
|
390
395
|
continue;
|
|
391
396
|
}
|
package/package.json
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@fusionkit/ensemble",
|
|
3
3
|
"private": false,
|
|
4
|
-
"version": "0.1.
|
|
4
|
+
"version": "0.1.6",
|
|
5
5
|
"repository": {
|
|
6
6
|
"type": "git",
|
|
7
7
|
"url": "git+https://github.com/velum-labs/handoffkit.git",
|
|
@@ -25,11 +25,12 @@
|
|
|
25
25
|
"provenance": true
|
|
26
26
|
},
|
|
27
27
|
"dependencies": {
|
|
28
|
-
"@
|
|
29
|
-
"@fusionkit/
|
|
30
|
-
"@fusionkit/
|
|
31
|
-
"@fusionkit/
|
|
32
|
-
"@fusionkit/session-harness": "0.1.
|
|
33
|
-
"@fusionkit/
|
|
28
|
+
"@velum-labs/cursorkit": "0.1.2",
|
|
29
|
+
"@fusionkit/adapter-ai-sdk": "0.1.6",
|
|
30
|
+
"@fusionkit/model-gateway": "0.1.6",
|
|
31
|
+
"@fusionkit/protocol": "0.1.6",
|
|
32
|
+
"@fusionkit/session-harness": "0.1.6",
|
|
33
|
+
"@fusionkit/runner": "0.1.6",
|
|
34
|
+
"@fusionkit/workspace": "0.1.6"
|
|
34
35
|
}
|
|
35
36
|
}
|
package/dist/claude-code.d.ts
DELETED
|
@@ -1,25 +0,0 @@
|
|
|
1
|
-
import type { NetworkPolicy } from "@fusionkit/protocol";
|
|
2
|
-
import type { SessionBackend } from "@fusionkit/runner";
|
|
3
|
-
import type { ClaudeCodeBindingOptions } from "@fusionkit/session-harness";
|
|
4
|
-
import type { HarnessAdapter } from "./harness.js";
|
|
5
|
-
export type ClaudeCodeHarnessEnv = Record<string, string | undefined>;
|
|
6
|
-
export type ClaudeCodeHarnessOptions = ClaudeCodeBindingOptions & {
|
|
7
|
-
id?: string;
|
|
8
|
-
/** Defaults to `process.env`; tests can pass `{}` for deterministic skips. */
|
|
9
|
-
env?: ClaudeCodeHarnessEnv;
|
|
10
|
-
/** Already-released secret values forwarded through the session backend seam. */
|
|
11
|
-
secrets?: {
|
|
12
|
-
name: string;
|
|
13
|
-
value: string;
|
|
14
|
-
}[];
|
|
15
|
-
/** Test/extension seam. Defaults to `aiSdkHarnessBackend(...)`. */
|
|
16
|
-
backend?: SessionBackend;
|
|
17
|
-
pool?: string;
|
|
18
|
-
network?: NetworkPolicy;
|
|
19
|
-
timeoutMs?: number;
|
|
20
|
-
logMaxBytes?: number;
|
|
21
|
-
skipWhenUnavailable?: boolean;
|
|
22
|
-
};
|
|
23
|
-
export declare function claudeCodeHarnessCredentialSkipReason(env?: ClaudeCodeHarnessEnv, options?: ClaudeCodeHarnessOptions): string | undefined;
|
|
24
|
-
export declare function createClaudeCodeHarness(options?: ClaudeCodeHarnessOptions): HarnessAdapter;
|
|
25
|
-
export declare function claudeCodeHarness(options?: ClaudeCodeHarnessOptions): HarnessAdapter;
|