@fusionkit/ensemble 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (51) hide show
  1. package/dist/agent.d.ts +21 -0
  2. package/dist/agent.js +186 -0
  3. package/dist/artifacts.d.ts +21 -0
  4. package/dist/artifacts.js +36 -0
  5. package/dist/claude-code.d.ts +25 -0
  6. package/dist/claude-code.js +398 -0
  7. package/dist/codex.d.ts +69 -0
  8. package/dist/codex.js +467 -0
  9. package/dist/command.d.ts +15 -0
  10. package/dist/command.js +82 -0
  11. package/dist/dashboard.d.ts +62 -0
  12. package/dist/dashboard.js +788 -0
  13. package/dist/external-executor.d.ts +56 -0
  14. package/dist/external-executor.js +288 -0
  15. package/dist/harness.d.ts +337 -0
  16. package/dist/harness.js +1 -0
  17. package/dist/index.d.ts +30 -0
  18. package/dist/index.js +15 -0
  19. package/dist/isolation.d.ts +25 -0
  20. package/dist/isolation.js +509 -0
  21. package/dist/judge.d.ts +77 -0
  22. package/dist/judge.js +16 -0
  23. package/dist/mock.d.ts +20 -0
  24. package/dist/mock.js +56 -0
  25. package/dist/run.d.ts +5 -0
  26. package/dist/run.js +520 -0
  27. package/dist/synthesis.d.ts +25 -0
  28. package/dist/synthesis.js +221 -0
  29. package/dist/test/codex.test.d.ts +1 -0
  30. package/dist/test/codex.test.js +237 -0
  31. package/dist/test/dashboard.test.d.ts +1 -0
  32. package/dist/test/dashboard.test.js +214 -0
  33. package/dist/test/ensemble.test.d.ts +1 -0
  34. package/dist/test/ensemble.test.js +780 -0
  35. package/dist/test/external-executor.test.d.ts +1 -0
  36. package/dist/test/external-executor.test.js +273 -0
  37. package/dist/test/isolation.test.d.ts +1 -0
  38. package/dist/test/isolation.test.js +359 -0
  39. package/dist/test/tool-executor.test.d.ts +1 -0
  40. package/dist/test/tool-executor.test.js +113 -0
  41. package/dist/test/unified.test.d.ts +1 -0
  42. package/dist/test/unified.test.js +150 -0
  43. package/dist/tool-executor.d.ts +14 -0
  44. package/dist/tool-executor.js +156 -0
  45. package/dist/trace.d.ts +8 -0
  46. package/dist/trace.js +7 -0
  47. package/dist/unified.d.ts +101 -0
  48. package/dist/unified.js +422 -0
  49. package/dist/worktree.d.ts +25 -0
  50. package/dist/worktree.js +75 -0
  51. package/package.json +35 -0
@@ -0,0 +1,21 @@
1
+ import type { HarnessAdapter } from "./harness.js";
2
+ export type AgentHarnessOptions = {
3
+ id?: string;
4
+ /** Per-candidate OpenAI-compatible base URL keyed by `EnsembleModel.id`. */
5
+ modelEndpoints: Record<string, string>;
6
+ /** Used when a model has no per-model endpoint. */
7
+ fallbackBaseUrl?: string;
8
+ apiKey?: string;
9
+ maxSteps?: number;
10
+ /** Per-`run` shell-command timeout (ms). */
11
+ timeoutMs?: number;
12
+ /** Overall wall-clock budget for one model's agent run (ms). */
13
+ modelTimeoutMs?: number;
14
+ /** Observability correlation id; when set, each candidate is traced. */
15
+ traceId?: string;
16
+ /** Session root span; candidate spans parent under it for a correct tree. */
17
+ parentSpanId?: string;
18
+ /** User-turn index this panel run belongs to (stamped on candidate events). */
19
+ turn?: number;
20
+ };
21
+ export declare function createAgentHarness(options: AgentHarnessOptions): HarnessAdapter;
package/dist/agent.js ADDED
@@ -0,0 +1,186 @@
1
+ import { runWorktreeAgent } from "@fusionkit/adapter-ai-sdk";
2
+ import { artifactHash, emitTrace, newSpanId } from "@fusionkit/protocol";
3
+ /**
4
+ * The uniform panel agent for trajectory-level fusion. Each panel model drives
5
+ * a real AI SDK tool loop (read/list/grep/write/run) over its own git worktree
6
+ * and produces a normalized trajectory. The same agent runs for every model, so
7
+ * trajectories are directly comparable and only the model varies.
8
+ */
9
+ /** Wall-clock budget for a single panel model's agent run (model + tools). */
10
+ const DEFAULT_MODEL_TIMEOUT_MS = 10 * 60 * 1000;
11
+ /**
12
+ * Verification is a signal, not a gate: if the agent ran a command (e.g. tests)
13
+ * the last observed exit code becomes the trajectory's verification status.
14
+ */
15
+ function deriveVerification(steps) {
16
+ let lastExitCode;
17
+ for (const step of steps) {
18
+ // The `run` tool always prefixes its observation with `exit_code=<n>`; anchor
19
+ // to the start so unrelated tool output that happens to contain the substring
20
+ // cannot be mistaken for a command result.
21
+ if (step.type === "observation" && typeof step.text === "string") {
22
+ const match = step.text.match(/^exit_code=(-?\d+)/);
23
+ if (match)
24
+ lastExitCode = Number(match[1]);
25
+ }
26
+ }
27
+ if (lastExitCode === undefined)
28
+ return undefined;
29
+ return {
30
+ status: lastExitCode === 0 ? "succeeded" : "failed",
31
+ evidence: [`exit_code=${lastExitCode}`],
32
+ exitCode: lastExitCode
33
+ };
34
+ }
35
+ export function createAgentHarness(options) {
36
+ const id = options.id ?? "agent";
37
+ return {
38
+ id,
39
+ harnessKind: "generic",
40
+ prepare: () => ({ id, timeoutMs: options.timeoutMs }),
41
+ capabilities: () => ({
42
+ shell_command: "supported",
43
+ artifact_capture: "supported",
44
+ verification: "supported",
45
+ tool_call_loop: "supported"
46
+ }),
47
+ verificationProfile: () => ({
48
+ id: `${id}-verification`,
49
+ requiredEvidence: ["agent trajectory", "final output"]
50
+ }),
51
+ run: async ({ descriptor, model, ordinal, worktree }) => {
52
+ const baseUrl = options.modelEndpoints[model.id] ?? options.fallbackBaseUrl;
53
+ if (baseUrl === undefined) {
54
+ throw new Error(`no model endpoint configured for panel model "${model.id}"`);
55
+ }
56
+ const root = worktree?.path ?? process.cwd();
57
+ const candidateId = `${descriptor.id}_${model.id}_${ordinal}`;
58
+ const executionId = `exec_${candidateId}`;
59
+ const planId = `plan_${candidateId}`;
60
+ const traceId = options.traceId;
61
+ const candidateSpan = newSpanId();
62
+ if (traceId !== undefined) {
63
+ emitTrace({
64
+ component: "panel-model",
65
+ event_type: "harness.candidate.started",
66
+ traceId,
67
+ spanId: candidateSpan,
68
+ ...(options.parentSpanId !== undefined ? { parentSpanId: options.parentSpanId } : {}),
69
+ candidateId,
70
+ modelId: model.id,
71
+ payload: {
72
+ model: model.model,
73
+ ...(options.turn !== undefined ? { turn: options.turn } : {}),
74
+ ...(worktree ? { branch_name: worktree.branchName, worktree_path: worktree.path } : {})
75
+ }
76
+ });
77
+ }
78
+ // Bound the whole agent run so a hung model HTTP call cannot wedge a
79
+ // candidate forever (the per-command timeout only bounds `run`).
80
+ const modelTimeoutMs = options.modelTimeoutMs ?? DEFAULT_MODEL_TIMEOUT_MS;
81
+ const result = await runWorktreeAgent({
82
+ worktree: root,
83
+ prompt: descriptor.prompt,
84
+ baseUrl,
85
+ model: model.model,
86
+ abortSignal: AbortSignal.timeout(modelTimeoutMs),
87
+ ...(options.turn !== undefined ? { turn: options.turn } : {}),
88
+ ...(options.apiKey !== undefined ? { apiKey: options.apiKey } : {}),
89
+ ...(options.maxSteps !== undefined ? { maxSteps: options.maxSteps } : {}),
90
+ ...(options.timeoutMs !== undefined ? { commandTimeoutMs: options.timeoutMs } : {}),
91
+ ...(traceId !== undefined ? { traceId, candidateId, parentSpanId: candidateSpan } : {})
92
+ });
93
+ const steps = result.steps;
94
+ const status = result.status === "failed" ? "failed" : "succeeded";
95
+ const verification = deriveVerification(steps);
96
+ const trajectory = {
97
+ trajectoryId: candidateId,
98
+ modelId: model.id,
99
+ model: model.model,
100
+ candidateId,
101
+ harnessKind: "generic",
102
+ status,
103
+ steps,
104
+ finalOutput: result.finalOutput,
105
+ ...(verification !== undefined ? { verification } : {})
106
+ };
107
+ const transcript = JSON.stringify(steps, null, 2);
108
+ const outputHash = artifactHash(transcript);
109
+ if (traceId !== undefined) {
110
+ emitTrace({
111
+ component: "panel-model",
112
+ event_type: "harness.candidate.finished",
113
+ traceId,
114
+ spanId: candidateSpan,
115
+ candidateId,
116
+ modelId: model.id,
117
+ payload: {
118
+ status,
119
+ ...(options.turn !== undefined ? { turn: options.turn } : {}),
120
+ tool_call_count: result.toolCallCount,
121
+ finish_reason: result.finishReason,
122
+ step_count: steps.length,
123
+ final_output_preview: result.finalOutput.slice(0, 400),
124
+ ...(verification !== undefined ? { verification_status: verification.status } : {})
125
+ }
126
+ });
127
+ emitTrace({
128
+ component: "panel-model",
129
+ event_type: "tool.execution",
130
+ traceId,
131
+ spanId: candidateSpan,
132
+ candidateId,
133
+ modelId: model.id,
134
+ payload: {
135
+ execution_id: executionId,
136
+ plan_id: planId,
137
+ status,
138
+ ...(options.turn !== undefined ? { turn: options.turn } : {}),
139
+ output_hash: outputHash,
140
+ tool_call_count: result.toolCallCount
141
+ }
142
+ });
143
+ }
144
+ return {
145
+ candidateId,
146
+ model,
147
+ status,
148
+ ...(worktree ? { branchName: worktree.branchName, worktreePath: worktree.path } : {}),
149
+ transcript,
150
+ trajectory,
151
+ diff: "",
152
+ summary: result.finalOutput.slice(0, 280),
153
+ artifacts: [
154
+ {
155
+ artifact_id: `artifact_${descriptor.id}_${model.id}_agent_trajectory`,
156
+ kind: "transcript",
157
+ hash: outputHash,
158
+ redaction_status: "synthetic"
159
+ }
160
+ ],
161
+ toolRecords: [
162
+ {
163
+ execution_id: executionId,
164
+ plan_id: planId,
165
+ status,
166
+ output_hash: outputHash
167
+ }
168
+ ],
169
+ verification: verification !== undefined
170
+ ? {
171
+ status: verification.status,
172
+ evidence: verification.evidence,
173
+ ...(verification.exitCode !== undefined ? { exitCode: verification.exitCode } : {})
174
+ }
175
+ : { status, evidence: [`final_output_chars=${result.finalOutput.length}`, outputHash] },
176
+ metadata: {
177
+ adapter: "agent",
178
+ model_id: model.id,
179
+ tool_call_count: result.toolCallCount,
180
+ finish_reason: result.finishReason
181
+ }
182
+ };
183
+ },
184
+ collectArtifacts: () => []
185
+ };
186
+ }
@@ -0,0 +1,21 @@
1
+ import type { ModelFusionArtifactKind } from "@fusionkit/protocol";
2
+ import type { HarnessArtifact } from "./harness.js";
3
+ export type ArtifactStore = {
4
+ root: string;
5
+ writeText(input: {
6
+ artifactId: string;
7
+ kind: ModelFusionArtifactKind;
8
+ content: string;
9
+ suffix?: string;
10
+ }): HarnessArtifact & {
11
+ path: string;
12
+ };
13
+ writeJson(input: {
14
+ artifactId: string;
15
+ kind: ModelFusionArtifactKind;
16
+ value: unknown;
17
+ }): HarnessArtifact & {
18
+ path: string;
19
+ };
20
+ };
21
+ export declare function createArtifactStore(root: string): ArtifactStore;
@@ -0,0 +1,36 @@
1
+ import { mkdirSync, writeFileSync } from "node:fs";
2
+ import { join, resolve } from "node:path";
3
+ import { pathToFileURL } from "node:url";
4
+ import { artifactHash } from "@fusionkit/protocol";
5
+ function safeFileName(value) {
6
+ return value.replace(/[^A-Za-z0-9_.:-]/g, "_");
7
+ }
8
+ export function createArtifactStore(root) {
9
+ const resolvedRoot = resolve(root);
10
+ mkdirSync(resolvedRoot, { recursive: true });
11
+ return {
12
+ root: resolvedRoot,
13
+ writeText(input) {
14
+ const hash = artifactHash(input.content);
15
+ const hashPart = hash.replace("sha256:", "");
16
+ const path = join(resolvedRoot, `${safeFileName(input.artifactId)}-${hashPart}${input.suffix ?? ".txt"}`);
17
+ writeFileSync(path, input.content);
18
+ return {
19
+ artifact_id: input.artifactId,
20
+ kind: input.kind,
21
+ hash,
22
+ uri: pathToFileURL(path).toString(),
23
+ redaction_status: "synthetic",
24
+ path
25
+ };
26
+ },
27
+ writeJson(input) {
28
+ return this.writeText({
29
+ artifactId: input.artifactId,
30
+ kind: input.kind,
31
+ content: JSON.stringify(input.value, null, 2) + "\n",
32
+ suffix: ".json"
33
+ });
34
+ }
35
+ };
36
+ }
@@ -0,0 +1,25 @@
1
+ import type { NetworkPolicy } from "@fusionkit/protocol";
2
+ import type { SessionBackend } from "@fusionkit/runner";
3
+ import type { ClaudeCodeBindingOptions } from "@fusionkit/session-harness";
4
+ import type { HarnessAdapter } from "./harness.js";
5
+ export type ClaudeCodeHarnessEnv = Record<string, string | undefined>;
6
+ export type ClaudeCodeHarnessOptions = ClaudeCodeBindingOptions & {
7
+ id?: string;
8
+ /** Defaults to `process.env`; tests can pass `{}` for deterministic skips. */
9
+ env?: ClaudeCodeHarnessEnv;
10
+ /** Already-released secret values forwarded through the session backend seam. */
11
+ secrets?: {
12
+ name: string;
13
+ value: string;
14
+ }[];
15
+ /** Test/extension seam. Defaults to `aiSdkHarnessBackend(...)`. */
16
+ backend?: SessionBackend;
17
+ pool?: string;
18
+ network?: NetworkPolicy;
19
+ timeoutMs?: number;
20
+ logMaxBytes?: number;
21
+ skipWhenUnavailable?: boolean;
22
+ };
23
+ export declare function claudeCodeHarnessCredentialSkipReason(env?: ClaudeCodeHarnessEnv, options?: ClaudeCodeHarnessOptions): string | undefined;
24
+ export declare function createClaudeCodeHarness(options?: ClaudeCodeHarnessOptions): HarnessAdapter;
25
+ export declare function claudeCodeHarness(options?: ClaudeCodeHarnessOptions): HarnessAdapter;
@@ -0,0 +1,398 @@
1
+ import { artifactHash } from "@fusionkit/protocol";
2
+ import { CapabilityMismatchError, prepareExecution } from "@fusionkit/runner";
3
+ import { aiSdkHarnessBackend } from "@fusionkit/session-harness";
4
+ const ZERO_HASH = "0".repeat(64);
5
+ const ZERO_GIT_SHA = "0".repeat(40);
6
+ const DEFAULT_POOL = "ensemble";
7
+ const DEFAULT_RUNTIME = "node24";
8
+ const DEFAULT_TIMEOUT_MS = 10 * 60 * 1000;
9
+ const DEFAULT_LOG_MAX_BYTES = 256 * 1024;
10
+ const DEFAULT_CLAUDE_NETWORK = {
11
+ defaultDeny: true,
12
+ allowHosts: ["registry.npmjs.org", "api.anthropic.com", "ai-gateway.vercel.sh"]
13
+ };
14
+ const AUTH_ENV_NAMES = [
15
+ "AI_GATEWAY_API_KEY",
16
+ "AI_GATEWAY_BASE_URL",
17
+ "ANTHROPIC_API_KEY",
18
+ "ANTHROPIC_AUTH_TOKEN",
19
+ "ANTHROPIC_BASE_URL"
20
+ ];
21
+ function candidateId(input) {
22
+ return `${input.descriptor.id}_${input.model.id}_${input.ordinal}`;
23
+ }
24
+ function envValue(env, name) {
25
+ const value = env[name];
26
+ return value && value.length > 0 ? value : undefined;
27
+ }
28
+ function authEnvFrom(env) {
29
+ const authEnv = {};
30
+ for (const name of AUTH_ENV_NAMES) {
31
+ const value = envValue(env, name);
32
+ if (value !== undefined)
33
+ authEnv[name] = value;
34
+ }
35
+ return authEnv;
36
+ }
37
+ function credentialGate(env, options) {
38
+ const missing = [];
39
+ const hasProviderCredential = envValue(env, "AI_GATEWAY_API_KEY") ??
40
+ envValue(env, "ANTHROPIC_API_KEY") ??
41
+ envValue(env, "ANTHROPIC_AUTH_TOKEN");
42
+ const hasSandboxCredential = options.backend !== undefined ||
43
+ options.createSandboxProvider !== undefined ||
44
+ options.token !== undefined ||
45
+ envValue(env, "VERCEL_TOKEN") !== undefined;
46
+ if (!hasSandboxCredential)
47
+ missing.push("VERCEL_TOKEN");
48
+ if (!hasProviderCredential) {
49
+ missing.push("ANTHROPIC_API_KEY|ANTHROPIC_AUTH_TOKEN|AI_GATEWAY_API_KEY");
50
+ }
51
+ if (missing.length > 0) {
52
+ return {
53
+ available: false,
54
+ missing,
55
+ reason: "Claude Code harness skipped: missing Claude Code credential/env; set VERCEL_TOKEN and one of " +
56
+ "ANTHROPIC_API_KEY, ANTHROPIC_AUTH_TOKEN, or AI_GATEWAY_API_KEY."
57
+ };
58
+ }
59
+ return { available: true, authEnv: authEnvFrom(env) };
60
+ }
61
+ export function claudeCodeHarnessCredentialSkipReason(env = process.env, options = {}) {
62
+ const gate = credentialGate(env, options);
63
+ return gate.available ? undefined : gate.reason;
64
+ }
65
+ function backendFor(options, env) {
66
+ return (options.backend ??
67
+ aiSdkHarnessBackend({
68
+ ...(options.runtime !== undefined ? { runtime: options.runtime } : {}),
69
+ ...(options.bridgePort !== undefined ? { bridgePort: options.bridgePort } : {}),
70
+ token: options.token ?? envValue(env, "VERCEL_TOKEN"),
71
+ teamId: options.teamId ?? envValue(env, "VERCEL_TEAM_ID"),
72
+ projectId: options.projectId ?? envValue(env, "VERCEL_PROJECT_ID"),
73
+ ...(options.model !== undefined ? { model: options.model } : {}),
74
+ ...(options.maxTurns !== undefined ? { maxTurns: options.maxTurns } : {}),
75
+ ...(options.thinking !== undefined ? { thinking: options.thinking } : {}),
76
+ ...(options.startupTimeoutMs !== undefined
77
+ ? { startupTimeoutMs: options.startupTimeoutMs }
78
+ : {}),
79
+ ...(options.createHarness !== undefined ? { createHarness: options.createHarness } : {}),
80
+ ...(options.createSandboxProvider !== undefined
81
+ ? { createSandboxProvider: options.createSandboxProvider }
82
+ : {})
83
+ }));
84
+ }
85
+ function contractFor(input) {
86
+ const timeoutMs = input.options.timeoutMs ?? input.descriptor.policy.timeoutMs ?? DEFAULT_TIMEOUT_MS;
87
+ return {
88
+ version: "warrant.contract.v1",
89
+ runId: `ensemble_${input.candidateId}`,
90
+ issuedAt: new Date().toISOString(),
91
+ issuer: { keyId: "ensemble-claude-code", role: "plane" },
92
+ requestedBy: { kind: "service", id: "handoffkit-ensemble" },
93
+ agent: { kind: "claude-code" },
94
+ task: { prompt: input.descriptor.prompt },
95
+ runner: {
96
+ pool: input.options.pool ??
97
+ input.descriptor.runtime.environmentId ??
98
+ input.descriptor.runtime.id ??
99
+ DEFAULT_POOL
100
+ },
101
+ workspace: {
102
+ version: "warrant.manifest.v1",
103
+ baseRef: (input.repoBaseSha ?? input.descriptor.baseGitSha) || ZERO_GIT_SHA,
104
+ bundleHash: ZERO_HASH,
105
+ untrackedFiles: [],
106
+ deniedPatterns: [],
107
+ deniedPaths: []
108
+ },
109
+ policyHash: ZERO_HASH,
110
+ secrets: input.options.secrets?.map((secret) => ({ name: secret.name, scope: "ensemble" })) ?? [],
111
+ network: input.options.network ??
112
+ (input.descriptor.runtime.isolation?.networkPolicy
113
+ ? {
114
+ defaultDeny: input.descriptor.runtime.isolation.networkPolicy.defaultDeny,
115
+ allowHosts: [...input.descriptor.runtime.isolation.networkPolicy.allowHosts]
116
+ }
117
+ : DEFAULT_CLAUDE_NETWORK),
118
+ budget: {
119
+ ...(input.descriptor.policy.budgetUsd !== undefined
120
+ ? { maxSpendUsd: input.descriptor.policy.budgetUsd }
121
+ : {}),
122
+ maxDurationMin: Math.ceil(timeoutMs / 60_000)
123
+ },
124
+ disclosure: "minimal-context",
125
+ isolation: "vercel-sandbox",
126
+ execution: {
127
+ kind: "agent",
128
+ agent: { kind: "claude-code" },
129
+ prompt: input.descriptor.prompt,
130
+ timeoutMs,
131
+ env: { vars: input.gate.authEnv, egressProxy: false },
132
+ log: {
133
+ stdout: "capture",
134
+ stderr: "merge",
135
+ maxBytes: input.options.logMaxBytes ?? DEFAULT_LOG_MAX_BYTES
136
+ }
137
+ },
138
+ expiresAt: new Date(Date.now() + timeoutMs).toISOString(),
139
+ signatures: []
140
+ };
141
+ }
142
+ function hardeningFor(input) {
143
+ const networkPolicy = input.options.network ??
144
+ input.descriptor.runtime.isolation?.networkPolicy ??
145
+ DEFAULT_CLAUDE_NETWORK;
146
+ const mountPolicy = input.descriptor.runtime.isolation?.mountPolicy;
147
+ const secretPolicy = input.descriptor.runtime.isolation?.secretPolicy;
148
+ return {
149
+ requested_isolation: "microvm",
150
+ actual_isolation: input.finished ? "vercel-sandbox" : "process",
151
+ runtime: {
152
+ provider: "vercel-sandbox",
153
+ runtime: input.options.runtime ??
154
+ (input.descriptor.runtime.isolation?.kind === "microvm"
155
+ ? input.descriptor.runtime.isolation.runtime
156
+ : undefined) ??
157
+ DEFAULT_RUNTIME,
158
+ workdir: mountPolicy?.workdir ?? input.repoDir
159
+ },
160
+ mount_policy: {
161
+ worktree_writable: mountPolicy?.worktreeWritable ?? true,
162
+ read_only_caches: [...(mountPolicy?.readOnlyCachePaths ?? [])],
163
+ ignored_dirs: [...(mountPolicy?.ignoredDirs ?? [".git", "node_modules", ".warrant"])]
164
+ },
165
+ network_policy: {
166
+ default_deny: networkPolicy.defaultDeny,
167
+ allow_hosts: [...networkPolicy.allowHosts],
168
+ enforced: input.finished
169
+ },
170
+ cleanup: input.finished
171
+ ? { attempted: true, succeeded: true, status: "succeeded" }
172
+ : { attempted: false, succeeded: true, status: "not_required" },
173
+ secret_absence: {
174
+ secret_names: [
175
+ ...(secretPolicy?.secretNames ?? input.options.secrets?.map((secret) => secret.name) ?? [])
176
+ ],
177
+ secret_value_hashes: [...(secretPolicy?.secretValueHashes ?? [])],
178
+ injected_env_names: [...(secretPolicy?.injectedEnvNames ?? input.authEnvNames)],
179
+ scanned: false,
180
+ leaks_found: false,
181
+ scan_scope: [],
182
+ leak_count: 0
183
+ }
184
+ };
185
+ }
186
+ function skippedOutput(input) {
187
+ const evidenceHash = artifactHash(input.reason);
188
+ const repoDir = input.runInput.worktree?.path ?? input.runInput.descriptor.sourceRepo;
189
+ return {
190
+ candidateId: candidateId(input.runInput),
191
+ model: input.runInput.model,
192
+ status: "skipped",
193
+ ...(input.runInput.worktree
194
+ ? {
195
+ branchName: input.runInput.worktree.branchName,
196
+ worktreePath: input.runInput.worktree.path
197
+ }
198
+ : {}),
199
+ transcript: input.reason,
200
+ summary: input.reason,
201
+ error: {
202
+ kind: "capability_missing",
203
+ message: input.reason,
204
+ retryable: false
205
+ },
206
+ verification: {
207
+ status: "skipped",
208
+ evidence: [input.reason, evidenceHash],
209
+ exitCode: 0
210
+ },
211
+ metadata: {
212
+ adapter: "claude-code",
213
+ credential_gate: "skipped",
214
+ missing_credentials: [...input.missing],
215
+ hardening: hardeningFor({
216
+ descriptor: input.runInput.descriptor,
217
+ options: input.options,
218
+ repoDir,
219
+ authEnvNames: [],
220
+ finished: false
221
+ })
222
+ }
223
+ };
224
+ }
225
+ function failureOutput(input) {
226
+ const message = input.error instanceof Error ? input.error.message : String(input.error);
227
+ const errorHash = artifactHash(message);
228
+ const repoDir = input.runInput.worktree?.path ?? input.runInput.descriptor.sourceRepo;
229
+ return {
230
+ candidateId: candidateId(input.runInput),
231
+ model: input.runInput.model,
232
+ status: "failed",
233
+ ...(input.runInput.worktree
234
+ ? {
235
+ branchName: input.runInput.worktree.branchName,
236
+ worktreePath: input.runInput.worktree.path
237
+ }
238
+ : {}),
239
+ transcript: `Claude Code harness failed: ${message}`,
240
+ error: {
241
+ kind: "provider_error",
242
+ message,
243
+ retryable: true
244
+ },
245
+ verification: {
246
+ status: "failed",
247
+ evidence: [errorHash],
248
+ exitCode: 1
249
+ },
250
+ metadata: {
251
+ adapter: "claude-code",
252
+ credential_gate: "available",
253
+ event_count: 0,
254
+ auth_env_names: [...input.authEnvNames],
255
+ hardening: hardeningFor({
256
+ descriptor: input.runInput.descriptor,
257
+ options: input.options,
258
+ repoDir,
259
+ authEnvNames: input.authEnvNames,
260
+ finished: false
261
+ })
262
+ }
263
+ };
264
+ }
265
+ export function createClaudeCodeHarness(options = {}) {
266
+ const id = options.id ?? "claude-code";
267
+ const env = options.env ?? process.env;
268
+ const skipWhenUnavailable = options.skipWhenUnavailable ?? true;
269
+ return {
270
+ id,
271
+ harnessKind: "claude_code",
272
+ prepare: () => {
273
+ const gate = credentialGate(env, options);
274
+ if (!gate.available) {
275
+ if (skipWhenUnavailable)
276
+ return { gate };
277
+ throw new CapabilityMismatchError(gate.reason);
278
+ }
279
+ return { gate, backend: backendFor(options, env) };
280
+ },
281
+ capabilities: () => {
282
+ const gate = credentialGate(env, options);
283
+ return {
284
+ workspace_read: gate.available ? "supported" : "degraded",
285
+ workspace_write: gate.available ? "supported" : "degraded",
286
+ apply_patch: gate.available ? "supported" : "degraded",
287
+ tool_records: "supported",
288
+ verification: gate.available ? "supported" : "degraded",
289
+ microvm_isolation: gate.available ? "supported" : "degraded",
290
+ credential_gate: gate.available ? "supported" : "degraded"
291
+ };
292
+ },
293
+ verificationProfile: () => ({
294
+ id: `${id}-verification`,
295
+ requiredEvidence: ["structured transcript", "exit code", "worktree diff or skip reason"]
296
+ }),
297
+ run: async (runInput) => {
298
+ const state = runInput.prepared;
299
+ if (!state.gate.available) {
300
+ return skippedOutput({
301
+ runInput,
302
+ reason: state.gate.reason,
303
+ missing: state.gate.missing,
304
+ options
305
+ });
306
+ }
307
+ const id = candidateId(runInput);
308
+ const repoDir = runInput.worktree?.path ?? runInput.descriptor.workspace ?? runInput.descriptor.sourceRepo;
309
+ const backend = state.backend ?? backendFor(options, env);
310
+ const contract = contractFor({
311
+ descriptor: runInput.descriptor,
312
+ candidateId: id,
313
+ options,
314
+ gate: state.gate,
315
+ ...(runInput.worktree ? { repoBaseSha: runInput.worktree.baseGitSha } : {})
316
+ });
317
+ const events = [];
318
+ const authEnvNames = Object.keys(state.gate.authEnv);
319
+ try {
320
+ const result = await backend.execute({
321
+ contract,
322
+ repoDir,
323
+ secrets: options.secrets ?? [],
324
+ execution: prepareExecution({ contract, mockScriptPath: "/tmp/mock-agent.js" }),
325
+ emit: (event) => {
326
+ events.push(event);
327
+ }
328
+ });
329
+ const transcript = result.log.toString("utf8");
330
+ const outputHash = artifactHash(transcript);
331
+ const status = result.exitCode === 0 ? "succeeded" : "failed";
332
+ return {
333
+ candidateId: id,
334
+ model: runInput.model,
335
+ status,
336
+ ...(runInput.worktree
337
+ ? {
338
+ branchName: runInput.worktree.branchName,
339
+ worktreePath: runInput.worktree.path
340
+ }
341
+ : {}),
342
+ transcript,
343
+ toolRecords: [
344
+ {
345
+ execution_id: `exec_${id}`,
346
+ plan_id: `plan_${id}`,
347
+ status,
348
+ output_hash: outputHash
349
+ }
350
+ ],
351
+ verification: {
352
+ status,
353
+ evidence: [`exit_code=${result.exitCode}`, outputHash],
354
+ exitCode: result.exitCode
355
+ },
356
+ ...(status === "failed"
357
+ ? {
358
+ error: {
359
+ kind: "provider_error",
360
+ message: "Claude Code harness exited non-zero",
361
+ retryable: true
362
+ }
363
+ }
364
+ : {}),
365
+ metadata: {
366
+ adapter: "claude-code",
367
+ backend_isolation: backend.isolation,
368
+ credential_gate: "available",
369
+ event_count: events.length,
370
+ auth_env_names: authEnvNames,
371
+ hardening: hardeningFor({
372
+ descriptor: runInput.descriptor,
373
+ options,
374
+ repoDir,
375
+ authEnvNames,
376
+ finished: true
377
+ })
378
+ }
379
+ };
380
+ }
381
+ catch (error) {
382
+ if (skipWhenUnavailable && error instanceof CapabilityMismatchError) {
383
+ return skippedOutput({
384
+ runInput,
385
+ reason: error.message,
386
+ missing: ["capability_mismatch"],
387
+ options
388
+ });
389
+ }
390
+ return failureOutput({ runInput, error, options, authEnvNames });
391
+ }
392
+ },
393
+ collectArtifacts: () => []
394
+ };
395
+ }
396
+ export function claudeCodeHarness(options = {}) {
397
+ return createClaudeCodeHarness(options);
398
+ }