@fusionkit/ensemble 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/agent.d.ts +21 -0
- package/dist/agent.js +186 -0
- package/dist/artifacts.d.ts +21 -0
- package/dist/artifacts.js +36 -0
- package/dist/claude-code.d.ts +25 -0
- package/dist/claude-code.js +398 -0
- package/dist/codex.d.ts +69 -0
- package/dist/codex.js +467 -0
- package/dist/command.d.ts +15 -0
- package/dist/command.js +82 -0
- package/dist/dashboard.d.ts +62 -0
- package/dist/dashboard.js +788 -0
- package/dist/external-executor.d.ts +56 -0
- package/dist/external-executor.js +288 -0
- package/dist/harness.d.ts +337 -0
- package/dist/harness.js +1 -0
- package/dist/index.d.ts +30 -0
- package/dist/index.js +15 -0
- package/dist/isolation.d.ts +25 -0
- package/dist/isolation.js +509 -0
- package/dist/judge.d.ts +77 -0
- package/dist/judge.js +16 -0
- package/dist/mock.d.ts +20 -0
- package/dist/mock.js +56 -0
- package/dist/run.d.ts +5 -0
- package/dist/run.js +520 -0
- package/dist/synthesis.d.ts +25 -0
- package/dist/synthesis.js +221 -0
- package/dist/test/codex.test.d.ts +1 -0
- package/dist/test/codex.test.js +237 -0
- package/dist/test/dashboard.test.d.ts +1 -0
- package/dist/test/dashboard.test.js +214 -0
- package/dist/test/ensemble.test.d.ts +1 -0
- package/dist/test/ensemble.test.js +780 -0
- package/dist/test/external-executor.test.d.ts +1 -0
- package/dist/test/external-executor.test.js +273 -0
- package/dist/test/isolation.test.d.ts +1 -0
- package/dist/test/isolation.test.js +359 -0
- package/dist/test/tool-executor.test.d.ts +1 -0
- package/dist/test/tool-executor.test.js +113 -0
- package/dist/test/unified.test.d.ts +1 -0
- package/dist/test/unified.test.js +150 -0
- package/dist/tool-executor.d.ts +14 -0
- package/dist/tool-executor.js +156 -0
- package/dist/trace.d.ts +8 -0
- package/dist/trace.js +7 -0
- package/dist/unified.d.ts +101 -0
- package/dist/unified.js +422 -0
- package/dist/worktree.d.ts +25 -0
- package/dist/worktree.js +75 -0
- package/package.json +35 -0
package/dist/agent.d.ts
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
import type { HarnessAdapter } from "./harness.js";
|
|
2
|
+
export type AgentHarnessOptions = {
|
|
3
|
+
id?: string;
|
|
4
|
+
/** Per-candidate OpenAI-compatible base URL keyed by `EnsembleModel.id`. */
|
|
5
|
+
modelEndpoints: Record<string, string>;
|
|
6
|
+
/** Used when a model has no per-model endpoint. */
|
|
7
|
+
fallbackBaseUrl?: string;
|
|
8
|
+
apiKey?: string;
|
|
9
|
+
maxSteps?: number;
|
|
10
|
+
/** Per-`run` shell-command timeout (ms). */
|
|
11
|
+
timeoutMs?: number;
|
|
12
|
+
/** Overall wall-clock budget for one model's agent run (ms). */
|
|
13
|
+
modelTimeoutMs?: number;
|
|
14
|
+
/** Observability correlation id; when set, each candidate is traced. */
|
|
15
|
+
traceId?: string;
|
|
16
|
+
/** Session root span; candidate spans parent under it for a correct tree. */
|
|
17
|
+
parentSpanId?: string;
|
|
18
|
+
/** User-turn index this panel run belongs to (stamped on candidate events). */
|
|
19
|
+
turn?: number;
|
|
20
|
+
};
|
|
21
|
+
export declare function createAgentHarness(options: AgentHarnessOptions): HarnessAdapter;
|
package/dist/agent.js
ADDED
|
@@ -0,0 +1,186 @@
|
|
|
1
|
+
import { runWorktreeAgent } from "@fusionkit/adapter-ai-sdk";
|
|
2
|
+
import { artifactHash, emitTrace, newSpanId } from "@fusionkit/protocol";
|
|
3
|
+
/**
|
|
4
|
+
* The uniform panel agent for trajectory-level fusion. Each panel model drives
|
|
5
|
+
* a real AI SDK tool loop (read/list/grep/write/run) over its own git worktree
|
|
6
|
+
* and produces a normalized trajectory. The same agent runs for every model, so
|
|
7
|
+
* trajectories are directly comparable and only the model varies.
|
|
8
|
+
*/
|
|
9
|
+
/** Wall-clock budget for a single panel model's agent run (model + tools). */
|
|
10
|
+
const DEFAULT_MODEL_TIMEOUT_MS = 10 * 60 * 1000;
|
|
11
|
+
/**
|
|
12
|
+
* Verification is a signal, not a gate: if the agent ran a command (e.g. tests)
|
|
13
|
+
* the last observed exit code becomes the trajectory's verification status.
|
|
14
|
+
*/
|
|
15
|
+
function deriveVerification(steps) {
|
|
16
|
+
let lastExitCode;
|
|
17
|
+
for (const step of steps) {
|
|
18
|
+
// The `run` tool always prefixes its observation with `exit_code=<n>`; anchor
|
|
19
|
+
// to the start so unrelated tool output that happens to contain the substring
|
|
20
|
+
// cannot be mistaken for a command result.
|
|
21
|
+
if (step.type === "observation" && typeof step.text === "string") {
|
|
22
|
+
const match = step.text.match(/^exit_code=(-?\d+)/);
|
|
23
|
+
if (match)
|
|
24
|
+
lastExitCode = Number(match[1]);
|
|
25
|
+
}
|
|
26
|
+
}
|
|
27
|
+
if (lastExitCode === undefined)
|
|
28
|
+
return undefined;
|
|
29
|
+
return {
|
|
30
|
+
status: lastExitCode === 0 ? "succeeded" : "failed",
|
|
31
|
+
evidence: [`exit_code=${lastExitCode}`],
|
|
32
|
+
exitCode: lastExitCode
|
|
33
|
+
};
|
|
34
|
+
}
|
|
35
|
+
export function createAgentHarness(options) {
|
|
36
|
+
const id = options.id ?? "agent";
|
|
37
|
+
return {
|
|
38
|
+
id,
|
|
39
|
+
harnessKind: "generic",
|
|
40
|
+
prepare: () => ({ id, timeoutMs: options.timeoutMs }),
|
|
41
|
+
capabilities: () => ({
|
|
42
|
+
shell_command: "supported",
|
|
43
|
+
artifact_capture: "supported",
|
|
44
|
+
verification: "supported",
|
|
45
|
+
tool_call_loop: "supported"
|
|
46
|
+
}),
|
|
47
|
+
verificationProfile: () => ({
|
|
48
|
+
id: `${id}-verification`,
|
|
49
|
+
requiredEvidence: ["agent trajectory", "final output"]
|
|
50
|
+
}),
|
|
51
|
+
run: async ({ descriptor, model, ordinal, worktree }) => {
|
|
52
|
+
const baseUrl = options.modelEndpoints[model.id] ?? options.fallbackBaseUrl;
|
|
53
|
+
if (baseUrl === undefined) {
|
|
54
|
+
throw new Error(`no model endpoint configured for panel model "${model.id}"`);
|
|
55
|
+
}
|
|
56
|
+
const root = worktree?.path ?? process.cwd();
|
|
57
|
+
const candidateId = `${descriptor.id}_${model.id}_${ordinal}`;
|
|
58
|
+
const executionId = `exec_${candidateId}`;
|
|
59
|
+
const planId = `plan_${candidateId}`;
|
|
60
|
+
const traceId = options.traceId;
|
|
61
|
+
const candidateSpan = newSpanId();
|
|
62
|
+
if (traceId !== undefined) {
|
|
63
|
+
emitTrace({
|
|
64
|
+
component: "panel-model",
|
|
65
|
+
event_type: "harness.candidate.started",
|
|
66
|
+
traceId,
|
|
67
|
+
spanId: candidateSpan,
|
|
68
|
+
...(options.parentSpanId !== undefined ? { parentSpanId: options.parentSpanId } : {}),
|
|
69
|
+
candidateId,
|
|
70
|
+
modelId: model.id,
|
|
71
|
+
payload: {
|
|
72
|
+
model: model.model,
|
|
73
|
+
...(options.turn !== undefined ? { turn: options.turn } : {}),
|
|
74
|
+
...(worktree ? { branch_name: worktree.branchName, worktree_path: worktree.path } : {})
|
|
75
|
+
}
|
|
76
|
+
});
|
|
77
|
+
}
|
|
78
|
+
// Bound the whole agent run so a hung model HTTP call cannot wedge a
|
|
79
|
+
// candidate forever (the per-command timeout only bounds `run`).
|
|
80
|
+
const modelTimeoutMs = options.modelTimeoutMs ?? DEFAULT_MODEL_TIMEOUT_MS;
|
|
81
|
+
const result = await runWorktreeAgent({
|
|
82
|
+
worktree: root,
|
|
83
|
+
prompt: descriptor.prompt,
|
|
84
|
+
baseUrl,
|
|
85
|
+
model: model.model,
|
|
86
|
+
abortSignal: AbortSignal.timeout(modelTimeoutMs),
|
|
87
|
+
...(options.turn !== undefined ? { turn: options.turn } : {}),
|
|
88
|
+
...(options.apiKey !== undefined ? { apiKey: options.apiKey } : {}),
|
|
89
|
+
...(options.maxSteps !== undefined ? { maxSteps: options.maxSteps } : {}),
|
|
90
|
+
...(options.timeoutMs !== undefined ? { commandTimeoutMs: options.timeoutMs } : {}),
|
|
91
|
+
...(traceId !== undefined ? { traceId, candidateId, parentSpanId: candidateSpan } : {})
|
|
92
|
+
});
|
|
93
|
+
const steps = result.steps;
|
|
94
|
+
const status = result.status === "failed" ? "failed" : "succeeded";
|
|
95
|
+
const verification = deriveVerification(steps);
|
|
96
|
+
const trajectory = {
|
|
97
|
+
trajectoryId: candidateId,
|
|
98
|
+
modelId: model.id,
|
|
99
|
+
model: model.model,
|
|
100
|
+
candidateId,
|
|
101
|
+
harnessKind: "generic",
|
|
102
|
+
status,
|
|
103
|
+
steps,
|
|
104
|
+
finalOutput: result.finalOutput,
|
|
105
|
+
...(verification !== undefined ? { verification } : {})
|
|
106
|
+
};
|
|
107
|
+
const transcript = JSON.stringify(steps, null, 2);
|
|
108
|
+
const outputHash = artifactHash(transcript);
|
|
109
|
+
if (traceId !== undefined) {
|
|
110
|
+
emitTrace({
|
|
111
|
+
component: "panel-model",
|
|
112
|
+
event_type: "harness.candidate.finished",
|
|
113
|
+
traceId,
|
|
114
|
+
spanId: candidateSpan,
|
|
115
|
+
candidateId,
|
|
116
|
+
modelId: model.id,
|
|
117
|
+
payload: {
|
|
118
|
+
status,
|
|
119
|
+
...(options.turn !== undefined ? { turn: options.turn } : {}),
|
|
120
|
+
tool_call_count: result.toolCallCount,
|
|
121
|
+
finish_reason: result.finishReason,
|
|
122
|
+
step_count: steps.length,
|
|
123
|
+
final_output_preview: result.finalOutput.slice(0, 400),
|
|
124
|
+
...(verification !== undefined ? { verification_status: verification.status } : {})
|
|
125
|
+
}
|
|
126
|
+
});
|
|
127
|
+
emitTrace({
|
|
128
|
+
component: "panel-model",
|
|
129
|
+
event_type: "tool.execution",
|
|
130
|
+
traceId,
|
|
131
|
+
spanId: candidateSpan,
|
|
132
|
+
candidateId,
|
|
133
|
+
modelId: model.id,
|
|
134
|
+
payload: {
|
|
135
|
+
execution_id: executionId,
|
|
136
|
+
plan_id: planId,
|
|
137
|
+
status,
|
|
138
|
+
...(options.turn !== undefined ? { turn: options.turn } : {}),
|
|
139
|
+
output_hash: outputHash,
|
|
140
|
+
tool_call_count: result.toolCallCount
|
|
141
|
+
}
|
|
142
|
+
});
|
|
143
|
+
}
|
|
144
|
+
return {
|
|
145
|
+
candidateId,
|
|
146
|
+
model,
|
|
147
|
+
status,
|
|
148
|
+
...(worktree ? { branchName: worktree.branchName, worktreePath: worktree.path } : {}),
|
|
149
|
+
transcript,
|
|
150
|
+
trajectory,
|
|
151
|
+
diff: "",
|
|
152
|
+
summary: result.finalOutput.slice(0, 280),
|
|
153
|
+
artifacts: [
|
|
154
|
+
{
|
|
155
|
+
artifact_id: `artifact_${descriptor.id}_${model.id}_agent_trajectory`,
|
|
156
|
+
kind: "transcript",
|
|
157
|
+
hash: outputHash,
|
|
158
|
+
redaction_status: "synthetic"
|
|
159
|
+
}
|
|
160
|
+
],
|
|
161
|
+
toolRecords: [
|
|
162
|
+
{
|
|
163
|
+
execution_id: executionId,
|
|
164
|
+
plan_id: planId,
|
|
165
|
+
status,
|
|
166
|
+
output_hash: outputHash
|
|
167
|
+
}
|
|
168
|
+
],
|
|
169
|
+
verification: verification !== undefined
|
|
170
|
+
? {
|
|
171
|
+
status: verification.status,
|
|
172
|
+
evidence: verification.evidence,
|
|
173
|
+
...(verification.exitCode !== undefined ? { exitCode: verification.exitCode } : {})
|
|
174
|
+
}
|
|
175
|
+
: { status, evidence: [`final_output_chars=${result.finalOutput.length}`, outputHash] },
|
|
176
|
+
metadata: {
|
|
177
|
+
adapter: "agent",
|
|
178
|
+
model_id: model.id,
|
|
179
|
+
tool_call_count: result.toolCallCount,
|
|
180
|
+
finish_reason: result.finishReason
|
|
181
|
+
}
|
|
182
|
+
};
|
|
183
|
+
},
|
|
184
|
+
collectArtifacts: () => []
|
|
185
|
+
};
|
|
186
|
+
}
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
import type { ModelFusionArtifactKind } from "@fusionkit/protocol";
|
|
2
|
+
import type { HarnessArtifact } from "./harness.js";
|
|
3
|
+
export type ArtifactStore = {
|
|
4
|
+
root: string;
|
|
5
|
+
writeText(input: {
|
|
6
|
+
artifactId: string;
|
|
7
|
+
kind: ModelFusionArtifactKind;
|
|
8
|
+
content: string;
|
|
9
|
+
suffix?: string;
|
|
10
|
+
}): HarnessArtifact & {
|
|
11
|
+
path: string;
|
|
12
|
+
};
|
|
13
|
+
writeJson(input: {
|
|
14
|
+
artifactId: string;
|
|
15
|
+
kind: ModelFusionArtifactKind;
|
|
16
|
+
value: unknown;
|
|
17
|
+
}): HarnessArtifact & {
|
|
18
|
+
path: string;
|
|
19
|
+
};
|
|
20
|
+
};
|
|
21
|
+
export declare function createArtifactStore(root: string): ArtifactStore;
|
|
@@ -0,0 +1,36 @@
|
|
|
1
|
+
import { mkdirSync, writeFileSync } from "node:fs";
|
|
2
|
+
import { join, resolve } from "node:path";
|
|
3
|
+
import { pathToFileURL } from "node:url";
|
|
4
|
+
import { artifactHash } from "@fusionkit/protocol";
|
|
5
|
+
function safeFileName(value) {
|
|
6
|
+
return value.replace(/[^A-Za-z0-9_.:-]/g, "_");
|
|
7
|
+
}
|
|
8
|
+
export function createArtifactStore(root) {
|
|
9
|
+
const resolvedRoot = resolve(root);
|
|
10
|
+
mkdirSync(resolvedRoot, { recursive: true });
|
|
11
|
+
return {
|
|
12
|
+
root: resolvedRoot,
|
|
13
|
+
writeText(input) {
|
|
14
|
+
const hash = artifactHash(input.content);
|
|
15
|
+
const hashPart = hash.replace("sha256:", "");
|
|
16
|
+
const path = join(resolvedRoot, `${safeFileName(input.artifactId)}-${hashPart}${input.suffix ?? ".txt"}`);
|
|
17
|
+
writeFileSync(path, input.content);
|
|
18
|
+
return {
|
|
19
|
+
artifact_id: input.artifactId,
|
|
20
|
+
kind: input.kind,
|
|
21
|
+
hash,
|
|
22
|
+
uri: pathToFileURL(path).toString(),
|
|
23
|
+
redaction_status: "synthetic",
|
|
24
|
+
path
|
|
25
|
+
};
|
|
26
|
+
},
|
|
27
|
+
writeJson(input) {
|
|
28
|
+
return this.writeText({
|
|
29
|
+
artifactId: input.artifactId,
|
|
30
|
+
kind: input.kind,
|
|
31
|
+
content: JSON.stringify(input.value, null, 2) + "\n",
|
|
32
|
+
suffix: ".json"
|
|
33
|
+
});
|
|
34
|
+
}
|
|
35
|
+
};
|
|
36
|
+
}
|
|
@@ -0,0 +1,25 @@
|
|
|
1
|
+
import type { NetworkPolicy } from "@fusionkit/protocol";
|
|
2
|
+
import type { SessionBackend } from "@fusionkit/runner";
|
|
3
|
+
import type { ClaudeCodeBindingOptions } from "@fusionkit/session-harness";
|
|
4
|
+
import type { HarnessAdapter } from "./harness.js";
|
|
5
|
+
export type ClaudeCodeHarnessEnv = Record<string, string | undefined>;
|
|
6
|
+
export type ClaudeCodeHarnessOptions = ClaudeCodeBindingOptions & {
|
|
7
|
+
id?: string;
|
|
8
|
+
/** Defaults to `process.env`; tests can pass `{}` for deterministic skips. */
|
|
9
|
+
env?: ClaudeCodeHarnessEnv;
|
|
10
|
+
/** Already-released secret values forwarded through the session backend seam. */
|
|
11
|
+
secrets?: {
|
|
12
|
+
name: string;
|
|
13
|
+
value: string;
|
|
14
|
+
}[];
|
|
15
|
+
/** Test/extension seam. Defaults to `aiSdkHarnessBackend(...)`. */
|
|
16
|
+
backend?: SessionBackend;
|
|
17
|
+
pool?: string;
|
|
18
|
+
network?: NetworkPolicy;
|
|
19
|
+
timeoutMs?: number;
|
|
20
|
+
logMaxBytes?: number;
|
|
21
|
+
skipWhenUnavailable?: boolean;
|
|
22
|
+
};
|
|
23
|
+
export declare function claudeCodeHarnessCredentialSkipReason(env?: ClaudeCodeHarnessEnv, options?: ClaudeCodeHarnessOptions): string | undefined;
|
|
24
|
+
export declare function createClaudeCodeHarness(options?: ClaudeCodeHarnessOptions): HarnessAdapter;
|
|
25
|
+
export declare function claudeCodeHarness(options?: ClaudeCodeHarnessOptions): HarnessAdapter;
|
|
@@ -0,0 +1,398 @@
|
|
|
1
|
+
import { artifactHash } from "@fusionkit/protocol";
|
|
2
|
+
import { CapabilityMismatchError, prepareExecution } from "@fusionkit/runner";
|
|
3
|
+
import { aiSdkHarnessBackend } from "@fusionkit/session-harness";
|
|
4
|
+
const ZERO_HASH = "0".repeat(64);
|
|
5
|
+
const ZERO_GIT_SHA = "0".repeat(40);
|
|
6
|
+
const DEFAULT_POOL = "ensemble";
|
|
7
|
+
const DEFAULT_RUNTIME = "node24";
|
|
8
|
+
const DEFAULT_TIMEOUT_MS = 10 * 60 * 1000;
|
|
9
|
+
const DEFAULT_LOG_MAX_BYTES = 256 * 1024;
|
|
10
|
+
const DEFAULT_CLAUDE_NETWORK = {
|
|
11
|
+
defaultDeny: true,
|
|
12
|
+
allowHosts: ["registry.npmjs.org", "api.anthropic.com", "ai-gateway.vercel.sh"]
|
|
13
|
+
};
|
|
14
|
+
const AUTH_ENV_NAMES = [
|
|
15
|
+
"AI_GATEWAY_API_KEY",
|
|
16
|
+
"AI_GATEWAY_BASE_URL",
|
|
17
|
+
"ANTHROPIC_API_KEY",
|
|
18
|
+
"ANTHROPIC_AUTH_TOKEN",
|
|
19
|
+
"ANTHROPIC_BASE_URL"
|
|
20
|
+
];
|
|
21
|
+
function candidateId(input) {
|
|
22
|
+
return `${input.descriptor.id}_${input.model.id}_${input.ordinal}`;
|
|
23
|
+
}
|
|
24
|
+
function envValue(env, name) {
|
|
25
|
+
const value = env[name];
|
|
26
|
+
return value && value.length > 0 ? value : undefined;
|
|
27
|
+
}
|
|
28
|
+
function authEnvFrom(env) {
|
|
29
|
+
const authEnv = {};
|
|
30
|
+
for (const name of AUTH_ENV_NAMES) {
|
|
31
|
+
const value = envValue(env, name);
|
|
32
|
+
if (value !== undefined)
|
|
33
|
+
authEnv[name] = value;
|
|
34
|
+
}
|
|
35
|
+
return authEnv;
|
|
36
|
+
}
|
|
37
|
+
function credentialGate(env, options) {
|
|
38
|
+
const missing = [];
|
|
39
|
+
const hasProviderCredential = envValue(env, "AI_GATEWAY_API_KEY") ??
|
|
40
|
+
envValue(env, "ANTHROPIC_API_KEY") ??
|
|
41
|
+
envValue(env, "ANTHROPIC_AUTH_TOKEN");
|
|
42
|
+
const hasSandboxCredential = options.backend !== undefined ||
|
|
43
|
+
options.createSandboxProvider !== undefined ||
|
|
44
|
+
options.token !== undefined ||
|
|
45
|
+
envValue(env, "VERCEL_TOKEN") !== undefined;
|
|
46
|
+
if (!hasSandboxCredential)
|
|
47
|
+
missing.push("VERCEL_TOKEN");
|
|
48
|
+
if (!hasProviderCredential) {
|
|
49
|
+
missing.push("ANTHROPIC_API_KEY|ANTHROPIC_AUTH_TOKEN|AI_GATEWAY_API_KEY");
|
|
50
|
+
}
|
|
51
|
+
if (missing.length > 0) {
|
|
52
|
+
return {
|
|
53
|
+
available: false,
|
|
54
|
+
missing,
|
|
55
|
+
reason: "Claude Code harness skipped: missing Claude Code credential/env; set VERCEL_TOKEN and one of " +
|
|
56
|
+
"ANTHROPIC_API_KEY, ANTHROPIC_AUTH_TOKEN, or AI_GATEWAY_API_KEY."
|
|
57
|
+
};
|
|
58
|
+
}
|
|
59
|
+
return { available: true, authEnv: authEnvFrom(env) };
|
|
60
|
+
}
|
|
61
|
+
export function claudeCodeHarnessCredentialSkipReason(env = process.env, options = {}) {
|
|
62
|
+
const gate = credentialGate(env, options);
|
|
63
|
+
return gate.available ? undefined : gate.reason;
|
|
64
|
+
}
|
|
65
|
+
function backendFor(options, env) {
|
|
66
|
+
return (options.backend ??
|
|
67
|
+
aiSdkHarnessBackend({
|
|
68
|
+
...(options.runtime !== undefined ? { runtime: options.runtime } : {}),
|
|
69
|
+
...(options.bridgePort !== undefined ? { bridgePort: options.bridgePort } : {}),
|
|
70
|
+
token: options.token ?? envValue(env, "VERCEL_TOKEN"),
|
|
71
|
+
teamId: options.teamId ?? envValue(env, "VERCEL_TEAM_ID"),
|
|
72
|
+
projectId: options.projectId ?? envValue(env, "VERCEL_PROJECT_ID"),
|
|
73
|
+
...(options.model !== undefined ? { model: options.model } : {}),
|
|
74
|
+
...(options.maxTurns !== undefined ? { maxTurns: options.maxTurns } : {}),
|
|
75
|
+
...(options.thinking !== undefined ? { thinking: options.thinking } : {}),
|
|
76
|
+
...(options.startupTimeoutMs !== undefined
|
|
77
|
+
? { startupTimeoutMs: options.startupTimeoutMs }
|
|
78
|
+
: {}),
|
|
79
|
+
...(options.createHarness !== undefined ? { createHarness: options.createHarness } : {}),
|
|
80
|
+
...(options.createSandboxProvider !== undefined
|
|
81
|
+
? { createSandboxProvider: options.createSandboxProvider }
|
|
82
|
+
: {})
|
|
83
|
+
}));
|
|
84
|
+
}
|
|
85
|
+
function contractFor(input) {
|
|
86
|
+
const timeoutMs = input.options.timeoutMs ?? input.descriptor.policy.timeoutMs ?? DEFAULT_TIMEOUT_MS;
|
|
87
|
+
return {
|
|
88
|
+
version: "warrant.contract.v1",
|
|
89
|
+
runId: `ensemble_${input.candidateId}`,
|
|
90
|
+
issuedAt: new Date().toISOString(),
|
|
91
|
+
issuer: { keyId: "ensemble-claude-code", role: "plane" },
|
|
92
|
+
requestedBy: { kind: "service", id: "handoffkit-ensemble" },
|
|
93
|
+
agent: { kind: "claude-code" },
|
|
94
|
+
task: { prompt: input.descriptor.prompt },
|
|
95
|
+
runner: {
|
|
96
|
+
pool: input.options.pool ??
|
|
97
|
+
input.descriptor.runtime.environmentId ??
|
|
98
|
+
input.descriptor.runtime.id ??
|
|
99
|
+
DEFAULT_POOL
|
|
100
|
+
},
|
|
101
|
+
workspace: {
|
|
102
|
+
version: "warrant.manifest.v1",
|
|
103
|
+
baseRef: (input.repoBaseSha ?? input.descriptor.baseGitSha) || ZERO_GIT_SHA,
|
|
104
|
+
bundleHash: ZERO_HASH,
|
|
105
|
+
untrackedFiles: [],
|
|
106
|
+
deniedPatterns: [],
|
|
107
|
+
deniedPaths: []
|
|
108
|
+
},
|
|
109
|
+
policyHash: ZERO_HASH,
|
|
110
|
+
secrets: input.options.secrets?.map((secret) => ({ name: secret.name, scope: "ensemble" })) ?? [],
|
|
111
|
+
network: input.options.network ??
|
|
112
|
+
(input.descriptor.runtime.isolation?.networkPolicy
|
|
113
|
+
? {
|
|
114
|
+
defaultDeny: input.descriptor.runtime.isolation.networkPolicy.defaultDeny,
|
|
115
|
+
allowHosts: [...input.descriptor.runtime.isolation.networkPolicy.allowHosts]
|
|
116
|
+
}
|
|
117
|
+
: DEFAULT_CLAUDE_NETWORK),
|
|
118
|
+
budget: {
|
|
119
|
+
...(input.descriptor.policy.budgetUsd !== undefined
|
|
120
|
+
? { maxSpendUsd: input.descriptor.policy.budgetUsd }
|
|
121
|
+
: {}),
|
|
122
|
+
maxDurationMin: Math.ceil(timeoutMs / 60_000)
|
|
123
|
+
},
|
|
124
|
+
disclosure: "minimal-context",
|
|
125
|
+
isolation: "vercel-sandbox",
|
|
126
|
+
execution: {
|
|
127
|
+
kind: "agent",
|
|
128
|
+
agent: { kind: "claude-code" },
|
|
129
|
+
prompt: input.descriptor.prompt,
|
|
130
|
+
timeoutMs,
|
|
131
|
+
env: { vars: input.gate.authEnv, egressProxy: false },
|
|
132
|
+
log: {
|
|
133
|
+
stdout: "capture",
|
|
134
|
+
stderr: "merge",
|
|
135
|
+
maxBytes: input.options.logMaxBytes ?? DEFAULT_LOG_MAX_BYTES
|
|
136
|
+
}
|
|
137
|
+
},
|
|
138
|
+
expiresAt: new Date(Date.now() + timeoutMs).toISOString(),
|
|
139
|
+
signatures: []
|
|
140
|
+
};
|
|
141
|
+
}
|
|
142
|
+
function hardeningFor(input) {
|
|
143
|
+
const networkPolicy = input.options.network ??
|
|
144
|
+
input.descriptor.runtime.isolation?.networkPolicy ??
|
|
145
|
+
DEFAULT_CLAUDE_NETWORK;
|
|
146
|
+
const mountPolicy = input.descriptor.runtime.isolation?.mountPolicy;
|
|
147
|
+
const secretPolicy = input.descriptor.runtime.isolation?.secretPolicy;
|
|
148
|
+
return {
|
|
149
|
+
requested_isolation: "microvm",
|
|
150
|
+
actual_isolation: input.finished ? "vercel-sandbox" : "process",
|
|
151
|
+
runtime: {
|
|
152
|
+
provider: "vercel-sandbox",
|
|
153
|
+
runtime: input.options.runtime ??
|
|
154
|
+
(input.descriptor.runtime.isolation?.kind === "microvm"
|
|
155
|
+
? input.descriptor.runtime.isolation.runtime
|
|
156
|
+
: undefined) ??
|
|
157
|
+
DEFAULT_RUNTIME,
|
|
158
|
+
workdir: mountPolicy?.workdir ?? input.repoDir
|
|
159
|
+
},
|
|
160
|
+
mount_policy: {
|
|
161
|
+
worktree_writable: mountPolicy?.worktreeWritable ?? true,
|
|
162
|
+
read_only_caches: [...(mountPolicy?.readOnlyCachePaths ?? [])],
|
|
163
|
+
ignored_dirs: [...(mountPolicy?.ignoredDirs ?? [".git", "node_modules", ".warrant"])]
|
|
164
|
+
},
|
|
165
|
+
network_policy: {
|
|
166
|
+
default_deny: networkPolicy.defaultDeny,
|
|
167
|
+
allow_hosts: [...networkPolicy.allowHosts],
|
|
168
|
+
enforced: input.finished
|
|
169
|
+
},
|
|
170
|
+
cleanup: input.finished
|
|
171
|
+
? { attempted: true, succeeded: true, status: "succeeded" }
|
|
172
|
+
: { attempted: false, succeeded: true, status: "not_required" },
|
|
173
|
+
secret_absence: {
|
|
174
|
+
secret_names: [
|
|
175
|
+
...(secretPolicy?.secretNames ?? input.options.secrets?.map((secret) => secret.name) ?? [])
|
|
176
|
+
],
|
|
177
|
+
secret_value_hashes: [...(secretPolicy?.secretValueHashes ?? [])],
|
|
178
|
+
injected_env_names: [...(secretPolicy?.injectedEnvNames ?? input.authEnvNames)],
|
|
179
|
+
scanned: false,
|
|
180
|
+
leaks_found: false,
|
|
181
|
+
scan_scope: [],
|
|
182
|
+
leak_count: 0
|
|
183
|
+
}
|
|
184
|
+
};
|
|
185
|
+
}
|
|
186
|
+
function skippedOutput(input) {
|
|
187
|
+
const evidenceHash = artifactHash(input.reason);
|
|
188
|
+
const repoDir = input.runInput.worktree?.path ?? input.runInput.descriptor.sourceRepo;
|
|
189
|
+
return {
|
|
190
|
+
candidateId: candidateId(input.runInput),
|
|
191
|
+
model: input.runInput.model,
|
|
192
|
+
status: "skipped",
|
|
193
|
+
...(input.runInput.worktree
|
|
194
|
+
? {
|
|
195
|
+
branchName: input.runInput.worktree.branchName,
|
|
196
|
+
worktreePath: input.runInput.worktree.path
|
|
197
|
+
}
|
|
198
|
+
: {}),
|
|
199
|
+
transcript: input.reason,
|
|
200
|
+
summary: input.reason,
|
|
201
|
+
error: {
|
|
202
|
+
kind: "capability_missing",
|
|
203
|
+
message: input.reason,
|
|
204
|
+
retryable: false
|
|
205
|
+
},
|
|
206
|
+
verification: {
|
|
207
|
+
status: "skipped",
|
|
208
|
+
evidence: [input.reason, evidenceHash],
|
|
209
|
+
exitCode: 0
|
|
210
|
+
},
|
|
211
|
+
metadata: {
|
|
212
|
+
adapter: "claude-code",
|
|
213
|
+
credential_gate: "skipped",
|
|
214
|
+
missing_credentials: [...input.missing],
|
|
215
|
+
hardening: hardeningFor({
|
|
216
|
+
descriptor: input.runInput.descriptor,
|
|
217
|
+
options: input.options,
|
|
218
|
+
repoDir,
|
|
219
|
+
authEnvNames: [],
|
|
220
|
+
finished: false
|
|
221
|
+
})
|
|
222
|
+
}
|
|
223
|
+
};
|
|
224
|
+
}
|
|
225
|
+
function failureOutput(input) {
|
|
226
|
+
const message = input.error instanceof Error ? input.error.message : String(input.error);
|
|
227
|
+
const errorHash = artifactHash(message);
|
|
228
|
+
const repoDir = input.runInput.worktree?.path ?? input.runInput.descriptor.sourceRepo;
|
|
229
|
+
return {
|
|
230
|
+
candidateId: candidateId(input.runInput),
|
|
231
|
+
model: input.runInput.model,
|
|
232
|
+
status: "failed",
|
|
233
|
+
...(input.runInput.worktree
|
|
234
|
+
? {
|
|
235
|
+
branchName: input.runInput.worktree.branchName,
|
|
236
|
+
worktreePath: input.runInput.worktree.path
|
|
237
|
+
}
|
|
238
|
+
: {}),
|
|
239
|
+
transcript: `Claude Code harness failed: ${message}`,
|
|
240
|
+
error: {
|
|
241
|
+
kind: "provider_error",
|
|
242
|
+
message,
|
|
243
|
+
retryable: true
|
|
244
|
+
},
|
|
245
|
+
verification: {
|
|
246
|
+
status: "failed",
|
|
247
|
+
evidence: [errorHash],
|
|
248
|
+
exitCode: 1
|
|
249
|
+
},
|
|
250
|
+
metadata: {
|
|
251
|
+
adapter: "claude-code",
|
|
252
|
+
credential_gate: "available",
|
|
253
|
+
event_count: 0,
|
|
254
|
+
auth_env_names: [...input.authEnvNames],
|
|
255
|
+
hardening: hardeningFor({
|
|
256
|
+
descriptor: input.runInput.descriptor,
|
|
257
|
+
options: input.options,
|
|
258
|
+
repoDir,
|
|
259
|
+
authEnvNames: input.authEnvNames,
|
|
260
|
+
finished: false
|
|
261
|
+
})
|
|
262
|
+
}
|
|
263
|
+
};
|
|
264
|
+
}
|
|
265
|
+
export function createClaudeCodeHarness(options = {}) {
|
|
266
|
+
const id = options.id ?? "claude-code";
|
|
267
|
+
const env = options.env ?? process.env;
|
|
268
|
+
const skipWhenUnavailable = options.skipWhenUnavailable ?? true;
|
|
269
|
+
return {
|
|
270
|
+
id,
|
|
271
|
+
harnessKind: "claude_code",
|
|
272
|
+
prepare: () => {
|
|
273
|
+
const gate = credentialGate(env, options);
|
|
274
|
+
if (!gate.available) {
|
|
275
|
+
if (skipWhenUnavailable)
|
|
276
|
+
return { gate };
|
|
277
|
+
throw new CapabilityMismatchError(gate.reason);
|
|
278
|
+
}
|
|
279
|
+
return { gate, backend: backendFor(options, env) };
|
|
280
|
+
},
|
|
281
|
+
capabilities: () => {
|
|
282
|
+
const gate = credentialGate(env, options);
|
|
283
|
+
return {
|
|
284
|
+
workspace_read: gate.available ? "supported" : "degraded",
|
|
285
|
+
workspace_write: gate.available ? "supported" : "degraded",
|
|
286
|
+
apply_patch: gate.available ? "supported" : "degraded",
|
|
287
|
+
tool_records: "supported",
|
|
288
|
+
verification: gate.available ? "supported" : "degraded",
|
|
289
|
+
microvm_isolation: gate.available ? "supported" : "degraded",
|
|
290
|
+
credential_gate: gate.available ? "supported" : "degraded"
|
|
291
|
+
};
|
|
292
|
+
},
|
|
293
|
+
verificationProfile: () => ({
|
|
294
|
+
id: `${id}-verification`,
|
|
295
|
+
requiredEvidence: ["structured transcript", "exit code", "worktree diff or skip reason"]
|
|
296
|
+
}),
|
|
297
|
+
run: async (runInput) => {
|
|
298
|
+
const state = runInput.prepared;
|
|
299
|
+
if (!state.gate.available) {
|
|
300
|
+
return skippedOutput({
|
|
301
|
+
runInput,
|
|
302
|
+
reason: state.gate.reason,
|
|
303
|
+
missing: state.gate.missing,
|
|
304
|
+
options
|
|
305
|
+
});
|
|
306
|
+
}
|
|
307
|
+
const id = candidateId(runInput);
|
|
308
|
+
const repoDir = runInput.worktree?.path ?? runInput.descriptor.workspace ?? runInput.descriptor.sourceRepo;
|
|
309
|
+
const backend = state.backend ?? backendFor(options, env);
|
|
310
|
+
const contract = contractFor({
|
|
311
|
+
descriptor: runInput.descriptor,
|
|
312
|
+
candidateId: id,
|
|
313
|
+
options,
|
|
314
|
+
gate: state.gate,
|
|
315
|
+
...(runInput.worktree ? { repoBaseSha: runInput.worktree.baseGitSha } : {})
|
|
316
|
+
});
|
|
317
|
+
const events = [];
|
|
318
|
+
const authEnvNames = Object.keys(state.gate.authEnv);
|
|
319
|
+
try {
|
|
320
|
+
const result = await backend.execute({
|
|
321
|
+
contract,
|
|
322
|
+
repoDir,
|
|
323
|
+
secrets: options.secrets ?? [],
|
|
324
|
+
execution: prepareExecution({ contract, mockScriptPath: "/tmp/mock-agent.js" }),
|
|
325
|
+
emit: (event) => {
|
|
326
|
+
events.push(event);
|
|
327
|
+
}
|
|
328
|
+
});
|
|
329
|
+
const transcript = result.log.toString("utf8");
|
|
330
|
+
const outputHash = artifactHash(transcript);
|
|
331
|
+
const status = result.exitCode === 0 ? "succeeded" : "failed";
|
|
332
|
+
return {
|
|
333
|
+
candidateId: id,
|
|
334
|
+
model: runInput.model,
|
|
335
|
+
status,
|
|
336
|
+
...(runInput.worktree
|
|
337
|
+
? {
|
|
338
|
+
branchName: runInput.worktree.branchName,
|
|
339
|
+
worktreePath: runInput.worktree.path
|
|
340
|
+
}
|
|
341
|
+
: {}),
|
|
342
|
+
transcript,
|
|
343
|
+
toolRecords: [
|
|
344
|
+
{
|
|
345
|
+
execution_id: `exec_${id}`,
|
|
346
|
+
plan_id: `plan_${id}`,
|
|
347
|
+
status,
|
|
348
|
+
output_hash: outputHash
|
|
349
|
+
}
|
|
350
|
+
],
|
|
351
|
+
verification: {
|
|
352
|
+
status,
|
|
353
|
+
evidence: [`exit_code=${result.exitCode}`, outputHash],
|
|
354
|
+
exitCode: result.exitCode
|
|
355
|
+
},
|
|
356
|
+
...(status === "failed"
|
|
357
|
+
? {
|
|
358
|
+
error: {
|
|
359
|
+
kind: "provider_error",
|
|
360
|
+
message: "Claude Code harness exited non-zero",
|
|
361
|
+
retryable: true
|
|
362
|
+
}
|
|
363
|
+
}
|
|
364
|
+
: {}),
|
|
365
|
+
metadata: {
|
|
366
|
+
adapter: "claude-code",
|
|
367
|
+
backend_isolation: backend.isolation,
|
|
368
|
+
credential_gate: "available",
|
|
369
|
+
event_count: events.length,
|
|
370
|
+
auth_env_names: authEnvNames,
|
|
371
|
+
hardening: hardeningFor({
|
|
372
|
+
descriptor: runInput.descriptor,
|
|
373
|
+
options,
|
|
374
|
+
repoDir,
|
|
375
|
+
authEnvNames,
|
|
376
|
+
finished: true
|
|
377
|
+
})
|
|
378
|
+
}
|
|
379
|
+
};
|
|
380
|
+
}
|
|
381
|
+
catch (error) {
|
|
382
|
+
if (skipWhenUnavailable && error instanceof CapabilityMismatchError) {
|
|
383
|
+
return skippedOutput({
|
|
384
|
+
runInput,
|
|
385
|
+
reason: error.message,
|
|
386
|
+
missing: ["capability_mismatch"],
|
|
387
|
+
options
|
|
388
|
+
});
|
|
389
|
+
}
|
|
390
|
+
return failureOutput({ runInput, error, options, authEnvNames });
|
|
391
|
+
}
|
|
392
|
+
},
|
|
393
|
+
collectArtifacts: () => []
|
|
394
|
+
};
|
|
395
|
+
}
|
|
396
|
+
export function claudeCodeHarness(options = {}) {
|
|
397
|
+
return createClaudeCodeHarness(options);
|
|
398
|
+
}
|