@fusionkit/ensemble 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/agent.d.ts +21 -0
- package/dist/agent.js +186 -0
- package/dist/artifacts.d.ts +21 -0
- package/dist/artifacts.js +36 -0
- package/dist/claude-code.d.ts +25 -0
- package/dist/claude-code.js +398 -0
- package/dist/codex.d.ts +69 -0
- package/dist/codex.js +467 -0
- package/dist/command.d.ts +15 -0
- package/dist/command.js +82 -0
- package/dist/dashboard.d.ts +62 -0
- package/dist/dashboard.js +788 -0
- package/dist/external-executor.d.ts +56 -0
- package/dist/external-executor.js +288 -0
- package/dist/harness.d.ts +337 -0
- package/dist/harness.js +1 -0
- package/dist/index.d.ts +30 -0
- package/dist/index.js +15 -0
- package/dist/isolation.d.ts +25 -0
- package/dist/isolation.js +509 -0
- package/dist/judge.d.ts +77 -0
- package/dist/judge.js +16 -0
- package/dist/mock.d.ts +20 -0
- package/dist/mock.js +56 -0
- package/dist/run.d.ts +5 -0
- package/dist/run.js +520 -0
- package/dist/synthesis.d.ts +25 -0
- package/dist/synthesis.js +221 -0
- package/dist/test/codex.test.d.ts +1 -0
- package/dist/test/codex.test.js +237 -0
- package/dist/test/dashboard.test.d.ts +1 -0
- package/dist/test/dashboard.test.js +214 -0
- package/dist/test/ensemble.test.d.ts +1 -0
- package/dist/test/ensemble.test.js +780 -0
- package/dist/test/external-executor.test.d.ts +1 -0
- package/dist/test/external-executor.test.js +273 -0
- package/dist/test/isolation.test.d.ts +1 -0
- package/dist/test/isolation.test.js +359 -0
- package/dist/test/tool-executor.test.d.ts +1 -0
- package/dist/test/tool-executor.test.js +113 -0
- package/dist/test/unified.test.d.ts +1 -0
- package/dist/test/unified.test.js +150 -0
- package/dist/tool-executor.d.ts +14 -0
- package/dist/tool-executor.js +156 -0
- package/dist/trace.d.ts +8 -0
- package/dist/trace.js +7 -0
- package/dist/unified.d.ts +101 -0
- package/dist/unified.js +422 -0
- package/dist/worktree.d.ts +25 -0
- package/dist/worktree.js +75 -0
- package/package.json +35 -0
package/dist/codex.d.ts
ADDED
|
@@ -0,0 +1,69 @@
|
|
|
1
|
+
import type { HarnessAdapter } from "./harness.js";
|
|
2
|
+
export type CodexSandboxMode = "read-only" | "workspace-write" | "danger-full-access";
|
|
3
|
+
export type CodexApprovalPolicy = "untrusted" | "on-failure" | "on-request" | "never";
|
|
4
|
+
export type CodexAmbientProvider = {
|
|
5
|
+
kind: "ambient";
|
|
6
|
+
credentialEnvNames?: readonly string[];
|
|
7
|
+
};
|
|
8
|
+
export type CodexResponsesProvider = {
|
|
9
|
+
kind: "responses";
|
|
10
|
+
baseUrl: string;
|
|
11
|
+
apiKey?: string;
|
|
12
|
+
apiKeyEnvName?: string;
|
|
13
|
+
requiresOpenAiAuth?: boolean;
|
|
14
|
+
providerId?: string;
|
|
15
|
+
name?: string;
|
|
16
|
+
};
|
|
17
|
+
export type CodexOpenAiCompatibleProvider = {
|
|
18
|
+
kind: "openai-compatible";
|
|
19
|
+
baseUrl: string;
|
|
20
|
+
apiKey?: string;
|
|
21
|
+
apiKeyEnvName?: string;
|
|
22
|
+
defaultModel?: string;
|
|
23
|
+
providerId?: string;
|
|
24
|
+
name?: string;
|
|
25
|
+
};
|
|
26
|
+
export type CodexProvider = CodexAmbientProvider | CodexResponsesProvider | CodexOpenAiCompatibleProvider;
|
|
27
|
+
export type CodexExecInput = {
|
|
28
|
+
command: string;
|
|
29
|
+
args: string[];
|
|
30
|
+
cwd: string;
|
|
31
|
+
env: Record<string, string>;
|
|
32
|
+
timeoutMs?: number;
|
|
33
|
+
};
|
|
34
|
+
export type CodexExecResult = {
|
|
35
|
+
stdout: string;
|
|
36
|
+
stderr: string;
|
|
37
|
+
exitCode: number;
|
|
38
|
+
timedOut?: boolean;
|
|
39
|
+
};
|
|
40
|
+
export type CodexExecRunner = (input: CodexExecInput) => Promise<CodexExecResult> | CodexExecResult;
|
|
41
|
+
export type CodexHarnessOptions = {
|
|
42
|
+
id?: string;
|
|
43
|
+
command?: string;
|
|
44
|
+
cwd?: string;
|
|
45
|
+
timeoutMs?: number;
|
|
46
|
+
env?: Record<string, string | undefined>;
|
|
47
|
+
provider?: CodexProvider;
|
|
48
|
+
runner?: CodexExecRunner;
|
|
49
|
+
sandboxMode?: CodexSandboxMode;
|
|
50
|
+
approvalPolicy?: CodexApprovalPolicy;
|
|
51
|
+
keepCodexHome?: boolean;
|
|
52
|
+
};
|
|
53
|
+
export type CodexHarnessEnv = Record<string, string | undefined>;
|
|
54
|
+
export type CodexConfigTomlInput = {
|
|
55
|
+
model: string;
|
|
56
|
+
sandboxMode: CodexSandboxMode;
|
|
57
|
+
approvalPolicy: CodexApprovalPolicy;
|
|
58
|
+
provider?: {
|
|
59
|
+
providerId?: string;
|
|
60
|
+
name?: string;
|
|
61
|
+
baseUrl: string;
|
|
62
|
+
apiKeyEnvName?: string;
|
|
63
|
+
requiresOpenAiAuth: boolean;
|
|
64
|
+
};
|
|
65
|
+
};
|
|
66
|
+
export declare function codexHarnessCredentialSkipReason(env?: CodexHarnessEnv, options?: Pick<CodexHarnessOptions, "provider">): string | undefined;
|
|
67
|
+
export declare function codexConfigToml(input: CodexConfigTomlInput): string;
|
|
68
|
+
export declare function createCodexHarness(options?: CodexHarnessOptions): HarnessAdapter;
|
|
69
|
+
export declare const codexHarness: typeof createCodexHarness;
|
package/dist/codex.js
ADDED
|
@@ -0,0 +1,467 @@
|
|
|
1
|
+
import { spawn } from "node:child_process";
|
|
2
|
+
import { copyFileSync, existsSync, mkdtempSync, rmSync, writeFileSync } from "node:fs";
|
|
3
|
+
import { homedir, tmpdir } from "node:os";
|
|
4
|
+
import { join } from "node:path";
|
|
5
|
+
import { artifactHash } from "@fusionkit/protocol";
|
|
6
|
+
import { OpenAiBackend, startGateway } from "@fusionkit/model-gateway";
|
|
7
|
+
const DEFAULT_CODEX_COMMAND = "codex";
|
|
8
|
+
const DEFAULT_PROVIDER_ID = "warrant-codex";
|
|
9
|
+
const DEFAULT_PROVIDER_NAME = "Warrant Codex";
|
|
10
|
+
const DEFAULT_CREDENTIAL_ENV_NAMES = ["CODEX_API_KEY", "OPENAI_API_KEY"];
|
|
11
|
+
const INLINE_PROVIDER_API_KEY_ENV = "WARRANT_CODEX_PROVIDER_API_KEY";
|
|
12
|
+
const CODEX_AUTH_FILE = "auth.json";
|
|
13
|
+
function tomlString(value) {
|
|
14
|
+
return JSON.stringify(value);
|
|
15
|
+
}
|
|
16
|
+
function normalizeApiBaseUrl(baseUrl) {
|
|
17
|
+
const trimmed = baseUrl.replace(/\/+$/, "");
|
|
18
|
+
return trimmed.endsWith("/v1") ? trimmed : `${trimmed}/v1`;
|
|
19
|
+
}
|
|
20
|
+
function stripResponsesRoute(baseUrl) {
|
|
21
|
+
return baseUrl.replace(/\/responses\/?$/, "");
|
|
22
|
+
}
|
|
23
|
+
function isLoopbackUrl(baseUrl) {
|
|
24
|
+
try {
|
|
25
|
+
const url = new URL(baseUrl);
|
|
26
|
+
return url.hostname === "localhost" || url.hostname === "127.0.0.1" || url.hostname === "::1";
|
|
27
|
+
}
|
|
28
|
+
catch {
|
|
29
|
+
return false;
|
|
30
|
+
}
|
|
31
|
+
}
|
|
32
|
+
function definedEnv(env) {
|
|
33
|
+
const result = {};
|
|
34
|
+
for (const [key, value] of Object.entries(env)) {
|
|
35
|
+
if (value !== undefined)
|
|
36
|
+
result[key] = value;
|
|
37
|
+
}
|
|
38
|
+
return result;
|
|
39
|
+
}
|
|
40
|
+
function firstPresentEnv(env, names) {
|
|
41
|
+
return names.find((name) => env[name] !== undefined && env[name].length > 0);
|
|
42
|
+
}
|
|
43
|
+
function codexHome(env) {
|
|
44
|
+
return env.CODEX_HOME && env.CODEX_HOME.length > 0
|
|
45
|
+
? env.CODEX_HOME
|
|
46
|
+
: join(homedir(), ".codex");
|
|
47
|
+
}
|
|
48
|
+
function codexAuthFile(env) {
|
|
49
|
+
const path = join(codexHome(env), CODEX_AUTH_FILE);
|
|
50
|
+
return existsSync(path) ? path : undefined;
|
|
51
|
+
}
|
|
52
|
+
function providerFromEnv(env) {
|
|
53
|
+
const responsesBaseUrl = env.WARRANT_CODEX_RESPONSES_BASE_URL ?? env.CODEX_RESPONSES_BASE_URL;
|
|
54
|
+
if (responsesBaseUrl !== undefined && responsesBaseUrl.length > 0) {
|
|
55
|
+
const apiKeyEnvName = firstPresentEnv(env, [
|
|
56
|
+
"WARRANT_CODEX_API_KEY",
|
|
57
|
+
"CODEX_API_KEY",
|
|
58
|
+
"OPENAI_API_KEY"
|
|
59
|
+
]);
|
|
60
|
+
return {
|
|
61
|
+
kind: "responses",
|
|
62
|
+
baseUrl: responsesBaseUrl,
|
|
63
|
+
...(apiKeyEnvName ? { apiKeyEnvName } : {}),
|
|
64
|
+
requiresOpenAiAuth: !isLoopbackUrl(responsesBaseUrl)
|
|
65
|
+
};
|
|
66
|
+
}
|
|
67
|
+
const openAiBaseUrl = env.WARRANT_CODEX_OPENAI_BASE_URL ?? env.OPENAI_BASE_URL;
|
|
68
|
+
if (openAiBaseUrl !== undefined && openAiBaseUrl.length > 0) {
|
|
69
|
+
const apiKeyEnvName = firstPresentEnv(env, [
|
|
70
|
+
"WARRANT_CODEX_OPENAI_API_KEY",
|
|
71
|
+
"OPENAI_API_KEY"
|
|
72
|
+
]);
|
|
73
|
+
return {
|
|
74
|
+
kind: "openai-compatible",
|
|
75
|
+
baseUrl: openAiBaseUrl,
|
|
76
|
+
...(apiKeyEnvName ? { apiKeyEnvName } : {})
|
|
77
|
+
};
|
|
78
|
+
}
|
|
79
|
+
return { kind: "ambient" };
|
|
80
|
+
}
|
|
81
|
+
function credentialEnvName(provider, env) {
|
|
82
|
+
if (provider.apiKey !== undefined)
|
|
83
|
+
return INLINE_PROVIDER_API_KEY_ENV;
|
|
84
|
+
if (provider.apiKeyEnvName !== undefined)
|
|
85
|
+
return provider.apiKeyEnvName;
|
|
86
|
+
return firstPresentEnv(env, DEFAULT_CREDENTIAL_ENV_NAMES);
|
|
87
|
+
}
|
|
88
|
+
function missingCredentialReason(provider, env) {
|
|
89
|
+
switch (provider.kind) {
|
|
90
|
+
case "ambient": {
|
|
91
|
+
const names = provider.credentialEnvNames ?? DEFAULT_CREDENTIAL_ENV_NAMES;
|
|
92
|
+
return firstPresentEnv(env, names) === undefined && codexAuthFile(env) === undefined
|
|
93
|
+
? `Codex credentials are absent; set ${names.join(" or ")} or configure a Responses/OpenAI-compatible provider.`
|
|
94
|
+
: undefined;
|
|
95
|
+
}
|
|
96
|
+
case "responses": {
|
|
97
|
+
if (provider.requiresOpenAiAuth === false || provider.apiKey !== undefined)
|
|
98
|
+
return undefined;
|
|
99
|
+
const envName = credentialEnvName(provider, env);
|
|
100
|
+
return envName === undefined || env[envName] === undefined || env[envName].length === 0
|
|
101
|
+
? `Codex Responses provider credentials are absent; set ${provider.apiKeyEnvName ?? DEFAULT_CREDENTIAL_ENV_NAMES.join(" or ")} or mark the provider requiresOpenAiAuth=false for local endpoints.`
|
|
102
|
+
: undefined;
|
|
103
|
+
}
|
|
104
|
+
case "openai-compatible":
|
|
105
|
+
return undefined;
|
|
106
|
+
default: {
|
|
107
|
+
const exhausted = provider;
|
|
108
|
+
throw new Error(`unsupported Codex provider: ${String(exhausted)}`);
|
|
109
|
+
}
|
|
110
|
+
}
|
|
111
|
+
}
|
|
112
|
+
export function codexHarnessCredentialSkipReason(env = process.env, options = {}) {
|
|
113
|
+
const defined = definedEnv(env);
|
|
114
|
+
return missingCredentialReason(options.provider ?? providerFromEnv(defined), defined);
|
|
115
|
+
}
|
|
116
|
+
function sandboxModeFor(descriptor, override) {
|
|
117
|
+
if (override !== undefined)
|
|
118
|
+
return override;
|
|
119
|
+
switch (descriptor.policy.sideEffects) {
|
|
120
|
+
case "none":
|
|
121
|
+
case "read_only":
|
|
122
|
+
return "read-only";
|
|
123
|
+
case "writes_workspace":
|
|
124
|
+
case "network":
|
|
125
|
+
case "tool_execution":
|
|
126
|
+
case "unknown":
|
|
127
|
+
return "workspace-write";
|
|
128
|
+
default: {
|
|
129
|
+
const exhausted = descriptor.policy.sideEffects;
|
|
130
|
+
throw new Error(`unsupported side effects policy: ${String(exhausted)}`);
|
|
131
|
+
}
|
|
132
|
+
}
|
|
133
|
+
}
|
|
134
|
+
export function codexConfigToml(input) {
|
|
135
|
+
const lines = [
|
|
136
|
+
`model = ${tomlString(input.model)}`,
|
|
137
|
+
input.provider
|
|
138
|
+
? `model_provider = ${tomlString(input.provider.providerId ?? DEFAULT_PROVIDER_ID)}`
|
|
139
|
+
: `model_provider = "openai"`,
|
|
140
|
+
`approval_policy = ${tomlString(input.approvalPolicy)}`,
|
|
141
|
+
`sandbox_mode = ${tomlString(input.sandboxMode)}`,
|
|
142
|
+
""
|
|
143
|
+
];
|
|
144
|
+
if (input.provider !== undefined) {
|
|
145
|
+
const providerId = input.provider.providerId ?? DEFAULT_PROVIDER_ID;
|
|
146
|
+
lines.push(`[model_providers.${providerId}]`, `name = ${tomlString(input.provider.name ?? DEFAULT_PROVIDER_NAME)}`, `base_url = ${tomlString(normalizeApiBaseUrl(stripResponsesRoute(input.provider.baseUrl)))}`, `wire_api = "responses"`, `requires_openai_auth = ${input.provider.requiresOpenAiAuth ? "true" : "false"}`);
|
|
147
|
+
if (input.provider.apiKeyEnvName !== undefined) {
|
|
148
|
+
lines.push(`env_key = ${tomlString(input.provider.apiKeyEnvName)}`);
|
|
149
|
+
}
|
|
150
|
+
lines.push("");
|
|
151
|
+
}
|
|
152
|
+
return lines.join("\n");
|
|
153
|
+
}
|
|
154
|
+
function codexArgs(prompt) {
|
|
155
|
+
return ["exec", "--json", "--skip-git-repo-check", prompt];
|
|
156
|
+
}
|
|
157
|
+
function writeCodexHome(input) {
|
|
158
|
+
const codexHome = mkdtempSync(join(input.tempRoot, "candidate-"));
|
|
159
|
+
const providerConfig = input.provider.kind === "ambient"
|
|
160
|
+
? undefined
|
|
161
|
+
: {
|
|
162
|
+
providerId: input.provider.providerId,
|
|
163
|
+
name: input.provider.name,
|
|
164
|
+
baseUrl: input.providerBaseUrl ?? input.provider.baseUrl,
|
|
165
|
+
apiKeyEnvName: input.provider.kind === "responses"
|
|
166
|
+
? credentialEnvName(input.provider, input.env)
|
|
167
|
+
: undefined,
|
|
168
|
+
requiresOpenAiAuth: input.provider.kind === "responses"
|
|
169
|
+
? input.provider.requiresOpenAiAuth ?? true
|
|
170
|
+
: false
|
|
171
|
+
};
|
|
172
|
+
writeFileSync(join(codexHome, "config.toml"), codexConfigToml({
|
|
173
|
+
model: input.model.model,
|
|
174
|
+
sandboxMode: sandboxModeFor(input.descriptor, input.sandboxMode),
|
|
175
|
+
approvalPolicy: input.approvalPolicy,
|
|
176
|
+
...(providerConfig ? { provider: providerConfig } : {})
|
|
177
|
+
}));
|
|
178
|
+
if (input.provider.kind === "ambient" &&
|
|
179
|
+
firstPresentEnv(input.env, input.provider.credentialEnvNames ?? DEFAULT_CREDENTIAL_ENV_NAMES) === undefined) {
|
|
180
|
+
const authFile = codexAuthFile(input.env);
|
|
181
|
+
if (authFile !== undefined) {
|
|
182
|
+
copyFileSync(authFile, join(codexHome, CODEX_AUTH_FILE));
|
|
183
|
+
}
|
|
184
|
+
}
|
|
185
|
+
return codexHome;
|
|
186
|
+
}
|
|
187
|
+
async function defaultCodexRunner(input) {
|
|
188
|
+
return await new Promise((resolve, reject) => {
|
|
189
|
+
const child = spawn(input.command, input.args, {
|
|
190
|
+
cwd: input.cwd,
|
|
191
|
+
env: input.env,
|
|
192
|
+
stdio: ["ignore", "pipe", "pipe"]
|
|
193
|
+
});
|
|
194
|
+
const stdout = [];
|
|
195
|
+
const stderr = [];
|
|
196
|
+
let timedOut = false;
|
|
197
|
+
let timer;
|
|
198
|
+
if (input.timeoutMs !== undefined) {
|
|
199
|
+
timer = setTimeout(() => {
|
|
200
|
+
timedOut = true;
|
|
201
|
+
child.kill("SIGTERM");
|
|
202
|
+
}, input.timeoutMs);
|
|
203
|
+
}
|
|
204
|
+
child.stdout.on("data", (chunk) => stdout.push(chunk));
|
|
205
|
+
child.stderr.on("data", (chunk) => stderr.push(chunk));
|
|
206
|
+
child.on("error", reject);
|
|
207
|
+
child.on("exit", (code) => {
|
|
208
|
+
if (timer !== undefined)
|
|
209
|
+
clearTimeout(timer);
|
|
210
|
+
resolve({
|
|
211
|
+
stdout: Buffer.concat(stdout).toString("utf8"),
|
|
212
|
+
stderr: Buffer.concat(stderr).toString("utf8"),
|
|
213
|
+
exitCode: timedOut ? 124 : code ?? 0,
|
|
214
|
+
...(timedOut ? { timedOut } : {})
|
|
215
|
+
});
|
|
216
|
+
});
|
|
217
|
+
});
|
|
218
|
+
}
|
|
219
|
+
async function runProvider(input) {
|
|
220
|
+
switch (input.provider.kind) {
|
|
221
|
+
case "ambient":
|
|
222
|
+
case "responses":
|
|
223
|
+
return {
|
|
224
|
+
provider: input.provider,
|
|
225
|
+
modelCallRecords: [],
|
|
226
|
+
close: async () => undefined
|
|
227
|
+
};
|
|
228
|
+
case "openai-compatible": {
|
|
229
|
+
const records = [];
|
|
230
|
+
const apiKey = input.provider.apiKey ??
|
|
231
|
+
(input.provider.apiKeyEnvName !== undefined
|
|
232
|
+
? input.env[input.provider.apiKeyEnvName]
|
|
233
|
+
: input.env.OPENAI_API_KEY);
|
|
234
|
+
const gateway = await startGateway({
|
|
235
|
+
backend: new OpenAiBackend({
|
|
236
|
+
baseUrl: normalizeApiBaseUrl(input.provider.baseUrl),
|
|
237
|
+
...(apiKey !== undefined ? { apiKey } : {}),
|
|
238
|
+
defaultModel: input.provider.defaultModel ?? input.model.model
|
|
239
|
+
}),
|
|
240
|
+
provenance: {
|
|
241
|
+
onModelCall(record) {
|
|
242
|
+
records.push(record);
|
|
243
|
+
}
|
|
244
|
+
}
|
|
245
|
+
});
|
|
246
|
+
return {
|
|
247
|
+
provider: input.provider,
|
|
248
|
+
configBaseUrl: gateway.url(),
|
|
249
|
+
modelCallRecords: records,
|
|
250
|
+
close: () => gateway.close()
|
|
251
|
+
};
|
|
252
|
+
}
|
|
253
|
+
default: {
|
|
254
|
+
const exhausted = input.provider;
|
|
255
|
+
throw new Error(`unsupported Codex provider: ${String(exhausted)}`);
|
|
256
|
+
}
|
|
257
|
+
}
|
|
258
|
+
}
|
|
259
|
+
function metadataFor(input) {
|
|
260
|
+
return {
|
|
261
|
+
adapter: "codex",
|
|
262
|
+
command: input.command,
|
|
263
|
+
args: input.args,
|
|
264
|
+
provider_kind: input.provider.kind,
|
|
265
|
+
stdout_bytes: Buffer.byteLength(input.stdout),
|
|
266
|
+
stderr_bytes: Buffer.byteLength(input.stderr),
|
|
267
|
+
timed_out: input.timedOut === true,
|
|
268
|
+
model_call_count: input.modelCallRecords.length
|
|
269
|
+
};
|
|
270
|
+
}
|
|
271
|
+
function skippedCandidate(input) {
|
|
272
|
+
const transcript = `Codex adapter skipped: ${input.reason}`;
|
|
273
|
+
const hash = artifactHash(transcript);
|
|
274
|
+
return {
|
|
275
|
+
candidateId: `${input.descriptor.id}_${input.model.id}_${input.ordinal}`,
|
|
276
|
+
model: input.model,
|
|
277
|
+
status: "skipped",
|
|
278
|
+
transcript,
|
|
279
|
+
log: transcript,
|
|
280
|
+
artifacts: [
|
|
281
|
+
{
|
|
282
|
+
artifact_id: `artifact_${input.descriptor.id}_${input.model.id}_codex_skip`,
|
|
283
|
+
kind: "log",
|
|
284
|
+
hash,
|
|
285
|
+
redaction_status: "synthetic"
|
|
286
|
+
}
|
|
287
|
+
],
|
|
288
|
+
verification: {
|
|
289
|
+
status: "skipped",
|
|
290
|
+
evidence: [input.reason]
|
|
291
|
+
},
|
|
292
|
+
error: {
|
|
293
|
+
kind: "capability_missing",
|
|
294
|
+
message: input.reason,
|
|
295
|
+
retryable: false
|
|
296
|
+
},
|
|
297
|
+
metadata: {
|
|
298
|
+
adapter: "codex",
|
|
299
|
+
provider_kind: input.provider.kind,
|
|
300
|
+
skip_reason: input.reason
|
|
301
|
+
}
|
|
302
|
+
};
|
|
303
|
+
}
|
|
304
|
+
function failedToSpawnCandidate(input) {
|
|
305
|
+
const errno = input.error;
|
|
306
|
+
const reason = errno.code === "ENOENT"
|
|
307
|
+
? "Codex CLI binary was not found on PATH."
|
|
308
|
+
: input.error instanceof Error
|
|
309
|
+
? input.error.message
|
|
310
|
+
: String(input.error);
|
|
311
|
+
return skippedCandidate({
|
|
312
|
+
descriptor: input.descriptor,
|
|
313
|
+
model: input.model,
|
|
314
|
+
ordinal: input.ordinal,
|
|
315
|
+
reason,
|
|
316
|
+
provider: input.provider
|
|
317
|
+
});
|
|
318
|
+
}
|
|
319
|
+
export function createCodexHarness(options = {}) {
|
|
320
|
+
const id = options.id ?? "codex";
|
|
321
|
+
const command = options.command ?? DEFAULT_CODEX_COMMAND;
|
|
322
|
+
const runner = options.runner ?? defaultCodexRunner;
|
|
323
|
+
const approvalPolicy = options.approvalPolicy ?? "never";
|
|
324
|
+
return {
|
|
325
|
+
id,
|
|
326
|
+
harnessKind: "codex",
|
|
327
|
+
prepare: () => {
|
|
328
|
+
const env = definedEnv(options.env ?? process.env);
|
|
329
|
+
return {
|
|
330
|
+
tempRoot: mkdtempSync(join(tmpdir(), "warrant-codex-")),
|
|
331
|
+
env,
|
|
332
|
+
provider: options.provider ?? providerFromEnv(env)
|
|
333
|
+
};
|
|
334
|
+
},
|
|
335
|
+
capabilities: () => ({
|
|
336
|
+
workspace_read: "supported",
|
|
337
|
+
apply_patch: "supported",
|
|
338
|
+
shell_command: "degraded",
|
|
339
|
+
artifact_capture: "supported",
|
|
340
|
+
model_gateway_responses: "supported",
|
|
341
|
+
openai_compatible_gateway: "supported",
|
|
342
|
+
verification: "supported"
|
|
343
|
+
}),
|
|
344
|
+
verificationProfile: () => ({
|
|
345
|
+
id: `${id}-verification`,
|
|
346
|
+
requiredEvidence: ["codex transcript", "exit code", "optional model-call record"]
|
|
347
|
+
}),
|
|
348
|
+
run: async ({ descriptor, model, ordinal, prepared, worktree }) => {
|
|
349
|
+
const state = prepared;
|
|
350
|
+
const missing = missingCredentialReason(state.provider, state.env);
|
|
351
|
+
if (missing !== undefined) {
|
|
352
|
+
return skippedCandidate({ descriptor, model, ordinal, reason: missing, provider: state.provider });
|
|
353
|
+
}
|
|
354
|
+
const provider = await runProvider({
|
|
355
|
+
provider: state.provider,
|
|
356
|
+
env: state.env,
|
|
357
|
+
model
|
|
358
|
+
});
|
|
359
|
+
try {
|
|
360
|
+
const env = { ...state.env };
|
|
361
|
+
if (provider.provider.kind === "responses" && provider.provider.apiKey !== undefined) {
|
|
362
|
+
env[INLINE_PROVIDER_API_KEY_ENV] = provider.provider.apiKey;
|
|
363
|
+
}
|
|
364
|
+
const codexHome = writeCodexHome({
|
|
365
|
+
tempRoot: state.tempRoot,
|
|
366
|
+
model,
|
|
367
|
+
providerBaseUrl: provider.configBaseUrl,
|
|
368
|
+
provider: provider.provider,
|
|
369
|
+
env,
|
|
370
|
+
descriptor,
|
|
371
|
+
sandboxMode: options.sandboxMode,
|
|
372
|
+
approvalPolicy
|
|
373
|
+
});
|
|
374
|
+
env.CODEX_HOME = codexHome;
|
|
375
|
+
const args = codexArgs(descriptor.prompt);
|
|
376
|
+
const cwd = worktree?.path ?? options.cwd ?? descriptor.workspace ?? process.cwd();
|
|
377
|
+
const timeoutMs = options.timeoutMs ?? descriptor.policy.timeoutMs;
|
|
378
|
+
let result;
|
|
379
|
+
try {
|
|
380
|
+
result = await runner({ command, args, cwd, env, timeoutMs });
|
|
381
|
+
}
|
|
382
|
+
catch (error) {
|
|
383
|
+
return failedToSpawnCandidate({
|
|
384
|
+
descriptor,
|
|
385
|
+
model,
|
|
386
|
+
ordinal,
|
|
387
|
+
error,
|
|
388
|
+
provider: provider.provider
|
|
389
|
+
});
|
|
390
|
+
}
|
|
391
|
+
const transcript = [result.stdout, result.stderr].filter(Boolean).join("\n");
|
|
392
|
+
const status = result.exitCode === 0 && result.timedOut !== true ? "succeeded" : "failed";
|
|
393
|
+
const outputHash = artifactHash(transcript);
|
|
394
|
+
const modelCallRecord = provider.modelCallRecords.at(-1);
|
|
395
|
+
return {
|
|
396
|
+
candidateId: `${descriptor.id}_${model.id}_${ordinal}`,
|
|
397
|
+
model,
|
|
398
|
+
status,
|
|
399
|
+
...(modelCallRecord ? { modelCallId: modelCallRecord.call_id, modelCallRecord } : {}),
|
|
400
|
+
...(worktree ? { branchName: worktree.branchName, worktreePath: worktree.path } : {}),
|
|
401
|
+
transcript,
|
|
402
|
+
log: transcript,
|
|
403
|
+
artifacts: [
|
|
404
|
+
{
|
|
405
|
+
artifact_id: `artifact_${descriptor.id}_${model.id}_codex_output`,
|
|
406
|
+
kind: "log",
|
|
407
|
+
hash: outputHash,
|
|
408
|
+
redaction_status: "synthetic"
|
|
409
|
+
}
|
|
410
|
+
],
|
|
411
|
+
toolRecords: [
|
|
412
|
+
{
|
|
413
|
+
execution_id: `exec_${descriptor.id}_${model.id}_${ordinal}_codex`,
|
|
414
|
+
plan_id: `plan_${descriptor.id}_${model.id}_${ordinal}_codex`,
|
|
415
|
+
status,
|
|
416
|
+
output_hash: outputHash,
|
|
417
|
+
...(status === "failed"
|
|
418
|
+
? {
|
|
419
|
+
error: {
|
|
420
|
+
kind: result.timedOut === true ? "timeout" : "provider_error",
|
|
421
|
+
message: result.timedOut === true ? "Codex CLI timed out." : result.stderr.slice(0, 500),
|
|
422
|
+
retryable: result.timedOut === true
|
|
423
|
+
}
|
|
424
|
+
}
|
|
425
|
+
: {})
|
|
426
|
+
}
|
|
427
|
+
],
|
|
428
|
+
verification: {
|
|
429
|
+
status,
|
|
430
|
+
evidence: [`exit_code=${result.exitCode}`, outputHash],
|
|
431
|
+
exitCode: result.exitCode
|
|
432
|
+
},
|
|
433
|
+
...(status === "failed"
|
|
434
|
+
? {
|
|
435
|
+
error: {
|
|
436
|
+
kind: result.timedOut === true ? "timeout" : "provider_error",
|
|
437
|
+
message: result.timedOut === true ? "Codex CLI timed out." : result.stderr.slice(0, 500),
|
|
438
|
+
retryable: result.timedOut === true
|
|
439
|
+
}
|
|
440
|
+
}
|
|
441
|
+
: {}),
|
|
442
|
+
metadata: metadataFor({
|
|
443
|
+
command,
|
|
444
|
+
args,
|
|
445
|
+
provider: provider.provider,
|
|
446
|
+
stdout: result.stdout,
|
|
447
|
+
stderr: result.stderr,
|
|
448
|
+
...(result.timedOut !== undefined ? { timedOut: result.timedOut } : {}),
|
|
449
|
+
modelCallRecords: provider.modelCallRecords
|
|
450
|
+
})
|
|
451
|
+
};
|
|
452
|
+
}
|
|
453
|
+
finally {
|
|
454
|
+
await provider.close();
|
|
455
|
+
}
|
|
456
|
+
},
|
|
457
|
+
collectArtifacts: () => [],
|
|
458
|
+
cleanup: ({ prepared }) => {
|
|
459
|
+
if (options.keepCodexHome === true)
|
|
460
|
+
return;
|
|
461
|
+
const state = prepared;
|
|
462
|
+
if (state !== undefined)
|
|
463
|
+
rmSync(state.tempRoot, { recursive: true, force: true });
|
|
464
|
+
}
|
|
465
|
+
};
|
|
466
|
+
}
|
|
467
|
+
export const codexHarness = createCodexHarness;
|
|
@@ -0,0 +1,15 @@
|
|
|
1
|
+
import type { HarnessAdapter } from "./harness.js";
|
|
2
|
+
import type { EnsembleModel } from "./harness.js";
|
|
3
|
+
export type CommandHarnessEnvInput = {
|
|
4
|
+
model: EnsembleModel;
|
|
5
|
+
ordinal: number;
|
|
6
|
+
descriptorId: string;
|
|
7
|
+
};
|
|
8
|
+
export type CommandHarnessOptions = {
|
|
9
|
+
id?: string;
|
|
10
|
+
command: string;
|
|
11
|
+
cwd?: string;
|
|
12
|
+
timeoutMs?: number;
|
|
13
|
+
env?: Record<string, string | undefined> | ((input: CommandHarnessEnvInput) => Record<string, string | undefined>);
|
|
14
|
+
};
|
|
15
|
+
export declare function createCommandHarness(options: CommandHarnessOptions): HarnessAdapter;
|
package/dist/command.js
ADDED
|
@@ -0,0 +1,82 @@
|
|
|
1
|
+
import { artifactHash } from "@fusionkit/protocol";
|
|
2
|
+
import { runCandidateCommandWithIsolation } from "./isolation.js";
|
|
3
|
+
export function createCommandHarness(options) {
|
|
4
|
+
const id = options.id ?? "command";
|
|
5
|
+
return {
|
|
6
|
+
id,
|
|
7
|
+
prepare: () => ({
|
|
8
|
+
command: options.command,
|
|
9
|
+
cwd: options.cwd,
|
|
10
|
+
timeoutMs: options.timeoutMs
|
|
11
|
+
}),
|
|
12
|
+
capabilities: () => ({
|
|
13
|
+
shell_command: "supported",
|
|
14
|
+
artifact_capture: "supported",
|
|
15
|
+
verification: "supported"
|
|
16
|
+
}),
|
|
17
|
+
verificationProfile: () => ({
|
|
18
|
+
id: `${id}-verification`,
|
|
19
|
+
command: options.command,
|
|
20
|
+
requiredEvidence: ["command output", "exit code", "tool execution record"]
|
|
21
|
+
}),
|
|
22
|
+
run: async ({ descriptor, model, ordinal, worktree }) => {
|
|
23
|
+
const env = typeof options.env === "function"
|
|
24
|
+
? options.env({ model, ordinal, descriptorId: descriptor.id })
|
|
25
|
+
: options.env;
|
|
26
|
+
const execution = await runCandidateCommandWithIsolation({
|
|
27
|
+
command: options.command,
|
|
28
|
+
cwd: worktree?.path ?? options.cwd ?? process.cwd(),
|
|
29
|
+
timeoutMs: options.timeoutMs ?? descriptor.policy.timeoutMs,
|
|
30
|
+
isolation: descriptor.runtime.isolation,
|
|
31
|
+
env: {
|
|
32
|
+
HARNESS_MODEL_ID: model.id,
|
|
33
|
+
HARNESS_MODEL: model.model,
|
|
34
|
+
HARNESS_PROMPT: descriptor.prompt,
|
|
35
|
+
...(model.endpointId !== undefined ? { HARNESS_ENDPOINT_ID: model.endpointId } : {}),
|
|
36
|
+
...env
|
|
37
|
+
}
|
|
38
|
+
});
|
|
39
|
+
const { stdout, stderr, exitCode } = execution;
|
|
40
|
+
const transcript = [stdout, stderr].filter(Boolean).join("\n");
|
|
41
|
+
const status = exitCode === 0 ? "succeeded" : "failed";
|
|
42
|
+
const outputHash = artifactHash(transcript);
|
|
43
|
+
return {
|
|
44
|
+
candidateId: `${descriptor.id}_${model.id}_${ordinal}`,
|
|
45
|
+
model,
|
|
46
|
+
status,
|
|
47
|
+
...(worktree ? { branchName: worktree.branchName, worktreePath: worktree.path } : {}),
|
|
48
|
+
transcript,
|
|
49
|
+
diff: "",
|
|
50
|
+
artifacts: [
|
|
51
|
+
{
|
|
52
|
+
artifact_id: `artifact_${descriptor.id}_${model.id}_command_output`,
|
|
53
|
+
kind: "log",
|
|
54
|
+
hash: outputHash,
|
|
55
|
+
redaction_status: "synthetic"
|
|
56
|
+
}
|
|
57
|
+
],
|
|
58
|
+
toolRecords: [
|
|
59
|
+
{
|
|
60
|
+
execution_id: `exec_${descriptor.id}_${model.id}_${ordinal}`,
|
|
61
|
+
plan_id: `plan_${descriptor.id}_${model.id}_${ordinal}`,
|
|
62
|
+
status,
|
|
63
|
+
output_hash: outputHash
|
|
64
|
+
}
|
|
65
|
+
],
|
|
66
|
+
verification: {
|
|
67
|
+
status,
|
|
68
|
+
evidence: [`exit_code=${exitCode}`, outputHash],
|
|
69
|
+
exitCode
|
|
70
|
+
},
|
|
71
|
+
metadata: {
|
|
72
|
+
command: options.command,
|
|
73
|
+
stdout_bytes: Buffer.byteLength(stdout),
|
|
74
|
+
stderr_bytes: Buffer.byteLength(stderr),
|
|
75
|
+
timed_out: execution.timedOut,
|
|
76
|
+
hardening: execution.hardening
|
|
77
|
+
}
|
|
78
|
+
};
|
|
79
|
+
},
|
|
80
|
+
collectArtifacts: () => []
|
|
81
|
+
};
|
|
82
|
+
}
|
|
@@ -0,0 +1,62 @@
|
|
|
1
|
+
import type { HarnessRunResultV1, ModelFusionHarnessKind } from "@fusionkit/protocol";
|
|
2
|
+
import type { HarnessAdapter, HarnessCapabilities } from "./harness.js";
|
|
3
|
+
declare const LIVE_SMOKE_TARGETS: readonly ["claude-code", "codex"];
|
|
4
|
+
export type HarnessCapabilityTarget = "cursor" | "claude-code" | "codex" | "command" | "mock";
|
|
5
|
+
export type HarnessAvailability = "available" | "credential_gated" | "missing";
|
|
6
|
+
export type HarnessLiveSmokeTarget = (typeof LIVE_SMOKE_TARGETS)[number];
|
|
7
|
+
export type HarnessSmokePurpose = "contract" | "credential-skip" | "live" | "missing";
|
|
8
|
+
export type HarnessAdapterReadiness = {
|
|
9
|
+
harnessId: HarnessCapabilityTarget;
|
|
10
|
+
displayName: string;
|
|
11
|
+
contractReadiness: string;
|
|
12
|
+
credentialState: string;
|
|
13
|
+
liveSmoke: string;
|
|
14
|
+
evidence: string[];
|
|
15
|
+
artifactRefs: string[];
|
|
16
|
+
};
|
|
17
|
+
export type HarnessCapabilityMatrixRow = {
|
|
18
|
+
harnessId: HarnessCapabilityTarget;
|
|
19
|
+
harnessKind: ModelFusionHarnessKind;
|
|
20
|
+
displayName: string;
|
|
21
|
+
availability: HarnessAvailability;
|
|
22
|
+
capabilities: HarnessCapabilities;
|
|
23
|
+
notes: string[];
|
|
24
|
+
};
|
|
25
|
+
export type HarnessCapabilityMatrix = {
|
|
26
|
+
capabilities: string[];
|
|
27
|
+
rows: HarnessCapabilityMatrixRow[];
|
|
28
|
+
};
|
|
29
|
+
export type HarnessSmokeOutcome = "success" | "failure" | "missing" | "skipped";
|
|
30
|
+
export type HarnessSmokeRecord = {
|
|
31
|
+
taskId: string;
|
|
32
|
+
harnessId: HarnessCapabilityTarget;
|
|
33
|
+
purpose: HarnessSmokePurpose;
|
|
34
|
+
outcome: HarnessSmokeOutcome;
|
|
35
|
+
result: HarnessRunResultV1;
|
|
36
|
+
resultPath: string;
|
|
37
|
+
};
|
|
38
|
+
export type HarnessSmokeDashboard = {
|
|
39
|
+
outputRoot: string;
|
|
40
|
+
dashboardPath: string;
|
|
41
|
+
matrix: HarnessCapabilityMatrix;
|
|
42
|
+
records: HarnessSmokeRecord[];
|
|
43
|
+
readiness: HarnessAdapterReadiness[];
|
|
44
|
+
};
|
|
45
|
+
export type HarnessSmokeDashboardOptions = {
|
|
46
|
+
repo?: string;
|
|
47
|
+
outputRoot?: string;
|
|
48
|
+
timeoutMs?: number;
|
|
49
|
+
createdAt?: string;
|
|
50
|
+
env?: Record<string, string | undefined>;
|
|
51
|
+
commandSuccess?: string;
|
|
52
|
+
commandFailure?: string;
|
|
53
|
+
liveSmoke?: readonly HarnessLiveSmokeTarget[];
|
|
54
|
+
liveSmokeHarnesses?: Partial<Record<HarnessLiveSmokeTarget, HarnessAdapter>>;
|
|
55
|
+
};
|
|
56
|
+
export declare function createHarnessCapabilityMatrix(options?: HarnessSmokeDashboardOptions): HarnessCapabilityMatrix;
|
|
57
|
+
export declare function runHarnessSmokeDashboard(options?: HarnessSmokeDashboardOptions): Promise<HarnessSmokeDashboard>;
|
|
58
|
+
export declare const harnessDashboard: {
|
|
59
|
+
readonly capabilities: typeof createHarnessCapabilityMatrix;
|
|
60
|
+
readonly run: typeof runHarnessSmokeDashboard;
|
|
61
|
+
};
|
|
62
|
+
export {};
|