@vercel/agent-eval 0.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +370 -0
- package/dist/cli.d.ts +6 -0
- package/dist/cli.d.ts.map +1 -0
- package/dist/cli.js +166 -0
- package/dist/cli.js.map +1 -0
- package/dist/index.d.ts +21 -0
- package/dist/index.d.ts.map +1 -0
- package/dist/index.js +17 -0
- package/dist/index.js.map +1 -0
- package/dist/lib/agents/claude-code.d.ts +12 -0
- package/dist/lib/agents/claude-code.d.ts.map +1 -0
- package/dist/lib/agents/claude-code.js +203 -0
- package/dist/lib/agents/claude-code.js.map +1 -0
- package/dist/lib/agents/codex.d.ts +12 -0
- package/dist/lib/agents/codex.d.ts.map +1 -0
- package/dist/lib/agents/codex.js +247 -0
- package/dist/lib/agents/codex.js.map +1 -0
- package/dist/lib/agents/index.d.ts +7 -0
- package/dist/lib/agents/index.d.ts.map +1 -0
- package/dist/lib/agents/index.js +14 -0
- package/dist/lib/agents/index.js.map +1 -0
- package/dist/lib/agents/registry.d.ts +23 -0
- package/dist/lib/agents/registry.d.ts.map +1 -0
- package/dist/lib/agents/registry.js +35 -0
- package/dist/lib/agents/registry.js.map +1 -0
- package/dist/lib/agents/shared.d.ts +47 -0
- package/dist/lib/agents/shared.d.ts.map +1 -0
- package/dist/lib/agents/shared.js +99 -0
- package/dist/lib/agents/shared.js.map +1 -0
- package/dist/lib/agents/types.d.ts +69 -0
- package/dist/lib/agents/types.d.ts.map +1 -0
- package/dist/lib/agents/types.js +5 -0
- package/dist/lib/agents/types.js.map +1 -0
- package/dist/lib/config.d.ts +34 -0
- package/dist/lib/config.d.ts.map +1 -0
- package/dist/lib/config.js +117 -0
- package/dist/lib/config.js.map +1 -0
- package/dist/lib/fixture.d.ts +52 -0
- package/dist/lib/fixture.d.ts.map +1 -0
- package/dist/lib/fixture.js +175 -0
- package/dist/lib/fixture.js.map +1 -0
- package/dist/lib/init.d.ts +21 -0
- package/dist/lib/init.d.ts.map +1 -0
- package/dist/lib/init.js +250 -0
- package/dist/lib/init.js.map +1 -0
- package/dist/lib/results.d.ts +54 -0
- package/dist/lib/results.d.ts.map +1 -0
- package/dist/lib/results.js +186 -0
- package/dist/lib/results.js.map +1 -0
- package/dist/lib/runner.d.ts +43 -0
- package/dist/lib/runner.d.ts.map +1 -0
- package/dist/lib/runner.js +142 -0
- package/dist/lib/runner.js.map +1 -0
- package/dist/lib/sandbox.d.ts +117 -0
- package/dist/lib/sandbox.d.ts.map +1 -0
- package/dist/lib/sandbox.js +248 -0
- package/dist/lib/sandbox.js.map +1 -0
- package/dist/lib/types.d.ts +166 -0
- package/dist/lib/types.d.ts.map +1 -0
- package/dist/lib/types.js +14 -0
- package/dist/lib/types.js.map +1 -0
- package/dist/test-setup.d.ts +2 -0
- package/dist/test-setup.d.ts.map +1 -0
- package/dist/test-setup.js +6 -0
- package/dist/test-setup.js.map +1 -0
- package/package.json +58 -0
|
@@ -0,0 +1,203 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Claude Code agent implementation.
|
|
3
|
+
* Uses Vercel AI Gateway for model access.
|
|
4
|
+
*/
|
|
5
|
+
import { SandboxManager, collectLocalFiles, splitTestFiles, verifyNoTestFiles, } from '../sandbox.js';
|
|
6
|
+
import { runValidation, captureGeneratedFiles, createVitestConfig, AI_GATEWAY, ANTHROPIC_DIRECT, } from './shared.js';
|
|
7
|
+
/**
|
|
8
|
+
* Capture the Claude Code transcript from the sandbox.
|
|
9
|
+
* Claude Code stores transcripts at ~/.claude/projects/-{workdir}/{session-id}.jsonl
|
|
10
|
+
*/
|
|
11
|
+
async function captureTranscript(sandbox) {
|
|
12
|
+
try {
|
|
13
|
+
// Get the working directory to construct the transcript path
|
|
14
|
+
const workdir = sandbox.getWorkingDirectory();
|
|
15
|
+
// Claude Code uses the path with slashes replaced by dashes
|
|
16
|
+
const projectPath = workdir.replace(/\//g, '-');
|
|
17
|
+
const claudeProjectDir = `~/.claude/projects/${projectPath}`;
|
|
18
|
+
// Find the most recent .jsonl file (the transcript)
|
|
19
|
+
const findResult = await sandbox.runShell(`ls -t ${claudeProjectDir}/*.jsonl 2>/dev/null | head -1`);
|
|
20
|
+
if (findResult.exitCode !== 0 || !findResult.stdout.trim()) {
|
|
21
|
+
return undefined;
|
|
22
|
+
}
|
|
23
|
+
const transcriptPath = findResult.stdout.trim();
|
|
24
|
+
const content = await sandbox.readFile(transcriptPath);
|
|
25
|
+
return content;
|
|
26
|
+
}
|
|
27
|
+
catch {
|
|
28
|
+
// Transcript capture is best-effort
|
|
29
|
+
return undefined;
|
|
30
|
+
}
|
|
31
|
+
}
|
|
32
|
+
/**
|
|
33
|
+
* Create Claude Code agent with specified authentication method.
|
|
34
|
+
*/
|
|
35
|
+
export function createClaudeCodeAgent({ useVercelAiGateway }) {
|
|
36
|
+
return {
|
|
37
|
+
name: useVercelAiGateway ? 'vercel-ai-gateway/claude-code' : 'claude-code',
|
|
38
|
+
displayName: useVercelAiGateway ? 'Claude Code (Vercel AI Gateway)' : 'Claude Code',
|
|
39
|
+
getApiKeyEnvVar() {
|
|
40
|
+
return useVercelAiGateway ? AI_GATEWAY.apiKeyEnvVar : ANTHROPIC_DIRECT.apiKeyEnvVar;
|
|
41
|
+
},
|
|
42
|
+
getDefaultModel() {
|
|
43
|
+
return 'opus';
|
|
44
|
+
},
|
|
45
|
+
async run(fixturePath, options) {
|
|
46
|
+
const startTime = Date.now();
|
|
47
|
+
let sandbox = null;
|
|
48
|
+
let agentOutput = '';
|
|
49
|
+
let aborted = false;
|
|
50
|
+
let sandboxStopped = false;
|
|
51
|
+
// Handle abort signal
|
|
52
|
+
const abortHandler = () => {
|
|
53
|
+
aborted = true;
|
|
54
|
+
if (sandbox && !sandboxStopped) {
|
|
55
|
+
sandboxStopped = true;
|
|
56
|
+
sandbox.stop().catch(() => { });
|
|
57
|
+
}
|
|
58
|
+
};
|
|
59
|
+
if (options.signal) {
|
|
60
|
+
if (options.signal.aborted) {
|
|
61
|
+
return {
|
|
62
|
+
success: false,
|
|
63
|
+
output: '',
|
|
64
|
+
error: 'Aborted before start',
|
|
65
|
+
duration: 0,
|
|
66
|
+
};
|
|
67
|
+
}
|
|
68
|
+
options.signal.addEventListener('abort', abortHandler);
|
|
69
|
+
}
|
|
70
|
+
try {
|
|
71
|
+
// Collect files from fixture
|
|
72
|
+
const allFiles = await collectLocalFiles(fixturePath);
|
|
73
|
+
const { workspaceFiles, testFiles } = splitTestFiles(allFiles);
|
|
74
|
+
// Check for abort before expensive operations
|
|
75
|
+
if (aborted) {
|
|
76
|
+
return {
|
|
77
|
+
success: false,
|
|
78
|
+
output: '',
|
|
79
|
+
error: 'Aborted',
|
|
80
|
+
duration: Date.now() - startTime,
|
|
81
|
+
};
|
|
82
|
+
}
|
|
83
|
+
// Create sandbox
|
|
84
|
+
sandbox = await SandboxManager.create({
|
|
85
|
+
timeout: options.timeout,
|
|
86
|
+
runtime: 'node24',
|
|
87
|
+
});
|
|
88
|
+
// Check for abort after sandbox creation (abort may have fired during create)
|
|
89
|
+
if (aborted) {
|
|
90
|
+
return {
|
|
91
|
+
success: false,
|
|
92
|
+
output: '',
|
|
93
|
+
error: 'Aborted',
|
|
94
|
+
duration: Date.now() - startTime,
|
|
95
|
+
sandboxId: sandbox.sandboxId,
|
|
96
|
+
};
|
|
97
|
+
}
|
|
98
|
+
// Upload workspace files (excluding tests)
|
|
99
|
+
await sandbox.uploadFiles(workspaceFiles);
|
|
100
|
+
// Run setup function if provided
|
|
101
|
+
if (options.setup) {
|
|
102
|
+
await options.setup(sandbox);
|
|
103
|
+
}
|
|
104
|
+
// Install dependencies
|
|
105
|
+
const installResult = await sandbox.runCommand('npm', ['install']);
|
|
106
|
+
if (installResult.exitCode !== 0) {
|
|
107
|
+
throw new Error(`npm install failed: ${installResult.stderr}`);
|
|
108
|
+
}
|
|
109
|
+
// Install Claude Code CLI globally
|
|
110
|
+
const cliInstall = await sandbox.runCommand('npm', [
|
|
111
|
+
'install',
|
|
112
|
+
'-g',
|
|
113
|
+
'@anthropic-ai/claude-code',
|
|
114
|
+
]);
|
|
115
|
+
if (cliInstall.exitCode !== 0) {
|
|
116
|
+
throw new Error(`Claude Code install failed: ${cliInstall.stderr}`);
|
|
117
|
+
}
|
|
118
|
+
// Verify no test files in sandbox
|
|
119
|
+
await verifyNoTestFiles(sandbox);
|
|
120
|
+
// Prepare enhanced prompt
|
|
121
|
+
const enhancedPrompt = `${options.prompt.trim()}
|
|
122
|
+
|
|
123
|
+
IMPORTANT: Do not run npm, pnpm, yarn, or any package manager commands. Dependencies have already been installed. Do not run build, test, or dev server commands. Just write the code files.`;
|
|
124
|
+
// Run Claude Code with appropriate authentication
|
|
125
|
+
const claudeResult = await sandbox.runCommand('claude', ['--print', '--model', options.model, '--dangerously-skip-permissions', enhancedPrompt], {
|
|
126
|
+
env: useVercelAiGateway
|
|
127
|
+
? {
|
|
128
|
+
// AI Gateway configuration for Claude Code
|
|
129
|
+
ANTHROPIC_BASE_URL: AI_GATEWAY.baseUrl,
|
|
130
|
+
ANTHROPIC_AUTH_TOKEN: options.apiKey,
|
|
131
|
+
ANTHROPIC_API_KEY: '',
|
|
132
|
+
}
|
|
133
|
+
: {
|
|
134
|
+
// Direct Anthropic API
|
|
135
|
+
ANTHROPIC_API_KEY: options.apiKey,
|
|
136
|
+
},
|
|
137
|
+
});
|
|
138
|
+
agentOutput = claudeResult.stdout + claudeResult.stderr;
|
|
139
|
+
if (claudeResult.exitCode !== 0) {
|
|
140
|
+
// Extract meaningful error from output (last few lines usually contain the error)
|
|
141
|
+
const errorLines = agentOutput.trim().split('\n').slice(-5).join('\n');
|
|
142
|
+
return {
|
|
143
|
+
success: false,
|
|
144
|
+
output: agentOutput,
|
|
145
|
+
error: errorLines || `Claude Code exited with code ${claudeResult.exitCode}`,
|
|
146
|
+
duration: Date.now() - startTime,
|
|
147
|
+
sandboxId: sandbox.sandboxId,
|
|
148
|
+
};
|
|
149
|
+
}
|
|
150
|
+
// Upload test files for validation
|
|
151
|
+
await sandbox.uploadFiles(testFiles);
|
|
152
|
+
// Create vitest config for EVAL.ts
|
|
153
|
+
await createVitestConfig(sandbox);
|
|
154
|
+
// Capture the Claude Code transcript
|
|
155
|
+
const transcript = await captureTranscript(sandbox);
|
|
156
|
+
// Run validation scripts
|
|
157
|
+
const validationResults = await runValidation(sandbox, options.scripts ?? []);
|
|
158
|
+
// Capture generated files
|
|
159
|
+
const generatedFiles = await captureGeneratedFiles(sandbox);
|
|
160
|
+
return {
|
|
161
|
+
success: validationResults.allPassed,
|
|
162
|
+
output: agentOutput,
|
|
163
|
+
transcript,
|
|
164
|
+
duration: Date.now() - startTime,
|
|
165
|
+
testResult: validationResults.test,
|
|
166
|
+
scriptsResults: validationResults.scripts,
|
|
167
|
+
sandboxId: sandbox.sandboxId,
|
|
168
|
+
generatedFiles,
|
|
169
|
+
};
|
|
170
|
+
}
|
|
171
|
+
catch (error) {
|
|
172
|
+
// Check if this was an abort
|
|
173
|
+
if (aborted) {
|
|
174
|
+
return {
|
|
175
|
+
success: false,
|
|
176
|
+
output: agentOutput,
|
|
177
|
+
error: 'Aborted',
|
|
178
|
+
duration: Date.now() - startTime,
|
|
179
|
+
sandboxId: sandbox?.sandboxId,
|
|
180
|
+
};
|
|
181
|
+
}
|
|
182
|
+
return {
|
|
183
|
+
success: false,
|
|
184
|
+
output: agentOutput,
|
|
185
|
+
error: error instanceof Error ? error.message : String(error),
|
|
186
|
+
duration: Date.now() - startTime,
|
|
187
|
+
sandboxId: sandbox?.sandboxId,
|
|
188
|
+
};
|
|
189
|
+
}
|
|
190
|
+
finally {
|
|
191
|
+
// Clean up abort listener
|
|
192
|
+
if (options.signal) {
|
|
193
|
+
options.signal.removeEventListener('abort', abortHandler);
|
|
194
|
+
}
|
|
195
|
+
if (sandbox && !sandboxStopped) {
|
|
196
|
+
sandboxStopped = true;
|
|
197
|
+
await sandbox.stop();
|
|
198
|
+
}
|
|
199
|
+
}
|
|
200
|
+
},
|
|
201
|
+
};
|
|
202
|
+
}
|
|
203
|
+
//# sourceMappingURL=claude-code.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"claude-code.js","sourceRoot":"","sources":["../../../src/lib/agents/claude-code.ts"],"names":[],"mappings":"AAAA;;;GAGG;AAIH,OAAO,EACL,cAAc,EACd,iBAAiB,EACjB,cAAc,EACd,iBAAiB,GAClB,MAAM,eAAe,CAAC;AACvB,OAAO,EACL,aAAa,EACb,qBAAqB,EACrB,kBAAkB,EAClB,UAAU,EACV,gBAAgB,GACjB,MAAM,aAAa,CAAC;AAErB;;;GAGG;AACH,KAAK,UAAU,iBAAiB,CAAC,OAAuB;IACtD,IAAI,CAAC;QACH,6DAA6D;QAC7D,MAAM,OAAO,GAAG,OAAO,CAAC,mBAAmB,EAAE,CAAC;QAC9C,4DAA4D;QAC5D,MAAM,WAAW,GAAG,OAAO,CAAC,OAAO,CAAC,KAAK,EAAE,GAAG,CAAC,CAAC;QAChD,MAAM,gBAAgB,GAAG,sBAAsB,WAAW,EAAE,CAAC;QAE7D,oDAAoD;QACpD,MAAM,UAAU,GAAG,MAAM,OAAO,CAAC,QAAQ,CACvC,SAAS,gBAAgB,gCAAgC,CAC1D,CAAC;QAEF,IAAI,UAAU,CAAC,QAAQ,KAAK,CAAC,IAAI,CAAC,UAAU,CAAC,MAAM,CAAC,IAAI,EAAE,EAAE,CAAC;YAC3D,OAAO,SAAS,CAAC;QACnB,CAAC;QAED,MAAM,cAAc,GAAG,UAAU,CAAC,MAAM,CAAC,IAAI,EAAE,CAAC;QAChD,MAAM,OAAO,GAAG,MAAM,OAAO,CAAC,QAAQ,CAAC,cAAc,CAAC,CAAC;QACvD,OAAO,OAAO,CAAC;IACjB,CAAC;IAAC,MAAM,CAAC;QACP,oCAAoC;QACpC,OAAO,SAAS,CAAC;IACnB,CAAC;AACH,CAAC;AAED;;GAEG;AACH,MAAM,UAAU,qBAAqB,CAAC,EAAE,kBAAkB,EAAmC;IAC3F,OAAO;QACL,IAAI,EAAE,kBAAkB,CAAC,CAAC,CAAC,+BAA+B,CAAC,CAAC,CAAC,aAAa;QAC1E,WAAW,EAAE,kBAAkB,CAAC,CAAC,CAAC,iCAAiC,CAAC,CAAC,CAAC,aAAa;QAEnF,eAAe;YACb,OAAO,kBAAkB,CAAC,CAAC,CAAC,UAAU,CAAC,YAAY,CAAC,CAAC,CAAC,gBAAgB,CAAC,YAAY,CAAC;QACtF,CAAC;QAED,eAAe;YACb,OAAO,MAAM,CAAC;QAChB,CAAC;QAED,KAAK,CAAC,GAAG,CAAC,WAAmB,EAAE,OAAwB;YACvD,MAAM,SAAS,GAAG,IAAI,CAAC,GAAG,EAAE,CAAC;YAC7B,IAAI,OAAO,GAA0B,IAAI,CAAC;YAC1C,IAAI,WAAW,GAAG,EAAE,CAAC;YACrB,IAAI,OAAO,GAAG,KAAK,CAAC;YACpB,IAAI,cAAc,GAAG,KAAK,CAAC;YAE3B,sBAAsB;YACtB,MAAM,YAAY,GAAG,GAAG,EAAE;gBACxB,OAAO,GAAG,IAAI,CAAC;gBACf,IAAI,OAAO,IAAI,CAAC,cAAc,EAAE,CAAC;oBAC/B,cAAc,GAAG,IAAI,CAAC;oBACtB,OAAO,CAAC,IAAI,EAAE,CAAC,KAAK,CAAC,GAAG,EAAE,GAAE,CAAC,CAAC,CAAC;gBACjC,CAAC;YACH,CAAC,CAAC;YAEF,IAAI,OAAO,CAAC,MAAM,EAAE,CAAC;gBACnB,IAAI,OAAO,CAAC,MAAM,CAAC,OAAO,EAAE,CAAC;oBAC3B,OAAO;wBACL,OAAO,EAAE,KAAK;wBACd,MAAM,EAAE,EAAE;wBACV,KAAK,EAAE,sBAAsB;wBAC7B,QAAQ,EAAE,CAAC;qBACZ,CAAC;gBACJ,CAAC;gBACD,OAAO,CAAC,MAAM,CAAC,gBAAgB,CAAC,OAAO,EAAE,YAAY,CAAC,CAAC;YACzD,CAAC;YAED,IAAI,CAAC;gBACH,6BAA6B;gBAC7B,MAAM,QAAQ,GAAG,MAAM,iBAAiB,CAAC,WAAW,CAAC,CAAC;gBACtD,MAAM,EAAE,cAAc,EAAE,SAAS,EAAE,GAAG,cAAc,CAAC,QAAQ,CAAC,CAAC;gBAE/D,8CAA8C;gBAC9C,IAAI,OAAO,EAAE,CAAC;oBACZ,OAAO;wBACL,OAAO,EAAE,KAAK;wBACd,MAAM,EAAE,EAAE;wBACV,KAAK,EAAE,SAAS;wBAChB,QAAQ,EAAE,IAAI,CAAC,GAAG,EAAE,GAAG,SAAS;qBACjC,CAAC;gBACJ,CAAC;gBAED,iBAAiB;gBACjB,OAAO,GAAG,MAAM,cAAc,CAAC,MAAM,CAAC;oBACpC,OAAO,EAAE,OAAO,CAAC,OAAO;oBACxB,OAAO,EAAE,QAAQ;iBAClB,CAAC,CAAC;gBAEH,8EAA8E;gBAC9E,IAAI,OAAO,EAAE,CAAC;oBACZ,OAAO;wBACL,OAAO,EAAE,KAAK;wBACd,MAAM,EAAE,EAAE;wBACV,KAAK,EAAE,SAAS;wBAChB,QAAQ,EAAE,IAAI,CAAC,GAAG,EAAE,GAAG,SAAS;wBAChC,SAAS,EAAE,OAAO,CAAC,SAAS;qBAC7B,CAAC;gBACJ,CAAC;gBAED,2CAA2C;gBAC3C,MAAM,OAAO,CAAC,WAAW,CAAC,cAAc,CAAC,CAAC;gBAE1C,iCAAiC;gBACjC,IAAI,OAAO,CAAC,KAAK,EAAE,CAAC;oBAClB,MAAM,OAAO,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC;gBAC/B,CAAC;gBAED,uBAAuB;gBACvB,MAAM,aAAa,GAAG,MAAM,OAAO,CAAC,UAAU,CAAC,KAAK,EAAE,CAAC,SAAS,CAAC,CAAC,CAAC;gBACnE,IAAI,aAAa,CAAC,QAAQ,KAAK,CAAC,EAAE,CAAC;oBACjC,MAAM,IAAI,KAAK,CAAC,uBAAuB,aAAa,CAAC,MAAM,EAAE,CAAC,CAAC;gBACjE,CAAC;gBAED,mCAAmC;gBACnC,MAAM,UAAU,GAAG,MAAM,OAAO,CAAC,UAAU,CAAC,KAAK,EAAE;oBACjD,SAAS;oBACT,IAAI;oBACJ,2BAA2B;iBAC5B,CAAC,CAAC;gBACH,IAAI,UAAU,CAAC,QAAQ,KAAK,CAAC,EAAE,CAAC;oBAC9B,MAAM,IAAI,KAAK,CAAC,+BAA+B,UAAU,CAAC,MAAM,EAAE,CAAC,CAAC;gBACtE,CAAC;gBAED,kCAAkC;gBAClC,MAAM,iBAAiB,CAAC,OAAO,CAAC,CAAC;gBAEjC,0BAA0B;gBAC1B,MAAM,cAAc,GAAG,GAAG,OAAO,CAAC,MAAM,CAAC,IAAI,EAAE;;6LAEwI,CAAC;gBAExL,kDAAkD;gBAClD,MAAM,YAAY,GAAG,MAAM,OAAO,CAAC,UAAU,CAC3C,QAAQ,EACR,CAAC,SAAS,EAAE,SAAS,EAAE,OAAO,CAAC,KAAK,EAAE,gCAAgC,EAAE,cAAc,CAAC,EACvF;oBACE,GAAG,EAAE,kBAAkB;wBACrB,CAAC,CAAC;4BACE,2CAA2C;4BAC3C,kBAAkB,EAAE,UAAU,CAAC,OAAO;4BACtC,oBAAoB,EAAE,OAAO,CAAC,MAAM;4BACpC,iBAAiB,EAAE,EAAE;yBACtB;wBACH,CAAC,CAAC;4BACE,uBAAuB;4BACvB,iBAAiB,EAAE,OAAO,CAAC,MAAM;yBAClC;iBACN,CACF,CAAC;gBAEF,WAAW,GAAG,YAAY,CAAC,MAAM,GAAG,YAAY,CAAC,MAAM,CAAC;gBAExD,IAAI,YAAY,CAAC,QAAQ,KAAK,CAAC,EAAE,CAAC;oBAChC,kFAAkF;oBAClF,MAAM,UAAU,GAAG,WAAW,CAAC,IAAI,EAAE,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;oBACvE,OAAO;wBACL,OAAO,EAAE,KAAK;wBACd,MAAM,EAAE,WAAW;wBACnB,KAAK,EAAE,UAAU,IAAI,gCAAgC,YAAY,CAAC,QAAQ,EAAE;wBAC5E,QAAQ,EAAE,IAAI,CAAC,GAAG,EAAE,GAAG,SAAS;wBAChC,SAAS,EAAE,OAAO,CAAC,SAAS;qBAC7B,CAAC;gBACJ,CAAC;gBAED,mCAAmC;gBACnC,MAAM,OAAO,CAAC,WAAW,CAAC,SAAS,CAAC,CAAC;gBAErC,mCAAmC;gBACnC,MAAM,kBAAkB,CAAC,OAAO,CAAC,CAAC;gBAElC,qCAAqC;gBACrC,MAAM,UAAU,GAAG,MAAM,iBAAiB,CAAC,OAAO,CAAC,CAAC;gBAEpD,yBAAyB;gBACzB,MAAM,iBAAiB,GAAG,MAAM,aAAa,CAAC,OAAO,EAAE,OAAO,CAAC,OAAO,IAAI,EAAE,CAAC,CAAC;gBAE9E,0BAA0B;gBAC1B,MAAM,cAAc,GAAG,MAAM,qBAAqB,CAAC,OAAO,CAAC,CAAC;gBAE5D,OAAO;oBACL,OAAO,EAAE,iBAAiB,CAAC,SAAS;oBACpC,MAAM,EAAE,WAAW;oBACnB,UAAU;oBACV,QAAQ,EAAE,IAAI,CAAC,GAAG,EAAE,GAAG,SAAS;oBAChC,UAAU,EAAE,iBAAiB,CAAC,IAAI;oBAClC,cAAc,EAAE,iBAAiB,CAAC,OAAO;oBACzC,SAAS,EAAE,OAAO,CAAC,SAAS;oBAC5B,cAAc;iBACf,CAAC;YACJ,CAAC;YAAC,OAAO,KAAK,EAAE,CAAC;gBACf,6BAA6B;gBAC7B,IAAI,OAAO,EAAE,CAAC;oBACZ,OAAO;wBACL,OAAO,EAAE,KAAK;wBACd,MAAM,EAAE,WAAW;wBACnB,KAAK,EAAE,SAAS;wBAChB,QAAQ,EAAE,IAAI,CAAC,GAAG,EAAE,GAAG,SAAS;wBAChC,SAAS,EAAE,OAAO,EAAE,SAAS;qBAC9B,CAAC;gBACJ,CAAC;gBACD,OAAO;oBACL,OAAO,EAAE,KAAK;oBACd,MAAM,EAAE,WAAW;oBACnB,KAAK,EAAE,KAAK,YAAY,KAAK,CAAC,CAAC,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC,CAAC,MAAM,CAAC,KAAK,CAAC;oBAC7D,QAAQ,EAAE,IAAI,CAAC,GAAG,EAAE,GAAG,SAAS;oBAChC,SAAS,EAAE,OAAO,EAAE,SAAS;iBAC9B,CAAC;YACJ,CAAC;oBAAS,CAAC;gBACT,0BAA0B;gBAC1B,IAAI,OAAO,CAAC,MAAM,EAAE,CAAC;oBACnB,OAAO,CAAC,MAAM,CAAC,mBAAmB,CAAC,OAAO,EAAE,YAAY,CAAC,CAAC;gBAC5D,CAAC;gBACD,IAAI,OAAO,IAAI,CAAC,cAAc,EAAE,CAAC;oBAC/B,cAAc,GAAG,IAAI,CAAC;oBACtB,MAAM,OAAO,CAAC,IAAI,EAAE,CAAC;gBACvB,CAAC;YACH,CAAC;QACH,CAAC;KACF,CAAC;AACF,CAAC"}
|
|
@@ -0,0 +1,12 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* OpenAI Codex CLI agent implementation.
|
|
3
|
+
* Uses Vercel AI Gateway for model access.
|
|
4
|
+
*/
|
|
5
|
+
import type { Agent } from './types.js';
|
|
6
|
+
/**
|
|
7
|
+
* Create Codex agent with specified authentication method.
|
|
8
|
+
*/
|
|
9
|
+
export declare function createCodexAgent({ useVercelAiGateway }: {
|
|
10
|
+
useVercelAiGateway: boolean;
|
|
11
|
+
}): Agent;
|
|
12
|
+
//# sourceMappingURL=codex.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"codex.d.ts","sourceRoot":"","sources":["../../../src/lib/agents/codex.ts"],"names":[],"mappings":"AAAA;;;GAGG;AAEH,OAAO,KAAK,EAAE,KAAK,EAAmC,MAAM,YAAY,CAAC;AA+EzE;;GAEG;AACH,wBAAgB,gBAAgB,CAAC,EAAE,kBAAkB,EAAE,EAAE;IAAE,kBAAkB,EAAE,OAAO,CAAA;CAAE,GAAG,KAAK,CAiN/F"}
|
|
@@ -0,0 +1,247 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* OpenAI Codex CLI agent implementation.
|
|
3
|
+
* Uses Vercel AI Gateway for model access.
|
|
4
|
+
*/
|
|
5
|
+
import { SandboxManager, collectLocalFiles, splitTestFiles, verifyNoTestFiles, } from '../sandbox.js';
|
|
6
|
+
import { runValidation, captureGeneratedFiles, createVitestConfig, AI_GATEWAY, OPENAI_DIRECT, } from './shared.js';
|
|
7
|
+
/**
|
|
8
|
+
* Extract transcript from Codex JSON output.
|
|
9
|
+
* When run with --json, Codex outputs JSONL to stdout with the full transcript.
|
|
10
|
+
*/
|
|
11
|
+
function extractTranscriptFromOutput(output) {
|
|
12
|
+
if (!output || !output.trim()) {
|
|
13
|
+
return undefined;
|
|
14
|
+
}
|
|
15
|
+
// The --json output is already the transcript in JSONL format
|
|
16
|
+
// Filter to only include lines that look like JSON objects
|
|
17
|
+
const lines = output.split('\n').filter(line => {
|
|
18
|
+
const trimmed = line.trim();
|
|
19
|
+
return trimmed.startsWith('{') && trimmed.endsWith('}');
|
|
20
|
+
});
|
|
21
|
+
if (lines.length === 0) {
|
|
22
|
+
return undefined;
|
|
23
|
+
}
|
|
24
|
+
return lines.join('\n');
|
|
25
|
+
}
|
|
26
|
+
/**
|
|
27
|
+
* Generate Codex config.toml content.
|
|
28
|
+
* For direct mode, we embed the API key directly since env_key doesn't work reliably in the sandbox.
|
|
29
|
+
*/
|
|
30
|
+
function generateCodexConfig(model, useVercelAiGateway, apiKey) {
|
|
31
|
+
const fullModel = model.includes('/') ? model : `openai/${model}`;
|
|
32
|
+
if (useVercelAiGateway) {
|
|
33
|
+
return `# Codex configuration for Vercel AI Gateway
|
|
34
|
+
profile = "default"
|
|
35
|
+
|
|
36
|
+
[model_providers.vercel]
|
|
37
|
+
name = "Vercel AI Gateway"
|
|
38
|
+
base_url = "${AI_GATEWAY.openAiBaseUrl}"
|
|
39
|
+
env_key = "${AI_GATEWAY.apiKeyEnvVar}"
|
|
40
|
+
wire_api = "chat"
|
|
41
|
+
|
|
42
|
+
[profiles.default]
|
|
43
|
+
model_provider = "vercel"
|
|
44
|
+
model = "${fullModel}"
|
|
45
|
+
`;
|
|
46
|
+
}
|
|
47
|
+
else {
|
|
48
|
+
// For direct mode, embed the API key directly in config
|
|
49
|
+
// This is safe because the sandbox is ephemeral and isolated
|
|
50
|
+
return `# Direct OpenAI API configuration
|
|
51
|
+
profile = "default"
|
|
52
|
+
|
|
53
|
+
[model_providers.openai]
|
|
54
|
+
name = "OpenAI"
|
|
55
|
+
base_url = "${OPENAI_DIRECT.baseUrl}"
|
|
56
|
+
api_key = "${apiKey}"
|
|
57
|
+
wire_api = "chat"
|
|
58
|
+
|
|
59
|
+
[profiles.default]
|
|
60
|
+
model_provider = "openai"
|
|
61
|
+
model = "${fullModel}"
|
|
62
|
+
`;
|
|
63
|
+
}
|
|
64
|
+
}
|
|
65
|
+
/**
|
|
66
|
+
* Create Codex agent with specified authentication method.
|
|
67
|
+
*/
|
|
68
|
+
export function createCodexAgent({ useVercelAiGateway }) {
|
|
69
|
+
return {
|
|
70
|
+
name: useVercelAiGateway ? 'vercel-ai-gateway/codex' : 'codex',
|
|
71
|
+
displayName: useVercelAiGateway ? 'OpenAI Codex (Vercel AI Gateway)' : 'OpenAI Codex',
|
|
72
|
+
getApiKeyEnvVar() {
|
|
73
|
+
return useVercelAiGateway ? AI_GATEWAY.apiKeyEnvVar : OPENAI_DIRECT.apiKeyEnvVar;
|
|
74
|
+
},
|
|
75
|
+
getDefaultModel() {
|
|
76
|
+
return 'openai/gpt-5.2-codex';
|
|
77
|
+
},
|
|
78
|
+
async run(fixturePath, options) {
|
|
79
|
+
const startTime = Date.now();
|
|
80
|
+
let sandbox = null;
|
|
81
|
+
let agentOutput = '';
|
|
82
|
+
let aborted = false;
|
|
83
|
+
let sandboxStopped = false;
|
|
84
|
+
// Handle abort signal
|
|
85
|
+
const abortHandler = () => {
|
|
86
|
+
aborted = true;
|
|
87
|
+
if (sandbox && !sandboxStopped) {
|
|
88
|
+
sandboxStopped = true;
|
|
89
|
+
sandbox.stop().catch(() => { });
|
|
90
|
+
}
|
|
91
|
+
};
|
|
92
|
+
if (options.signal) {
|
|
93
|
+
if (options.signal.aborted) {
|
|
94
|
+
return {
|
|
95
|
+
success: false,
|
|
96
|
+
output: '',
|
|
97
|
+
error: 'Aborted before start',
|
|
98
|
+
duration: 0,
|
|
99
|
+
};
|
|
100
|
+
}
|
|
101
|
+
options.signal.addEventListener('abort', abortHandler);
|
|
102
|
+
}
|
|
103
|
+
try {
|
|
104
|
+
// Collect files from fixture
|
|
105
|
+
const allFiles = await collectLocalFiles(fixturePath);
|
|
106
|
+
const { workspaceFiles, testFiles } = splitTestFiles(allFiles);
|
|
107
|
+
// Check for abort before expensive operations
|
|
108
|
+
if (aborted) {
|
|
109
|
+
return {
|
|
110
|
+
success: false,
|
|
111
|
+
output: '',
|
|
112
|
+
error: 'Aborted',
|
|
113
|
+
duration: Date.now() - startTime,
|
|
114
|
+
};
|
|
115
|
+
}
|
|
116
|
+
// Create sandbox
|
|
117
|
+
sandbox = await SandboxManager.create({
|
|
118
|
+
timeout: options.timeout,
|
|
119
|
+
runtime: 'node24',
|
|
120
|
+
});
|
|
121
|
+
// Check for abort after sandbox creation (abort may have fired during create)
|
|
122
|
+
if (aborted) {
|
|
123
|
+
return {
|
|
124
|
+
success: false,
|
|
125
|
+
output: '',
|
|
126
|
+
error: 'Aborted',
|
|
127
|
+
duration: Date.now() - startTime,
|
|
128
|
+
sandboxId: sandbox.sandboxId,
|
|
129
|
+
};
|
|
130
|
+
}
|
|
131
|
+
// Upload workspace files (excluding tests)
|
|
132
|
+
await sandbox.uploadFiles(workspaceFiles);
|
|
133
|
+
// Run setup function if provided
|
|
134
|
+
if (options.setup) {
|
|
135
|
+
await options.setup(sandbox);
|
|
136
|
+
}
|
|
137
|
+
// Install dependencies
|
|
138
|
+
const installResult = await sandbox.runCommand('npm', ['install']);
|
|
139
|
+
if (installResult.exitCode !== 0) {
|
|
140
|
+
throw new Error(`npm install failed: ${installResult.stderr}`);
|
|
141
|
+
}
|
|
142
|
+
// Install Codex CLI globally
|
|
143
|
+
const cliInstall = await sandbox.runCommand('npm', [
|
|
144
|
+
'install',
|
|
145
|
+
'-g',
|
|
146
|
+
'@openai/codex',
|
|
147
|
+
]);
|
|
148
|
+
if (cliInstall.exitCode !== 0) {
|
|
149
|
+
throw new Error(`Codex CLI install failed: ${cliInstall.stderr}`);
|
|
150
|
+
}
|
|
151
|
+
// Create Codex config directory and config file
|
|
152
|
+
await sandbox.runShell('mkdir -p ~/.codex');
|
|
153
|
+
const configContent = generateCodexConfig(options.model, useVercelAiGateway, useVercelAiGateway ? undefined : options.apiKey);
|
|
154
|
+
await sandbox.runShell(`cat > ~/.codex/config.toml << 'EOF'
|
|
155
|
+
${configContent}
|
|
156
|
+
EOF`);
|
|
157
|
+
// Verify no test files in sandbox
|
|
158
|
+
await verifyNoTestFiles(sandbox);
|
|
159
|
+
// Prepare enhanced prompt
|
|
160
|
+
const enhancedPrompt = `${options.prompt.trim()}
|
|
161
|
+
|
|
162
|
+
IMPORTANT: Do not run npm, pnpm, yarn, or any package manager commands. Dependencies have already been installed. Do not run build, test, or dev server commands. Just write the code files.`;
|
|
163
|
+
// Run Codex CLI using exec mode for non-interactive execution
|
|
164
|
+
// Use --dangerously-bypass-approvals-and-sandbox since Vercel sandbox provides isolation
|
|
165
|
+
// Use --json for structured output and --skip-git-repo-check since sandbox is not a git repo
|
|
166
|
+
// Model is configured in config.toml, so we don't pass --model here
|
|
167
|
+
const codexResult = await sandbox.runCommand('codex', [
|
|
168
|
+
'exec',
|
|
169
|
+
'--dangerously-bypass-approvals-and-sandbox',
|
|
170
|
+
'--json',
|
|
171
|
+
'--skip-git-repo-check',
|
|
172
|
+
enhancedPrompt,
|
|
173
|
+
], {
|
|
174
|
+
env: useVercelAiGateway
|
|
175
|
+
? {
|
|
176
|
+
[AI_GATEWAY.apiKeyEnvVar]: options.apiKey,
|
|
177
|
+
}
|
|
178
|
+
: {
|
|
179
|
+
[OPENAI_DIRECT.apiKeyEnvVar]: options.apiKey,
|
|
180
|
+
},
|
|
181
|
+
});
|
|
182
|
+
agentOutput = codexResult.stdout + codexResult.stderr;
|
|
183
|
+
if (codexResult.exitCode !== 0) {
|
|
184
|
+
// Extract meaningful error from output (last few lines usually contain the error)
|
|
185
|
+
const errorLines = agentOutput.trim().split('\n').slice(-5).join('\n');
|
|
186
|
+
return {
|
|
187
|
+
success: false,
|
|
188
|
+
output: agentOutput,
|
|
189
|
+
error: errorLines || `Codex CLI exited with code ${codexResult.exitCode}`,
|
|
190
|
+
duration: Date.now() - startTime,
|
|
191
|
+
sandboxId: sandbox.sandboxId,
|
|
192
|
+
};
|
|
193
|
+
}
|
|
194
|
+
// Upload test files for validation
|
|
195
|
+
await sandbox.uploadFiles(testFiles);
|
|
196
|
+
// Create vitest config for EVAL.ts
|
|
197
|
+
await createVitestConfig(sandbox);
|
|
198
|
+
// Extract transcript from the Codex JSON output (--json flag outputs JSONL)
|
|
199
|
+
const transcript = extractTranscriptFromOutput(agentOutput);
|
|
200
|
+
// Run validation scripts
|
|
201
|
+
const validationResults = await runValidation(sandbox, options.scripts ?? []);
|
|
202
|
+
// Capture generated files
|
|
203
|
+
const generatedFiles = await captureGeneratedFiles(sandbox);
|
|
204
|
+
return {
|
|
205
|
+
success: validationResults.allPassed,
|
|
206
|
+
output: agentOutput,
|
|
207
|
+
transcript,
|
|
208
|
+
duration: Date.now() - startTime,
|
|
209
|
+
testResult: validationResults.test,
|
|
210
|
+
scriptsResults: validationResults.scripts,
|
|
211
|
+
sandboxId: sandbox.sandboxId,
|
|
212
|
+
generatedFiles,
|
|
213
|
+
};
|
|
214
|
+
}
|
|
215
|
+
catch (error) {
|
|
216
|
+
// Check if this was an abort
|
|
217
|
+
if (aborted) {
|
|
218
|
+
return {
|
|
219
|
+
success: false,
|
|
220
|
+
output: agentOutput,
|
|
221
|
+
error: 'Aborted',
|
|
222
|
+
duration: Date.now() - startTime,
|
|
223
|
+
sandboxId: sandbox?.sandboxId,
|
|
224
|
+
};
|
|
225
|
+
}
|
|
226
|
+
return {
|
|
227
|
+
success: false,
|
|
228
|
+
output: agentOutput,
|
|
229
|
+
error: error instanceof Error ? error.message : String(error),
|
|
230
|
+
duration: Date.now() - startTime,
|
|
231
|
+
sandboxId: sandbox?.sandboxId,
|
|
232
|
+
};
|
|
233
|
+
}
|
|
234
|
+
finally {
|
|
235
|
+
// Clean up abort listener
|
|
236
|
+
if (options.signal) {
|
|
237
|
+
options.signal.removeEventListener('abort', abortHandler);
|
|
238
|
+
}
|
|
239
|
+
if (sandbox && !sandboxStopped) {
|
|
240
|
+
sandboxStopped = true;
|
|
241
|
+
await sandbox.stop();
|
|
242
|
+
}
|
|
243
|
+
}
|
|
244
|
+
},
|
|
245
|
+
};
|
|
246
|
+
}
|
|
247
|
+
//# sourceMappingURL=codex.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"codex.js","sourceRoot":"","sources":["../../../src/lib/agents/codex.ts"],"names":[],"mappings":"AAAA;;;GAGG;AAIH,OAAO,EACL,cAAc,EACd,iBAAiB,EACjB,cAAc,EACd,iBAAiB,GAClB,MAAM,eAAe,CAAC;AACvB,OAAO,EACL,aAAa,EACb,qBAAqB,EACrB,kBAAkB,EAClB,UAAU,EACV,aAAa,GACd,MAAM,aAAa,CAAC;AAErB;;;GAGG;AACH,SAAS,2BAA2B,CAAC,MAAc;IACjD,IAAI,CAAC,MAAM,IAAI,CAAC,MAAM,CAAC,IAAI,EAAE,EAAE,CAAC;QAC9B,OAAO,SAAS,CAAC;IACnB,CAAC;IAED,8DAA8D;IAC9D,2DAA2D;IAC3D,MAAM,KAAK,GAAG,MAAM,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC,MAAM,CAAC,IAAI,CAAC,EAAE;QAC7C,MAAM,OAAO,GAAG,IAAI,CAAC,IAAI,EAAE,CAAC;QAC5B,OAAO,OAAO,CAAC,UAAU,CAAC,GAAG,CAAC,IAAI,OAAO,CAAC,QAAQ,CAAC,GAAG,CAAC,CAAC;IAC1D,CAAC,CAAC,CAAC;IAEH,IAAI,KAAK,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;QACvB,OAAO,SAAS,CAAC;IACnB,CAAC;IAED,OAAO,KAAK,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;AAC1B,CAAC;AAED;;;GAGG;AACH,SAAS,mBAAmB,CAAC,KAAa,EAAE,kBAA2B,EAAE,MAAe;IACtF,MAAM,SAAS,GAAG,KAAK,CAAC,QAAQ,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,KAAK,CAAC,CAAC,CAAC,UAAU,KAAK,EAAE,CAAC;IAElE,IAAI,kBAAkB,EAAE,CAAC;QACvB,OAAO;;;;;cAKG,UAAU,CAAC,aAAa;aACzB,UAAU,CAAC,YAAY;;;;;WAKzB,SAAS;CACnB,CAAC;IACA,CAAC;SAAM,CAAC;QACN,wDAAwD;QACxD,6DAA6D;QAC7D,OAAO;;;;;cAKG,aAAa,CAAC,OAAO;aACtB,MAAM;;;;;WAKR,SAAS;CACnB,CAAC;IACA,CAAC;AACH,CAAC;AAED;;GAEG;AACH,MAAM,UAAU,gBAAgB,CAAC,EAAE,kBAAkB,EAAmC;IACtF,OAAO;QACL,IAAI,EAAE,kBAAkB,CAAC,CAAC,CAAC,yBAAyB,CAAC,CAAC,CAAC,OAAO;QAC9D,WAAW,EAAE,kBAAkB,CAAC,CAAC,CAAC,kCAAkC,CAAC,CAAC,CAAC,cAAc;QAErF,eAAe;YACb,OAAO,kBAAkB,CAAC,CAAC,CAAC,UAAU,CAAC,YAAY,CAAC,CAAC,CAAC,aAAa,CAAC,YAAY,CAAC;QACnF,CAAC;QAED,eAAe;YACb,OAAO,sBAAsB,CAAC;QAChC,CAAC;QAED,KAAK,CAAC,GAAG,CAAC,WAAmB,EAAE,OAAwB;YACvD,MAAM,SAAS,GAAG,IAAI,CAAC,GAAG,EAAE,CAAC;YAC7B,IAAI,OAAO,GAA0B,IAAI,CAAC;YAC1C,IAAI,WAAW,GAAG,EAAE,CAAC;YACrB,IAAI,OAAO,GAAG,KAAK,CAAC;YACpB,IAAI,cAAc,GAAG,KAAK,CAAC;YAE3B,sBAAsB;YACtB,MAAM,YAAY,GAAG,GAAG,EAAE;gBACxB,OAAO,GAAG,IAAI,CAAC;gBACf,IAAI,OAAO,IAAI,CAAC,cAAc,EAAE,CAAC;oBAC/B,cAAc,GAAG,IAAI,CAAC;oBACtB,OAAO,CAAC,IAAI,EAAE,CAAC,KAAK,CAAC,GAAG,EAAE,GAAE,CAAC,CAAC,CAAC;gBACjC,CAAC;YACH,CAAC,CAAC;YAEF,IAAI,OAAO,CAAC,MAAM,EAAE,CAAC;gBACnB,IAAI,OAAO,CAAC,MAAM,CAAC,OAAO,EAAE,CAAC;oBAC3B,OAAO;wBACL,OAAO,EAAE,KAAK;wBACd,MAAM,EAAE,EAAE;wBACV,KAAK,EAAE,sBAAsB;wBAC7B,QAAQ,EAAE,CAAC;qBACZ,CAAC;gBACJ,CAAC;gBACD,OAAO,CAAC,MAAM,CAAC,gBAAgB,CAAC,OAAO,EAAE,YAAY,CAAC,CAAC;YACzD,CAAC;YAED,IAAI,CAAC;gBACH,6BAA6B;gBAC7B,MAAM,QAAQ,GAAG,MAAM,iBAAiB,CAAC,WAAW,CAAC,CAAC;gBACtD,MAAM,EAAE,cAAc,EAAE,SAAS,EAAE,GAAG,cAAc,CAAC,QAAQ,CAAC,CAAC;gBAE/D,8CAA8C;gBAC9C,IAAI,OAAO,EAAE,CAAC;oBACZ,OAAO;wBACL,OAAO,EAAE,KAAK;wBACd,MAAM,EAAE,EAAE;wBACV,KAAK,EAAE,SAAS;wBAChB,QAAQ,EAAE,IAAI,CAAC,GAAG,EAAE,GAAG,SAAS;qBACjC,CAAC;gBACJ,CAAC;gBAED,iBAAiB;gBACjB,OAAO,GAAG,MAAM,cAAc,CAAC,MAAM,CAAC;oBACpC,OAAO,EAAE,OAAO,CAAC,OAAO;oBACxB,OAAO,EAAE,QAAQ;iBAClB,CAAC,CAAC;gBAEH,8EAA8E;gBAC9E,IAAI,OAAO,EAAE,CAAC;oBACZ,OAAO;wBACL,OAAO,EAAE,KAAK;wBACd,MAAM,EAAE,EAAE;wBACV,KAAK,EAAE,SAAS;wBAChB,QAAQ,EAAE,IAAI,CAAC,GAAG,EAAE,GAAG,SAAS;wBAChC,SAAS,EAAE,OAAO,CAAC,SAAS;qBAC7B,CAAC;gBACJ,CAAC;gBAED,2CAA2C;gBAC3C,MAAM,OAAO,CAAC,WAAW,CAAC,cAAc,CAAC,CAAC;gBAE1C,iCAAiC;gBACjC,IAAI,OAAO,CAAC,KAAK,EAAE,CAAC;oBAClB,MAAM,OAAO,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC;gBAC/B,CAAC;gBAED,uBAAuB;gBACvB,MAAM,aAAa,GAAG,MAAM,OAAO,CAAC,UAAU,CAAC,KAAK,EAAE,CAAC,SAAS,CAAC,CAAC,CAAC;gBACnE,IAAI,aAAa,CAAC,QAAQ,KAAK,CAAC,EAAE,CAAC;oBACjC,MAAM,IAAI,KAAK,CAAC,uBAAuB,aAAa,CAAC,MAAM,EAAE,CAAC,CAAC;gBACjE,CAAC;gBAED,6BAA6B;gBAC7B,MAAM,UAAU,GAAG,MAAM,OAAO,CAAC,UAAU,CAAC,KAAK,EAAE;oBACjD,SAAS;oBACT,IAAI;oBACJ,eAAe;iBAChB,CAAC,CAAC;gBACH,IAAI,UAAU,CAAC,QAAQ,KAAK,CAAC,EAAE,CAAC;oBAC9B,MAAM,IAAI,KAAK,CAAC,6BAA6B,UAAU,CAAC,MAAM,EAAE,CAAC,CAAC;gBACpE,CAAC;gBAED,gDAAgD;gBAChD,MAAM,OAAO,CAAC,QAAQ,CAAC,mBAAmB,CAAC,CAAC;gBAC5C,MAAM,aAAa,GAAG,mBAAmB,CACvC,OAAO,CAAC,KAAK,EACb,kBAAkB,EAClB,kBAAkB,CAAC,CAAC,CAAC,SAAS,CAAC,CAAC,CAAC,OAAO,CAAC,MAAM,CAChD,CAAC;gBACF,MAAM,OAAO,CAAC,QAAQ,CAAC;EAC3B,aAAa;IACX,CAAC,CAAC;gBAEA,kCAAkC;gBAClC,MAAM,iBAAiB,CAAC,OAAO,CAAC,CAAC;gBAEjC,0BAA0B;gBAC1B,MAAM,cAAc,GAAG,GAAG,OAAO,CAAC,MAAM,CAAC,IAAI,EAAE;;6LAEwI,CAAC;gBAExL,8DAA8D;gBAC9D,yFAAyF;gBACzF,6FAA6F;gBAC7F,oEAAoE;gBACpE,MAAM,WAAW,GAAG,MAAM,OAAO,CAAC,UAAU,CAC1C,OAAO,EACP;oBACE,MAAM;oBACN,4CAA4C;oBAC5C,QAAQ;oBACR,uBAAuB;oBACvB,cAAc;iBACf,EACD;oBACE,GAAG,EAAE,kBAAkB;wBACrB,CAAC,CAAC;4BACE,CAAC,UAAU,CAAC,YAAY,CAAC,EAAE,OAAO,CAAC,MAAM;yBAC1C;wBACH,CAAC,CAAC;4BACE,CAAC,aAAa,CAAC,YAAY,CAAC,EAAE,OAAO,CAAC,MAAM;yBAC7C;iBACN,CACF,CAAC;gBAEF,WAAW,GAAG,WAAW,CAAC,MAAM,GAAG,WAAW,CAAC,MAAM,CAAC;gBAEtD,IAAI,WAAW,CAAC,QAAQ,KAAK,CAAC,EAAE,CAAC;oBAC/B,kFAAkF;oBAClF,MAAM,UAAU,GAAG,WAAW,CAAC,IAAI,EAAE,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;oBACvE,OAAO;wBACL,OAAO,EAAE,KAAK;wBACd,MAAM,EAAE,WAAW;wBACnB,KAAK,EAAE,UAAU,IAAI,8BAA8B,WAAW,CAAC,QAAQ,EAAE;wBACzE,QAAQ,EAAE,IAAI,CAAC,GAAG,EAAE,GAAG,SAAS;wBAChC,SAAS,EAAE,OAAO,CAAC,SAAS;qBAC7B,CAAC;gBACJ,CAAC;gBAED,mCAAmC;gBACnC,MAAM,OAAO,CAAC,WAAW,CAAC,SAAS,CAAC,CAAC;gBAErC,mCAAmC;gBACnC,MAAM,kBAAkB,CAAC,OAAO,CAAC,CAAC;gBAElC,4EAA4E;gBAC5E,MAAM,UAAU,GAAG,2BAA2B,CAAC,WAAW,CAAC,CAAC;gBAE5D,yBAAyB;gBACzB,MAAM,iBAAiB,GAAG,MAAM,aAAa,CAAC,OAAO,EAAE,OAAO,CAAC,OAAO,IAAI,EAAE,CAAC,CAAC;gBAE9E,0BAA0B;gBAC1B,MAAM,cAAc,GAAG,MAAM,qBAAqB,CAAC,OAAO,CAAC,CAAC;gBAE5D,OAAO;oBACL,OAAO,EAAE,iBAAiB,CAAC,SAAS;oBACpC,MAAM,EAAE,WAAW;oBACnB,UAAU;oBACV,QAAQ,EAAE,IAAI,CAAC,GAAG,EAAE,GAAG,SAAS;oBAChC,UAAU,EAAE,iBAAiB,CAAC,IAAI;oBAClC,cAAc,EAAE,iBAAiB,CAAC,OAAO;oBACzC,SAAS,EAAE,OAAO,CAAC,SAAS;oBAC5B,cAAc;iBACf,CAAC;YACJ,CAAC;YAAC,OAAO,KAAK,EAAE,CAAC;gBACf,6BAA6B;gBAC7B,IAAI,OAAO,EAAE,CAAC;oBACZ,OAAO;wBACL,OAAO,EAAE,KAAK;wBACd,MAAM,EAAE,WAAW;wBACnB,KAAK,EAAE,SAAS;wBAChB,QAAQ,EAAE,IAAI,CAAC,GAAG,EAAE,GAAG,SAAS;wBAChC,SAAS,EAAE,OAAO,EAAE,SAAS;qBAC9B,CAAC;gBACJ,CAAC;gBACD,OAAO;oBACL,OAAO,EAAE,KAAK;oBACd,MAAM,EAAE,WAAW;oBACnB,KAAK,EAAE,KAAK,YAAY,KAAK,CAAC,CAAC,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC,CAAC,MAAM,CAAC,KAAK,CAAC;oBAC7D,QAAQ,EAAE,IAAI,CAAC,GAAG,EAAE,GAAG,SAAS;oBAChC,SAAS,EAAE,OAAO,EAAE,SAAS;iBAC9B,CAAC;YACJ,CAAC;oBAAS,CAAC;gBACT,0BAA0B;gBAC1B,IAAI,OAAO,CAAC,MAAM,EAAE,CAAC;oBACnB,OAAO,CAAC,MAAM,CAAC,mBAAmB,CAAC,OAAO,EAAE,YAAY,CAAC,CAAC;gBAC5D,CAAC;gBACD,IAAI,OAAO,IAAI,CAAC,cAAc,EAAE,CAAC;oBAC/B,cAAc,GAAG,IAAI,CAAC;oBACtB,MAAM,OAAO,CAAC,IAAI,EAAE,CAAC;gBACvB,CAAC;YACH,CAAC;QACH,CAAC;KACF,CAAC;AACF,CAAC"}
|
|
@@ -0,0 +1,7 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Agent registry with built-in agents.
|
|
3
|
+
*/
|
|
4
|
+
import { registerAgent, getAgent, listAgents, hasAgent } from './registry.js';
|
|
5
|
+
export { registerAgent, getAgent, listAgents, hasAgent };
|
|
6
|
+
export type { Agent, AgentRunOptions, AgentRunResult } from './types.js';
|
|
7
|
+
//# sourceMappingURL=index.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../../src/lib/agents/index.ts"],"names":[],"mappings":"AAAA;;GAEG;AAEH,OAAO,EAAE,aAAa,EAAE,QAAQ,EAAE,UAAU,EAAE,QAAQ,EAAE,MAAM,eAAe,CAAC;AAW9E,OAAO,EAAE,aAAa,EAAE,QAAQ,EAAE,UAAU,EAAE,QAAQ,EAAE,CAAC;AAGzD,YAAY,EAAE,KAAK,EAAE,eAAe,EAAE,cAAc,EAAE,MAAM,YAAY,CAAC"}
|
|
@@ -0,0 +1,14 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Agent registry with built-in agents.
|
|
3
|
+
*/
|
|
4
|
+
import { registerAgent, getAgent, listAgents, hasAgent } from './registry.js';
|
|
5
|
+
import { createClaudeCodeAgent } from './claude-code.js';
|
|
6
|
+
import { createCodexAgent } from './codex.js';
|
|
7
|
+
// Register all agent variants (Vercel AI Gateway + Direct API)
|
|
8
|
+
registerAgent(createClaudeCodeAgent({ useVercelAiGateway: true })); // vercel-ai-gateway/claude-code
|
|
9
|
+
registerAgent(createClaudeCodeAgent({ useVercelAiGateway: false })); // claude-code
|
|
10
|
+
registerAgent(createCodexAgent({ useVercelAiGateway: true })); // vercel-ai-gateway/codex
|
|
11
|
+
registerAgent(createCodexAgent({ useVercelAiGateway: false })); // codex
|
|
12
|
+
// Re-export registry functions
|
|
13
|
+
export { registerAgent, getAgent, listAgents, hasAgent };
|
|
14
|
+
//# sourceMappingURL=index.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"index.js","sourceRoot":"","sources":["../../../src/lib/agents/index.ts"],"names":[],"mappings":"AAAA;;GAEG;AAEH,OAAO,EAAE,aAAa,EAAE,QAAQ,EAAE,UAAU,EAAE,QAAQ,EAAE,MAAM,eAAe,CAAC;AAC9E,OAAO,EAAE,qBAAqB,EAAE,MAAM,kBAAkB,CAAC;AACzD,OAAO,EAAE,gBAAgB,EAAE,MAAM,YAAY,CAAC;AAE9C,+DAA+D;AAC/D,aAAa,CAAC,qBAAqB,CAAC,EAAE,kBAAkB,EAAE,IAAI,EAAE,CAAC,CAAC,CAAC,CAAG,gCAAgC;AACtG,aAAa,CAAC,qBAAqB,CAAC,EAAE,kBAAkB,EAAE,KAAK,EAAE,CAAC,CAAC,CAAC,CAAE,cAAc;AACpF,aAAa,CAAC,gBAAgB,CAAC,EAAE,kBAAkB,EAAE,IAAI,EAAE,CAAC,CAAC,CAAC,CAAQ,0BAA0B;AAChG,aAAa,CAAC,gBAAgB,CAAC,EAAE,kBAAkB,EAAE,KAAK,EAAE,CAAC,CAAC,CAAC,CAAO,QAAQ;AAE9E,+BAA+B;AAC/B,OAAO,EAAE,aAAa,EAAE,QAAQ,EAAE,UAAU,EAAE,QAAQ,EAAE,CAAC"}
|
|
@@ -0,0 +1,23 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Agent registry for managing available agents.
|
|
3
|
+
*/
|
|
4
|
+
import type { Agent } from './types.js';
|
|
5
|
+
import type { AgentType } from '../types.js';
|
|
6
|
+
/**
|
|
7
|
+
* Register an agent in the registry.
|
|
8
|
+
*/
|
|
9
|
+
export declare function registerAgent(agent: Agent): void;
|
|
10
|
+
/**
|
|
11
|
+
* Get an agent by name.
|
|
12
|
+
* @throws Error if agent is not found
|
|
13
|
+
*/
|
|
14
|
+
export declare function getAgent(name: AgentType): Agent;
|
|
15
|
+
/**
|
|
16
|
+
* List all registered agents.
|
|
17
|
+
*/
|
|
18
|
+
export declare function listAgents(): string[];
|
|
19
|
+
/**
|
|
20
|
+
* Check if an agent is registered.
|
|
21
|
+
*/
|
|
22
|
+
export declare function hasAgent(name: string): boolean;
|
|
23
|
+
//# sourceMappingURL=registry.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"registry.d.ts","sourceRoot":"","sources":["../../../src/lib/agents/registry.ts"],"names":[],"mappings":"AAAA;;GAEG;AAEH,OAAO,KAAK,EAAE,KAAK,EAAE,MAAM,YAAY,CAAC;AACxC,OAAO,KAAK,EAAE,SAAS,EAAE,MAAM,aAAa,CAAC;AAI7C;;GAEG;AACH,wBAAgB,aAAa,CAAC,KAAK,EAAE,KAAK,GAAG,IAAI,CAEhD;AAED;;;GAGG;AACH,wBAAgB,QAAQ,CAAC,IAAI,EAAE,SAAS,GAAG,KAAK,CAO/C;AAED;;GAEG;AACH,wBAAgB,UAAU,IAAI,MAAM,EAAE,CAErC;AAED;;GAEG;AACH,wBAAgB,QAAQ,CAAC,IAAI,EAAE,MAAM,GAAG,OAAO,CAE9C"}
|
|
@@ -0,0 +1,35 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Agent registry for managing available agents.
|
|
3
|
+
*/
|
|
4
|
+
const agents = new Map();
|
|
5
|
+
/**
|
|
6
|
+
* Register an agent in the registry.
|
|
7
|
+
*/
|
|
8
|
+
export function registerAgent(agent) {
|
|
9
|
+
agents.set(agent.name, agent);
|
|
10
|
+
}
|
|
11
|
+
/**
|
|
12
|
+
* Get an agent by name.
|
|
13
|
+
* @throws Error if agent is not found
|
|
14
|
+
*/
|
|
15
|
+
export function getAgent(name) {
|
|
16
|
+
const agent = agents.get(name);
|
|
17
|
+
if (!agent) {
|
|
18
|
+
const available = Array.from(agents.keys()).join(', ');
|
|
19
|
+
throw new Error(`Unknown agent: ${name}. Available agents: ${available}`);
|
|
20
|
+
}
|
|
21
|
+
return agent;
|
|
22
|
+
}
|
|
23
|
+
/**
|
|
24
|
+
* List all registered agents.
|
|
25
|
+
*/
|
|
26
|
+
export function listAgents() {
|
|
27
|
+
return Array.from(agents.keys());
|
|
28
|
+
}
|
|
29
|
+
/**
|
|
30
|
+
* Check if an agent is registered.
|
|
31
|
+
*/
|
|
32
|
+
export function hasAgent(name) {
|
|
33
|
+
return agents.has(name);
|
|
34
|
+
}
|
|
35
|
+
//# sourceMappingURL=registry.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"registry.js","sourceRoot":"","sources":["../../../src/lib/agents/registry.ts"],"names":[],"mappings":"AAAA;;GAEG;AAKH,MAAM,MAAM,GAAG,IAAI,GAAG,EAAiB,CAAC;AAExC;;GAEG;AACH,MAAM,UAAU,aAAa,CAAC,KAAY;IACxC,MAAM,CAAC,GAAG,CAAC,KAAK,CAAC,IAAI,EAAE,KAAK,CAAC,CAAC;AAChC,CAAC;AAED;;;GAGG;AACH,MAAM,UAAU,QAAQ,CAAC,IAAe;IACtC,MAAM,KAAK,GAAG,MAAM,CAAC,GAAG,CAAC,IAAI,CAAC,CAAC;IAC/B,IAAI,CAAC,KAAK,EAAE,CAAC;QACX,MAAM,SAAS,GAAG,KAAK,CAAC,IAAI,CAAC,MAAM,CAAC,IAAI,EAAE,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;QACvD,MAAM,IAAI,KAAK,CAAC,kBAAkB,IAAI,uBAAuB,SAAS,EAAE,CAAC,CAAC;IAC5E,CAAC;IACD,OAAO,KAAK,CAAC;AACf,CAAC;AAED;;GAEG;AACH,MAAM,UAAU,UAAU;IACxB,OAAO,KAAK,CAAC,IAAI,CAAC,MAAM,CAAC,IAAI,EAAE,CAAC,CAAC;AACnC,CAAC;AAED;;GAEG;AACH,MAAM,UAAU,QAAQ,CAAC,IAAY;IACnC,OAAO,MAAM,CAAC,GAAG,CAAC,IAAI,CAAC,CAAC;AAC1B,CAAC"}
|
|
@@ -0,0 +1,47 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Shared utilities for agent implementations.
|
|
3
|
+
*/
|
|
4
|
+
import type { ScriptResult } from './types.js';
|
|
5
|
+
import type { SandboxManager } from '../sandbox.js';
|
|
6
|
+
/**
|
|
7
|
+
* Combined validation results.
|
|
8
|
+
*/
|
|
9
|
+
export interface ValidationResults {
|
|
10
|
+
allPassed: boolean;
|
|
11
|
+
test?: ScriptResult;
|
|
12
|
+
scripts: Record<string, ScriptResult>;
|
|
13
|
+
}
|
|
14
|
+
/**
|
|
15
|
+
* Run validation scripts in the sandbox.
|
|
16
|
+
*/
|
|
17
|
+
export declare function runValidation(sandbox: SandboxManager, scripts: string[]): Promise<ValidationResults>;
|
|
18
|
+
/**
|
|
19
|
+
* Capture source files generated by the agent.
|
|
20
|
+
*/
|
|
21
|
+
export declare function captureGeneratedFiles(sandbox: SandboxManager): Promise<Record<string, string>>;
|
|
22
|
+
/**
|
|
23
|
+
* Create vitest config for running EVAL.ts.
|
|
24
|
+
*/
|
|
25
|
+
export declare function createVitestConfig(sandbox: SandboxManager): Promise<void>;
|
|
26
|
+
/**
|
|
27
|
+
* AI Gateway configuration.
|
|
28
|
+
*/
|
|
29
|
+
export declare const AI_GATEWAY: {
|
|
30
|
+
readonly baseUrl: "https://ai-gateway.vercel.sh";
|
|
31
|
+
readonly openAiBaseUrl: "https://ai-gateway.vercel.sh/v1";
|
|
32
|
+
readonly apiKeyEnvVar: "AI_GATEWAY_API_KEY";
|
|
33
|
+
};
|
|
34
|
+
/**
|
|
35
|
+
* Direct API configuration for Anthropic.
|
|
36
|
+
*/
|
|
37
|
+
export declare const ANTHROPIC_DIRECT: {
|
|
38
|
+
readonly apiKeyEnvVar: "ANTHROPIC_API_KEY";
|
|
39
|
+
};
|
|
40
|
+
/**
|
|
41
|
+
* Direct API configuration for OpenAI.
|
|
42
|
+
*/
|
|
43
|
+
export declare const OPENAI_DIRECT: {
|
|
44
|
+
readonly baseUrl: "https://api.openai.com/v1";
|
|
45
|
+
readonly apiKeyEnvVar: "OPENAI_API_KEY";
|
|
46
|
+
};
|
|
47
|
+
//# sourceMappingURL=shared.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"shared.d.ts","sourceRoot":"","sources":["../../../src/lib/agents/shared.ts"],"names":[],"mappings":"AAAA;;GAEG;AAEH,OAAO,KAAK,EAAE,YAAY,EAAE,MAAM,YAAY,CAAC;AAC/C,OAAO,KAAK,EAAE,cAAc,EAAE,MAAM,eAAe,CAAC;AAEpD;;GAEG;AACH,MAAM,WAAW,iBAAiB;IAChC,SAAS,EAAE,OAAO,CAAC;IACnB,IAAI,CAAC,EAAE,YAAY,CAAC;IACpB,OAAO,EAAE,MAAM,CAAC,MAAM,EAAE,YAAY,CAAC,CAAC;CACvC;AAED;;GAEG;AACH,wBAAsB,aAAa,CACjC,OAAO,EAAE,cAAc,EACvB,OAAO,EAAE,MAAM,EAAE,GAChB,OAAO,CAAC,iBAAiB,CAAC,CAgC5B;AAED;;GAEG;AACH,wBAAsB,qBAAqB,CAAC,OAAO,EAAE,cAAc,GAAG,OAAO,CAAC,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC,CA2BpG;AAED;;GAEG;AACH,wBAAsB,kBAAkB,CAAC,OAAO,EAAE,cAAc,GAAG,OAAO,CAAC,IAAI,CAAC,CAY/E;AAED;;GAEG;AACH,eAAO,MAAM,UAAU;;;;CAIb,CAAC;AAEX;;GAEG;AACH,eAAO,MAAM,gBAAgB;;CAEnB,CAAC;AAEX;;GAEG;AACH,eAAO,MAAM,aAAa;;;CAGhB,CAAC"}
|