@vercel/agent-eval 0.0.5 → 0.0.8
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +40 -10
- package/dist/cli.js +9 -4
- package/dist/cli.js.map +1 -1
- package/dist/lib/agents/ai-sdk-agent.d.ts +10 -0
- package/dist/lib/agents/ai-sdk-agent.d.ts.map +1 -0
- package/dist/lib/agents/ai-sdk-agent.js +427 -0
- package/dist/lib/agents/ai-sdk-agent.js.map +1 -0
- package/dist/lib/agents/claude-code.d.ts.map +1 -1
- package/dist/lib/agents/claude-code.js +2 -1
- package/dist/lib/agents/claude-code.js.map +1 -1
- package/dist/lib/agents/codex.d.ts.map +1 -1
- package/dist/lib/agents/codex.js +2 -1
- package/dist/lib/agents/codex.js.map +1 -1
- package/dist/lib/agents/index.d.ts.map +1 -1
- package/dist/lib/agents/index.js +2 -0
- package/dist/lib/agents/index.js.map +1 -1
- package/dist/lib/agents/opencode.d.ts.map +1 -1
- package/dist/lib/agents/opencode.js +3 -2
- package/dist/lib/agents/opencode.js.map +1 -1
- package/dist/lib/agents/types.d.ts +3 -1
- package/dist/lib/agents/types.d.ts.map +1 -1
- package/dist/lib/config.d.ts +1 -0
- package/dist/lib/config.d.ts.map +1 -1
- package/dist/lib/config.js +4 -1
- package/dist/lib/config.js.map +1 -1
- package/dist/lib/docker-sandbox.d.ts.map +1 -1
- package/dist/lib/docker-sandbox.js +3 -0
- package/dist/lib/docker-sandbox.js.map +1 -1
- package/dist/lib/init.js +2 -2
- package/dist/lib/runner.d.ts +1 -0
- package/dist/lib/runner.d.ts.map +1 -1
- package/dist/lib/runner.js +41 -8
- package/dist/lib/runner.js.map +1 -1
- package/dist/lib/sandbox.d.ts +2 -3
- package/dist/lib/sandbox.d.ts.map +1 -1
- package/dist/lib/sandbox.js +3 -11
- package/dist/lib/sandbox.js.map +1 -1
- package/dist/lib/types.d.ts +7 -0
- package/dist/lib/types.d.ts.map +1 -1
- package/dist/lib/types.js.map +1 -1
- package/package.json +1 -1
package/README.md
CHANGED
|
@@ -208,6 +208,7 @@ Choose your agent and authentication method:
|
|
|
208
208
|
agent: 'vercel-ai-gateway/claude-code' // Claude Code via AI Gateway
|
|
209
209
|
agent: 'vercel-ai-gateway/codex' // OpenAI Codex via AI Gateway
|
|
210
210
|
agent: 'vercel-ai-gateway/opencode' // OpenCode via AI Gateway
|
|
211
|
+
agent: 'vercel-ai-gateway/ai-sdk-harness' // Simple AI SDK harness (any model)
|
|
211
212
|
|
|
212
213
|
// Direct API (uses provider keys directly)
|
|
213
214
|
agent: 'claude-code' // requires ANTHROPIC_API_KEY
|
|
@@ -218,22 +219,28 @@ See the Environment Variables section below for setup instructions.
|
|
|
218
219
|
|
|
219
220
|
### OpenCode Model Configuration
|
|
220
221
|
|
|
221
|
-
OpenCode uses Vercel AI Gateway exclusively. Models
|
|
222
|
+
OpenCode uses Vercel AI Gateway exclusively. Models **must** be specified with the `vercel/{provider}/{model}` format:
|
|
222
223
|
|
|
223
224
|
```typescript
|
|
224
225
|
// Anthropic models
|
|
225
|
-
model: 'anthropic/claude-sonnet-4'
|
|
226
|
-
model: 'anthropic/claude-opus-4'
|
|
226
|
+
model: 'vercel/anthropic/claude-sonnet-4'
|
|
227
|
+
model: 'vercel/anthropic/claude-opus-4'
|
|
228
|
+
|
|
229
|
+
// Minimax models
|
|
230
|
+
model: 'vercel/minimax/minimax-m2.1'
|
|
231
|
+
model: 'vercel/minimax/minimax-m2.1-lightning'
|
|
227
232
|
|
|
228
233
|
// Moonshot AI (Kimi) models
|
|
229
|
-
model: 'moonshotai/kimi-k2'
|
|
230
|
-
model: 'moonshotai/kimi-k2-thinking'
|
|
234
|
+
model: 'vercel/moonshotai/kimi-k2'
|
|
235
|
+
model: 'vercel/moonshotai/kimi-k2-thinking'
|
|
231
236
|
|
|
232
237
|
// OpenAI models
|
|
233
|
-
model: 'openai/gpt-4o'
|
|
234
|
-
model: 'openai/o3'
|
|
238
|
+
model: 'vercel/openai/gpt-4o'
|
|
239
|
+
model: 'vercel/openai/o3'
|
|
235
240
|
```
|
|
236
241
|
|
|
242
|
+
> **Important:** The `vercel/` prefix is required. OpenCode's config sets up a `vercel` provider, so the model string must start with `vercel/` to route through Vercel AI Gateway correctly. Using just `anthropic/claude-sonnet-4` (without the `vercel/` prefix) will fail with a "provider not found" error.
|
|
243
|
+
|
|
237
244
|
Under the hood, the agent creates an `opencode.json` config file that configures the Vercel provider:
|
|
238
245
|
|
|
239
246
|
```json
|
|
@@ -255,6 +262,28 @@ Under the hood, the agent creates an `opencode.json` config file that configures
|
|
|
255
262
|
|
|
256
263
|
And runs: `opencode run "<prompt>" --model {provider}/{model} --format json`
|
|
257
264
|
|
|
265
|
+
### AI SDK Harness Model Configuration
|
|
266
|
+
|
|
267
|
+
The AI SDK harness (`vercel-ai-gateway/ai-sdk-harness`) is a lightweight agent that works with **any model** available on Vercel AI Gateway. Unlike OpenCode, it uses the standard `{provider}/{model}` format without a `vercel/` prefix:
|
|
268
|
+
|
|
269
|
+
```typescript
|
|
270
|
+
// Anthropic models
|
|
271
|
+
model: 'anthropic/claude-sonnet-4'
|
|
272
|
+
model: 'anthropic/claude-opus-4'
|
|
273
|
+
|
|
274
|
+
// Moonshot AI (Kimi) models
|
|
275
|
+
model: 'moonshotai/kimi-k2.5'
|
|
276
|
+
model: 'moonshotai/kimi-k2-thinking'
|
|
277
|
+
|
|
278
|
+
// Minimax models
|
|
279
|
+
model: 'minimax/minimax-m2.1'
|
|
280
|
+
|
|
281
|
+
// OpenAI models
|
|
282
|
+
model: 'openai/gpt-4o'
|
|
283
|
+
```
|
|
284
|
+
|
|
285
|
+
The AI SDK harness includes these tools: `readFile`, `writeFile`, `editFile`, `listFiles`, `glob`, `grep`, and `bash`. It's ideal for evaluating models that may not be fully compatible with OpenCode.
|
|
286
|
+
|
|
258
287
|
### Full Configuration
|
|
259
288
|
|
|
260
289
|
```typescript
|
|
@@ -267,7 +296,8 @@ const config: ExperimentConfig = {
|
|
|
267
296
|
// Model to use (defaults vary by agent)
|
|
268
297
|
// - claude-code: 'opus'
|
|
269
298
|
// - codex: 'openai/gpt-5.2-codex'
|
|
270
|
-
// - opencode: 'anthropic/claude-sonnet-4'
|
|
299
|
+
// - opencode: 'vercel/anthropic/claude-sonnet-4' (note: vercel/ prefix required)
|
|
300
|
+
// - ai-sdk-harness: 'anthropic/claude-sonnet-4' (works with any AI Gateway model)
|
|
271
301
|
model: 'opus',
|
|
272
302
|
|
|
273
303
|
// How many times to run each eval
|
|
@@ -279,8 +309,8 @@ const config: ExperimentConfig = {
|
|
|
279
309
|
// npm scripts that must pass after agent finishes
|
|
280
310
|
scripts: ['build', 'lint'],
|
|
281
311
|
|
|
282
|
-
// Timeout per run in seconds
|
|
283
|
-
timeout:
|
|
312
|
+
// Timeout per run in seconds (default: 600)
|
|
313
|
+
timeout: 600,
|
|
284
314
|
|
|
285
315
|
// Filter which evals to run (pick one)
|
|
286
316
|
evals: '*', // all (default)
|
package/dist/cli.js
CHANGED
|
@@ -5,7 +5,8 @@
|
|
|
5
5
|
import { Command } from 'commander';
|
|
6
6
|
import { config as dotenvConfig } from 'dotenv';
|
|
7
7
|
import { resolve, dirname, basename } from 'path';
|
|
8
|
-
import { existsSync } from 'fs';
|
|
8
|
+
import { existsSync, readFileSync } from 'fs';
|
|
9
|
+
import { fileURLToPath } from 'url';
|
|
9
10
|
import chalk from 'chalk';
|
|
10
11
|
import { loadConfig, resolveEvalNames } from './lib/config.js';
|
|
11
12
|
import { loadAllFixtures } from './lib/fixture.js';
|
|
@@ -13,13 +14,17 @@ import { runExperiment } from './lib/runner.js';
|
|
|
13
14
|
import { initProject, getPostInitInstructions } from './lib/init.js';
|
|
14
15
|
import { getAgent } from './lib/agents/index.js';
|
|
15
16
|
import { getSandboxBackendInfo } from './lib/sandbox.js';
|
|
16
|
-
// Load environment variables
|
|
17
|
+
// Load environment variables (.env.local first, then .env as fallback)
|
|
18
|
+
dotenvConfig({ path: '.env.local' });
|
|
17
19
|
dotenvConfig();
|
|
20
|
+
// Read version from package.json
|
|
21
|
+
const __dirname = dirname(fileURLToPath(import.meta.url));
|
|
22
|
+
const pkg = JSON.parse(readFileSync(resolve(__dirname, '../package.json'), 'utf-8'));
|
|
18
23
|
const program = new Command();
|
|
19
24
|
program
|
|
20
25
|
.name('agent-eval')
|
|
21
26
|
.description('Framework for testing AI coding agents in isolated sandboxes')
|
|
22
|
-
.version(
|
|
27
|
+
.version(pkg.version);
|
|
23
28
|
/**
|
|
24
29
|
* Resolve config path shorthand.
|
|
25
30
|
* - "cc" -> "experiments/cc.ts"
|
|
@@ -81,7 +86,7 @@ async function runExperimentCommand(configInput, options) {
|
|
|
81
86
|
console.log(chalk.blue(`\nRunning ${evalNames.length} eval(s) x ${config.runs} run(s) = ${evalNames.length * config.runs} total runs`));
|
|
82
87
|
console.log(chalk.blue(`Agent: ${config.agent}, Model: ${config.model}, Timeout: ${config.timeout}s, Early Exit: ${config.earlyExit}`));
|
|
83
88
|
// Show which sandbox backend will be used
|
|
84
|
-
const sandboxInfo = getSandboxBackendInfo();
|
|
89
|
+
const sandboxInfo = getSandboxBackendInfo({ backend: config.sandbox });
|
|
85
90
|
console.log(chalk.blue(`Sandbox: ${sandboxInfo.description}`));
|
|
86
91
|
if (options.dry) {
|
|
87
92
|
console.log(chalk.yellow('\n[DRY RUN] Would execute evals here'));
|
package/dist/cli.js.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"cli.js","sourceRoot":"","sources":["../src/cli.ts"],"names":[],"mappings":";AAEA;;GAEG;AAEH,OAAO,EAAE,OAAO,EAAE,MAAM,WAAW,CAAC;AACpC,OAAO,EAAE,MAAM,IAAI,YAAY,EAAE,MAAM,QAAQ,CAAC;AAChD,OAAO,EAAE,OAAO,EAAE,OAAO,EAAE,QAAQ,EAAE,MAAM,MAAM,CAAC;AAClD,OAAO,EAAE,UAAU,EAAE,MAAM,IAAI,CAAC;
|
|
1
|
+
{"version":3,"file":"cli.js","sourceRoot":"","sources":["../src/cli.ts"],"names":[],"mappings":";AAEA;;GAEG;AAEH,OAAO,EAAE,OAAO,EAAE,MAAM,WAAW,CAAC;AACpC,OAAO,EAAE,MAAM,IAAI,YAAY,EAAE,MAAM,QAAQ,CAAC;AAChD,OAAO,EAAE,OAAO,EAAE,OAAO,EAAE,QAAQ,EAAE,MAAM,MAAM,CAAC;AAClD,OAAO,EAAE,UAAU,EAAE,YAAY,EAAE,MAAM,IAAI,CAAC;AAC9C,OAAO,EAAE,aAAa,EAAE,MAAM,KAAK,CAAC;AACpC,OAAO,KAAK,MAAM,OAAO,CAAC;AAC1B,OAAO,EAAE,UAAU,EAAE,gBAAgB,EAAE,MAAM,iBAAiB,CAAC;AAC/D,OAAO,EAAE,eAAe,EAAE,MAAM,kBAAkB,CAAC;AACnD,OAAO,EAAE,aAAa,EAAE,MAAM,iBAAiB,CAAC;AAChD,OAAO,EAAE,WAAW,EAAE,uBAAuB,EAAE,MAAM,eAAe,CAAC;AACrE,OAAO,EAAE,QAAQ,EAAE,MAAM,uBAAuB,CAAC;AACjD,OAAO,EAAE,qBAAqB,EAAE,MAAM,kBAAkB,CAAC;AAEzD,uEAAuE;AACvE,YAAY,CAAC,EAAE,IAAI,EAAE,YAAY,EAAE,CAAC,CAAC;AACrC,YAAY,EAAE,CAAC;AAEf,iCAAiC;AACjC,MAAM,SAAS,GAAG,OAAO,CAAC,aAAa,CAAC,MAAM,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC,CAAC;AAC1D,MAAM,GAAG,GAAG,IAAI,CAAC,KAAK,CAAC,YAAY,CAAC,OAAO,CAAC,SAAS,EAAE,iBAAiB,CAAC,EAAE,OAAO,CAAC,CAAC,CAAC;AAErF,MAAM,OAAO,GAAG,IAAI,OAAO,EAAE,CAAC;AAE9B,OAAO;KACJ,IAAI,CAAC,YAAY,CAAC;KAClB,WAAW,CAAC,8DAA8D,CAAC;KAC3E,OAAO,CAAC,GAAG,CAAC,OAAO,CAAC,CAAC;AAExB;;;;GAIG;AACH,SAAS,iBAAiB,CAAC,KAAa;IACtC,6DAA6D;IAC7D,IAAI,KAAK,CAAC,QAAQ,CAAC,GAAG,CAAC,IAAI,KAAK,CAAC,QAAQ,CAAC,IAAI,CAAC,IAAI,KAAK,CAAC,QAAQ,CAAC,KAAK,CAAC,IAAI,KAAK,CAAC,QAAQ,CAAC,KAAK,CAAC,EAAE,CAAC;QAClG,OAAO,KAAK,CAAC;IACf,CAAC;IACD,6DAA6D;IAC7D,OAAO,eAAe,KAAK,KAAK,CAAC;AACnC,CAAC;AAED;;GAEG;AACH,KAAK,UAAU,oBAAoB,CAAC,WAAmB,EAAE,OAA0B;IACjF,IAAI,CAAC;QACH,MAAM,UAAU,GAAG,iBAAiB,CAAC,WAAW,CAAC,CAAC;QAClD,MAAM,kBAAkB,GAAG,OAAO,CAAC,OAAO,CAAC,GAAG,EAAE,EAAE,UAAU,CAAC,CAAC;QAE9D,IAAI,CAAC,UAAU,CAAC,kBAAkB,CAAC,EAAE,CAAC;YACpC,OAAO,CAAC,KAAK,CAAC,KAAK,CAAC,GAAG,CAAC,0BAA0B,kBAAkB,EAAE,CAAC,CAAC,CAAC;YACzE,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;QAClB,CAAC;QAED,OAAO,CAAC,GAAG,CAAC,KAAK,CAAC,IAAI,CAAC,uBAAuB,UAAU,KAAK,CAAC,CAAC,CAAC;QAChE,MAAM,MAAM,GAAG,MAAM,UAAU,CAAC,kBAAkB,CAAC,CAAC;QAEpD,mDAAmD;QACnD,kEAAkE;QAClE,MAAM,UAAU,GAAG,OAAO,CAAC,OAAO,CAAC,kBAAkB,CAAC,CAAC,CAAC;QACxD,MAAM,QAAQ,GAAG,OAAO,CAAC,UAAU,EAAE,OAAO,CAAC,CAAC;QAC9C,IAAI,CAAC,UAAU,CAAC,QAAQ,CAAC,EAAE,CAAC;YAC1B,OAAO,CAAC,KAAK,CAAC,KAAK,CAAC,GAAG,CAAC,8BAA8B,QAAQ,EAAE,CAAC,CAAC,CAAC;YACnE,OAAO,CAAC,KAAK,CAAC,KAAK,CAAC,IAAI,CAAC,yDAAyD,CAAC,CAAC,CAAC;YACrF,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;QAClB,CAAC;QAED,OAAO,CAAC,GAAG,CAAC,KAAK,CAAC,IAAI,CAAC,wBAAwB,QAAQ,KAAK,CAAC,CAAC,CAAC;QAC/D,MAAM,EAAE,QAAQ,EAAE,MAAM,EAAE,GAAG,eAAe,CAAC,QAAQ,CAAC,CAAC;QAEvD,IAAI,MAAM,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;YACtB,OAAO,CAAC,GAAG,CAAC,KAAK,CAAC,MAAM,CAAC,cAAc,MAAM,CAAC,MAAM,sBAAsB,CAAC,CAAC,CAAC;YAC7E,KAAK,MAAM,KAAK,IAAI,MAAM,EAAE,CAAC;gBAC3B,OAAO,CAAC,GAAG,CAAC,KAAK,CAAC,MAAM,CAAC,OAAO,KAAK,CAAC,WAAW,KAAK,KAAK,CAAC,OAAO,EAAE,CAAC,CAAC,CAAC;YAC1E,CAAC;QACH,CAAC;QAED,IAAI,QAAQ,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;YAC1B,OAAO,CAAC,KAAK,CAAC,KAAK,CAAC,GAAG,CAAC,8BAA8B,CAAC,CAAC,CAAC;YACzD,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;QAClB,CAAC;QAED,6BAA6B;QAC7B,MAAM,cAAc,GAAG,QAAQ,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC;QACnD,MAAM,SAAS,GAAG,gBAAgB,CAAC,MAAM,CAAC,KAAK,EAAE,cAAc,CAAC,CAAC;QAEjE,IAAI,SAAS,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;YAC3B,OAAO,CAAC,KAAK,CAAC,KAAK,CAAC,GAAG,CAAC,6BAA6B,CAAC,CAAC,CAAC;YACxD,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;QAClB,CAAC;QAED,OAAO,CAAC,GAAG,CAAC,KAAK,CAAC,KAAK,CAAC,WAAW,QAAQ,CAAC,MAAM,+BAA+B,SAAS,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC;QACvG,KAAK,MAAM,IAAI,IAAI,SAAS,EAAE,CAAC;YAC7B,OAAO,CAAC,GAAG,CAAC,KAAK,CAAC,KAAK,CAAC,OAAO,IAAI,EAAE,CAAC,CAAC,CAAC;QAC1C,CAAC;QAED,OAAO,CAAC,GAAG,CAAC,KAAK,CAAC,IAAI,CAAC,aAAa,SAAS,CAAC,MAAM,cAAc,MAAM,CAAC,IAAI,aAAa,SAAS,CAAC,MAAM,GAAG,MAAM,CAAC,IAAI,aAAa,CAAC,CAAC,CAAC;QACxI,OAAO,CAAC,GAAG,CAAC,KAAK,CAAC,IAAI,CAAC,UAAU,MAAM,CAAC,KAAK,YAAY,MAAM,CAAC,KAAK,cAAc,MAAM,CAAC,OAAO,kBAAkB,MAAM,CAAC,SAAS,EAAE,CAAC,CAAC,CAAC;QAExI,0CAA0C;QAC1C,MAAM,WAAW,GAAG,qBAAqB,CAAC,EAAE,OAAO,EAAE,MAAM,CAAC,OAAO,EAAE,CAAC,CAAC;QACvE,OAAO,CAAC,GAAG,CAAC,KAAK,CAAC,IAAI,CAAC,YAAY,WAAW,CAAC,WAAW,EAAE,CAAC,CAAC,CAAC;QAE/D,IAAI,OAAO,CAAC,GAAG,EAAE,CAAC;YAChB,OAAO,CAAC,GAAG,CAAC,KAAK,CAAC,MAAM,CAAC,sCAAsC,CAAC,CAAC,CAAC;YAClE,OAAO;QACT,CAAC;QAED,8CAA8C;QAC9C,MAAM,KAAK,GAAG,QAAQ,CAAC,MAAM,CAAC,KAAK,CAAC,CAAC;QACrC,MAAM,YAAY,GAAG,KAAK,CAAC,eAAe,EAAE,CAAC;QAC7C,MAAM,MAAM,GAAG,OAAO,CAAC,GAAG,CAAC,YAAY,CAAC,CAAC;QACzC,IAAI,CAAC,MAAM,EAAE,CAAC;YACZ,OAAO,CAAC,KAAK,CAAC,KAAK,CAAC,GAAG,CAAC,GAAG,YAAY,mCAAmC,CAAC,CAAC,CAAC;YAC7E,OAAO,CAAC,KAAK,CAAC,KAAK,CAAC,IAAI,CAAC,iEAAiE,CAAC,CAAC,CAAC;YAC7F,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;QAClB,CAAC;QAED,kDAAkD;QAClD,MAAM,gBAAgB,GAAG,QAAQ,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,SAAS,CAAC,QAAQ,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC;QAE5E,uCAAuC;QACvC,MAAM,cAAc,GAAG,QAAQ,CAAC,UAAU,EAAE,KAAK,CAAC,CAAC,OAAO,CAAC,OAAO,EAAE,EAAE,CAAC,CAAC;QACxE,MAAM,UAAU,GAAG,OAAO,CAAC,OAAO,CAAC,GAAG,EAAE,EAAE,SAAS,CAAC,CAAC;QAErD,OAAO,CAAC,GAAG,CAAC,KAAK,CAAC,IAAI,CAAC,0BAA0B,CAAC,CAAC,CAAC;QAEpD,qBAAqB;QACrB,MAAM,OAAO,GAAG,MAAM,aAAa,CAAC;YAClC,MAAM;YACN,QAAQ,EAAE,gBAAgB;YAC1B,MAAM;YACN,UAAU;YACV,cAAc;YACd,UAAU,EAAE,CAAC,GAAG,EAAE,EAAE,CAAC,OAAO,CAAC,GAAG,CAAC,GAAG,CAAC;SACtC,CAAC,CAAC;QAEH,6BAA6B;QAC7B,MAAM,SAAS,GAAG,OAAO,CAAC,KAAK,CAAC,KAAK,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,UAAU,KAAK,CAAC,CAAC,SAAS,CAAC,CAAC;QAC3E,OAAO,CAAC,IAAI,CAAC,SAAS,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC;IAClC,CAAC;IAAC,OAAO,KAAK,EAAE,CAAC;QACf,IAAI,KAAK,YAAY,KAAK,EAAE,CAAC;YAC3B,OAAO,CAAC,KAAK,CAAC,KAAK,CAAC,GAAG,CAAC,UAAU,KAAK,CAAC,OAAO,EAAE,CAAC,CAAC,CAAC;QACtD,CAAC;aAAM,CAAC;YACN,OAAO,CAAC,KAAK,CAAC,KAAK,CAAC,GAAG,CAAC,2BAA2B,CAAC,CAAC,CAAC;QACxD,CAAC;QACD,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;IAClB,CAAC;AACH,CAAC;AAED;;GAEG;AACH,OAAO;KACJ,OAAO,CAAC,MAAM,CAAC;KACf,QAAQ,CAAC,QAAQ,EAAE,+BAA+B,CAAC;KACnD,WAAW,CAAC,iDAAiD,CAAC;KAC9D,MAAM,CAAC,KAAK,EAAE,IAAY,EAAE,EAAE;IAC7B,IAAI,CAAC;QACH,OAAO,CAAC,GAAG,CAAC,KAAK,CAAC,IAAI,CAAC,8BAA8B,IAAI,EAAE,CAAC,CAAC,CAAC;QAE9D,MAAM,UAAU,GAAG,WAAW,CAAC;YAC7B,IAAI;YACJ,SAAS,EAAE,OAAO,CAAC,GAAG,EAAE;SACzB,CAAC,CAAC;QAEH,OAAO,CAAC,GAAG,CAAC,KAAK,CAAC,KAAK,CAAC,+BAA+B,CAAC,CAAC,CAAC;QAC1D,OAAO,CAAC,GAAG,CAAC,uBAAuB,CAAC,UAAU,EAAE,IAAI,CAAC,CAAC,CAAC;IACzD,CAAC;IAAC,OAAO,KAAK,EAAE,CAAC;QACf,IAAI,KAAK,YAAY,KAAK,EAAE,CAAC;YAC3B,OAAO,CAAC,KAAK,CAAC,KAAK,CAAC,GAAG,CAAC,UAAU,KAAK,CAAC,OAAO,EAAE,CAAC,CAAC,CAAC;QACtD,CAAC;aAAM,CAAC;YACN,OAAO,CAAC,KAAK,CAAC,KAAK,CAAC,GAAG,CAAC,2BAA2B,CAAC,CAAC,CAAC;QACxD,CAAC;QACD,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;IAClB,CAAC;AACH,CAAC,CAAC,CAAC;AAEL;;;GAGG;AACH,OAAO;KACJ,QAAQ,CAAC,UAAU,EAAE,sCAAsC,CAAC;KAC5D,MAAM,CAAC,OAAO,EAAE,0CAA0C,CAAC;KAC3D,MAAM,CAAC,KAAK,EAAE,WAA+B,EAAE,OAA0B,EAAE,EAAE;IAC5E,IAAI,CAAC,WAAW,EAAE,CAAC;QACjB,OAAO,CAAC,IAAI,EAAE,CAAC;QACf,OAAO;IACT,CAAC;IACD,MAAM,oBAAoB,CAAC,WAAW,EAAE,OAAO,CAAC,CAAC;AACnD,CAAC,CAAC,CAAC;AAEL,OAAO,CAAC,KAAK,EAAE,CAAC"}
|
|
@@ -0,0 +1,10 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* AI SDK Agent - A simple coding agent using the Vercel AI SDK.
|
|
3
|
+
* Works with any model available on Vercel AI Gateway.
|
|
4
|
+
*/
|
|
5
|
+
import type { Agent } from './types.js';
|
|
6
|
+
/**
|
|
7
|
+
* Create AI SDK agent with Vercel AI Gateway authentication.
|
|
8
|
+
*/
|
|
9
|
+
export declare function createAiSdkAgent(): Agent;
|
|
10
|
+
//# sourceMappingURL=ai-sdk-agent.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"ai-sdk-agent.d.ts","sourceRoot":"","sources":["../../../src/lib/agents/ai-sdk-agent.ts"],"names":[],"mappings":"AAAA;;;GAGG;AAEH,OAAO,KAAK,EAAE,KAAK,EAAmC,MAAM,YAAY,CAAC;AA8QzE;;GAEG;AACH,wBAAgB,gBAAgB,IAAI,KAAK,CAiMxC"}
|
|
@@ -0,0 +1,427 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* AI SDK Agent - A simple coding agent using the Vercel AI SDK.
|
|
3
|
+
* Works with any model available on Vercel AI Gateway.
|
|
4
|
+
*/
|
|
5
|
+
import { createSandbox, collectLocalFiles, splitTestFiles, verifyNoTestFiles, } from '../sandbox.js';
|
|
6
|
+
import { runValidation, captureGeneratedFiles, createVitestConfig, AI_GATEWAY, } from './shared.js';
|
|
7
|
+
/**
|
|
8
|
+
* The CLI script source code that runs inside the sandbox.
|
|
9
|
+
* This is a self-contained script that uses the AI SDK.
|
|
10
|
+
*/
|
|
11
|
+
const CLI_SCRIPT = `
|
|
12
|
+
import { generateText, tool, stepCountIs } from 'ai';
|
|
13
|
+
import { createGateway } from '@ai-sdk/gateway';
|
|
14
|
+
import { z } from 'zod';
|
|
15
|
+
import { readFileSync, writeFileSync, readdirSync, existsSync, statSync } from 'fs';
|
|
16
|
+
import { execSync } from 'child_process';
|
|
17
|
+
import { join, dirname } from 'path';
|
|
18
|
+
import { mkdirSync } from 'fs';
|
|
19
|
+
|
|
20
|
+
// Parse command line arguments
|
|
21
|
+
const args = process.argv.slice(2);
|
|
22
|
+
let prompt = '';
|
|
23
|
+
let model = '';
|
|
24
|
+
|
|
25
|
+
for (let i = 0; i < args.length; i++) {
|
|
26
|
+
if (args[i] === '--prompt' && args[i + 1]) {
|
|
27
|
+
prompt = args[++i];
|
|
28
|
+
} else if (args[i] === '--model' && args[i + 1]) {
|
|
29
|
+
model = args[++i];
|
|
30
|
+
}
|
|
31
|
+
}
|
|
32
|
+
|
|
33
|
+
if (!prompt || !model) {
|
|
34
|
+
console.error('Usage: ai-sdk-agent --prompt "..." --model "provider/model"');
|
|
35
|
+
process.exit(1);
|
|
36
|
+
}
|
|
37
|
+
|
|
38
|
+
// Create AI Gateway client
|
|
39
|
+
const gateway = createGateway({
|
|
40
|
+
apiKey: process.env.AI_GATEWAY_API_KEY,
|
|
41
|
+
});
|
|
42
|
+
|
|
43
|
+
// Transcript events
|
|
44
|
+
const events = [];
|
|
45
|
+
|
|
46
|
+
function logEvent(type, data) {
|
|
47
|
+
const event = { type, timestamp: Date.now(), ...data };
|
|
48
|
+
events.push(event);
|
|
49
|
+
console.log(JSON.stringify(event));
|
|
50
|
+
}
|
|
51
|
+
|
|
52
|
+
// Define coding tools
|
|
53
|
+
const tools = {
|
|
54
|
+
readFile: tool({
|
|
55
|
+
description: 'Read the contents of a file at the given path',
|
|
56
|
+
inputSchema: z.object({
|
|
57
|
+
path: z.string().describe('The file path to read'),
|
|
58
|
+
}),
|
|
59
|
+
execute: async ({ path }) => {
|
|
60
|
+
try {
|
|
61
|
+
const content = readFileSync(path, 'utf-8');
|
|
62
|
+
logEvent('tool_result', { tool: 'readFile', path, success: true });
|
|
63
|
+
return content;
|
|
64
|
+
} catch (error) {
|
|
65
|
+
logEvent('tool_result', { tool: 'readFile', path, success: false, error: error.message });
|
|
66
|
+
return \`Error reading file: \${error.message}\`;
|
|
67
|
+
}
|
|
68
|
+
},
|
|
69
|
+
}),
|
|
70
|
+
|
|
71
|
+
writeFile: tool({
|
|
72
|
+
description: 'Write content to a file at the given path. Creates directories if needed.',
|
|
73
|
+
inputSchema: z.object({
|
|
74
|
+
path: z.string().describe('The file path to write'),
|
|
75
|
+
content: z.string().describe('The content to write'),
|
|
76
|
+
}),
|
|
77
|
+
execute: async ({ path, content }) => {
|
|
78
|
+
try {
|
|
79
|
+
const dir = dirname(path);
|
|
80
|
+
if (dir && dir !== '.') {
|
|
81
|
+
mkdirSync(dir, { recursive: true });
|
|
82
|
+
}
|
|
83
|
+
writeFileSync(path, content);
|
|
84
|
+
logEvent('tool_result', { tool: 'writeFile', path, success: true });
|
|
85
|
+
return 'File written successfully';
|
|
86
|
+
} catch (error) {
|
|
87
|
+
logEvent('tool_result', { tool: 'writeFile', path, success: false, error: error.message });
|
|
88
|
+
return \`Error writing file: \${error.message}\`;
|
|
89
|
+
}
|
|
90
|
+
},
|
|
91
|
+
}),
|
|
92
|
+
|
|
93
|
+
editFile: tool({
|
|
94
|
+
description: 'Edit a file by replacing a specific string with new content',
|
|
95
|
+
inputSchema: z.object({
|
|
96
|
+
path: z.string().describe('The file path to edit'),
|
|
97
|
+
oldString: z.string().describe('The exact string to find and replace'),
|
|
98
|
+
newString: z.string().describe('The replacement string'),
|
|
99
|
+
}),
|
|
100
|
+
execute: async ({ path, oldString, newString }) => {
|
|
101
|
+
try {
|
|
102
|
+
const content = readFileSync(path, 'utf-8');
|
|
103
|
+
if (!content.includes(oldString)) {
|
|
104
|
+
logEvent('tool_result', { tool: 'editFile', path, success: false, error: 'String not found' });
|
|
105
|
+
return 'Error: The specified string was not found in the file';
|
|
106
|
+
}
|
|
107
|
+
const newContent = content.replace(oldString, newString);
|
|
108
|
+
writeFileSync(path, newContent);
|
|
109
|
+
logEvent('tool_result', { tool: 'editFile', path, success: true });
|
|
110
|
+
return 'File edited successfully';
|
|
111
|
+
} catch (error) {
|
|
112
|
+
logEvent('tool_result', { tool: 'editFile', path, success: false, error: error.message });
|
|
113
|
+
return \`Error editing file: \${error.message}\`;
|
|
114
|
+
}
|
|
115
|
+
},
|
|
116
|
+
}),
|
|
117
|
+
|
|
118
|
+
listFiles: tool({
|
|
119
|
+
description: 'List files in a directory. Call with path="." to list current directory.',
|
|
120
|
+
inputSchema: z.object({
|
|
121
|
+
path: z.string().describe('The directory path to list (use "." for current directory)'),
|
|
122
|
+
recursive: z.boolean().describe('Whether to list recursively').optional(),
|
|
123
|
+
}),
|
|
124
|
+
execute: async ({ path, recursive }) => {
|
|
125
|
+
const targetPath = path || '.';
|
|
126
|
+
const isRecursive = recursive || false;
|
|
127
|
+
try {
|
|
128
|
+
if (isRecursive) {
|
|
129
|
+
const result = execSync(\`find \${targetPath} -type f | head -100\`, { encoding: 'utf-8' });
|
|
130
|
+
logEvent('tool_result', { tool: 'listFiles', path: targetPath, recursive: isRecursive, success: true });
|
|
131
|
+
return result;
|
|
132
|
+
}
|
|
133
|
+
const files = readdirSync(targetPath);
|
|
134
|
+
logEvent('tool_result', { tool: 'listFiles', path: targetPath, recursive: isRecursive, success: true });
|
|
135
|
+
return files.join('\\n');
|
|
136
|
+
} catch (error) {
|
|
137
|
+
logEvent('tool_result', { tool: 'listFiles', path: targetPath, success: false, error: error.message });
|
|
138
|
+
return \`Error listing files: \${error.message}\`;
|
|
139
|
+
}
|
|
140
|
+
},
|
|
141
|
+
}),
|
|
142
|
+
|
|
143
|
+
glob: tool({
|
|
144
|
+
description: 'Find files matching a pattern (e.g., "*.ts" for TypeScript files)',
|
|
145
|
+
inputSchema: z.object({
|
|
146
|
+
pattern: z.string().describe('The file pattern (e.g., "*.ts", "*.js")'),
|
|
147
|
+
}),
|
|
148
|
+
execute: async ({ pattern }) => {
|
|
149
|
+
try {
|
|
150
|
+
// Extract just the file pattern, remove any path prefix
|
|
151
|
+
const filePattern = pattern.replace(/^\\*\\*\\//, '').replace(/^\\.\\//, '');
|
|
152
|
+
const result = execSync(\`find . -name "\${filePattern}" -type f 2>/dev/null | grep -v node_modules | head -50\`, { encoding: 'utf-8' });
|
|
153
|
+
logEvent('tool_result', { tool: 'glob', pattern, success: true });
|
|
154
|
+
return result.trim() || 'No files found';
|
|
155
|
+
} catch (error) {
|
|
156
|
+
logEvent('tool_result', { tool: 'glob', pattern, success: false, error: error.message });
|
|
157
|
+
return 'No files found';
|
|
158
|
+
}
|
|
159
|
+
},
|
|
160
|
+
}),
|
|
161
|
+
|
|
162
|
+
grep: tool({
|
|
163
|
+
description: 'Search for a text pattern in files',
|
|
164
|
+
inputSchema: z.object({
|
|
165
|
+
pattern: z.string().describe('The search pattern'),
|
|
166
|
+
path: z.string().describe('The file or directory to search in').optional(),
|
|
167
|
+
}),
|
|
168
|
+
execute: async ({ pattern, path }) => {
|
|
169
|
+
const targetPath = path || '.';
|
|
170
|
+
try {
|
|
171
|
+
const result = execSync(\`grep -rn "\${pattern}" \${targetPath} 2>/dev/null | grep -v node_modules | head -50\`, { encoding: 'utf-8' });
|
|
172
|
+
logEvent('tool_result', { tool: 'grep', pattern, path: targetPath, success: true });
|
|
173
|
+
return result.trim() || 'No matches found';
|
|
174
|
+
} catch (error) {
|
|
175
|
+
logEvent('tool_result', { tool: 'grep', pattern, path: targetPath, success: false });
|
|
176
|
+
return 'No matches found';
|
|
177
|
+
}
|
|
178
|
+
},
|
|
179
|
+
}),
|
|
180
|
+
|
|
181
|
+
bash: tool({
|
|
182
|
+
description: 'Run a bash command',
|
|
183
|
+
inputSchema: z.object({
|
|
184
|
+
command: z.string().describe('The command to run'),
|
|
185
|
+
}),
|
|
186
|
+
execute: async ({ command }) => {
|
|
187
|
+
try {
|
|
188
|
+
const result = execSync(command, { encoding: 'utf-8', timeout: 30000 });
|
|
189
|
+
logEvent('tool_result', { tool: 'bash', command, success: true });
|
|
190
|
+
return result;
|
|
191
|
+
} catch (error) {
|
|
192
|
+
logEvent('tool_result', { tool: 'bash', command, success: false, error: error.message });
|
|
193
|
+
return \`Error: \${error.message}\\n\${error.stdout || ''}\\n\${error.stderr || ''}\`;
|
|
194
|
+
}
|
|
195
|
+
},
|
|
196
|
+
}),
|
|
197
|
+
};
|
|
198
|
+
|
|
199
|
+
// System prompt for the coding agent
|
|
200
|
+
const systemPrompt = \`You are an expert coding agent. Your job is to complete programming tasks by reading, writing, and modifying files.
|
|
201
|
+
|
|
202
|
+
Available tools:
|
|
203
|
+
- readFile(path): Read a file's contents
|
|
204
|
+
- writeFile(path, content): Write/create a file (creates directories if needed)
|
|
205
|
+
- editFile(path, oldString, newString): Replace a specific string in a file
|
|
206
|
+
- listFiles(path): List files in a directory (use path="." for current directory)
|
|
207
|
+
- glob(pattern): Find files by pattern (e.g., "*.ts")
|
|
208
|
+
- grep(pattern, path): Search for text in files
|
|
209
|
+
- bash(command): Run shell commands
|
|
210
|
+
|
|
211
|
+
IMPORTANT WORKFLOW:
|
|
212
|
+
1. First, list files to understand the project structure: listFiles(path=".")
|
|
213
|
+
2. Read any relevant existing files to understand the context
|
|
214
|
+
3. Make the necessary code changes using writeFile or editFile
|
|
215
|
+
4. If needed, run build/test commands with bash to verify
|
|
216
|
+
|
|
217
|
+
RULES:
|
|
218
|
+
- Always check what files exist before modifying them
|
|
219
|
+
- Create complete, working code - not placeholders
|
|
220
|
+
- Put files in the correct directories (e.g., src/ for source files)
|
|
221
|
+
- Be thorough but efficient\`;
|
|
222
|
+
|
|
223
|
+
// Run the agent
|
|
224
|
+
async function main() {
|
|
225
|
+
logEvent('start', { model, prompt });
|
|
226
|
+
|
|
227
|
+
try {
|
|
228
|
+
const result = await generateText({
|
|
229
|
+
model: gateway(model),
|
|
230
|
+
tools,
|
|
231
|
+
stopWhen: stepCountIs(100), // Allow up to 100 steps
|
|
232
|
+
system: systemPrompt,
|
|
233
|
+
prompt,
|
|
234
|
+
onStepFinish: ({ stepType, text, toolCalls, toolResults }) => {
|
|
235
|
+
logEvent('step', { stepType, text, toolCalls: toolCalls?.length, toolResults: toolResults?.length });
|
|
236
|
+
},
|
|
237
|
+
});
|
|
238
|
+
|
|
239
|
+
logEvent('complete', {
|
|
240
|
+
success: true,
|
|
241
|
+
steps: result.steps.length,
|
|
242
|
+
text: result.text,
|
|
243
|
+
});
|
|
244
|
+
} catch (error) {
|
|
245
|
+
logEvent('error', {
|
|
246
|
+
success: false,
|
|
247
|
+
error: error.message,
|
|
248
|
+
name: error.name,
|
|
249
|
+
});
|
|
250
|
+
process.exit(1);
|
|
251
|
+
}
|
|
252
|
+
}
|
|
253
|
+
|
|
254
|
+
main();
|
|
255
|
+
`;
|
|
256
|
+
/**
|
|
257
|
+
* Create AI SDK agent with Vercel AI Gateway authentication.
|
|
258
|
+
*/
|
|
259
|
+
export function createAiSdkAgent() {
|
|
260
|
+
return {
|
|
261
|
+
name: 'vercel-ai-gateway/ai-sdk-harness',
|
|
262
|
+
displayName: 'AI SDK Harness (Vercel AI Gateway)',
|
|
263
|
+
getApiKeyEnvVar() {
|
|
264
|
+
return AI_GATEWAY.apiKeyEnvVar;
|
|
265
|
+
},
|
|
266
|
+
getDefaultModel() {
|
|
267
|
+
return 'anthropic/claude-sonnet-4';
|
|
268
|
+
},
|
|
269
|
+
async run(fixturePath, options) {
|
|
270
|
+
const startTime = Date.now();
|
|
271
|
+
let sandbox = null;
|
|
272
|
+
let agentOutput = '';
|
|
273
|
+
let aborted = false;
|
|
274
|
+
let sandboxStopped = false;
|
|
275
|
+
// Handle abort signal
|
|
276
|
+
const abortHandler = () => {
|
|
277
|
+
aborted = true;
|
|
278
|
+
if (sandbox && !sandboxStopped) {
|
|
279
|
+
sandboxStopped = true;
|
|
280
|
+
sandbox.stop().catch(() => { });
|
|
281
|
+
}
|
|
282
|
+
};
|
|
283
|
+
if (options.signal) {
|
|
284
|
+
if (options.signal.aborted) {
|
|
285
|
+
return {
|
|
286
|
+
success: false,
|
|
287
|
+
output: '',
|
|
288
|
+
error: 'Aborted before start',
|
|
289
|
+
duration: 0,
|
|
290
|
+
};
|
|
291
|
+
}
|
|
292
|
+
options.signal.addEventListener('abort', abortHandler);
|
|
293
|
+
}
|
|
294
|
+
try {
|
|
295
|
+
// Collect files from fixture
|
|
296
|
+
const allFiles = await collectLocalFiles(fixturePath);
|
|
297
|
+
const { workspaceFiles, testFiles } = splitTestFiles(allFiles);
|
|
298
|
+
// Check for abort before expensive operations
|
|
299
|
+
if (aborted) {
|
|
300
|
+
return {
|
|
301
|
+
success: false,
|
|
302
|
+
output: '',
|
|
303
|
+
error: 'Aborted',
|
|
304
|
+
duration: Date.now() - startTime,
|
|
305
|
+
};
|
|
306
|
+
}
|
|
307
|
+
// Create sandbox
|
|
308
|
+
sandbox = await createSandbox({
|
|
309
|
+
timeout: options.timeout,
|
|
310
|
+
runtime: 'node24',
|
|
311
|
+
backend: options.sandbox,
|
|
312
|
+
});
|
|
313
|
+
// Check for abort after sandbox creation
|
|
314
|
+
if (aborted) {
|
|
315
|
+
return {
|
|
316
|
+
success: false,
|
|
317
|
+
output: '',
|
|
318
|
+
error: 'Aborted',
|
|
319
|
+
duration: Date.now() - startTime,
|
|
320
|
+
sandboxId: sandbox.sandboxId,
|
|
321
|
+
};
|
|
322
|
+
}
|
|
323
|
+
// Upload workspace files (excluding tests)
|
|
324
|
+
await sandbox.uploadFiles(workspaceFiles);
|
|
325
|
+
// Run setup function if provided
|
|
326
|
+
if (options.setup) {
|
|
327
|
+
await options.setup(sandbox);
|
|
328
|
+
}
|
|
329
|
+
// Install dependencies
|
|
330
|
+
const installResult = await sandbox.runCommand('npm', ['install']);
|
|
331
|
+
if (installResult.exitCode !== 0) {
|
|
332
|
+
throw new Error(`npm install failed: ${installResult.stderr}`);
|
|
333
|
+
}
|
|
334
|
+
// Install AI SDK dependencies
|
|
335
|
+
const aiInstall = await sandbox.runCommand('npm', [
|
|
336
|
+
'install',
|
|
337
|
+
'ai@^5.0.11',
|
|
338
|
+
'@ai-sdk/gateway@^1.0.0',
|
|
339
|
+
'zod@^3.23.8',
|
|
340
|
+
]);
|
|
341
|
+
if (aiInstall.exitCode !== 0) {
|
|
342
|
+
throw new Error(`AI SDK install failed: ${aiInstall.stderr}`);
|
|
343
|
+
}
|
|
344
|
+
// Write the CLI script to the sandbox
|
|
345
|
+
await sandbox.writeFiles({
|
|
346
|
+
'ai-sdk-agent.mjs': CLI_SCRIPT,
|
|
347
|
+
});
|
|
348
|
+
// Verify no test files in sandbox
|
|
349
|
+
await verifyNoTestFiles(sandbox);
|
|
350
|
+
// Run the AI SDK agent
|
|
351
|
+
const agentResult = await sandbox.runCommand('node', [
|
|
352
|
+
'ai-sdk-agent.mjs',
|
|
353
|
+
'--prompt',
|
|
354
|
+
options.prompt,
|
|
355
|
+
'--model',
|
|
356
|
+
options.model,
|
|
357
|
+
], {
|
|
358
|
+
env: {
|
|
359
|
+
[AI_GATEWAY.apiKeyEnvVar]: options.apiKey,
|
|
360
|
+
},
|
|
361
|
+
});
|
|
362
|
+
agentOutput = agentResult.stdout + agentResult.stderr;
|
|
363
|
+
if (agentResult.exitCode !== 0) {
|
|
364
|
+
// Extract meaningful error from output
|
|
365
|
+
const errorLines = agentOutput.trim().split('\n').slice(-5).join('\n');
|
|
366
|
+
return {
|
|
367
|
+
success: false,
|
|
368
|
+
output: agentOutput,
|
|
369
|
+
error: errorLines || `AI SDK agent exited with code ${agentResult.exitCode}`,
|
|
370
|
+
duration: Date.now() - startTime,
|
|
371
|
+
sandboxId: sandbox.sandboxId,
|
|
372
|
+
};
|
|
373
|
+
}
|
|
374
|
+
// Upload test files for validation
|
|
375
|
+
await sandbox.uploadFiles(testFiles);
|
|
376
|
+
// Create vitest config for EVAL.ts/tsx
|
|
377
|
+
await createVitestConfig(sandbox);
|
|
378
|
+
// The agent outputs JSON events, use that as transcript
|
|
379
|
+
const transcript = agentOutput;
|
|
380
|
+
// Run validation scripts
|
|
381
|
+
const validationResults = await runValidation(sandbox, options.scripts ?? []);
|
|
382
|
+
// Capture generated files
|
|
383
|
+
const generatedFiles = await captureGeneratedFiles(sandbox);
|
|
384
|
+
return {
|
|
385
|
+
success: validationResults.allPassed,
|
|
386
|
+
output: agentOutput,
|
|
387
|
+
transcript,
|
|
388
|
+
duration: Date.now() - startTime,
|
|
389
|
+
testResult: validationResults.test,
|
|
390
|
+
scriptsResults: validationResults.scripts,
|
|
391
|
+
sandboxId: sandbox.sandboxId,
|
|
392
|
+
generatedFiles,
|
|
393
|
+
};
|
|
394
|
+
}
|
|
395
|
+
catch (error) {
|
|
396
|
+
// Check if this was an abort
|
|
397
|
+
if (aborted) {
|
|
398
|
+
return {
|
|
399
|
+
success: false,
|
|
400
|
+
output: agentOutput,
|
|
401
|
+
error: 'Aborted',
|
|
402
|
+
duration: Date.now() - startTime,
|
|
403
|
+
sandboxId: sandbox?.sandboxId,
|
|
404
|
+
};
|
|
405
|
+
}
|
|
406
|
+
return {
|
|
407
|
+
success: false,
|
|
408
|
+
output: agentOutput,
|
|
409
|
+
error: error instanceof Error ? error.message : String(error),
|
|
410
|
+
duration: Date.now() - startTime,
|
|
411
|
+
sandboxId: sandbox?.sandboxId,
|
|
412
|
+
};
|
|
413
|
+
}
|
|
414
|
+
finally {
|
|
415
|
+
// Clean up abort listener
|
|
416
|
+
if (options.signal) {
|
|
417
|
+
options.signal.removeEventListener('abort', abortHandler);
|
|
418
|
+
}
|
|
419
|
+
if (sandbox && !sandboxStopped) {
|
|
420
|
+
sandboxStopped = true;
|
|
421
|
+
await sandbox.stop();
|
|
422
|
+
}
|
|
423
|
+
}
|
|
424
|
+
},
|
|
425
|
+
};
|
|
426
|
+
}
|
|
427
|
+
//# sourceMappingURL=ai-sdk-agent.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"ai-sdk-agent.js","sourceRoot":"","sources":["../../../src/lib/agents/ai-sdk-agent.ts"],"names":[],"mappings":"AAAA;;;GAGG;AAIH,OAAO,EACL,aAAa,EACb,iBAAiB,EACjB,cAAc,EACd,iBAAiB,GAElB,MAAM,eAAe,CAAC;AAEvB,OAAO,EACL,aAAa,EACb,qBAAqB,EACrB,kBAAkB,EAClB,UAAU,GACX,MAAM,aAAa,CAAC;AAKrB;;;GAGG;AACH,MAAM,UAAU,GAAG;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;CAoPlB,CAAC;AAEF;;GAEG;AACH,MAAM,UAAU,gBAAgB;IAC9B,OAAO;QACL,IAAI,EAAE,kCAAkC;QACxC,WAAW,EAAE,oCAAoC;QAEjD,eAAe;YACb,OAAO,UAAU,CAAC,YAAY,CAAC;QACjC,CAAC;QAED,eAAe;YACb,OAAO,2BAA2B,CAAC;QACrC,CAAC;QAED,KAAK,CAAC,GAAG,CAAC,WAAmB,EAAE,OAAwB;YACrD,MAAM,SAAS,GAAG,IAAI,CAAC,GAAG,EAAE,CAAC;YAC7B,IAAI,OAAO,GAAsB,IAAI,CAAC;YACtC,IAAI,WAAW,GAAG,EAAE,CAAC;YACrB,IAAI,OAAO,GAAG,KAAK,CAAC;YACpB,IAAI,cAAc,GAAG,KAAK,CAAC;YAE3B,sBAAsB;YACtB,MAAM,YAAY,GAAG,GAAG,EAAE;gBACxB,OAAO,GAAG,IAAI,CAAC;gBACf,IAAI,OAAO,IAAI,CAAC,cAAc,EAAE,CAAC;oBAC/B,cAAc,GAAG,IAAI,CAAC;oBACtB,OAAO,CAAC,IAAI,EAAE,CAAC,KAAK,CAAC,GAAG,EAAE,GAAE,CAAC,CAAC,CAAC;gBACjC,CAAC;YACH,CAAC,CAAC;YAEF,IAAI,OAAO,CAAC,MAAM,EAAE,CAAC;gBACnB,IAAI,OAAO,CAAC,MAAM,CAAC,OAAO,EAAE,CAAC;oBAC3B,OAAO;wBACL,OAAO,EAAE,KAAK;wBACd,MAAM,EAAE,EAAE;wBACV,KAAK,EAAE,sBAAsB;wBAC7B,QAAQ,EAAE,CAAC;qBACZ,CAAC;gBACJ,CAAC;gBACD,OAAO,CAAC,MAAM,CAAC,gBAAgB,CAAC,OAAO,EAAE,YAAY,CAAC,CAAC;YACzD,CAAC;YAED,IAAI,CAAC;gBACH,6BAA6B;gBAC7B,MAAM,QAAQ,GAAG,MAAM,iBAAiB,CAAC,WAAW,CAAC,CAAC;gBACtD,MAAM,EAAE,cAAc,EAAE,SAAS,EAAE,GAAG,cAAc,CAAC,QAAQ,CAAC,CAAC;gBAE/D,8CAA8C;gBAC9C,IAAI,OAAO,EAAE,CAAC;oBACZ,OAAO;wBACL,OAAO,EAAE,KAAK;wBACd,MAAM,EAAE,EAAE;wBACV,KAAK,EAAE,SAAS;wBAChB,QAAQ,EAAE,IAAI,CAAC,GAAG,EAAE,GAAG,SAAS;qBACjC,CAAC;gBACJ,CAAC;gBAED,iBAAiB;gBACjB,OAAO,GAAG,MAAM,aAAa,CAAC;oBAC5B,OAAO,EAAE,OAAO,CAAC,OAAO;oBACxB,OAAO,EAAE,QAAQ;oBACjB,OAAO,EAAE,OAAO,CAAC,OAAO;iBACzB,CAAC,CAAC;gBAEH,yCAAyC;gBACzC,IAAI,OAAO,EAAE,CAAC;oBACZ,OAAO;wBACL,OAAO,EAAE,KAAK;wBACd,MAAM,EAAE,EAAE;wBACV,KAAK,EAAE,SAAS;wBAChB,QAAQ,EAAE,IAAI,CAAC,GAAG,EAAE,GAAG,SAAS;wBAChC,SAAS,EAAE,OAAO,CAAC,SAAS;qBAC7B,CAAC;gBACJ,CAAC;gBAED,2CAA2C;gBAC3C,MAAM,OAAO,CAAC,WAAW,CAAC,cAAc,CAAC,CAAC;gBAE1C,iCAAiC;gBACjC,IAAI,OAAO,CAAC,KAAK,EAAE,CAAC;oBAClB,MAAM,OAAO,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC;gBAC/B,CAAC;gBAED,uBAAuB;gBACvB,MAAM,aAAa,GAAG,MAAM,OAAO,CAAC,UAAU,CAAC,KAAK,EAAE,CAAC,SAAS,CAAC,CAAC,CAAC;gBACnE,IAAI,aAAa,CAAC,QAAQ,KAAK,CAAC,EAAE,CAAC;oBACjC,MAAM,IAAI,KAAK,CAAC,uBAAuB,aAAa,CAAC,MAAM,EAAE,CAAC,CAAC;gBACjE,CAAC;gBAED,8BAA8B;gBAC9B,MAAM,SAAS,GAAG,MAAM,OAAO,CAAC,UAAU,CAAC,KAAK,EAAE;oBAChD,SAAS;oBACT,YAAY;oBACZ,wBAAwB;oBACxB,aAAa;iBACd,CAAC,CAAC;gBACH,IAAI,SAAS,CAAC,QAAQ,KAAK,CAAC,EAAE,CAAC;oBAC7B,MAAM,IAAI,KAAK,CAAC,0BAA0B,SAAS,CAAC,MAAM,EAAE,CAAC,CAAC;gBAChE,CAAC;gBAED,sCAAsC;gBACtC,MAAM,OAAO,CAAC,UAAU,CAAC;oBACvB,kBAAkB,EAAE,UAAU;iBAC/B,CAAC,CAAC;gBAEH,kCAAkC;gBAClC,MAAM,iBAAiB,CAAC,OAAO,CAAC,CAAC;gBAEjC,uBAAuB;gBACvB,MAAM,WAAW,GAAG,MAAM,OAAO,CAAC,UAAU,CAC1C,MAAM,EACN;oBACE,kBAAkB;oBAClB,UAAU;oBACV,OAAO,CAAC,MAAM;oBACd,SAAS;oBACT,OAAO,CAAC,KAAK;iBACd,EACD;oBACE,GAAG,EAAE;wBACH,CAAC,UAAU,CAAC,YAAY,CAAC,EAAE,OAAO,CAAC,MAAM;qBAC1C;iBACF,CACF,CAAC;gBAEF,WAAW,GAAG,WAAW,CAAC,MAAM,GAAG,WAAW,CAAC,MAAM,CAAC;gBAEtD,IAAI,WAAW,CAAC,QAAQ,KAAK,CAAC,EAAE,CAAC;oBAC/B,uCAAuC;oBACvC,MAAM,UAAU,GAAG,WAAW,CAAC,IAAI,EAAE,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;oBACvE,OAAO;wBACL,OAAO,EAAE,KAAK;wBACd,MAAM,EAAE,WAAW;wBACnB,KAAK,EAAE,UAAU,IAAI,iCAAiC,WAAW,CAAC,QAAQ,EAAE;wBAC5E,QAAQ,EAAE,IAAI,CAAC,GAAG,EAAE,GAAG,SAAS;wBAChC,SAAS,EAAE,OAAO,CAAC,SAAS;qBAC7B,CAAC;gBACJ,CAAC;gBAED,mCAAmC;gBACnC,MAAM,OAAO,CAAC,WAAW,CAAC,SAAS,CAAC,CAAC;gBAErC,uCAAuC;gBACvC,MAAM,kBAAkB,CAAC,OAAO,CAAC,CAAC;gBAElC,wDAAwD;gBACxD,MAAM,UAAU,GAAG,WAAW,CAAC;gBAE/B,yBAAyB;gBACzB,MAAM,iBAAiB,GAAG,MAAM,aAAa,CAAC,OAAO,EAAE,OAAO,CAAC,OAAO,IAAI,EAAE,CAAC,CAAC;gBAE9E,0BAA0B;gBAC1B,MAAM,cAAc,GAAG,MAAM,qBAAqB,CAAC,OAAO,CAAC,CAAC;gBAE5D,OAAO;oBACL,OAAO,EAAE,iBAAiB,CAAC,SAAS;oBACpC,MAAM,EAAE,WAAW;oBACnB,UAAU;oBACV,QAAQ,EAAE,IAAI,CAAC,GAAG,EAAE,GAAG,SAAS;oBAChC,UAAU,EAAE,iBAAiB,CAAC,IAAI;oBAClC,cAAc,EAAE,iBAAiB,CAAC,OAAO;oBACzC,SAAS,EAAE,OAAO,CAAC,SAAS;oBAC5B,cAAc;iBACf,CAAC;YACJ,CAAC;YAAC,OAAO,KAAK,EAAE,CAAC;gBACf,6BAA6B;gBAC7B,IAAI,OAAO,EAAE,CAAC;oBACZ,OAAO;wBACL,OAAO,EAAE,KAAK;wBACd,MAAM,EAAE,WAAW;wBACnB,KAAK,EAAE,SAAS;wBAChB,QAAQ,EAAE,IAAI,CAAC,GAAG,EAAE,GAAG,SAAS;wBAChC,SAAS,EAAE,OAAO,EAAE,SAAS;qBAC9B,CAAC;gBACJ,CAAC;gBACD,OAAO;oBACL,OAAO,EAAE,KAAK;oBACd,MAAM,EAAE,WAAW;oBACnB,KAAK,EAAE,KAAK,YAAY,KAAK,CAAC,CAAC,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC,CAAC,MAAM,CAAC,KAAK,CAAC;oBAC7D,QAAQ,EAAE,IAAI,CAAC,GAAG,EAAE,GAAG,SAAS;oBAChC,SAAS,EAAE,OAAO,EAAE,SAAS;iBAC9B,CAAC;YACJ,CAAC;oBAAS,CAAC;gBACT,0BAA0B;gBAC1B,IAAI,OAAO,CAAC,MAAM,EAAE,CAAC;oBACnB,OAAO,CAAC,MAAM,CAAC,mBAAmB,CAAC,OAAO,EAAE,YAAY,CAAC,CAAC;gBAC5D,CAAC;gBACD,IAAI,OAAO,IAAI,CAAC,cAAc,EAAE,CAAC;oBAC/B,cAAc,GAAG,IAAI,CAAC;oBACtB,MAAM,OAAO,CAAC,IAAI,EAAE,CAAC;gBACvB,CAAC;YACH,CAAC;QACH,CAAC;KACF,CAAC;AACJ,CAAC"}
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"claude-code.d.ts","sourceRoot":"","sources":["../../../src/lib/agents/claude-code.ts"],"names":[],"mappings":"AAAA;;;GAGG;AAEH,OAAO,KAAK,EAAE,KAAK,EAAmC,MAAM,YAAY,CAAC;AAmDzE;;GAEG;AACH,wBAAgB,qBAAqB,CAAC,EAAE,kBAAkB,EAAE,EAAE;IAAE,kBAAkB,EAAE,OAAO,CAAA;CAAE,GAAG,KAAK,
|
|
1
|
+
{"version":3,"file":"claude-code.d.ts","sourceRoot":"","sources":["../../../src/lib/agents/claude-code.ts"],"names":[],"mappings":"AAAA;;;GAGG;AAEH,OAAO,KAAK,EAAE,KAAK,EAAmC,MAAM,YAAY,CAAC;AAmDzE;;GAEG;AACH,wBAAgB,qBAAqB,CAAC,EAAE,kBAAkB,EAAE,EAAE;IAAE,kBAAkB,EAAE,OAAO,CAAA;CAAE,GAAG,KAAK,CA6LpG"}
|
|
@@ -80,10 +80,11 @@ export function createClaudeCodeAgent({ useVercelAiGateway }) {
|
|
|
80
80
|
duration: Date.now() - startTime,
|
|
81
81
|
};
|
|
82
82
|
}
|
|
83
|
-
// Create sandbox
|
|
83
|
+
// Create sandbox
|
|
84
84
|
sandbox = await createSandbox({
|
|
85
85
|
timeout: options.timeout,
|
|
86
86
|
runtime: 'node24',
|
|
87
|
+
backend: options.sandbox,
|
|
87
88
|
});
|
|
88
89
|
// Check for abort after sandbox creation (abort may have fired during create)
|
|
89
90
|
if (aborted) {
|