@vercel/agent-eval 0.0.5 → 0.0.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (41) hide show
  1. package/README.md +40 -10
  2. package/dist/cli.js +9 -4
  3. package/dist/cli.js.map +1 -1
  4. package/dist/lib/agents/ai-sdk-agent.d.ts +10 -0
  5. package/dist/lib/agents/ai-sdk-agent.d.ts.map +1 -0
  6. package/dist/lib/agents/ai-sdk-agent.js +427 -0
  7. package/dist/lib/agents/ai-sdk-agent.js.map +1 -0
  8. package/dist/lib/agents/claude-code.d.ts.map +1 -1
  9. package/dist/lib/agents/claude-code.js +2 -1
  10. package/dist/lib/agents/claude-code.js.map +1 -1
  11. package/dist/lib/agents/codex.d.ts.map +1 -1
  12. package/dist/lib/agents/codex.js +2 -1
  13. package/dist/lib/agents/codex.js.map +1 -1
  14. package/dist/lib/agents/index.d.ts.map +1 -1
  15. package/dist/lib/agents/index.js +2 -0
  16. package/dist/lib/agents/index.js.map +1 -1
  17. package/dist/lib/agents/opencode.d.ts.map +1 -1
  18. package/dist/lib/agents/opencode.js +3 -2
  19. package/dist/lib/agents/opencode.js.map +1 -1
  20. package/dist/lib/agents/types.d.ts +3 -1
  21. package/dist/lib/agents/types.d.ts.map +1 -1
  22. package/dist/lib/config.d.ts +1 -0
  23. package/dist/lib/config.d.ts.map +1 -1
  24. package/dist/lib/config.js +4 -1
  25. package/dist/lib/config.js.map +1 -1
  26. package/dist/lib/docker-sandbox.d.ts.map +1 -1
  27. package/dist/lib/docker-sandbox.js +3 -0
  28. package/dist/lib/docker-sandbox.js.map +1 -1
  29. package/dist/lib/init.js +2 -2
  30. package/dist/lib/runner.d.ts +1 -0
  31. package/dist/lib/runner.d.ts.map +1 -1
  32. package/dist/lib/runner.js +41 -8
  33. package/dist/lib/runner.js.map +1 -1
  34. package/dist/lib/sandbox.d.ts +2 -3
  35. package/dist/lib/sandbox.d.ts.map +1 -1
  36. package/dist/lib/sandbox.js +3 -11
  37. package/dist/lib/sandbox.js.map +1 -1
  38. package/dist/lib/types.d.ts +7 -0
  39. package/dist/lib/types.d.ts.map +1 -1
  40. package/dist/lib/types.js.map +1 -1
  41. package/package.json +1 -1
package/README.md CHANGED
@@ -208,6 +208,7 @@ Choose your agent and authentication method:
208
208
  agent: 'vercel-ai-gateway/claude-code' // Claude Code via AI Gateway
209
209
  agent: 'vercel-ai-gateway/codex' // OpenAI Codex via AI Gateway
210
210
  agent: 'vercel-ai-gateway/opencode' // OpenCode via AI Gateway
211
+ agent: 'vercel-ai-gateway/ai-sdk-harness' // Simple AI SDK harness (any model)
211
212
 
212
213
  // Direct API (uses provider keys directly)
213
214
  agent: 'claude-code' // requires ANTHROPIC_API_KEY
@@ -218,22 +219,28 @@ See the Environment Variables section below for setup instructions.
218
219
 
219
220
  ### OpenCode Model Configuration
220
221
 
221
- OpenCode uses Vercel AI Gateway exclusively. Models are specified with the `{provider}/{model}` format:
222
+ OpenCode uses Vercel AI Gateway exclusively. Models **must** be specified with the `vercel/{provider}/{model}` format:
222
223
 
223
224
  ```typescript
224
225
  // Anthropic models
225
- model: 'anthropic/claude-sonnet-4'
226
- model: 'anthropic/claude-opus-4'
226
+ model: 'vercel/anthropic/claude-sonnet-4'
227
+ model: 'vercel/anthropic/claude-opus-4'
228
+
229
+ // Minimax models
230
+ model: 'vercel/minimax/minimax-m2.1'
231
+ model: 'vercel/minimax/minimax-m2.1-lightning'
227
232
 
228
233
  // Moonshot AI (Kimi) models
229
- model: 'moonshotai/kimi-k2'
230
- model: 'moonshotai/kimi-k2-thinking'
234
+ model: 'vercel/moonshotai/kimi-k2'
235
+ model: 'vercel/moonshotai/kimi-k2-thinking'
231
236
 
232
237
  // OpenAI models
233
- model: 'openai/gpt-4o'
234
- model: 'openai/o3'
238
+ model: 'vercel/openai/gpt-4o'
239
+ model: 'vercel/openai/o3'
235
240
  ```
236
241
 
242
+ > **Important:** The `vercel/` prefix is required. OpenCode's config sets up a `vercel` provider, so the model string must start with `vercel/` to route through Vercel AI Gateway correctly. Using just `anthropic/claude-sonnet-4` (without the `vercel/` prefix) will fail with a "provider not found" error.
243
+
237
244
  Under the hood, the agent creates an `opencode.json` config file that configures the Vercel provider:
238
245
 
239
246
  ```json
@@ -255,6 +262,28 @@ Under the hood, the agent creates an `opencode.json` config file that configures
255
262
 
256
263
  And runs: `opencode run "<prompt>" --model {provider}/{model} --format json`
257
264
 
265
+ ### AI SDK Harness Model Configuration
266
+
267
+ The AI SDK harness (`vercel-ai-gateway/ai-sdk-harness`) is a lightweight agent that works with **any model** available on Vercel AI Gateway. Unlike OpenCode, it uses the standard `{provider}/{model}` format without a `vercel/` prefix:
268
+
269
+ ```typescript
270
+ // Anthropic models
271
+ model: 'anthropic/claude-sonnet-4'
272
+ model: 'anthropic/claude-opus-4'
273
+
274
+ // Moonshot AI (Kimi) models
275
+ model: 'moonshotai/kimi-k2.5'
276
+ model: 'moonshotai/kimi-k2-thinking'
277
+
278
+ // Minimax models
279
+ model: 'minimax/minimax-m2.1'
280
+
281
+ // OpenAI models
282
+ model: 'openai/gpt-4o'
283
+ ```
284
+
285
+ The AI SDK harness includes these tools: `readFile`, `writeFile`, `editFile`, `listFiles`, `glob`, `grep`, and `bash`. It's ideal for evaluating models that may not be fully compatible with OpenCode.
286
+
258
287
  ### Full Configuration
259
288
 
260
289
  ```typescript
@@ -267,7 +296,8 @@ const config: ExperimentConfig = {
267
296
  // Model to use (defaults vary by agent)
268
297
  // - claude-code: 'opus'
269
298
  // - codex: 'openai/gpt-5.2-codex'
270
- // - opencode: 'anthropic/claude-sonnet-4'
299
+ // - opencode: 'vercel/anthropic/claude-sonnet-4' (note: vercel/ prefix required)
300
+ // - ai-sdk-harness: 'anthropic/claude-sonnet-4' (works with any AI Gateway model)
271
301
  model: 'opus',
272
302
 
273
303
  // How many times to run each eval
@@ -279,8 +309,8 @@ const config: ExperimentConfig = {
279
309
  // npm scripts that must pass after agent finishes
280
310
  scripts: ['build', 'lint'],
281
311
 
282
- // Timeout per run in seconds
283
- timeout: 300,
312
+ // Timeout per run in seconds (default: 600)
313
+ timeout: 600,
284
314
 
285
315
  // Filter which evals to run (pick one)
286
316
  evals: '*', // all (default)
package/dist/cli.js CHANGED
@@ -5,7 +5,8 @@
5
5
  import { Command } from 'commander';
6
6
  import { config as dotenvConfig } from 'dotenv';
7
7
  import { resolve, dirname, basename } from 'path';
8
- import { existsSync } from 'fs';
8
+ import { existsSync, readFileSync } from 'fs';
9
+ import { fileURLToPath } from 'url';
9
10
  import chalk from 'chalk';
10
11
  import { loadConfig, resolveEvalNames } from './lib/config.js';
11
12
  import { loadAllFixtures } from './lib/fixture.js';
@@ -13,13 +14,17 @@ import { runExperiment } from './lib/runner.js';
13
14
  import { initProject, getPostInitInstructions } from './lib/init.js';
14
15
  import { getAgent } from './lib/agents/index.js';
15
16
  import { getSandboxBackendInfo } from './lib/sandbox.js';
16
- // Load environment variables
17
+ // Load environment variables (.env.local first, then .env as fallback)
18
+ dotenvConfig({ path: '.env.local' });
17
19
  dotenvConfig();
20
+ // Read version from package.json
21
+ const __dirname = dirname(fileURLToPath(import.meta.url));
22
+ const pkg = JSON.parse(readFileSync(resolve(__dirname, '../package.json'), 'utf-8'));
18
23
  const program = new Command();
19
24
  program
20
25
  .name('agent-eval')
21
26
  .description('Framework for testing AI coding agents in isolated sandboxes')
22
- .version('0.0.4');
27
+ .version(pkg.version);
23
28
  /**
24
29
  * Resolve config path shorthand.
25
30
  * - "cc" -> "experiments/cc.ts"
@@ -81,7 +86,7 @@ async function runExperimentCommand(configInput, options) {
81
86
  console.log(chalk.blue(`\nRunning ${evalNames.length} eval(s) x ${config.runs} run(s) = ${evalNames.length * config.runs} total runs`));
82
87
  console.log(chalk.blue(`Agent: ${config.agent}, Model: ${config.model}, Timeout: ${config.timeout}s, Early Exit: ${config.earlyExit}`));
83
88
  // Show which sandbox backend will be used
84
- const sandboxInfo = getSandboxBackendInfo();
89
+ const sandboxInfo = getSandboxBackendInfo({ backend: config.sandbox });
85
90
  console.log(chalk.blue(`Sandbox: ${sandboxInfo.description}`));
86
91
  if (options.dry) {
87
92
  console.log(chalk.yellow('\n[DRY RUN] Would execute evals here'));
package/dist/cli.js.map CHANGED
@@ -1 +1 @@
1
- {"version":3,"file":"cli.js","sourceRoot":"","sources":["../src/cli.ts"],"names":[],"mappings":";AAEA;;GAEG;AAEH,OAAO,EAAE,OAAO,EAAE,MAAM,WAAW,CAAC;AACpC,OAAO,EAAE,MAAM,IAAI,YAAY,EAAE,MAAM,QAAQ,CAAC;AAChD,OAAO,EAAE,OAAO,EAAE,OAAO,EAAE,QAAQ,EAAE,MAAM,MAAM,CAAC;AAClD,OAAO,EAAE,UAAU,EAAE,MAAM,IAAI,CAAC;AAChC,OAAO,KAAK,MAAM,OAAO,CAAC;AAC1B,OAAO,EAAE,UAAU,EAAE,gBAAgB,EAAE,MAAM,iBAAiB,CAAC;AAC/D,OAAO,EAAE,eAAe,EAAE,MAAM,kBAAkB,CAAC;AACnD,OAAO,EAAE,aAAa,EAAE,MAAM,iBAAiB,CAAC;AAChD,OAAO,EAAE,WAAW,EAAE,uBAAuB,EAAE,MAAM,eAAe,CAAC;AACrE,OAAO,EAAE,QAAQ,EAAE,MAAM,uBAAuB,CAAC;AACjD,OAAO,EAAE,qBAAqB,EAAE,MAAM,kBAAkB,CAAC;AAEzD,6BAA6B;AAC7B,YAAY,EAAE,CAAC;AAEf,MAAM,OAAO,GAAG,IAAI,OAAO,EAAE,CAAC;AAE9B,OAAO;KACJ,IAAI,CAAC,YAAY,CAAC;KAClB,WAAW,CAAC,8DAA8D,CAAC;KAC3E,OAAO,CAAC,OAAO,CAAC,CAAC;AAEpB;;;;GAIG;AACH,SAAS,iBAAiB,CAAC,KAAa;IACtC,6DAA6D;IAC7D,IAAI,KAAK,CAAC,QAAQ,CAAC,GAAG,CAAC,IAAI,KAAK,CAAC,QAAQ,CAAC,IAAI,CAAC,IAAI,KAAK,CAAC,QAAQ,CAAC,KAAK,CAAC,IAAI,KAAK,CAAC,QAAQ,CAAC,KAAK,CAAC,EAAE,CAAC;QAClG,OAAO,KAAK,CAAC;IACf,CAAC;IACD,6DAA6D;IAC7D,OAAO,eAAe,KAAK,KAAK,CAAC;AACnC,CAAC;AAED;;GAEG;AACH,KAAK,UAAU,oBAAoB,CAAC,WAAmB,EAAE,OAA0B;IACjF,IAAI,CAAC;QACH,MAAM,UAAU,GAAG,iBAAiB,CAAC,WAAW,CAAC,CAAC;QAClD,MAAM,kBAAkB,GAAG,OAAO,CAAC,OAAO,CAAC,GAAG,EAAE,EAAE,UAAU,CAAC,CAAC;QAE9D,IAAI,CAAC,UAAU,CAAC,kBAAkB,CAAC,EAAE,CAAC;YACpC,OAAO,CAAC,KAAK,CAAC,KAAK,CAAC,GAAG,CAAC,0BAA0B,kBAAkB,EAAE,CAAC,CAAC,CAAC;YACzE,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;QAClB,CAAC;QAED,OAAO,CAAC,GAAG,CAAC,KAAK,CAAC,IAAI,CAAC,uBAAuB,UAAU,KAAK,CAAC,CAAC,CAAC;QAChE,MAAM,MAAM,GAAG,MAAM,UAAU,CAAC,kBAAkB,CAAC,CAAC;QAEpD,mDAAmD;QACnD,kEAAkE;QAClE,MAAM,UAAU,GAAG,OAAO,CAAC,OAAO,CAAC,kBAAkB,CAAC,CAAC,CAAC;QACxD,MAAM,QAAQ,GAAG,OAAO,CAAC,UAAU,EAAE,OAAO,CAAC,CAAC;QAC9C,IAAI,CAAC,UAAU,CAAC,QAAQ,CAAC,EAAE,CAAC;YAC1B,OAAO,CAAC,KAAK,CAAC,KAAK,CAAC,GAAG,CAAC,8BAA8B,QAAQ,EAAE,CAAC,CAAC,CAAC;YACnE,OAAO,CAAC,KAAK,CAAC,KAAK,CAAC,IAAI,CAAC,yDAAyD,CAAC,CAAC,CAAC;YACrF,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;QAClB,CAAC;QAED,OAAO,CAAC,GAAG,CAAC,KAAK,CAAC,IAAI,CAAC,wBAAwB,QAAQ,KAAK,CAAC,CAAC,CAAC;QAC/D,MAAM,EAAE,QAAQ,EAAE,MAAM,EAAE,GAAG,eAAe,CAAC,QAAQ,CAAC,CAAC;QAEvD,IAAI,MAAM,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;YACtB,OAAO,CAAC,GAAG,CAAC,KAAK,CAAC,MAAM,CAAC,cAAc,MAAM,CAAC,MAAM,sBAAsB,CAAC,CAAC,CAAC;YAC7E,KAAK,MAAM,KAAK,IAAI,MAAM,EAAE,CAAC;gBAC3B,OAAO,CAAC,GAAG,CAAC,KAAK,CAAC,MAAM,CAAC,OAAO,KAAK,CAAC,WAAW,KAAK,KAAK,CAAC,OAAO,EAAE,CAAC,CAAC,CAAC;YAC1E,CAAC;QACH,CAAC;QAED,IAAI,QAAQ,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;YAC1B,OAAO,CAAC,KAAK,CAAC,KAAK,CAAC,GAAG,CAAC,8BAA8B,CAAC,CAAC,CAAC;YACzD,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;QAClB,CAAC;QAED,6BAA6B;QAC7B,MAAM,cAAc,GAAG,QAAQ,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC;QACnD,MAAM,SAAS,GAAG,gBAAgB,CAAC,MAAM,CAAC,KAAK,EAAE,cAAc,CAAC,CAAC;QAEjE,IAAI,SAAS,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;YAC3B,OAAO,CAAC,KAAK,CAAC,KAAK,CAAC,GAAG,CAAC,6BAA6B,CAAC,CAAC,CAAC;YACxD,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;QAClB,CAAC;QAED,OAAO,CAAC,GAAG,CAAC,KAAK,CAAC,KAAK,CAAC,WAAW,QAAQ,CAAC,MAAM,+BAA+B,SAAS,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC;QACvG,KAAK,MAAM,IAAI,IAAI,SAAS,EAAE,CAAC;YAC7B,OAAO,CAAC,GAAG,CAAC,KAAK,CAAC,KAAK,CAAC,OAAO,IAAI,EAAE,CAAC,CAAC,CAAC;QAC1C,CAAC;QAED,OAAO,CAAC,GAAG,CAAC,KAAK,CAAC,IAAI,CAAC,aAAa,SAAS,CAAC,MAAM,cAAc,MAAM,CAAC,IAAI,aAAa,SAAS,CAAC,MAAM,GAAG,MAAM,CAAC,IAAI,aAAa,CAAC,CAAC,CAAC;QACxI,OAAO,CAAC,GAAG,CAAC,KAAK,CAAC,IAAI,CAAC,UAAU,MAAM,CAAC,KAAK,YAAY,MAAM,CAAC,KAAK,cAAc,MAAM,CAAC,OAAO,kBAAkB,MAAM,CAAC,SAAS,EAAE,CAAC,CAAC,CAAC;QAExI,0CAA0C;QAC1C,MAAM,WAAW,GAAG,qBAAqB,EAAE,CAAC;QAC5C,OAAO,CAAC,GAAG,CAAC,KAAK,CAAC,IAAI,CAAC,YAAY,WAAW,CAAC,WAAW,EAAE,CAAC,CAAC,CAAC;QAE/D,IAAI,OAAO,CAAC,GAAG,EAAE,CAAC;YAChB,OAAO,CAAC,GAAG,CAAC,KAAK,CAAC,MAAM,CAAC,sCAAsC,CAAC,CAAC,CAAC;YAClE,OAAO;QACT,CAAC;QAED,8CAA8C;QAC9C,MAAM,KAAK,GAAG,QAAQ,CAAC,MAAM,CAAC,KAAK,CAAC,CAAC;QACrC,MAAM,YAAY,GAAG,KAAK,CAAC,eAAe,EAAE,CAAC;QAC7C,MAAM,MAAM,GAAG,OAAO,CAAC,GAAG,CAAC,YAAY,CAAC,CAAC;QACzC,IAAI,CAAC,MAAM,EAAE,CAAC;YACZ,OAAO,CAAC,KAAK,CAAC,KAAK,CAAC,GAAG,CAAC,GAAG,YAAY,mCAAmC,CAAC,CAAC,CAAC;YAC7E,OAAO,CAAC,KAAK,CAAC,KAAK,CAAC,IAAI,CAAC,iEAAiE,CAAC,CAAC,CAAC;YAC7F,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;QAClB,CAAC;QAED,kDAAkD;QAClD,MAAM,gBAAgB,GAAG,QAAQ,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,SAAS,CAAC,QAAQ,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC;QAE5E,uCAAuC;QACvC,MAAM,cAAc,GAAG,QAAQ,CAAC,UAAU,EAAE,KAAK,CAAC,CAAC,OAAO,CAAC,OAAO,EAAE,EAAE,CAAC,CAAC;QACxE,MAAM,UAAU,GAAG,OAAO,CAAC,OAAO,CAAC,GAAG,EAAE,EAAE,SAAS,CAAC,CAAC;QAErD,OAAO,CAAC,GAAG,CAAC,KAAK,CAAC,IAAI,CAAC,0BAA0B,CAAC,CAAC,CAAC;QAEpD,qBAAqB;QACrB,MAAM,OAAO,GAAG,MAAM,aAAa,CAAC;YAClC,MAAM;YACN,QAAQ,EAAE,gBAAgB;YAC1B,MAAM;YACN,UAAU;YACV,cAAc;YACd,UAAU,EAAE,CAAC,GAAG,EAAE,EAAE,CAAC,OAAO,CAAC,GAAG,CAAC,GAAG,CAAC;SACtC,CAAC,CAAC;QAEH,6BAA6B;QAC7B,MAAM,SAAS,GAAG,OAAO,CAAC,KAAK,CAAC,KAAK,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,UAAU,KAAK,CAAC,CAAC,SAAS,CAAC,CAAC;QAC3E,OAAO,CAAC,IAAI,CAAC,SAAS,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC;IAClC,CAAC;IAAC,OAAO,KAAK,EAAE,CAAC;QACf,IAAI,KAAK,YAAY,KAAK,EAAE,CAAC;YAC3B,OAAO,CAAC,KAAK,CAAC,KAAK,CAAC,GAAG,CAAC,UAAU,KAAK,CAAC,OAAO,EAAE,CAAC,CAAC,CAAC;QACtD,CAAC;aAAM,CAAC;YACN,OAAO,CAAC,KAAK,CAAC,KAAK,CAAC,GAAG,CAAC,2BAA2B,CAAC,CAAC,CAAC;QACxD,CAAC;QACD,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;IAClB,CAAC;AACH,CAAC;AAED;;GAEG;AACH,OAAO;KACJ,OAAO,CAAC,MAAM,CAAC;KACf,QAAQ,CAAC,QAAQ,EAAE,+BAA+B,CAAC;KACnD,WAAW,CAAC,iDAAiD,CAAC;KAC9D,MAAM,CAAC,KAAK,EAAE,IAAY,EAAE,EAAE;IAC7B,IAAI,CAAC;QACH,OAAO,CAAC,GAAG,CAAC,KAAK,CAAC,IAAI,CAAC,8BAA8B,IAAI,EAAE,CAAC,CAAC,CAAC;QAE9D,MAAM,UAAU,GAAG,WAAW,CAAC;YAC7B,IAAI;YACJ,SAAS,EAAE,OAAO,CAAC,GAAG,EAAE;SACzB,CAAC,CAAC;QAEH,OAAO,CAAC,GAAG,CAAC,KAAK,CAAC,KAAK,CAAC,+BAA+B,CAAC,CAAC,CAAC;QAC1D,OAAO,CAAC,GAAG,CAAC,uBAAuB,CAAC,UAAU,EAAE,IAAI,CAAC,CAAC,CAAC;IACzD,CAAC;IAAC,OAAO,KAAK,EAAE,CAAC;QACf,IAAI,KAAK,YAAY,KAAK,EAAE,CAAC;YAC3B,OAAO,CAAC,KAAK,CAAC,KAAK,CAAC,GAAG,CAAC,UAAU,KAAK,CAAC,OAAO,EAAE,CAAC,CAAC,CAAC;QACtD,CAAC;aAAM,CAAC;YACN,OAAO,CAAC,KAAK,CAAC,KAAK,CAAC,GAAG,CAAC,2BAA2B,CAAC,CAAC,CAAC;QACxD,CAAC;QACD,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;IAClB,CAAC;AACH,CAAC,CAAC,CAAC;AAEL;;;GAGG;AACH,OAAO;KACJ,QAAQ,CAAC,UAAU,EAAE,sCAAsC,CAAC;KAC5D,MAAM,CAAC,OAAO,EAAE,0CAA0C,CAAC;KAC3D,MAAM,CAAC,KAAK,EAAE,WAA+B,EAAE,OAA0B,EAAE,EAAE;IAC5E,IAAI,CAAC,WAAW,EAAE,CAAC;QACjB,OAAO,CAAC,IAAI,EAAE,CAAC;QACf,OAAO;IACT,CAAC;IACD,MAAM,oBAAoB,CAAC,WAAW,EAAE,OAAO,CAAC,CAAC;AACnD,CAAC,CAAC,CAAC;AAEL,OAAO,CAAC,KAAK,EAAE,CAAC"}
1
+ {"version":3,"file":"cli.js","sourceRoot":"","sources":["../src/cli.ts"],"names":[],"mappings":";AAEA;;GAEG;AAEH,OAAO,EAAE,OAAO,EAAE,MAAM,WAAW,CAAC;AACpC,OAAO,EAAE,MAAM,IAAI,YAAY,EAAE,MAAM,QAAQ,CAAC;AAChD,OAAO,EAAE,OAAO,EAAE,OAAO,EAAE,QAAQ,EAAE,MAAM,MAAM,CAAC;AAClD,OAAO,EAAE,UAAU,EAAE,YAAY,EAAE,MAAM,IAAI,CAAC;AAC9C,OAAO,EAAE,aAAa,EAAE,MAAM,KAAK,CAAC;AACpC,OAAO,KAAK,MAAM,OAAO,CAAC;AAC1B,OAAO,EAAE,UAAU,EAAE,gBAAgB,EAAE,MAAM,iBAAiB,CAAC;AAC/D,OAAO,EAAE,eAAe,EAAE,MAAM,kBAAkB,CAAC;AACnD,OAAO,EAAE,aAAa,EAAE,MAAM,iBAAiB,CAAC;AAChD,OAAO,EAAE,WAAW,EAAE,uBAAuB,EAAE,MAAM,eAAe,CAAC;AACrE,OAAO,EAAE,QAAQ,EAAE,MAAM,uBAAuB,CAAC;AACjD,OAAO,EAAE,qBAAqB,EAAE,MAAM,kBAAkB,CAAC;AAEzD,uEAAuE;AACvE,YAAY,CAAC,EAAE,IAAI,EAAE,YAAY,EAAE,CAAC,CAAC;AACrC,YAAY,EAAE,CAAC;AAEf,iCAAiC;AACjC,MAAM,SAAS,GAAG,OAAO,CAAC,aAAa,CAAC,MAAM,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC,CAAC;AAC1D,MAAM,GAAG,GAAG,IAAI,CAAC,KAAK,CAAC,YAAY,CAAC,OAAO,CAAC,SAAS,EAAE,iBAAiB,CAAC,EAAE,OAAO,CAAC,CAAC,CAAC;AAErF,MAAM,OAAO,GAAG,IAAI,OAAO,EAAE,CAAC;AAE9B,OAAO;KACJ,IAAI,CAAC,YAAY,CAAC;KAClB,WAAW,CAAC,8DAA8D,CAAC;KAC3E,OAAO,CAAC,GAAG,CAAC,OAAO,CAAC,CAAC;AAExB;;;;GAIG;AACH,SAAS,iBAAiB,CAAC,KAAa;IACtC,6DAA6D;IAC7D,IAAI,KAAK,CAAC,QAAQ,CAAC,GAAG,CAAC,IAAI,KAAK,CAAC,QAAQ,CAAC,IAAI,CAAC,IAAI,KAAK,CAAC,QAAQ,CAAC,KAAK,CAAC,IAAI,KAAK,CAAC,QAAQ,CAAC,KAAK,CAAC,EAAE,CAAC;QAClG,OAAO,KAAK,CAAC;IACf,CAAC;IACD,6DAA6D;IAC7D,OAAO,eAAe,KAAK,KAAK,CAAC;AACnC,CAAC;AAED;;GAEG;AACH,KAAK,UAAU,oBAAoB,CAAC,WAAmB,EAAE,OAA0B;IACjF,IAAI,CAAC;QACH,MAAM,UAAU,GAAG,iBAAiB,CAAC,WAAW,CAAC,CAAC;QAClD,MAAM,kBAAkB,GAAG,OAAO,CAAC,OAAO,CAAC,GAAG,EAAE,EAAE,UAAU,CAAC,CAAC;QAE9D,IAAI,CAAC,UAAU,CAAC,kBAAkB,CAAC,EAAE,CAAC;YACpC,OAAO,CAAC,KAAK,CAAC,KAAK,CAAC,GAAG,CAAC,0BAA0B,kBAAkB,EAAE,CAAC,CAAC,CAAC;YACzE,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;QAClB,CAAC;QAED,OAAO,CAAC,GAAG,CAAC,KAAK,CAAC,IAAI,CAAC,uBAAuB,UAAU,KAAK,CAAC,CAAC,CAAC;QAChE,MAAM,MAAM,GAAG,MAAM,UAAU,CAAC,kBAAkB,CAAC,CAAC;QAEpD,mDAAmD;QACnD,kEAAkE;QAClE,MAAM,UAAU,GAAG,OAAO,CAAC,OAAO,CAAC,kBAAkB,CAAC,CAAC,CAAC;QACxD,MAAM,QAAQ,GAAG,OAAO,CAAC,UAAU,EAAE,OAAO,CAAC,CAAC;QAC9C,IAAI,CAAC,UAAU,CAAC,QAAQ,CAAC,EAAE,CAAC;YAC1B,OAAO,CAAC,KAAK,CAAC,KAAK,CAAC,GAAG,CAAC,8BAA8B,QAAQ,EAAE,CAAC,CAAC,CAAC;YACnE,OAAO,CAAC,KAAK,CAAC,KAAK,CAAC,IAAI,CAAC,yDAAyD,CAAC,CAAC,CAAC;YACrF,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;QAClB,CAAC;QAED,OAAO,CAAC,GAAG,CAAC,KAAK,CAAC,IAAI,CAAC,wBAAwB,QAAQ,KAAK,CAAC,CAAC,CAAC;QAC/D,MAAM,EAAE,QAAQ,EAAE,MAAM,EAAE,GAAG,eAAe,CAAC,QAAQ,CAAC,CAAC;QAEvD,IAAI,MAAM,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;YACtB,OAAO,CAAC,GAAG,CAAC,KAAK,CAAC,MAAM,CAAC,cAAc,MAAM,CAAC,MAAM,sBAAsB,CAAC,CAAC,CAAC;YAC7E,KAAK,MAAM,KAAK,IAAI,MAAM,EAAE,CAAC;gBAC3B,OAAO,CAAC,GAAG,CAAC,KAAK,CAAC,MAAM,CAAC,OAAO,KAAK,CAAC,WAAW,KAAK,KAAK,CAAC,OAAO,EAAE,CAAC,CAAC,CAAC;YAC1E,CAAC;QACH,CAAC;QAED,IAAI,QAAQ,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;YAC1B,OAAO,CAAC,KAAK,CAAC,KAAK,CAAC,GAAG,CAAC,8BAA8B,CAAC,CAAC,CAAC;YACzD,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;QAClB,CAAC;QAED,6BAA6B;QAC7B,MAAM,cAAc,GAAG,QAAQ,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC;QACnD,MAAM,SAAS,GAAG,gBAAgB,CAAC,MAAM,CAAC,KAAK,EAAE,cAAc,CAAC,CAAC;QAEjE,IAAI,SAAS,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;YAC3B,OAAO,CAAC,KAAK,CAAC,KAAK,CAAC,GAAG,CAAC,6BAA6B,CAAC,CAAC,CAAC;YACxD,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;QAClB,CAAC;QAED,OAAO,CAAC,GAAG,CAAC,KAAK,CAAC,KAAK,CAAC,WAAW,QAAQ,CAAC,MAAM,+BAA+B,SAAS,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC;QACvG,KAAK,MAAM,IAAI,IAAI,SAAS,EAAE,CAAC;YAC7B,OAAO,CAAC,GAAG,CAAC,KAAK,CAAC,KAAK,CAAC,OAAO,IAAI,EAAE,CAAC,CAAC,CAAC;QAC1C,CAAC;QAED,OAAO,CAAC,GAAG,CAAC,KAAK,CAAC,IAAI,CAAC,aAAa,SAAS,CAAC,MAAM,cAAc,MAAM,CAAC,IAAI,aAAa,SAAS,CAAC,MAAM,GAAG,MAAM,CAAC,IAAI,aAAa,CAAC,CAAC,CAAC;QACxI,OAAO,CAAC,GAAG,CAAC,KAAK,CAAC,IAAI,CAAC,UAAU,MAAM,CAAC,KAAK,YAAY,MAAM,CAAC,KAAK,cAAc,MAAM,CAAC,OAAO,kBAAkB,MAAM,CAAC,SAAS,EAAE,CAAC,CAAC,CAAC;QAExI,0CAA0C;QAC1C,MAAM,WAAW,GAAG,qBAAqB,CAAC,EAAE,OAAO,EAAE,MAAM,CAAC,OAAO,EAAE,CAAC,CAAC;QACvE,OAAO,CAAC,GAAG,CAAC,KAAK,CAAC,IAAI,CAAC,YAAY,WAAW,CAAC,WAAW,EAAE,CAAC,CAAC,CAAC;QAE/D,IAAI,OAAO,CAAC,GAAG,EAAE,CAAC;YAChB,OAAO,CAAC,GAAG,CAAC,KAAK,CAAC,MAAM,CAAC,sCAAsC,CAAC,CAAC,CAAC;YAClE,OAAO;QACT,CAAC;QAED,8CAA8C;QAC9C,MAAM,KAAK,GAAG,QAAQ,CAAC,MAAM,CAAC,KAAK,CAAC,CAAC;QACrC,MAAM,YAAY,GAAG,KAAK,CAAC,eAAe,EAAE,CAAC;QAC7C,MAAM,MAAM,GAAG,OAAO,CAAC,GAAG,CAAC,YAAY,CAAC,CAAC;QACzC,IAAI,CAAC,MAAM,EAAE,CAAC;YACZ,OAAO,CAAC,KAAK,CAAC,KAAK,CAAC,GAAG,CAAC,GAAG,YAAY,mCAAmC,CAAC,CAAC,CAAC;YAC7E,OAAO,CAAC,KAAK,CAAC,KAAK,CAAC,IAAI,CAAC,iEAAiE,CAAC,CAAC,CAAC;YAC7F,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;QAClB,CAAC;QAED,kDAAkD;QAClD,MAAM,gBAAgB,GAAG,QAAQ,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,SAAS,CAAC,QAAQ,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC;QAE5E,uCAAuC;QACvC,MAAM,cAAc,GAAG,QAAQ,CAAC,UAAU,EAAE,KAAK,CAAC,CAAC,OAAO,CAAC,OAAO,EAAE,EAAE,CAAC,CAAC;QACxE,MAAM,UAAU,GAAG,OAAO,CAAC,OAAO,CAAC,GAAG,EAAE,EAAE,SAAS,CAAC,CAAC;QAErD,OAAO,CAAC,GAAG,CAAC,KAAK,CAAC,IAAI,CAAC,0BAA0B,CAAC,CAAC,CAAC;QAEpD,qBAAqB;QACrB,MAAM,OAAO,GAAG,MAAM,aAAa,CAAC;YAClC,MAAM;YACN,QAAQ,EAAE,gBAAgB;YAC1B,MAAM;YACN,UAAU;YACV,cAAc;YACd,UAAU,EAAE,CAAC,GAAG,EAAE,EAAE,CAAC,OAAO,CAAC,GAAG,CAAC,GAAG,CAAC;SACtC,CAAC,CAAC;QAEH,6BAA6B;QAC7B,MAAM,SAAS,GAAG,OAAO,CAAC,KAAK,CAAC,KAAK,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,UAAU,KAAK,CAAC,CAAC,SAAS,CAAC,CAAC;QAC3E,OAAO,CAAC,IAAI,CAAC,SAAS,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC;IAClC,CAAC;IAAC,OAAO,KAAK,EAAE,CAAC;QACf,IAAI,KAAK,YAAY,KAAK,EAAE,CAAC;YAC3B,OAAO,CAAC,KAAK,CAAC,KAAK,CAAC,GAAG,CAAC,UAAU,KAAK,CAAC,OAAO,EAAE,CAAC,CAAC,CAAC;QACtD,CAAC;aAAM,CAAC;YACN,OAAO,CAAC,KAAK,CAAC,KAAK,CAAC,GAAG,CAAC,2BAA2B,CAAC,CAAC,CAAC;QACxD,CAAC;QACD,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;IAClB,CAAC;AACH,CAAC;AAED;;GAEG;AACH,OAAO;KACJ,OAAO,CAAC,MAAM,CAAC;KACf,QAAQ,CAAC,QAAQ,EAAE,+BAA+B,CAAC;KACnD,WAAW,CAAC,iDAAiD,CAAC;KAC9D,MAAM,CAAC,KAAK,EAAE,IAAY,EAAE,EAAE;IAC7B,IAAI,CAAC;QACH,OAAO,CAAC,GAAG,CAAC,KAAK,CAAC,IAAI,CAAC,8BAA8B,IAAI,EAAE,CAAC,CAAC,CAAC;QAE9D,MAAM,UAAU,GAAG,WAAW,CAAC;YAC7B,IAAI;YACJ,SAAS,EAAE,OAAO,CAAC,GAAG,EAAE;SACzB,CAAC,CAAC;QAEH,OAAO,CAAC,GAAG,CAAC,KAAK,CAAC,KAAK,CAAC,+BAA+B,CAAC,CAAC,CAAC;QAC1D,OAAO,CAAC,GAAG,CAAC,uBAAuB,CAAC,UAAU,EAAE,IAAI,CAAC,CAAC,CAAC;IACzD,CAAC;IAAC,OAAO,KAAK,EAAE,CAAC;QACf,IAAI,KAAK,YAAY,KAAK,EAAE,CAAC;YAC3B,OAAO,CAAC,KAAK,CAAC,KAAK,CAAC,GAAG,CAAC,UAAU,KAAK,CAAC,OAAO,EAAE,CAAC,CAAC,CAAC;QACtD,CAAC;aAAM,CAAC;YACN,OAAO,CAAC,KAAK,CAAC,KAAK,CAAC,GAAG,CAAC,2BAA2B,CAAC,CAAC,CAAC;QACxD,CAAC;QACD,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;IAClB,CAAC;AACH,CAAC,CAAC,CAAC;AAEL;;;GAGG;AACH,OAAO;KACJ,QAAQ,CAAC,UAAU,EAAE,sCAAsC,CAAC;KAC5D,MAAM,CAAC,OAAO,EAAE,0CAA0C,CAAC;KAC3D,MAAM,CAAC,KAAK,EAAE,WAA+B,EAAE,OAA0B,EAAE,EAAE;IAC5E,IAAI,CAAC,WAAW,EAAE,CAAC;QACjB,OAAO,CAAC,IAAI,EAAE,CAAC;QACf,OAAO;IACT,CAAC;IACD,MAAM,oBAAoB,CAAC,WAAW,EAAE,OAAO,CAAC,CAAC;AACnD,CAAC,CAAC,CAAC;AAEL,OAAO,CAAC,KAAK,EAAE,CAAC"}
@@ -0,0 +1,10 @@
1
+ /**
2
+ * AI SDK Agent - A simple coding agent using the Vercel AI SDK.
3
+ * Works with any model available on Vercel AI Gateway.
4
+ */
5
+ import type { Agent } from './types.js';
6
+ /**
7
+ * Create AI SDK agent with Vercel AI Gateway authentication.
8
+ */
9
+ export declare function createAiSdkAgent(): Agent;
10
+ //# sourceMappingURL=ai-sdk-agent.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"ai-sdk-agent.d.ts","sourceRoot":"","sources":["../../../src/lib/agents/ai-sdk-agent.ts"],"names":[],"mappings":"AAAA;;;GAGG;AAEH,OAAO,KAAK,EAAE,KAAK,EAAmC,MAAM,YAAY,CAAC;AA8QzE;;GAEG;AACH,wBAAgB,gBAAgB,IAAI,KAAK,CAiMxC"}
@@ -0,0 +1,427 @@
1
+ /**
2
+ * AI SDK Agent - A simple coding agent using the Vercel AI SDK.
3
+ * Works with any model available on Vercel AI Gateway.
4
+ */
5
+ import { createSandbox, collectLocalFiles, splitTestFiles, verifyNoTestFiles, } from '../sandbox.js';
6
+ import { runValidation, captureGeneratedFiles, createVitestConfig, AI_GATEWAY, } from './shared.js';
7
+ /**
8
+ * The CLI script source code that runs inside the sandbox.
9
+ * This is a self-contained script that uses the AI SDK.
10
+ */
11
+ const CLI_SCRIPT = `
12
+ import { generateText, tool, stepCountIs } from 'ai';
13
+ import { createGateway } from '@ai-sdk/gateway';
14
+ import { z } from 'zod';
15
+ import { readFileSync, writeFileSync, readdirSync, existsSync, statSync } from 'fs';
16
+ import { execSync } from 'child_process';
17
+ import { join, dirname } from 'path';
18
+ import { mkdirSync } from 'fs';
19
+
20
+ // Parse command line arguments
21
+ const args = process.argv.slice(2);
22
+ let prompt = '';
23
+ let model = '';
24
+
25
+ for (let i = 0; i < args.length; i++) {
26
+ if (args[i] === '--prompt' && args[i + 1]) {
27
+ prompt = args[++i];
28
+ } else if (args[i] === '--model' && args[i + 1]) {
29
+ model = args[++i];
30
+ }
31
+ }
32
+
33
+ if (!prompt || !model) {
34
+ console.error('Usage: ai-sdk-agent --prompt "..." --model "provider/model"');
35
+ process.exit(1);
36
+ }
37
+
38
+ // Create AI Gateway client
39
+ const gateway = createGateway({
40
+ apiKey: process.env.AI_GATEWAY_API_KEY,
41
+ });
42
+
43
+ // Transcript events
44
+ const events = [];
45
+
46
+ function logEvent(type, data) {
47
+ const event = { type, timestamp: Date.now(), ...data };
48
+ events.push(event);
49
+ console.log(JSON.stringify(event));
50
+ }
51
+
52
+ // Define coding tools
53
+ const tools = {
54
+ readFile: tool({
55
+ description: 'Read the contents of a file at the given path',
56
+ inputSchema: z.object({
57
+ path: z.string().describe('The file path to read'),
58
+ }),
59
+ execute: async ({ path }) => {
60
+ try {
61
+ const content = readFileSync(path, 'utf-8');
62
+ logEvent('tool_result', { tool: 'readFile', path, success: true });
63
+ return content;
64
+ } catch (error) {
65
+ logEvent('tool_result', { tool: 'readFile', path, success: false, error: error.message });
66
+ return \`Error reading file: \${error.message}\`;
67
+ }
68
+ },
69
+ }),
70
+
71
+ writeFile: tool({
72
+ description: 'Write content to a file at the given path. Creates directories if needed.',
73
+ inputSchema: z.object({
74
+ path: z.string().describe('The file path to write'),
75
+ content: z.string().describe('The content to write'),
76
+ }),
77
+ execute: async ({ path, content }) => {
78
+ try {
79
+ const dir = dirname(path);
80
+ if (dir && dir !== '.') {
81
+ mkdirSync(dir, { recursive: true });
82
+ }
83
+ writeFileSync(path, content);
84
+ logEvent('tool_result', { tool: 'writeFile', path, success: true });
85
+ return 'File written successfully';
86
+ } catch (error) {
87
+ logEvent('tool_result', { tool: 'writeFile', path, success: false, error: error.message });
88
+ return \`Error writing file: \${error.message}\`;
89
+ }
90
+ },
91
+ }),
92
+
93
+ editFile: tool({
94
+ description: 'Edit a file by replacing a specific string with new content',
95
+ inputSchema: z.object({
96
+ path: z.string().describe('The file path to edit'),
97
+ oldString: z.string().describe('The exact string to find and replace'),
98
+ newString: z.string().describe('The replacement string'),
99
+ }),
100
+ execute: async ({ path, oldString, newString }) => {
101
+ try {
102
+ const content = readFileSync(path, 'utf-8');
103
+ if (!content.includes(oldString)) {
104
+ logEvent('tool_result', { tool: 'editFile', path, success: false, error: 'String not found' });
105
+ return 'Error: The specified string was not found in the file';
106
+ }
107
+ const newContent = content.replace(oldString, newString);
108
+ writeFileSync(path, newContent);
109
+ logEvent('tool_result', { tool: 'editFile', path, success: true });
110
+ return 'File edited successfully';
111
+ } catch (error) {
112
+ logEvent('tool_result', { tool: 'editFile', path, success: false, error: error.message });
113
+ return \`Error editing file: \${error.message}\`;
114
+ }
115
+ },
116
+ }),
117
+
118
+ listFiles: tool({
119
+ description: 'List files in a directory. Call with path="." to list current directory.',
120
+ inputSchema: z.object({
121
+ path: z.string().describe('The directory path to list (use "." for current directory)'),
122
+ recursive: z.boolean().describe('Whether to list recursively').optional(),
123
+ }),
124
+ execute: async ({ path, recursive }) => {
125
+ const targetPath = path || '.';
126
+ const isRecursive = recursive || false;
127
+ try {
128
+ if (isRecursive) {
129
+ const result = execSync(\`find \${targetPath} -type f | head -100\`, { encoding: 'utf-8' });
130
+ logEvent('tool_result', { tool: 'listFiles', path: targetPath, recursive: isRecursive, success: true });
131
+ return result;
132
+ }
133
+ const files = readdirSync(targetPath);
134
+ logEvent('tool_result', { tool: 'listFiles', path: targetPath, recursive: isRecursive, success: true });
135
+ return files.join('\\n');
136
+ } catch (error) {
137
+ logEvent('tool_result', { tool: 'listFiles', path: targetPath, success: false, error: error.message });
138
+ return \`Error listing files: \${error.message}\`;
139
+ }
140
+ },
141
+ }),
142
+
143
+ glob: tool({
144
+ description: 'Find files matching a pattern (e.g., "*.ts" for TypeScript files)',
145
+ inputSchema: z.object({
146
+ pattern: z.string().describe('The file pattern (e.g., "*.ts", "*.js")'),
147
+ }),
148
+ execute: async ({ pattern }) => {
149
+ try {
150
+ // Extract just the file pattern, remove any path prefix
151
+ const filePattern = pattern.replace(/^\\*\\*\\//, '').replace(/^\\.\\//, '');
152
+ const result = execSync(\`find . -name "\${filePattern}" -type f 2>/dev/null | grep -v node_modules | head -50\`, { encoding: 'utf-8' });
153
+ logEvent('tool_result', { tool: 'glob', pattern, success: true });
154
+ return result.trim() || 'No files found';
155
+ } catch (error) {
156
+ logEvent('tool_result', { tool: 'glob', pattern, success: false, error: error.message });
157
+ return 'No files found';
158
+ }
159
+ },
160
+ }),
161
+
162
+ grep: tool({
163
+ description: 'Search for a text pattern in files',
164
+ inputSchema: z.object({
165
+ pattern: z.string().describe('The search pattern'),
166
+ path: z.string().describe('The file or directory to search in').optional(),
167
+ }),
168
+ execute: async ({ pattern, path }) => {
169
+ const targetPath = path || '.';
170
+ try {
171
+ const result = execSync(\`grep -rn "\${pattern}" \${targetPath} 2>/dev/null | grep -v node_modules | head -50\`, { encoding: 'utf-8' });
172
+ logEvent('tool_result', { tool: 'grep', pattern, path: targetPath, success: true });
173
+ return result.trim() || 'No matches found';
174
+ } catch (error) {
175
+ logEvent('tool_result', { tool: 'grep', pattern, path: targetPath, success: false });
176
+ return 'No matches found';
177
+ }
178
+ },
179
+ }),
180
+
181
+ bash: tool({
182
+ description: 'Run a bash command',
183
+ inputSchema: z.object({
184
+ command: z.string().describe('The command to run'),
185
+ }),
186
+ execute: async ({ command }) => {
187
+ try {
188
+ const result = execSync(command, { encoding: 'utf-8', timeout: 30000 });
189
+ logEvent('tool_result', { tool: 'bash', command, success: true });
190
+ return result;
191
+ } catch (error) {
192
+ logEvent('tool_result', { tool: 'bash', command, success: false, error: error.message });
193
+ return \`Error: \${error.message}\\n\${error.stdout || ''}\\n\${error.stderr || ''}\`;
194
+ }
195
+ },
196
+ }),
197
+ };
198
+
199
+ // System prompt for the coding agent
200
+ const systemPrompt = \`You are an expert coding agent. Your job is to complete programming tasks by reading, writing, and modifying files.
201
+
202
+ Available tools:
203
+ - readFile(path): Read a file's contents
204
+ - writeFile(path, content): Write/create a file (creates directories if needed)
205
+ - editFile(path, oldString, newString): Replace a specific string in a file
206
+ - listFiles(path): List files in a directory (use path="." for current directory)
207
+ - glob(pattern): Find files by pattern (e.g., "*.ts")
208
+ - grep(pattern, path): Search for text in files
209
+ - bash(command): Run shell commands
210
+
211
+ IMPORTANT WORKFLOW:
212
+ 1. First, list files to understand the project structure: listFiles(path=".")
213
+ 2. Read any relevant existing files to understand the context
214
+ 3. Make the necessary code changes using writeFile or editFile
215
+ 4. If needed, run build/test commands with bash to verify
216
+
217
+ RULES:
218
+ - Always check what files exist before modifying them
219
+ - Create complete, working code - not placeholders
220
+ - Put files in the correct directories (e.g., src/ for source files)
221
+ - Be thorough but efficient\`;
222
+
223
+ // Run the agent
224
+ async function main() {
225
+ logEvent('start', { model, prompt });
226
+
227
+ try {
228
+ const result = await generateText({
229
+ model: gateway(model),
230
+ tools,
231
+ stopWhen: stepCountIs(100), // Allow up to 100 steps
232
+ system: systemPrompt,
233
+ prompt,
234
+ onStepFinish: ({ stepType, text, toolCalls, toolResults }) => {
235
+ logEvent('step', { stepType, text, toolCalls: toolCalls?.length, toolResults: toolResults?.length });
236
+ },
237
+ });
238
+
239
+ logEvent('complete', {
240
+ success: true,
241
+ steps: result.steps.length,
242
+ text: result.text,
243
+ });
244
+ } catch (error) {
245
+ logEvent('error', {
246
+ success: false,
247
+ error: error.message,
248
+ name: error.name,
249
+ });
250
+ process.exit(1);
251
+ }
252
+ }
253
+
254
+ main();
255
+ `;
256
+ /**
257
+ * Create AI SDK agent with Vercel AI Gateway authentication.
258
+ */
259
+ export function createAiSdkAgent() {
260
+ return {
261
+ name: 'vercel-ai-gateway/ai-sdk-harness',
262
+ displayName: 'AI SDK Harness (Vercel AI Gateway)',
263
+ getApiKeyEnvVar() {
264
+ return AI_GATEWAY.apiKeyEnvVar;
265
+ },
266
+ getDefaultModel() {
267
+ return 'anthropic/claude-sonnet-4';
268
+ },
269
+ async run(fixturePath, options) {
270
+ const startTime = Date.now();
271
+ let sandbox = null;
272
+ let agentOutput = '';
273
+ let aborted = false;
274
+ let sandboxStopped = false;
275
+ // Handle abort signal
276
+ const abortHandler = () => {
277
+ aborted = true;
278
+ if (sandbox && !sandboxStopped) {
279
+ sandboxStopped = true;
280
+ sandbox.stop().catch(() => { });
281
+ }
282
+ };
283
+ if (options.signal) {
284
+ if (options.signal.aborted) {
285
+ return {
286
+ success: false,
287
+ output: '',
288
+ error: 'Aborted before start',
289
+ duration: 0,
290
+ };
291
+ }
292
+ options.signal.addEventListener('abort', abortHandler);
293
+ }
294
+ try {
295
+ // Collect files from fixture
296
+ const allFiles = await collectLocalFiles(fixturePath);
297
+ const { workspaceFiles, testFiles } = splitTestFiles(allFiles);
298
+ // Check for abort before expensive operations
299
+ if (aborted) {
300
+ return {
301
+ success: false,
302
+ output: '',
303
+ error: 'Aborted',
304
+ duration: Date.now() - startTime,
305
+ };
306
+ }
307
+ // Create sandbox
308
+ sandbox = await createSandbox({
309
+ timeout: options.timeout,
310
+ runtime: 'node24',
311
+ backend: options.sandbox,
312
+ });
313
+ // Check for abort after sandbox creation
314
+ if (aborted) {
315
+ return {
316
+ success: false,
317
+ output: '',
318
+ error: 'Aborted',
319
+ duration: Date.now() - startTime,
320
+ sandboxId: sandbox.sandboxId,
321
+ };
322
+ }
323
+ // Upload workspace files (excluding tests)
324
+ await sandbox.uploadFiles(workspaceFiles);
325
+ // Run setup function if provided
326
+ if (options.setup) {
327
+ await options.setup(sandbox);
328
+ }
329
+ // Install dependencies
330
+ const installResult = await sandbox.runCommand('npm', ['install']);
331
+ if (installResult.exitCode !== 0) {
332
+ throw new Error(`npm install failed: ${installResult.stderr}`);
333
+ }
334
+ // Install AI SDK dependencies
335
+ const aiInstall = await sandbox.runCommand('npm', [
336
+ 'install',
337
+ 'ai@^5.0.11',
338
+ '@ai-sdk/gateway@^1.0.0',
339
+ 'zod@^3.23.8',
340
+ ]);
341
+ if (aiInstall.exitCode !== 0) {
342
+ throw new Error(`AI SDK install failed: ${aiInstall.stderr}`);
343
+ }
344
+ // Write the CLI script to the sandbox
345
+ await sandbox.writeFiles({
346
+ 'ai-sdk-agent.mjs': CLI_SCRIPT,
347
+ });
348
+ // Verify no test files in sandbox
349
+ await verifyNoTestFiles(sandbox);
350
+ // Run the AI SDK agent
351
+ const agentResult = await sandbox.runCommand('node', [
352
+ 'ai-sdk-agent.mjs',
353
+ '--prompt',
354
+ options.prompt,
355
+ '--model',
356
+ options.model,
357
+ ], {
358
+ env: {
359
+ [AI_GATEWAY.apiKeyEnvVar]: options.apiKey,
360
+ },
361
+ });
362
+ agentOutput = agentResult.stdout + agentResult.stderr;
363
+ if (agentResult.exitCode !== 0) {
364
+ // Extract meaningful error from output
365
+ const errorLines = agentOutput.trim().split('\n').slice(-5).join('\n');
366
+ return {
367
+ success: false,
368
+ output: agentOutput,
369
+ error: errorLines || `AI SDK agent exited with code ${agentResult.exitCode}`,
370
+ duration: Date.now() - startTime,
371
+ sandboxId: sandbox.sandboxId,
372
+ };
373
+ }
374
+ // Upload test files for validation
375
+ await sandbox.uploadFiles(testFiles);
376
+ // Create vitest config for EVAL.ts/tsx
377
+ await createVitestConfig(sandbox);
378
+ // The agent outputs JSON events, use that as transcript
379
+ const transcript = agentOutput;
380
+ // Run validation scripts
381
+ const validationResults = await runValidation(sandbox, options.scripts ?? []);
382
+ // Capture generated files
383
+ const generatedFiles = await captureGeneratedFiles(sandbox);
384
+ return {
385
+ success: validationResults.allPassed,
386
+ output: agentOutput,
387
+ transcript,
388
+ duration: Date.now() - startTime,
389
+ testResult: validationResults.test,
390
+ scriptsResults: validationResults.scripts,
391
+ sandboxId: sandbox.sandboxId,
392
+ generatedFiles,
393
+ };
394
+ }
395
+ catch (error) {
396
+ // Check if this was an abort
397
+ if (aborted) {
398
+ return {
399
+ success: false,
400
+ output: agentOutput,
401
+ error: 'Aborted',
402
+ duration: Date.now() - startTime,
403
+ sandboxId: sandbox?.sandboxId,
404
+ };
405
+ }
406
+ return {
407
+ success: false,
408
+ output: agentOutput,
409
+ error: error instanceof Error ? error.message : String(error),
410
+ duration: Date.now() - startTime,
411
+ sandboxId: sandbox?.sandboxId,
412
+ };
413
+ }
414
+ finally {
415
+ // Clean up abort listener
416
+ if (options.signal) {
417
+ options.signal.removeEventListener('abort', abortHandler);
418
+ }
419
+ if (sandbox && !sandboxStopped) {
420
+ sandboxStopped = true;
421
+ await sandbox.stop();
422
+ }
423
+ }
424
+ },
425
+ };
426
+ }
427
+ //# sourceMappingURL=ai-sdk-agent.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"ai-sdk-agent.js","sourceRoot":"","sources":["../../../src/lib/agents/ai-sdk-agent.ts"],"names":[],"mappings":"AAAA;;;GAGG;AAIH,OAAO,EACL,aAAa,EACb,iBAAiB,EACjB,cAAc,EACd,iBAAiB,GAElB,MAAM,eAAe,CAAC;AAEvB,OAAO,EACL,aAAa,EACb,qBAAqB,EACrB,kBAAkB,EAClB,UAAU,GACX,MAAM,aAAa,CAAC;AAKrB;;;GAGG;AACH,MAAM,UAAU,GAAG;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;CAoPlB,CAAC;AAEF;;GAEG;AACH,MAAM,UAAU,gBAAgB;IAC9B,OAAO;QACL,IAAI,EAAE,kCAAkC;QACxC,WAAW,EAAE,oCAAoC;QAEjD,eAAe;YACb,OAAO,UAAU,CAAC,YAAY,CAAC;QACjC,CAAC;QAED,eAAe;YACb,OAAO,2BAA2B,CAAC;QACrC,CAAC;QAED,KAAK,CAAC,GAAG,CAAC,WAAmB,EAAE,OAAwB;YACrD,MAAM,SAAS,GAAG,IAAI,CAAC,GAAG,EAAE,CAAC;YAC7B,IAAI,OAAO,GAAsB,IAAI,CAAC;YACtC,IAAI,WAAW,GAAG,EAAE,CAAC;YACrB,IAAI,OAAO,GAAG,KAAK,CAAC;YACpB,IAAI,cAAc,GAAG,KAAK,CAAC;YAE3B,sBAAsB;YACtB,MAAM,YAAY,GAAG,GAAG,EAAE;gBACxB,OAAO,GAAG,IAAI,CAAC;gBACf,IAAI,OAAO,IAAI,CAAC,cAAc,EAAE,CAAC;oBAC/B,cAAc,GAAG,IAAI,CAAC;oBACtB,OAAO,CAAC,IAAI,EAAE,CAAC,KAAK,CAAC,GAAG,EAAE,GAAE,CAAC,CAAC,CAAC;gBACjC,CAAC;YACH,CAAC,CAAC;YAEF,IAAI,OAAO,CAAC,MAAM,EAAE,CAAC;gBACnB,IAAI,OAAO,CAAC,MAAM,CAAC,OAAO,EAAE,CAAC;oBAC3B,OAAO;wBACL,OAAO,EAAE,KAAK;wBACd,MAAM,EAAE,EAAE;wBACV,KAAK,EAAE,sBAAsB;wBAC7B,QAAQ,EAAE,CAAC;qBACZ,CAAC;gBACJ,CAAC;gBACD,OAAO,CAAC,MAAM,CAAC,gBAAgB,CAAC,OAAO,EAAE,YAAY,CAAC,CAAC;YACzD,CAAC;YAED,IAAI,CAAC;gBACH,6BAA6B;gBAC7B,MAAM,QAAQ,GAAG,MAAM,iBAAiB,CAAC,WAAW,CAAC,CAAC;gBACtD,MAAM,EAAE,cAAc,EAAE,SAAS,EAAE,GAAG,cAAc,CAAC,QAAQ,CAAC,CAAC;gBAE/D,8CAA8C;gBAC9C,IAAI,OAAO,EAAE,CAAC;oBACZ,OAAO;wBACL,OAAO,EAAE,KAAK;wBACd,MAAM,EAAE,EAAE;wBACV,KAAK,EAAE,SAAS;wBAChB,QAAQ,EAAE,IAAI,CAAC,GAAG,EAAE,GAAG,SAAS;qBACjC,CAAC;gBACJ,CAAC;gBAED,iBAAiB;gBACjB,OAAO,GAAG,MAAM,aAAa,CAAC;oBAC5B,OAAO,EAAE,OAAO,CAAC,OAAO;oBACxB,OAAO,EAAE,QAAQ;oBACjB,OAAO,EAAE,OAAO,CAAC,OAAO;iBACzB,CAAC,CAAC;gBAEH,yCAAyC;gBACzC,IAAI,OAAO,EAAE,CAAC;oBACZ,OAAO;wBACL,OAAO,EAAE,KAAK;wBACd,MAAM,EAAE,EAAE;wBACV,KAAK,EAAE,SAAS;wBAChB,QAAQ,EAAE,IAAI,CAAC,GAAG,EAAE,GAAG,SAAS;wBAChC,SAAS,EAAE,OAAO,CAAC,SAAS;qBAC7B,CAAC;gBACJ,CAAC;gBAED,2CAA2C;gBAC3C,MAAM,OAAO,CAAC,WAAW,CAAC,cAAc,CAAC,CAAC;gBAE1C,iCAAiC;gBACjC,IAAI,OAAO,CAAC,KAAK,EAAE,CAAC;oBAClB,MAAM,OAAO,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC;gBAC/B,CAAC;gBAED,uBAAuB;gBACvB,MAAM,aAAa,GAAG,MAAM,OAAO,CAAC,UAAU,CAAC,KAAK,EAAE,CAAC,SAAS,CAAC,CAAC,CAAC;gBACnE,IAAI,aAAa,CAAC,QAAQ,KAAK,CAAC,EAAE,CAAC;oBACjC,MAAM,IAAI,KAAK,CAAC,uBAAuB,aAAa,CAAC,MAAM,EAAE,CAAC,CAAC;gBACjE,CAAC;gBAED,8BAA8B;gBAC9B,MAAM,SAAS,GAAG,MAAM,OAAO,CAAC,UAAU,CAAC,KAAK,EAAE;oBAChD,SAAS;oBACT,YAAY;oBACZ,wBAAwB;oBACxB,aAAa;iBACd,CAAC,CAAC;gBACH,IAAI,SAAS,CAAC,QAAQ,KAAK,CAAC,EAAE,CAAC;oBAC7B,MAAM,IAAI,KAAK,CAAC,0BAA0B,SAAS,CAAC,MAAM,EAAE,CAAC,CAAC;gBAChE,CAAC;gBAED,sCAAsC;gBACtC,MAAM,OAAO,CAAC,UAAU,CAAC;oBACvB,kBAAkB,EAAE,UAAU;iBAC/B,CAAC,CAAC;gBAEH,kCAAkC;gBAClC,MAAM,iBAAiB,CAAC,OAAO,CAAC,CAAC;gBAEjC,uBAAuB;gBACvB,MAAM,WAAW,GAAG,MAAM,OAAO,CAAC,UAAU,CAC1C,MAAM,EACN;oBACE,kBAAkB;oBAClB,UAAU;oBACV,OAAO,CAAC,MAAM;oBACd,SAAS;oBACT,OAAO,CAAC,KAAK;iBACd,EACD;oBACE,GAAG,EAAE;wBACH,CAAC,UAAU,CAAC,YAAY,CAAC,EAAE,OAAO,CAAC,MAAM;qBAC1C;iBACF,CACF,CAAC;gBAEF,WAAW,GAAG,WAAW,CAAC,MAAM,GAAG,WAAW,CAAC,MAAM,CAAC;gBAEtD,IAAI,WAAW,CAAC,QAAQ,KAAK,CAAC,EAAE,CAAC;oBAC/B,uCAAuC;oBACvC,MAAM,UAAU,GAAG,WAAW,CAAC,IAAI,EAAE,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;oBACvE,OAAO;wBACL,OAAO,EAAE,KAAK;wBACd,MAAM,EAAE,WAAW;wBACnB,KAAK,EAAE,UAAU,IAAI,iCAAiC,WAAW,CAAC,QAAQ,EAAE;wBAC5E,QAAQ,EAAE,IAAI,CAAC,GAAG,EAAE,GAAG,SAAS;wBAChC,SAAS,EAAE,OAAO,CAAC,SAAS;qBAC7B,CAAC;gBACJ,CAAC;gBAED,mCAAmC;gBACnC,MAAM,OAAO,CAAC,WAAW,CAAC,SAAS,CAAC,CAAC;gBAErC,uCAAuC;gBACvC,MAAM,kBAAkB,CAAC,OAAO,CAAC,CAAC;gBAElC,wDAAwD;gBACxD,MAAM,UAAU,GAAG,WAAW,CAAC;gBAE/B,yBAAyB;gBACzB,MAAM,iBAAiB,GAAG,MAAM,aAAa,CAAC,OAAO,EAAE,OAAO,CAAC,OAAO,IAAI,EAAE,CAAC,CAAC;gBAE9E,0BAA0B;gBAC1B,MAAM,cAAc,GAAG,MAAM,qBAAqB,CAAC,OAAO,CAAC,CAAC;gBAE5D,OAAO;oBACL,OAAO,EAAE,iBAAiB,CAAC,SAAS;oBACpC,MAAM,EAAE,WAAW;oBACnB,UAAU;oBACV,QAAQ,EAAE,IAAI,CAAC,GAAG,EAAE,GAAG,SAAS;oBAChC,UAAU,EAAE,iBAAiB,CAAC,IAAI;oBAClC,cAAc,EAAE,iBAAiB,CAAC,OAAO;oBACzC,SAAS,EAAE,OAAO,CAAC,SAAS;oBAC5B,cAAc;iBACf,CAAC;YACJ,CAAC;YAAC,OAAO,KAAK,EAAE,CAAC;gBACf,6BAA6B;gBAC7B,IAAI,OAAO,EAAE,CAAC;oBACZ,OAAO;wBACL,OAAO,EAAE,KAAK;wBACd,MAAM,EAAE,WAAW;wBACnB,KAAK,EAAE,SAAS;wBAChB,QAAQ,EAAE,IAAI,CAAC,GAAG,EAAE,GAAG,SAAS;wBAChC,SAAS,EAAE,OAAO,EAAE,SAAS;qBAC9B,CAAC;gBACJ,CAAC;gBACD,OAAO;oBACL,OAAO,EAAE,KAAK;oBACd,MAAM,EAAE,WAAW;oBACnB,KAAK,EAAE,KAAK,YAAY,KAAK,CAAC,CAAC,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC,CAAC,MAAM,CAAC,KAAK,CAAC;oBAC7D,QAAQ,EAAE,IAAI,CAAC,GAAG,EAAE,GAAG,SAAS;oBAChC,SAAS,EAAE,OAAO,EAAE,SAAS;iBAC9B,CAAC;YACJ,CAAC;oBAAS,CAAC;gBACT,0BAA0B;gBAC1B,IAAI,OAAO,CAAC,MAAM,EAAE,CAAC;oBACnB,OAAO,CAAC,MAAM,CAAC,mBAAmB,CAAC,OAAO,EAAE,YAAY,CAAC,CAAC;gBAC5D,CAAC;gBACD,IAAI,OAAO,IAAI,CAAC,cAAc,EAAE,CAAC;oBAC/B,cAAc,GAAG,IAAI,CAAC;oBACtB,MAAM,OAAO,CAAC,IAAI,EAAE,CAAC;gBACvB,CAAC;YACH,CAAC;QACH,CAAC;KACF,CAAC;AACJ,CAAC"}
@@ -1 +1 @@
1
- {"version":3,"file":"claude-code.d.ts","sourceRoot":"","sources":["../../../src/lib/agents/claude-code.ts"],"names":[],"mappings":"AAAA;;;GAGG;AAEH,OAAO,KAAK,EAAE,KAAK,EAAmC,MAAM,YAAY,CAAC;AAmDzE;;GAEG;AACH,wBAAgB,qBAAqB,CAAC,EAAE,kBAAkB,EAAE,EAAE;IAAE,kBAAkB,EAAE,OAAO,CAAA;CAAE,GAAG,KAAK,CA4LpG"}
1
+ {"version":3,"file":"claude-code.d.ts","sourceRoot":"","sources":["../../../src/lib/agents/claude-code.ts"],"names":[],"mappings":"AAAA;;;GAGG;AAEH,OAAO,KAAK,EAAE,KAAK,EAAmC,MAAM,YAAY,CAAC;AAmDzE;;GAEG;AACH,wBAAgB,qBAAqB,CAAC,EAAE,kBAAkB,EAAE,EAAE;IAAE,kBAAkB,EAAE,OAAO,CAAA;CAAE,GAAG,KAAK,CA6LpG"}
@@ -80,10 +80,11 @@ export function createClaudeCodeAgent({ useVercelAiGateway }) {
80
80
  duration: Date.now() - startTime,
81
81
  };
82
82
  }
83
- // Create sandbox (auto-detects backend based on env)
83
+ // Create sandbox
84
84
  sandbox = await createSandbox({
85
85
  timeout: options.timeout,
86
86
  runtime: 'node24',
87
+ backend: options.sandbox,
87
88
  });
88
89
  // Check for abort after sandbox creation (abort may have fired during create)
89
90
  if (aborted) {