@vercel/agent-eval 0.0.13 → 0.0.15
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/lib/init.d.ts.map +1 -1
- package/dist/lib/init.js +63 -1
- package/dist/lib/init.js.map +1 -1
- package/package.json +1 -1
package/dist/lib/init.d.ts.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"init.d.ts","sourceRoot":"","sources":["../../src/lib/init.ts"],"names":[],"mappings":"AAAA;;GAEG;
|
|
1
|
+
{"version":3,"file":"init.d.ts","sourceRoot":"","sources":["../../src/lib/init.ts"],"names":[],"mappings":"AAAA;;GAEG;AAMH;;GAEG;AACH,MAAM,WAAW,WAAW;IAC1B,mBAAmB;IACnB,IAAI,EAAE,MAAM,CAAC;IACb,+DAA+D;IAC/D,SAAS,CAAC,EAAE,MAAM,CAAC;CACpB;AA+SD;;GAEG;AACH,wBAAgB,WAAW,CAAC,OAAO,EAAE,WAAW,GAAG,MAAM,CA0BxD;AAED;;GAEG;AACH,wBAAgB,uBAAuB,CAAC,UAAU,EAAE,MAAM,EAAE,WAAW,EAAE,MAAM,GAAG,MAAM,CAavF"}
|
package/dist/lib/init.js
CHANGED
|
@@ -3,6 +3,7 @@
|
|
|
3
3
|
*/
|
|
4
4
|
import { mkdirSync, writeFileSync, existsSync } from 'fs';
|
|
5
5
|
import { join, dirname } from 'path';
|
|
6
|
+
import pkg from '../../package.json' with { type: 'json' };
|
|
6
7
|
/**
|
|
7
8
|
* Get the package.json template.
|
|
8
9
|
*/
|
|
@@ -13,7 +14,7 @@ function getPackageJson(projectName) {
|
|
|
13
14
|
private: true,
|
|
14
15
|
type: 'module',
|
|
15
16
|
devDependencies: {
|
|
16
|
-
'agent-eval':
|
|
17
|
+
'@vercel/agent-eval': `^${pkg.version}`,
|
|
17
18
|
'@types/node': '^22.0.0',
|
|
18
19
|
typescript: '^5.6.0',
|
|
19
20
|
vitest: '^2.1.0',
|
|
@@ -51,6 +52,66 @@ dist/
|
|
|
51
52
|
results/
|
|
52
53
|
*.log
|
|
53
54
|
.DS_Store
|
|
55
|
+
`;
|
|
56
|
+
}
|
|
57
|
+
/**
|
|
58
|
+
* Get the README.md template.
|
|
59
|
+
*/
|
|
60
|
+
function getReadme() {
|
|
61
|
+
return `# Agent Evaluation Suite
|
|
62
|
+
|
|
63
|
+
Test AI coding agents to measure what actually works.
|
|
64
|
+
|
|
65
|
+
## Setup
|
|
66
|
+
|
|
67
|
+
1. **Install dependencies:**
|
|
68
|
+
\`\`\`bash
|
|
69
|
+
npm install
|
|
70
|
+
\`\`\`
|
|
71
|
+
|
|
72
|
+
2. **Configure environment variables:**
|
|
73
|
+
\`\`\`bash
|
|
74
|
+
cp .env.example .env.local
|
|
75
|
+
\`\`\`
|
|
76
|
+
|
|
77
|
+
Edit \`.env.local\` and add your API keys:
|
|
78
|
+
- \`AI_GATEWAY_API_KEY\` - Vercel AI Gateway API key ([get yours](https://vercel.com/dashboard))
|
|
79
|
+
- \`VERCEL_TOKEN\` - Vercel personal access token ([create one](https://vercel.com/account/tokens))
|
|
80
|
+
|
|
81
|
+
## Running Evals
|
|
82
|
+
|
|
83
|
+
### Preview (no cost)
|
|
84
|
+
|
|
85
|
+
See what will run without making API calls:
|
|
86
|
+
|
|
87
|
+
\`\`\`bash
|
|
88
|
+
npx @vercel/agent-eval cc --dry
|
|
89
|
+
\`\`\`
|
|
90
|
+
|
|
91
|
+
### Run Experiments
|
|
92
|
+
|
|
93
|
+
Run the Claude Code experiment:
|
|
94
|
+
|
|
95
|
+
\`\`\`bash
|
|
96
|
+
npx @vercel/agent-eval cc
|
|
97
|
+
\`\`\`
|
|
98
|
+
|
|
99
|
+
Run the Codex experiment:
|
|
100
|
+
|
|
101
|
+
\`\`\`bash
|
|
102
|
+
npx @vercel/agent-eval codex
|
|
103
|
+
\`\`\`
|
|
104
|
+
|
|
105
|
+
### View Results
|
|
106
|
+
|
|
107
|
+
Launch the web-based results viewer:
|
|
108
|
+
|
|
109
|
+
\`\`\`bash
|
|
110
|
+
npx @vercel/agent-eval playground
|
|
111
|
+
\`\`\`
|
|
112
|
+
|
|
113
|
+
Open [http://localhost:3000](http://localhost:3000) to browse results.
|
|
114
|
+
|
|
54
115
|
`;
|
|
55
116
|
}
|
|
56
117
|
/**
|
|
@@ -197,6 +258,7 @@ function getTemplateFiles(projectName) {
|
|
|
197
258
|
{ path: 'tsconfig.json', content: getRootTsconfig() },
|
|
198
259
|
{ path: '.env.example', content: getEnvExample() },
|
|
199
260
|
{ path: '.gitignore', content: getGitignore() },
|
|
261
|
+
{ path: 'README.md', content: getReadme() },
|
|
200
262
|
{ path: 'experiments/cc.ts', content: getCCExperiment() },
|
|
201
263
|
{ path: 'experiments/codex.ts', content: getCodexExperiment() },
|
|
202
264
|
{ path: 'evals/add-greeting/PROMPT.md', content: getExamplePrompt() },
|
package/dist/lib/init.js.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"init.js","sourceRoot":"","sources":["../../src/lib/init.ts"],"names":[],"mappings":"AAAA;;GAEG;AAEH,OAAO,EAAE,SAAS,EAAE,aAAa,EAAE,UAAU,EAAE,MAAM,IAAI,CAAC;AAC1D,OAAO,EAAE,IAAI,EAAE,OAAO,EAAE,MAAM,MAAM,CAAC;
|
|
1
|
+
{"version":3,"file":"init.js","sourceRoot":"","sources":["../../src/lib/init.ts"],"names":[],"mappings":"AAAA;;GAEG;AAEH,OAAO,EAAE,SAAS,EAAE,aAAa,EAAE,UAAU,EAAE,MAAM,IAAI,CAAC;AAC1D,OAAO,EAAE,IAAI,EAAE,OAAO,EAAE,MAAM,MAAM,CAAC;AACrC,OAAO,GAAG,MAAM,oBAAoB,CAAC,OAAO,IAAI,EAAE,MAAM,EAAE,CAAC;AAoB3D;;GAEG;AACH,SAAS,cAAc,CAAC,WAAmB;IACzC,OAAO,IAAI,CAAC,SAAS,CACnB;QACE,IAAI,EAAE,WAAW;QACjB,OAAO,EAAE,OAAO;QAChB,OAAO,EAAE,IAAI;QACb,IAAI,EAAE,QAAQ;QACd,eAAe,EAAE;YACf,oBAAoB,EAAE,IAAI,GAAG,CAAC,OAAO,EAAE;YACvC,aAAa,EAAE,SAAS;YACxB,UAAU,EAAE,QAAQ;YACpB,MAAM,EAAE,QAAQ;SACjB;KACF,EACD,IAAI,EACJ,CAAC,CACF,CAAC;AACJ,CAAC;AAED;;GAEG;AACH,SAAS,aAAa;IACpB,OAAO;;;;;;;;;;;;;;CAcR,CAAC;AACF,CAAC;AAED;;GAEG;AACH,SAAS,YAAY;IACnB,OAAO;;;;;;;CAOR,CAAC;AACF,CAAC;AAED;;GAEG;AACH,SAAS,SAAS;IAChB,OAAO;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;CAsDR,CAAC;AACF,CAAC;AAED;;GAEG;AACH,SAAS,eAAe;IACtB,OAAO;;;;;;;;;;;CAWR,CAAC;AACF,CAAC;AAED;;GAEG;AACH,SAAS,kBAAkB;IACzB,OAAO;;;;;;;;;;;CAWR,CAAC;AACF,CAAC;AAED;;GAEG;AACH,SAAS,gBAAgB;IACvB,OAAO;;;;;;CAMR,CAAC;AACF,CAAC;AAED;;GAEG;AACH,SAAS,cAAc;IACrB,OAAO;;;;;;;;;;;;;CAaR,CAAC;AACF,CAAC;AAED;;GAEG;AACH,SAAS,qBAAqB;IAC5B,OAAO,IAAI,CAAC,SAAS,CACnB;QACE,IAAI,EAAE,cAAc;QACpB,IAAI,EAAE,QAAQ;QACd,OAAO,EAAE;YACP,KAAK,EAAE,KAAK;SACb;QACD,YAAY,EAAE;YACZ,KAAK,EAAE,SAAS;SACjB;QACD,eAAe,EAAE;YACf,cAAc,EAAE,SAAS;YACzB,UAAU,EAAE,QAAQ;YACpB,MAAM,EAAE,QAAQ;SACjB;KACF,EACD,IAAI,EACJ,CAAC,CACF,CAAC;AACJ,CAAC;AAED;;GAEG;AACH,SAAS,eAAe;IACtB,OAAO,IAAI,CAAC,SAAS,CACnB;QACE,eAAe,EAAE;YACf,MAAM,EAAE,QAAQ;YAChB,MAAM,EAAE,UAAU;YAClB,gBAAgB,EAAE,UAAU;YAC5B,MAAM,EAAE,IAAI;YACZ,YAAY,EAAE,IAAI;YAClB,MAAM,EAAE,IAAI;YACZ,GAAG,EAAE,CAAC,QAAQ,CAAC;SAChB;QACD,OAAO,EAAE,CAAC,aAAa,CAAC;KACzB,EACD,IAAI,EACJ,CAAC,CACF,CAAC;AACJ,CAAC;AAED;;GAEG;AACH,SAAS,kBAAkB;IACzB,OAAO,IAAI,CAAC,SAAS,CACnB;QACE,eAAe,EAAE;YACf,MAAM,EAAE,QAAQ;YAChB,MAAM,EAAE,QAAQ;YAChB,gBAAgB,EAAE,SAAS;YAC3B,GAAG,EAAE,WAAW;YAChB,MAAM,EAAE,IAAI;YACZ,MAAM,EAAE,MAAM;YACd,YAAY,EAAE,IAAI;SACnB;QACD,OAAO,EAAE,CAAC,KAAK,CAAC;KACjB,EACD,IAAI,EACJ,CAAC,CACF,CAAC;AACJ,CAAC;AAED;;GAEG;AACH,SAAS,aAAa;IACpB,OAAO;;;;;;;;;;CAUR,CAAC;AACF,CAAC;AAED;;GAEG;AACH,SAAS,gBAAgB,CAAC,WAAmB;IAC3C,OAAO;QACL,EAAE,IAAI,EAAE,cAAc,EAAE,OAAO,EAAE,cAAc,CAAC,WAAW,CAAC,EAAE;QAC9D,EAAE,IAAI,EAAE,eAAe,EAAE,OAAO,EAAE,eAAe,EAAE,EAAE;QACrD,EAAE,IAAI,EAAE,cAAc,EAAE,OAAO,EAAE,aAAa,EAAE,EAAE;QAClD,EAAE,IAAI,EAAE,YAAY,EAAE,OAAO,EAAE,YAAY,EAAE,EAAE;QAC/C,EAAE,IAAI,EAAE,WAAW,EAAE,OAAO,EAAE,SAAS,EAAE,EAAE;QAC3C,EAAE,IAAI,EAAE,mBAAmB,EAAE,OAAO,EAAE,eAAe,EAAE,EAAE;QACzD,EAAE,IAAI,EAAE,sBAAsB,EAAE,OAAO,EAAE,kBAAkB,EAAE,EAAE;QAC/D,EAAE,IAAI,EAAE,8BAA8B,EAAE,OAAO,EAAE,gBAAgB,EAAE,EAAE;QACrE,EAAE,IAAI,EAAE,4BAA4B,EAAE,OAAO,EAAE,cAAc,EAAE,EAAE;QACjE,EAAE,IAAI,EAAE,iCAAiC,EAAE,OAAO,EAAE,qBAAqB,EAAE,EAAE;QAC7E,EAAE,IAAI,EAAE,kCAAkC,EAAE,OAAO,EAAE,kBAAkB,EAAE,EAAE;QAC3E,EAAE,IAAI,EAAE,gCAAgC,EAAE,OAAO,EAAE,aAAa,EAAE,EAAE;KACrE,CAAC;AACJ,CAAC;AAED;;GAEG;AACH,MAAM,UAAU,WAAW,CAAC,OAAoB;IAC9C,MAAM,SAAS,GAAG,OAAO,CAAC,SAAS,IAAI,OAAO,CAAC,GAAG,EAAE,CAAC;IACrD,MAAM,UAAU,GAAG,IAAI,CAAC,SAAS,EAAE,OAAO,CAAC,IAAI,CAAC,CAAC;IAEjD,oCAAoC;IACpC,IAAI,UAAU,CAAC,UAAU,CAAC,EAAE,CAAC;QAC3B,MAAM,IAAI,KAAK,CAAC,6BAA6B,UAAU,EAAE,CAAC,CAAC;IAC7D,CAAC;IAED,2BAA2B;IAC3B,SAAS,CAAC,UAAU,EAAE,EAAE,SAAS,EAAE,IAAI,EAAE,CAAC,CAAC;IAE3C,2BAA2B;IAC3B,MAAM,KAAK,GAAG,gBAAgB,CAAC,OAAO,CAAC,IAAI,CAAC,CAAC;IAC7C,KAAK,MAAM,IAAI,IAAI,KAAK,EAAE,CAAC;QACzB,MAAM,QAAQ,GAAG,IAAI,CAAC,UAAU,EAAE,IAAI,CAAC,IAAI,CAAC,CAAC;QAC7C,MAAM,OAAO,GAAG,OAAO,CAAC,QAAQ,CAAC,CAAC;QAElC,4BAA4B;QAC5B,SAAS,CAAC,OAAO,EAAE,EAAE,SAAS,EAAE,IAAI,EAAE,CAAC,CAAC;QAExC,aAAa;QACb,aAAa,CAAC,QAAQ,EAAE,IAAI,CAAC,OAAO,CAAC,CAAC;IACxC,CAAC;IAED,OAAO,UAAU,CAAC;AACpB,CAAC;AAED;;GAEG;AACH,MAAM,UAAU,uBAAuB,CAAC,UAAkB,EAAE,WAAmB;IAC7E,OAAO;sBACa,UAAU;;;UAGtB,WAAW;;;;;;;CAOpB,CAAC;AACF,CAAC"}
|