@vercel/agent-eval 0.0.9 → 0.0.12
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/cli.js +69 -13
- package/dist/cli.js.map +1 -1
- package/dist/index.d.ts +2 -0
- package/dist/index.d.ts.map +1 -1
- package/dist/index.js +1 -0
- package/dist/index.js.map +1 -1
- package/dist/lib/agents/index.d.ts.map +1 -1
- package/dist/lib/agents/index.js +0 -2
- package/dist/lib/agents/index.js.map +1 -1
- package/dist/lib/config.d.ts.map +1 -1
- package/dist/lib/config.js +3 -1
- package/dist/lib/config.js.map +1 -1
- package/dist/lib/o11y/index.d.ts +11 -0
- package/dist/lib/o11y/index.d.ts.map +1 -0
- package/dist/lib/o11y/index.js +11 -0
- package/dist/lib/o11y/index.js.map +1 -0
- package/dist/lib/o11y/parsers/claude-code.d.ts +18 -0
- package/dist/lib/o11y/parsers/claude-code.d.ts.map +1 -0
- package/dist/lib/o11y/parsers/claude-code.js +343 -0
- package/dist/lib/o11y/parsers/claude-code.js.map +1 -0
- package/dist/lib/o11y/parsers/codex.d.ts +17 -0
- package/dist/lib/o11y/parsers/codex.d.ts.map +1 -0
- package/dist/lib/o11y/parsers/codex.js +296 -0
- package/dist/lib/o11y/parsers/codex.js.map +1 -0
- package/dist/lib/o11y/parsers/index.d.ts +51 -0
- package/dist/lib/o11y/parsers/index.d.ts.map +1 -0
- package/dist/lib/o11y/parsers/index.js +280 -0
- package/dist/lib/o11y/parsers/index.js.map +1 -0
- package/dist/lib/o11y/parsers/opencode.d.ts +17 -0
- package/dist/lib/o11y/parsers/opencode.d.ts.map +1 -0
- package/dist/lib/o11y/parsers/opencode.js +313 -0
- package/dist/lib/o11y/parsers/opencode.js.map +1 -0
- package/dist/lib/o11y/types.d.ts +113 -0
- package/dist/lib/o11y/types.d.ts.map +1 -0
- package/dist/lib/o11y/types.js +6 -0
- package/dist/lib/o11y/types.js.map +1 -0
- package/dist/lib/results.d.ts +2 -1
- package/dist/lib/results.d.ts.map +1 -1
- package/dist/lib/results.js +23 -7
- package/dist/lib/results.js.map +1 -1
- package/dist/lib/runner.d.ts +6 -5
- package/dist/lib/runner.d.ts.map +1 -1
- package/dist/lib/runner.js +21 -11
- package/dist/lib/runner.js.map +1 -1
- package/dist/lib/types.d.ts +25 -3
- package/dist/lib/types.d.ts.map +1 -1
- package/dist/lib/types.js.map +1 -1
- package/package.json +5 -3
- package/README.md +0 -474
- package/dist/lib/agents/ai-sdk-agent.d.ts +0 -10
- package/dist/lib/agents/ai-sdk-agent.d.ts.map +0 -1
- package/dist/lib/agents/ai-sdk-agent.js +0 -427
- package/dist/lib/agents/ai-sdk-agent.js.map +0 -1
package/dist/cli.js
CHANGED
|
@@ -14,6 +14,7 @@ import { runExperiment } from './lib/runner.js';
|
|
|
14
14
|
import { initProject, getPostInitInstructions } from './lib/init.js';
|
|
15
15
|
import { getAgent } from './lib/agents/index.js';
|
|
16
16
|
import { getSandboxBackendInfo } from './lib/sandbox.js';
|
|
17
|
+
import { spawnSync } from 'child_process';
|
|
17
18
|
// Load environment variables (.env.local first, then .env as fallback)
|
|
18
19
|
dotenvConfig({ path: '.env.local' });
|
|
19
20
|
dotenvConfig();
|
|
@@ -83,8 +84,18 @@ async function runExperimentCommand(configInput, options) {
|
|
|
83
84
|
for (const name of evalNames) {
|
|
84
85
|
console.log(chalk.green(` - ${name}`));
|
|
85
86
|
}
|
|
86
|
-
|
|
87
|
-
|
|
87
|
+
const models = Array.isArray(config.model) ? config.model : [config.model];
|
|
88
|
+
// Show info for all models
|
|
89
|
+
const totalRunsPerModel = evalNames.length * config.runs;
|
|
90
|
+
const totalRuns = totalRunsPerModel * models.length;
|
|
91
|
+
if (models.length > 1) {
|
|
92
|
+
console.log(chalk.blue(`\nRunning ${evalNames.length} eval(s) x ${config.runs} run(s) x ${models.length} model(s) = ${totalRuns} total runs`));
|
|
93
|
+
console.log(chalk.blue(`Agent: ${config.agent}, Models: ${models.join(', ')}, Timeout: ${config.timeout}s, Early Exit: ${config.earlyExit}`));
|
|
94
|
+
}
|
|
95
|
+
else {
|
|
96
|
+
console.log(chalk.blue(`\nRunning ${evalNames.length} eval(s) x ${config.runs} run(s) = ${totalRuns} total runs`));
|
|
97
|
+
console.log(chalk.blue(`Agent: ${config.agent}, Model: ${models[0]}, Timeout: ${config.timeout}s, Early Exit: ${config.earlyExit}`));
|
|
98
|
+
}
|
|
88
99
|
// Show which sandbox backend will be used
|
|
89
100
|
const sandboxInfo = getSandboxBackendInfo({ backend: config.sandbox });
|
|
90
101
|
console.log(chalk.blue(`Sandbox: ${sandboxInfo.description}`));
|
|
@@ -104,20 +115,37 @@ async function runExperimentCommand(configInput, options) {
|
|
|
104
115
|
// Filter fixtures to only the ones we want to run
|
|
105
116
|
const selectedFixtures = fixtures.filter((f) => evalNames.includes(f.name));
|
|
106
117
|
// Get experiment name from config file
|
|
107
|
-
const
|
|
118
|
+
const baseExperimentName = basename(configPath, '.ts').replace(/\.js$/, '');
|
|
108
119
|
const resultsDir = resolve(process.cwd(), 'results');
|
|
109
120
|
console.log(chalk.blue('\nStarting experiment...'));
|
|
110
|
-
// Run
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
experimentName
|
|
117
|
-
|
|
118
|
-
|
|
121
|
+
// Run experiments for each model
|
|
122
|
+
let allPassed = true;
|
|
123
|
+
for (const model of models) {
|
|
124
|
+
// Create a config for this specific model
|
|
125
|
+
const modelConfig = { ...config, model };
|
|
126
|
+
// Include model in experiment name when multiple models are specified
|
|
127
|
+
const experimentName = models.length > 1
|
|
128
|
+
? `${baseExperimentName}/${model}`
|
|
129
|
+
: baseExperimentName;
|
|
130
|
+
if (models.length > 1) {
|
|
131
|
+
console.log(chalk.blue(`\n--- Running with model: ${model} ---`));
|
|
132
|
+
}
|
|
133
|
+
// Run the experiment
|
|
134
|
+
const results = await runExperiment({
|
|
135
|
+
config: modelConfig,
|
|
136
|
+
fixtures: selectedFixtures,
|
|
137
|
+
apiKey,
|
|
138
|
+
resultsDir,
|
|
139
|
+
experimentName,
|
|
140
|
+
onProgress: (msg) => console.log(msg),
|
|
141
|
+
});
|
|
142
|
+
// Check if this experiment passed
|
|
143
|
+
const experimentPassed = results.evals.every((e) => e.passedRuns === e.totalRuns);
|
|
144
|
+
if (!experimentPassed) {
|
|
145
|
+
allPassed = false;
|
|
146
|
+
}
|
|
147
|
+
}
|
|
119
148
|
// Exit with appropriate code
|
|
120
|
-
const allPassed = results.evals.every((e) => e.passedRuns === e.totalRuns);
|
|
121
149
|
process.exit(allPassed ? 0 : 1);
|
|
122
150
|
}
|
|
123
151
|
catch (error) {
|
|
@@ -157,6 +185,34 @@ program
|
|
|
157
185
|
process.exit(1);
|
|
158
186
|
}
|
|
159
187
|
});
|
|
188
|
+
/**
|
|
189
|
+
* playground command - Launch the web-based results viewer
|
|
190
|
+
* Spawns @vercel/agent-eval-playground (downloaded on-demand via npx if not installed)
|
|
191
|
+
*/
|
|
192
|
+
program
|
|
193
|
+
.command('playground')
|
|
194
|
+
.description('Launch the web-based playground for browsing experiment results')
|
|
195
|
+
.option('--port <port>', 'HTTP server port', '3000')
|
|
196
|
+
.option('--results-dir <dir>', 'Path to results directory', './results')
|
|
197
|
+
.option('--evals-dir <dir>', 'Path to evals directory', './evals')
|
|
198
|
+
.option('--watch', 'Enable live mode — watch results directory for changes')
|
|
199
|
+
.action(async (options) => {
|
|
200
|
+
const resultsDir = resolve(process.cwd(), options.resultsDir);
|
|
201
|
+
const evalsDir = resolve(process.cwd(), options.evalsDir);
|
|
202
|
+
console.log(chalk.blue('Starting Agent Eval Playground...'));
|
|
203
|
+
// Build args for the playground CLI
|
|
204
|
+
const playgroundArgs = [
|
|
205
|
+
'--results-dir', resultsDir,
|
|
206
|
+
'--evals-dir', evalsDir,
|
|
207
|
+
'--port', options.port,
|
|
208
|
+
];
|
|
209
|
+
if (options.watch) {
|
|
210
|
+
playgroundArgs.push('--watch');
|
|
211
|
+
}
|
|
212
|
+
// Try to run the playground package directly, fall back to npx
|
|
213
|
+
const result = spawnSync('npx', ['@vercel/agent-eval-playground', ...playgroundArgs], { stdio: 'inherit', cwd: process.cwd() });
|
|
214
|
+
process.exit(result.status ?? 1);
|
|
215
|
+
});
|
|
160
216
|
/**
|
|
161
217
|
* Default command - run experiment (no subcommand needed)
|
|
162
218
|
* Usage: agent-eval cc --dry
|
package/dist/cli.js.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"cli.js","sourceRoot":"","sources":["../src/cli.ts"],"names":[],"mappings":";AAEA;;GAEG;AAEH,OAAO,EAAE,OAAO,EAAE,MAAM,WAAW,CAAC;AACpC,OAAO,EAAE,MAAM,IAAI,YAAY,EAAE,MAAM,QAAQ,CAAC;AAChD,OAAO,EAAE,OAAO,EAAE,OAAO,EAAE,QAAQ,EAAE,MAAM,MAAM,CAAC;AAClD,OAAO,EAAE,UAAU,EAAE,YAAY,EAAE,MAAM,IAAI,CAAC;AAC9C,OAAO,EAAE,aAAa,EAAE,MAAM,KAAK,CAAC;AACpC,OAAO,KAAK,MAAM,OAAO,CAAC;AAC1B,OAAO,EAAE,UAAU,EAAE,gBAAgB,EAAE,MAAM,iBAAiB,CAAC;AAC/D,OAAO,EAAE,eAAe,EAAE,MAAM,kBAAkB,CAAC;AACnD,OAAO,EAAE,aAAa,EAAE,MAAM,iBAAiB,CAAC;AAChD,OAAO,EAAE,WAAW,EAAE,uBAAuB,EAAE,MAAM,eAAe,CAAC;AACrE,OAAO,EAAE,QAAQ,EAAE,MAAM,uBAAuB,CAAC;AACjD,OAAO,EAAE,qBAAqB,EAAE,MAAM,kBAAkB,CAAC;
|
|
1
|
+
{"version":3,"file":"cli.js","sourceRoot":"","sources":["../src/cli.ts"],"names":[],"mappings":";AAEA;;GAEG;AAEH,OAAO,EAAE,OAAO,EAAE,MAAM,WAAW,CAAC;AACpC,OAAO,EAAE,MAAM,IAAI,YAAY,EAAE,MAAM,QAAQ,CAAC;AAChD,OAAO,EAAE,OAAO,EAAE,OAAO,EAAE,QAAQ,EAAE,MAAM,MAAM,CAAC;AAClD,OAAO,EAAE,UAAU,EAAE,YAAY,EAAE,MAAM,IAAI,CAAC;AAC9C,OAAO,EAAE,aAAa,EAAE,MAAM,KAAK,CAAC;AACpC,OAAO,KAAK,MAAM,OAAO,CAAC;AAC1B,OAAO,EAAE,UAAU,EAAE,gBAAgB,EAAE,MAAM,iBAAiB,CAAC;AAC/D,OAAO,EAAE,eAAe,EAAE,MAAM,kBAAkB,CAAC;AACnD,OAAO,EAAE,aAAa,EAAE,MAAM,iBAAiB,CAAC;AAChD,OAAO,EAAE,WAAW,EAAE,uBAAuB,EAAE,MAAM,eAAe,CAAC;AACrE,OAAO,EAAE,QAAQ,EAAE,MAAM,uBAAuB,CAAC;AACjD,OAAO,EAAE,qBAAqB,EAAE,MAAM,kBAAkB,CAAC;AACzD,OAAO,EAAE,SAAS,EAAE,MAAM,eAAe,CAAC;AAE1C,uEAAuE;AACvE,YAAY,CAAC,EAAE,IAAI,EAAE,YAAY,EAAE,CAAC,CAAC;AACrC,YAAY,EAAE,CAAC;AAEf,iCAAiC;AACjC,MAAM,SAAS,GAAG,OAAO,CAAC,aAAa,CAAC,MAAM,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC,CAAC;AAC1D,MAAM,GAAG,GAAG,IAAI,CAAC,KAAK,CAAC,YAAY,CAAC,OAAO,CAAC,SAAS,EAAE,iBAAiB,CAAC,EAAE,OAAO,CAAC,CAAC,CAAC;AAErF,MAAM,OAAO,GAAG,IAAI,OAAO,EAAE,CAAC;AAE9B,OAAO;KACJ,IAAI,CAAC,YAAY,CAAC;KAClB,WAAW,CAAC,8DAA8D,CAAC;KAC3E,OAAO,CAAC,GAAG,CAAC,OAAO,CAAC,CAAC;AAExB;;;;GAIG;AACH,SAAS,iBAAiB,CAAC,KAAa;IACtC,6DAA6D;IAC7D,IAAI,KAAK,CAAC,QAAQ,CAAC,GAAG,CAAC,IAAI,KAAK,CAAC,QAAQ,CAAC,IAAI,CAAC,IAAI,KAAK,CAAC,QAAQ,CAAC,KAAK,CAAC,IAAI,KAAK,CAAC,QAAQ,CAAC,KAAK,CAAC,EAAE,CAAC;QAClG,OAAO,KAAK,CAAC;IACf,CAAC;IACD,6DAA6D;IAC7D,OAAO,eAAe,KAAK,KAAK,CAAC;AACnC,CAAC;AAED;;GAEG;AACH,KAAK,UAAU,oBAAoB,CAAC,WAAmB,EAAE,OAA0B;IACjF,IAAI,CAAC;QACH,MAAM,UAAU,GAAG,iBAAiB,CAAC,WAAW,CAAC,CAAC;QAClD,MAAM,kBAAkB,GAAG,OAAO,CAAC,OAAO,CAAC,GAAG,EAAE,EAAE,UAAU,CAAC,CAAC;QAE9D,IAAI,CAAC,UAAU,CAAC,kBAAkB,CAAC,EAAE,CAAC;YACpC,OAAO,CAAC,KAAK,CAAC,KAAK,CAAC,GAAG,CAAC,0BAA0B,kBAAkB,EAAE,CAAC,CAAC,CAAC;YACzE,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;QAClB,CAAC;QAED,OAAO,CAAC,GAAG,CAAC,KAAK,CAAC,IAAI,CAAC,uBAAuB,UAAU,KAAK,CAAC,CAAC,CAAC;QAChE,MAAM,MAAM,GAAG,MAAM,UAAU,CAAC,kBAAkB,CAAC,CAAC;QAEpD,mDAAmD;QACnD,kEAAkE;QAClE,MAAM,UAAU,GAAG,OAAO,CAAC,OAAO,CAAC,kBAAkB,CAAC,CAAC,CAAC;QACxD,MAAM,QAAQ,GAAG,OAAO,CAAC,UAAU,EAAE,OAAO,CAAC,CAAC;QAC9C,IAAI,CAAC,UAAU,CAAC,QAAQ,CAAC,EAAE,CAAC;YAC1B,OAAO,CAAC,KAAK,CAAC,KAAK,CAAC,GAAG,CAAC,8BAA8B,QAAQ,EAAE,CAAC,CAAC,CAAC;YACnE,OAAO,CAAC,KAAK,CAAC,KAAK,CAAC,IAAI,CAAC,yDAAyD,CAAC,CAAC,CAAC;YACrF,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;QAClB,CAAC;QAED,OAAO,CAAC,GAAG,CAAC,KAAK,CAAC,IAAI,CAAC,wBAAwB,QAAQ,KAAK,CAAC,CAAC,CAAC;QAC/D,MAAM,EAAE,QAAQ,EAAE,MAAM,EAAE,GAAG,eAAe,CAAC,QAAQ,CAAC,CAAC;QAEvD,IAAI,MAAM,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;YACtB,OAAO,CAAC,GAAG,CAAC,KAAK,CAAC,MAAM,CAAC,cAAc,MAAM,CAAC,MAAM,sBAAsB,CAAC,CAAC,CAAC;YAC7E,KAAK,MAAM,KAAK,IAAI,MAAM,EAAE,CAAC;gBAC3B,OAAO,CAAC,GAAG,CAAC,KAAK,CAAC,MAAM,CAAC,OAAO,KAAK,CAAC,WAAW,KAAK,KAAK,CAAC,OAAO,EAAE,CAAC,CAAC,CAAC;YAC1E,CAAC;QACH,CAAC;QAED,IAAI,QAAQ,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;YAC1B,OAAO,CAAC,KAAK,CAAC,KAAK,CAAC,GAAG,CAAC,8BAA8B,CAAC,CAAC,CAAC;YACzD,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;QAClB,CAAC;QAED,6BAA6B;QAC7B,MAAM,cAAc,GAAG,QAAQ,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC;QACnD,MAAM,SAAS,GAAG,gBAAgB,CAAC,MAAM,CAAC,KAAK,EAAE,cAAc,CAAC,CAAC;QAEjE,IAAI,SAAS,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;YAC3B,OAAO,CAAC,KAAK,CAAC,KAAK,CAAC,GAAG,CAAC,6BAA6B,CAAC,CAAC,CAAC;YACxD,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;QAClB,CAAC;QAED,OAAO,CAAC,GAAG,CAAC,KAAK,CAAC,KAAK,CAAC,WAAW,QAAQ,CAAC,MAAM,+BAA+B,SAAS,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC;QACvG,KAAK,MAAM,IAAI,IAAI,SAAS,EAAE,CAAC;YAC7B,OAAO,CAAC,GAAG,CAAC,KAAK,CAAC,KAAK,CAAC,OAAO,IAAI,EAAE,CAAC,CAAC,CAAC;QAC1C,CAAC;QAEJ,MAAM,MAAM,GAAG,KAAK,CAAC,OAAO,CAAC,MAAM,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,MAAM,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,MAAM,CAAC,KAAK,CAAC,CAAC;QAExE,2BAA2B;QAC3B,MAAM,iBAAiB,GAAG,SAAS,CAAC,MAAM,GAAG,MAAM,CAAC,IAAI,CAAC;QACzD,MAAM,SAAS,GAAG,iBAAiB,GAAG,MAAM,CAAC,MAAM,CAAC;QAEpD,IAAI,MAAM,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;YACtB,OAAO,CAAC,GAAG,CAAC,KAAK,CAAC,IAAI,CAAC,aAAa,SAAS,CAAC,MAAM,cAAc,MAAM,CAAC,IAAI,aAAa,MAAM,CAAC,MAAM,eAAe,SAAS,aAAa,CAAC,CAAC,CAAC;YAC/I,OAAO,CAAC,GAAG,CAAC,KAAK,CAAC,IAAI,CAAC,UAAU,MAAM,CAAC,KAAK,aAAa,MAAM,CAAC,IAAI,CAAC,IAAI,CAAC,cAAc,MAAM,CAAC,OAAO,kBAAkB,MAAM,CAAC,SAAS,EAAE,CAAC,CAAC,CAAC;QAChJ,CAAC;aAAM,CAAC;YACN,OAAO,CAAC,GAAG,CAAC,KAAK,CAAC,IAAI,CAAC,aAAa,SAAS,CAAC,MAAM,cAAc,MAAM,CAAC,IAAI,aAAa,SAAS,aAAa,CAAC,CAAC,CAAC;YACnH,OAAO,CAAC,GAAG,CAAC,KAAK,CAAC,IAAI,CAAC,UAAU,MAAM,CAAC,KAAK,YAAY,MAAM,CAAC,CAAC,CAAC,cAAc,MAAM,CAAC,OAAO,kBAAkB,MAAM,CAAC,SAAS,EAAE,CAAC,CAAC,CAAC;QACvI,CAAC;QAED,0CAA0C;QAC1C,MAAM,WAAW,GAAG,qBAAqB,CAAC,EAAE,OAAO,EAAE,MAAM,CAAC,OAAO,EAAE,CAAC,CAAC;QACvE,OAAO,CAAC,GAAG,CAAC,KAAK,CAAC,IAAI,CAAC,YAAY,WAAW,CAAC,WAAW,EAAE,CAAC,CAAC,CAAC;QAE/D,IAAI,OAAO,CAAC,GAAG,EAAE,CAAC;YAChB,OAAO,CAAC,GAAG,CAAC,KAAK,CAAC,MAAM,CAAC,sCAAsC,CAAC,CAAC,CAAC;YAClE,OAAO;QACT,CAAC;QAED,8CAA8C;QAC9C,MAAM,KAAK,GAAG,QAAQ,CAAC,MAAM,CAAC,KAAK,CAAC,CAAC;QACrC,MAAM,YAAY,GAAG,KAAK,CAAC,eAAe,EAAE,CAAC;QAC7C,MAAM,MAAM,GAAG,OAAO,CAAC,GAAG,CAAC,YAAY,CAAC,CAAC;QACzC,IAAI,CAAC,MAAM,EAAE,CAAC;YACZ,OAAO,CAAC,KAAK,CAAC,KAAK,CAAC,GAAG,CAAC,GAAG,YAAY,mCAAmC,CAAC,CAAC,CAAC;YAC7E,OAAO,CAAC,KAAK,CAAC,KAAK,CAAC,IAAI,CAAC,iEAAiE,CAAC,CAAC,CAAC;YAC7F,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;QAClB,CAAC;QAED,kDAAkD;QAClD,MAAM,gBAAgB,GAAG,QAAQ,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,SAAS,CAAC,QAAQ,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC;QAE5E,uCAAuC;QACvC,MAAM,kBAAkB,GAAG,QAAQ,CAAC,UAAU,EAAE,KAAK,CAAC,CAAC,OAAO,CAAC,OAAO,EAAE,EAAE,CAAC,CAAC;QAC5E,MAAM,UAAU,GAAG,OAAO,CAAC,OAAO,CAAC,GAAG,EAAE,EAAE,SAAS,CAAC,CAAC;QAErD,OAAO,CAAC,GAAG,CAAC,KAAK,CAAC,IAAI,CAAC,0BAA0B,CAAC,CAAC,CAAC;QAEpD,iCAAiC;QACjC,IAAI,SAAS,GAAG,IAAI,CAAC;QACrB,KAAK,MAAM,KAAK,IAAI,MAAM,EAAE,CAAC;YAC3B,0CAA0C;YAC1C,MAAM,WAAW,GAAG,EAAE,GAAG,MAAM,EAAE,KAAK,EAAE,CAAC;YAEzC,sEAAsE;YACtE,MAAM,cAAc,GAAG,MAAM,CAAC,MAAM,GAAG,CAAC;gBACtC,CAAC,CAAC,GAAG,kBAAkB,IAAI,KAAK,EAAE;gBAClC,CAAC,CAAC,kBAAkB,CAAC;YAEvB,IAAI,MAAM,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;gBACtB,OAAO,CAAC,GAAG,CAAC,KAAK,CAAC,IAAI,CAAC,6BAA6B,KAAK,MAAM,CAAC,CAAC,CAAC;YACpE,CAAC;YAED,qBAAqB;YACrB,MAAM,OAAO,GAAG,MAAM,aAAa,CAAC;gBAClC,MAAM,EAAE,WAAW;gBACnB,QAAQ,EAAE,gBAAgB;gBAC1B,MAAM;gBACN,UAAU;gBACV,cAAc;gBACd,UAAU,EAAE,CAAC,GAAG,EAAE,EAAE,CAAC,OAAO,CAAC,GAAG,CAAC,GAAG,CAAC;aACtC,CAAC,CAAC;YAEH,kCAAkC;YAClC,MAAM,gBAAgB,GAAG,OAAO,CAAC,KAAK,CAAC,KAAK,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,UAAU,KAAK,CAAC,CAAC,SAAS,CAAC,CAAC;YAClF,IAAI,CAAC,gBAAgB,EAAE,CAAC;gBACtB,SAAS,GAAG,KAAK,CAAC;YACpB,CAAC;QACH,CAAC;QAED,6BAA6B;QAC7B,OAAO,CAAC,IAAI,CAAC,SAAS,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC;IAClC,CAAC;IAAC,OAAO,KAAK,EAAE,CAAC;QACf,IAAI,KAAK,YAAY,KAAK,EAAE,CAAC;YAC3B,OAAO,CAAC,KAAK,CAAC,KAAK,CAAC,GAAG,CAAC,UAAU,KAAK,CAAC,OAAO,EAAE,CAAC,CAAC,CAAC;QACtD,CAAC;aAAM,CAAC;YACN,OAAO,CAAC,KAAK,CAAC,KAAK,CAAC,GAAG,CAAC,2BAA2B,CAAC,CAAC,CAAC;QACxD,CAAC;QACD,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;IAClB,CAAC;AACH,CAAC;AAED;;GAEG;AACH,OAAO;KACJ,OAAO,CAAC,MAAM,CAAC;KACf,QAAQ,CAAC,QAAQ,EAAE,+BAA+B,CAAC;KACnD,WAAW,CAAC,iDAAiD,CAAC;KAC9D,MAAM,CAAC,KAAK,EAAE,IAAY,EAAE,EAAE;IAC7B,IAAI,CAAC;QACH,OAAO,CAAC,GAAG,CAAC,KAAK,CAAC,IAAI,CAAC,8BAA8B,IAAI,EAAE,CAAC,CAAC,CAAC;QAE9D,MAAM,UAAU,GAAG,WAAW,CAAC;YAC7B,IAAI;YACJ,SAAS,EAAE,OAAO,CAAC,GAAG,EAAE;SACzB,CAAC,CAAC;QAEH,OAAO,CAAC,GAAG,CAAC,KAAK,CAAC,KAAK,CAAC,+BAA+B,CAAC,CAAC,CAAC;QAC1D,OAAO,CAAC,GAAG,CAAC,uBAAuB,CAAC,UAAU,EAAE,IAAI,CAAC,CAAC,CAAC;IACzD,CAAC;IAAC,OAAO,KAAK,EAAE,CAAC;QACf,IAAI,KAAK,YAAY,KAAK,EAAE,CAAC;YAC3B,OAAO,CAAC,KAAK,CAAC,KAAK,CAAC,GAAG,CAAC,UAAU,KAAK,CAAC,OAAO,EAAE,CAAC,CAAC,CAAC;QACtD,CAAC;aAAM,CAAC;YACN,OAAO,CAAC,KAAK,CAAC,KAAK,CAAC,GAAG,CAAC,2BAA2B,CAAC,CAAC,CAAC;QACxD,CAAC;QACD,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;IAClB,CAAC;AACH,CAAC,CAAC,CAAC;AAEL;;;GAGG;AACH,OAAO;KACJ,OAAO,CAAC,YAAY,CAAC;KACrB,WAAW,CAAC,iEAAiE,CAAC;KAC9E,MAAM,CAAC,eAAe,EAAE,kBAAkB,EAAE,MAAM,CAAC;KACnD,MAAM,CAAC,qBAAqB,EAAE,2BAA2B,EAAE,WAAW,CAAC;KACvE,MAAM,CAAC,mBAAmB,EAAE,yBAAyB,EAAE,SAAS,CAAC;KACjE,MAAM,CAAC,SAAS,EAAE,wDAAwD,CAAC;KAC3E,MAAM,CAAC,KAAK,EAAE,OAAgF,EAAE,EAAE;IACjG,MAAM,UAAU,GAAG,OAAO,CAAC,OAAO,CAAC,GAAG,EAAE,EAAE,OAAO,CAAC,UAAU,CAAC,CAAC;IAC9D,MAAM,QAAQ,GAAG,OAAO,CAAC,OAAO,CAAC,GAAG,EAAE,EAAE,OAAO,CAAC,QAAQ,CAAC,CAAC;IAE1D,OAAO,CAAC,GAAG,CAAC,KAAK,CAAC,IAAI,CAAC,mCAAmC,CAAC,CAAC,CAAC;IAE7D,oCAAoC;IACpC,MAAM,cAAc,GAAG;QACrB,eAAe,EAAE,UAAU;QAC3B,aAAa,EAAE,QAAQ;QACvB,QAAQ,EAAE,OAAO,CAAC,IAAI;KACvB,CAAC;IACF,IAAI,OAAO,CAAC,KAAK,EAAE,CAAC;QAClB,cAAc,CAAC,IAAI,CAAC,SAAS,CAAC,CAAC;IACjC,CAAC;IAED,+DAA+D;IAC/D,MAAM,MAAM,GAAG,SAAS,CACtB,KAAK,EACL,CAAC,+BAA+B,EAAE,GAAG,cAAc,CAAC,EACpD,EAAE,KAAK,EAAE,SAAS,EAAE,GAAG,EAAE,OAAO,CAAC,GAAG,EAAE,EAAE,CACzC,CAAC;IAEF,OAAO,CAAC,IAAI,CAAC,MAAM,CAAC,MAAM,IAAI,CAAC,CAAC,CAAC;AACnC,CAAC,CAAC,CAAC;AAEL;;;GAGG;AACH,OAAO;KACJ,QAAQ,CAAC,UAAU,EAAE,sCAAsC,CAAC;KAC5D,MAAM,CAAC,OAAO,EAAE,0CAA0C,CAAC;KAC3D,MAAM,CAAC,KAAK,EAAE,WAA+B,EAAE,OAA0B,EAAE,EAAE;IAC5E,IAAI,CAAC,WAAW,EAAE,CAAC;QACjB,OAAO,CAAC,IAAI,EAAE,CAAC;QACf,OAAO;IACT,CAAC;IACD,MAAM,oBAAoB,CAAC,WAAW,EAAE,OAAO,CAAC,CAAC;AACnD,CAAC,CAAC,CAAC;AAEL,OAAO,CAAC,KAAK,EAAE,CAAC"}
|
package/dist/index.d.ts
CHANGED
|
@@ -20,4 +20,6 @@ export type { RunExperimentOptions } from './lib/runner.js';
|
|
|
20
20
|
export { runExperiment, runSingleEval } from './lib/runner.js';
|
|
21
21
|
export type { InitOptions } from './lib/init.js';
|
|
22
22
|
export { initProject, getPostInitInstructions } from './lib/init.js';
|
|
23
|
+
export type { ToolName, TranscriptEvent, WebFetchInfo, FileOperationInfo, ShellCommandInfo, TranscriptSummary, Transcript, ParseableAgent, } from './lib/o11y/index.js';
|
|
24
|
+
export { parseTranscript, parseTranscriptSummary, loadTranscript, SUPPORTED_AGENTS, parseClaudeCodeTranscript, parseCodexTranscript, parseOpenCodeTranscript, } from './lib/o11y/index.js';
|
|
23
25
|
//# sourceMappingURL=index.d.ts.map
|
package/dist/index.d.ts.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":"AAAA;;;;GAIG;AAGH,YAAY,EACV,SAAS,EACT,SAAS,EACT,UAAU,EACV,OAAO,EACP,aAAa,EACb,gBAAgB,EAChB,wBAAwB,EACxB,WAAW,EACX,aAAa,EACb,WAAW,EACX,WAAW,EACX,iBAAiB,GAClB,MAAM,gBAAgB,CAAC;AAGxB,OAAO,EAAE,mBAAmB,EAAE,cAAc,EAAE,MAAM,gBAAgB,CAAC;AAGrE,OAAO,EACL,eAAe,EACf,cAAc,EACd,aAAa,EACb,UAAU,EACV,gBAAgB,GACjB,MAAM,iBAAiB,CAAC;AAGzB,OAAO,EACL,sBAAsB,EACtB,gBAAgB,EAChB,oBAAoB,EACpB,mBAAmB,EACnB,WAAW,EACX,eAAe,EACf,eAAe,EACf,gBAAgB,GACjB,MAAM,kBAAkB,CAAC;AAG1B,YAAY,EACV,cAAc,EACd,aAAa,EACb,WAAW,EACX,cAAc,EACd,kBAAkB,GACnB,MAAM,kBAAkB,CAAC;AAC1B,OAAO,EACL,cAAc,EACd,uBAAuB,EACvB,gBAAgB,EAChB,kBAAkB,EAClB,iBAAiB,EACjB,cAAc,EACd,iBAAiB,EACjB,aAAa,EACb,cAAc,EACd,qBAAqB,GACtB,MAAM,kBAAkB,CAAC;AAG1B,YAAY,EAAE,oBAAoB,EAAE,MAAM,yBAAyB,CAAC;AACpE,OAAO,EAAE,oBAAoB,EAAE,MAAM,yBAAyB,CAAC;AAG/D,YAAY,EAAE,eAAe,EAAE,cAAc,EAAE,MAAM,uBAAuB,CAAC;AAG7E,YAAY,EAAE,KAAK,EAAE,YAAY,EAAE,MAAM,uBAAuB,CAAC;AACjE,OAAO,EAAE,QAAQ,EAAE,UAAU,EAAE,aAAa,EAAE,MAAM,uBAAuB,CAAC;AAG5E,YAAY,EAAE,kBAAkB,EAAE,MAAM,kBAAkB,CAAC;AAC3D,OAAO,EACL,wBAAwB,EACxB,iBAAiB,EACjB,uBAAuB,EACvB,WAAW,EACX,kBAAkB,EAClB,eAAe,EACf,qBAAqB,GACtB,MAAM,kBAAkB,CAAC;AAG1B,YAAY,EAAE,oBAAoB,EAAE,MAAM,iBAAiB,CAAC;AAC5D,OAAO,EAAE,aAAa,EAAE,aAAa,EAAE,MAAM,iBAAiB,CAAC;AAG/D,YAAY,EAAE,WAAW,EAAE,MAAM,eAAe,CAAC;AACjD,OAAO,EAAE,WAAW,EAAE,uBAAuB,EAAE,MAAM,eAAe,CAAC"}
|
|
1
|
+
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":"AAAA;;;;GAIG;AAGH,YAAY,EACV,SAAS,EACT,SAAS,EACT,UAAU,EACV,OAAO,EACP,aAAa,EACb,gBAAgB,EAChB,wBAAwB,EACxB,WAAW,EACX,aAAa,EACb,WAAW,EACX,WAAW,EACX,iBAAiB,GAClB,MAAM,gBAAgB,CAAC;AAGxB,OAAO,EAAE,mBAAmB,EAAE,cAAc,EAAE,MAAM,gBAAgB,CAAC;AAGrE,OAAO,EACL,eAAe,EACf,cAAc,EACd,aAAa,EACb,UAAU,EACV,gBAAgB,GACjB,MAAM,iBAAiB,CAAC;AAGzB,OAAO,EACL,sBAAsB,EACtB,gBAAgB,EAChB,oBAAoB,EACpB,mBAAmB,EACnB,WAAW,EACX,eAAe,EACf,eAAe,EACf,gBAAgB,GACjB,MAAM,kBAAkB,CAAC;AAG1B,YAAY,EACV,cAAc,EACd,aAAa,EACb,WAAW,EACX,cAAc,EACd,kBAAkB,GACnB,MAAM,kBAAkB,CAAC;AAC1B,OAAO,EACL,cAAc,EACd,uBAAuB,EACvB,gBAAgB,EAChB,kBAAkB,EAClB,iBAAiB,EACjB,cAAc,EACd,iBAAiB,EACjB,aAAa,EACb,cAAc,EACd,qBAAqB,GACtB,MAAM,kBAAkB,CAAC;AAG1B,YAAY,EAAE,oBAAoB,EAAE,MAAM,yBAAyB,CAAC;AACpE,OAAO,EAAE,oBAAoB,EAAE,MAAM,yBAAyB,CAAC;AAG/D,YAAY,EAAE,eAAe,EAAE,cAAc,EAAE,MAAM,uBAAuB,CAAC;AAG7E,YAAY,EAAE,KAAK,EAAE,YAAY,EAAE,MAAM,uBAAuB,CAAC;AACjE,OAAO,EAAE,QAAQ,EAAE,UAAU,EAAE,aAAa,EAAE,MAAM,uBAAuB,CAAC;AAG5E,YAAY,EAAE,kBAAkB,EAAE,MAAM,kBAAkB,CAAC;AAC3D,OAAO,EACL,wBAAwB,EACxB,iBAAiB,EACjB,uBAAuB,EACvB,WAAW,EACX,kBAAkB,EAClB,eAAe,EACf,qBAAqB,GACtB,MAAM,kBAAkB,CAAC;AAG1B,YAAY,EAAE,oBAAoB,EAAE,MAAM,iBAAiB,CAAC;AAC5D,OAAO,EAAE,aAAa,EAAE,aAAa,EAAE,MAAM,iBAAiB,CAAC;AAG/D,YAAY,EAAE,WAAW,EAAE,MAAM,eAAe,CAAC;AACjD,OAAO,EAAE,WAAW,EAAE,uBAAuB,EAAE,MAAM,eAAe,CAAC;AAGrE,YAAY,EACV,QAAQ,EACR,eAAe,EACf,YAAY,EACZ,iBAAiB,EACjB,gBAAgB,EAChB,iBAAiB,EACjB,UAAU,EACV,cAAc,GACf,MAAM,qBAAqB,CAAC;AAC7B,OAAO,EACL,eAAe,EACf,sBAAsB,EACtB,cAAc,EACd,gBAAgB,EAChB,yBAAyB,EACzB,oBAAoB,EACpB,uBAAuB,GACxB,MAAM,qBAAqB,CAAC"}
|
package/dist/index.js
CHANGED
|
@@ -15,4 +15,5 @@ export { getAgent, listAgents, registerAgent } from './lib/agents/index.js';
|
|
|
15
15
|
export { agentResultToEvalRunData, createEvalSummary, createExperimentResults, saveResults, formatResultsTable, formatRunResult, createProgressDisplay, } from './lib/results.js';
|
|
16
16
|
export { runExperiment, runSingleEval } from './lib/runner.js';
|
|
17
17
|
export { initProject, getPostInitInstructions } from './lib/init.js';
|
|
18
|
+
export { parseTranscript, parseTranscriptSummary, loadTranscript, SUPPORTED_AGENTS, parseClaudeCodeTranscript, parseCodexTranscript, parseOpenCodeTranscript, } from './lib/o11y/index.js';
|
|
18
19
|
//# sourceMappingURL=index.js.map
|
package/dist/index.js.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"index.js","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":"AAAA;;;;GAIG;AAkBH,sBAAsB;AACtB,OAAO,EAAE,mBAAmB,EAAE,cAAc,EAAE,MAAM,gBAAgB,CAAC;AAErE,6BAA6B;AAC7B,OAAO,EACL,eAAe,EACf,cAAc,EACd,aAAa,EACb,UAAU,EACV,gBAAgB,GACjB,MAAM,iBAAiB,CAAC;AAEzB,8BAA8B;AAC9B,OAAO,EACL,sBAAsB,EACtB,gBAAgB,EAChB,oBAAoB,EACpB,mBAAmB,EACnB,WAAW,EACX,eAAe,EACf,eAAe,EACf,gBAAgB,GACjB,MAAM,kBAAkB,CAAC;AAU1B,OAAO,EACL,cAAc,EACd,uBAAuB,EACvB,gBAAgB,EAChB,kBAAkB,EAClB,iBAAiB,EACjB,cAAc,EACd,iBAAiB,EACjB,aAAa,EACb,cAAc,EACd,qBAAqB,GACtB,MAAM,kBAAkB,CAAC;AAI1B,OAAO,EAAE,oBAAoB,EAAE,MAAM,yBAAyB,CAAC;AAO/D,OAAO,EAAE,QAAQ,EAAE,UAAU,EAAE,aAAa,EAAE,MAAM,uBAAuB,CAAC;AAI5E,OAAO,EACL,wBAAwB,EACxB,iBAAiB,EACjB,uBAAuB,EACvB,WAAW,EACX,kBAAkB,EAClB,eAAe,EACf,qBAAqB,GACtB,MAAM,kBAAkB,CAAC;AAI1B,OAAO,EAAE,aAAa,EAAE,aAAa,EAAE,MAAM,iBAAiB,CAAC;AAI/D,OAAO,EAAE,WAAW,EAAE,uBAAuB,EAAE,MAAM,eAAe,CAAC"}
|
|
1
|
+
{"version":3,"file":"index.js","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":"AAAA;;;;GAIG;AAkBH,sBAAsB;AACtB,OAAO,EAAE,mBAAmB,EAAE,cAAc,EAAE,MAAM,gBAAgB,CAAC;AAErE,6BAA6B;AAC7B,OAAO,EACL,eAAe,EACf,cAAc,EACd,aAAa,EACb,UAAU,EACV,gBAAgB,GACjB,MAAM,iBAAiB,CAAC;AAEzB,8BAA8B;AAC9B,OAAO,EACL,sBAAsB,EACtB,gBAAgB,EAChB,oBAAoB,EACpB,mBAAmB,EACnB,WAAW,EACX,eAAe,EACf,eAAe,EACf,gBAAgB,GACjB,MAAM,kBAAkB,CAAC;AAU1B,OAAO,EACL,cAAc,EACd,uBAAuB,EACvB,gBAAgB,EAChB,kBAAkB,EAClB,iBAAiB,EACjB,cAAc,EACd,iBAAiB,EACjB,aAAa,EACb,cAAc,EACd,qBAAqB,GACtB,MAAM,kBAAkB,CAAC;AAI1B,OAAO,EAAE,oBAAoB,EAAE,MAAM,yBAAyB,CAAC;AAO/D,OAAO,EAAE,QAAQ,EAAE,UAAU,EAAE,aAAa,EAAE,MAAM,uBAAuB,CAAC;AAI5E,OAAO,EACL,wBAAwB,EACxB,iBAAiB,EACjB,uBAAuB,EACvB,WAAW,EACX,kBAAkB,EAClB,eAAe,EACf,qBAAqB,GACtB,MAAM,kBAAkB,CAAC;AAI1B,OAAO,EAAE,aAAa,EAAE,aAAa,EAAE,MAAM,iBAAiB,CAAC;AAI/D,OAAO,EAAE,WAAW,EAAE,uBAAuB,EAAE,MAAM,eAAe,CAAC;AAarE,OAAO,EACL,eAAe,EACf,sBAAsB,EACtB,cAAc,EACd,gBAAgB,EAChB,yBAAyB,EACzB,oBAAoB,EACpB,uBAAuB,GACxB,MAAM,qBAAqB,CAAC"}
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../../src/lib/agents/index.ts"],"names":[],"mappings":"AAAA;;GAEG;AAEH,OAAO,EAAE,aAAa,EAAE,QAAQ,EAAE,UAAU,EAAE,QAAQ,EAAE,MAAM,eAAe,CAAC;
|
|
1
|
+
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../../src/lib/agents/index.ts"],"names":[],"mappings":"AAAA;;GAEG;AAEH,OAAO,EAAE,aAAa,EAAE,QAAQ,EAAE,UAAU,EAAE,QAAQ,EAAE,MAAM,eAAe,CAAC;AAa9E,OAAO,EAAE,aAAa,EAAE,QAAQ,EAAE,UAAU,EAAE,QAAQ,EAAE,CAAC;AAGzD,YAAY,EAAE,KAAK,EAAE,eAAe,EAAE,cAAc,EAAE,MAAM,YAAY,CAAC"}
|
package/dist/lib/agents/index.js
CHANGED
|
@@ -5,14 +5,12 @@ import { registerAgent, getAgent, listAgents, hasAgent } from './registry.js';
|
|
|
5
5
|
import { createClaudeCodeAgent } from './claude-code.js';
|
|
6
6
|
import { createCodexAgent } from './codex.js';
|
|
7
7
|
import { createOpenCodeAgent } from './opencode.js';
|
|
8
|
-
import { createAiSdkAgent } from './ai-sdk-agent.js';
|
|
9
8
|
// Register all agent variants (Vercel AI Gateway + Direct API)
|
|
10
9
|
registerAgent(createClaudeCodeAgent({ useVercelAiGateway: true })); // vercel-ai-gateway/claude-code
|
|
11
10
|
registerAgent(createClaudeCodeAgent({ useVercelAiGateway: false })); // claude-code
|
|
12
11
|
registerAgent(createCodexAgent({ useVercelAiGateway: true })); // vercel-ai-gateway/codex
|
|
13
12
|
registerAgent(createCodexAgent({ useVercelAiGateway: false })); // codex
|
|
14
13
|
registerAgent(createOpenCodeAgent()); // vercel-ai-gateway/opencode
|
|
15
|
-
registerAgent(createAiSdkAgent()); // vercel-ai-gateway/ai-sdk-harness
|
|
16
14
|
// Re-export registry functions
|
|
17
15
|
export { registerAgent, getAgent, listAgents, hasAgent };
|
|
18
16
|
//# sourceMappingURL=index.js.map
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"index.js","sourceRoot":"","sources":["../../../src/lib/agents/index.ts"],"names":[],"mappings":"AAAA;;GAEG;AAEH,OAAO,EAAE,aAAa,EAAE,QAAQ,EAAE,UAAU,EAAE,QAAQ,EAAE,MAAM,eAAe,CAAC;AAC9E,OAAO,EAAE,qBAAqB,EAAE,MAAM,kBAAkB,CAAC;AACzD,OAAO,EAAE,gBAAgB,EAAE,MAAM,YAAY,CAAC;AAC9C,OAAO,EAAE,mBAAmB,EAAE,MAAM,eAAe,CAAC;
|
|
1
|
+
{"version":3,"file":"index.js","sourceRoot":"","sources":["../../../src/lib/agents/index.ts"],"names":[],"mappings":"AAAA;;GAEG;AAEH,OAAO,EAAE,aAAa,EAAE,QAAQ,EAAE,UAAU,EAAE,QAAQ,EAAE,MAAM,eAAe,CAAC;AAC9E,OAAO,EAAE,qBAAqB,EAAE,MAAM,kBAAkB,CAAC;AACzD,OAAO,EAAE,gBAAgB,EAAE,MAAM,YAAY,CAAC;AAC9C,OAAO,EAAE,mBAAmB,EAAE,MAAM,eAAe,CAAC;AAEpD,+DAA+D;AAC/D,aAAa,CAAC,qBAAqB,CAAC,EAAE,kBAAkB,EAAE,IAAI,EAAE,CAAC,CAAC,CAAC,CAAG,gCAAgC;AACtG,aAAa,CAAC,qBAAqB,CAAC,EAAE,kBAAkB,EAAE,KAAK,EAAE,CAAC,CAAC,CAAC,CAAE,cAAc;AACpF,aAAa,CAAC,gBAAgB,CAAC,EAAE,kBAAkB,EAAE,IAAI,EAAE,CAAC,CAAC,CAAC,CAAQ,0BAA0B;AAChG,aAAa,CAAC,gBAAgB,CAAC,EAAE,kBAAkB,EAAE,KAAK,EAAE,CAAC,CAAC,CAAC,CAAO,QAAQ;AAC9E,aAAa,CAAC,mBAAmB,EAAE,CAAC,CAAC,CAAiC,6BAA6B;AAEnG,+BAA+B;AAC/B,OAAO,EAAE,aAAa,EAAE,QAAQ,EAAE,UAAU,EAAE,QAAQ,EAAE,CAAC"}
|
package/dist/lib/config.d.ts.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"config.d.ts","sourceRoot":"","sources":["../../src/lib/config.ts"],"names":[],"mappings":"AAAA;;GAEG;AAGH,OAAO,KAAK,EACV,gBAAgB,EAChB,wBAAwB,EACxB,UAAU,EACX,MAAM,YAAY,CAAC;AAGpB;;GAEG;AACH,eAAO,MAAM,eAAe;;;;;aAKX,MAAM,EAAE;;;CAGxB,CAAC;
|
|
1
|
+
{"version":3,"file":"config.d.ts","sourceRoot":"","sources":["../../src/lib/config.ts"],"names":[],"mappings":"AAAA;;GAEG;AAGH,OAAO,KAAK,EACV,gBAAgB,EAChB,wBAAwB,EACxB,UAAU,EACX,MAAM,YAAY,CAAC;AAGpB;;GAEG;AACH,eAAO,MAAM,eAAe;;;;;aAKX,MAAM,EAAE;;;CAGxB,CAAC;AA0BF;;;GAGG;AACH,wBAAgB,cAAc,CAAC,MAAM,EAAE,OAAO,GAAG,gBAAgB,CAWhE;AAED;;GAEG;AACH,wBAAgB,aAAa,CAAC,MAAM,EAAE,gBAAgB,GAAG,wBAAwB,CAmBhF;AAED;;;GAGG;AACH,wBAAsB,UAAU,CAAC,UAAU,EAAE,MAAM,GAAG,OAAO,CAAC,wBAAwB,CAAC,CAkBtF;AAED;;GAEG;AACH,wBAAgB,gBAAgB,CAC9B,MAAM,EAAE,MAAM,GAAG,MAAM,EAAE,GAAG,UAAU,EACtC,cAAc,EAAE,MAAM,EAAE,GACvB,MAAM,EAAE,CAyBV"}
|
package/dist/lib/config.js
CHANGED
|
@@ -26,7 +26,7 @@ const experimentConfigSchema = z.object({
|
|
|
26
26
|
'codex',
|
|
27
27
|
'vercel-ai-gateway/opencode',
|
|
28
28
|
]),
|
|
29
|
-
model: z.string().optional(),
|
|
29
|
+
model: z.union([z.string(), z.array(z.string())]).optional(),
|
|
30
30
|
evals: z
|
|
31
31
|
.union([z.string(), z.array(z.string()), z.function().args(z.string()).returns(z.boolean())])
|
|
32
32
|
.optional(),
|
|
@@ -36,6 +36,7 @@ const experimentConfigSchema = z.object({
|
|
|
36
36
|
timeout: z.number().positive().optional(),
|
|
37
37
|
setup: z.function().optional(),
|
|
38
38
|
sandbox: z.enum(['vercel', 'docker', 'auto']).optional(),
|
|
39
|
+
editPrompt: z.function().args(z.string()).returns(z.string()).optional(),
|
|
39
40
|
});
|
|
40
41
|
/**
|
|
41
42
|
* Validates an experiment configuration object.
|
|
@@ -69,6 +70,7 @@ export function resolveConfig(config) {
|
|
|
69
70
|
timeout: config.timeout ?? CONFIG_DEFAULTS.timeout,
|
|
70
71
|
setup: config.setup,
|
|
71
72
|
sandbox: config.sandbox ?? CONFIG_DEFAULTS.sandbox,
|
|
73
|
+
editPrompt: config.editPrompt,
|
|
72
74
|
};
|
|
73
75
|
}
|
|
74
76
|
/**
|
package/dist/lib/config.js.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"config.js","sourceRoot":"","sources":["../../src/lib/config.ts"],"names":[],"mappings":"AAAA;;GAEG;AAEH,OAAO,EAAE,CAAC,EAAE,MAAM,KAAK,CAAC;AAMxB,OAAO,EAAE,QAAQ,EAAE,MAAM,mBAAmB,CAAC;AAE7C;;GAEG;AACH,MAAM,CAAC,MAAM,eAAe,GAAG;IAC7B,KAAK,EAAE,MAAe;IACtB,KAAK,EAAE,GAAY;IACnB,IAAI,EAAE,CAAC;IACP,SAAS,EAAE,IAAI;IACf,OAAO,EAAE,EAAc;IACvB,OAAO,EAAE,GAAG,EAAE,aAAa;IAC3B,OAAO,EAAE,MAAe;CACzB,CAAC;AAEF;;GAEG;AACH,MAAM,sBAAsB,GAAG,CAAC,CAAC,MAAM,CAAC;IACtC,KAAK,EAAE,CAAC,CAAC,IAAI,CAAC;QACZ,+BAA+B;QAC/B,aAAa;QACb,yBAAyB;QACzB,OAAO;QACP,4BAA4B;KAC7B,CAAC;IACF,KAAK,EAAE,CAAC,CAAC,MAAM,EAAE,CAAC,QAAQ,EAAE;
|
|
1
|
+
{"version":3,"file":"config.js","sourceRoot":"","sources":["../../src/lib/config.ts"],"names":[],"mappings":"AAAA;;GAEG;AAEH,OAAO,EAAE,CAAC,EAAE,MAAM,KAAK,CAAC;AAMxB,OAAO,EAAE,QAAQ,EAAE,MAAM,mBAAmB,CAAC;AAE7C;;GAEG;AACH,MAAM,CAAC,MAAM,eAAe,GAAG;IAC7B,KAAK,EAAE,MAAe;IACtB,KAAK,EAAE,GAAY;IACnB,IAAI,EAAE,CAAC;IACP,SAAS,EAAE,IAAI;IACf,OAAO,EAAE,EAAc;IACvB,OAAO,EAAE,GAAG,EAAE,aAAa;IAC3B,OAAO,EAAE,MAAe;CACzB,CAAC;AAEF;;GAEG;AACH,MAAM,sBAAsB,GAAG,CAAC,CAAC,MAAM,CAAC;IACtC,KAAK,EAAE,CAAC,CAAC,IAAI,CAAC;QACZ,+BAA+B;QAC/B,aAAa;QACb,yBAAyB;QACzB,OAAO;QACP,4BAA4B;KAC7B,CAAC;IACF,KAAK,EAAE,CAAC,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,MAAM,EAAE,EAAE,CAAC,CAAC,KAAK,CAAC,CAAC,CAAC,MAAM,EAAE,CAAC,CAAC,CAAC,CAAC,QAAQ,EAAE;IAC5D,KAAK,EAAE,CAAC;SACL,KAAK,CAAC,CAAC,CAAC,CAAC,MAAM,EAAE,EAAE,CAAC,CAAC,KAAK,CAAC,CAAC,CAAC,MAAM,EAAE,CAAC,EAAE,CAAC,CAAC,QAAQ,EAAE,CAAC,IAAI,CAAC,CAAC,CAAC,MAAM,EAAE,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC,OAAO,EAAE,CAAC,CAAC,CAAC;SAC5F,QAAQ,EAAE;IACb,IAAI,EAAE,CAAC,CAAC,MAAM,EAAE,CAAC,GAAG,EAAE,CAAC,QAAQ,EAAE,CAAC,QAAQ,EAAE;IAC5C,SAAS,EAAE,CAAC,CAAC,OAAO,EAAE,CAAC,QAAQ,EAAE;IACjC,OAAO,EAAE,CAAC,CAAC,KAAK,CAAC,CAAC,CAAC,MAAM,EAAE,CAAC,CAAC,QAAQ,EAAE;IACvC,OAAO,EAAE,CAAC,CAAC,MAAM,EAAE,CAAC,QAAQ,EAAE,CAAC,QAAQ,EAAE;IACzC,KAAK,EAAE,CAAC,CAAC,QAAQ,EAAE,CAAC,QAAQ,EAAE;IAC9B,OAAO,EAAE,CAAC,CAAC,IAAI,CAAC,CAAC,QAAQ,EAAE,QAAQ,EAAE,MAAM,CAAC,CAAC,CAAC,QAAQ,EAAE;IACxD,UAAU,EAAE,CAAC,CAAC,QAAQ,EAAE,CAAC,IAAI,CAAC,CAAC,CAAC,MAAM,EAAE,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC,MAAM,EAAE,CAAC,CAAC,QAAQ,EAAE;CACzE,CAAC,CAAC;AAEH;;;GAGG;AACH,MAAM,UAAU,cAAc,CAAC,MAAe;IAC5C,MAAM,MAAM,GAAG,sBAAsB,CAAC,SAAS,CAAC,MAAM,CAAC,CAAC;IAExD,IAAI,CAAC,MAAM,CAAC,OAAO,EAAE,CAAC;QACpB,MAAM,MAAM,GAAG,MAAM,CAAC,KAAK,CAAC,MAAM;aAC/B,GAAG,CAAC,CAAC,KAAK,EAAE,EAAE,CAAC,OAAO,KAAK,CAAC,IAAI,CAAC,IAAI,CAAC,GAAG,CAAC,KAAK,KAAK,CAAC,OAAO,EAAE,CAAC;aAC/D,IAAI,CAAC,IAAI,CAAC,CAAC;QACd,MAAM,IAAI,KAAK,CAAC,sCAAsC,MAAM,EAAE,CAAC,CAAC;IAClE,CAAC;IAED,OAAO,MAAM,CAAC,IAAwB,CAAC;AACzC,CAAC;AAED;;GAEG;AACH,MAAM,UAAU,aAAa,CAAC,MAAwB;IACpD,wBAAwB;IACxB,MAAM,KAAK,GAAG,QAAQ,CAAC,MAAM,CAAC,KAAK,CAAC,CAAC;IAErC,gDAAgD;IAChD,MAAM,YAAY,GAAG,MAAM,CAAC,KAAK,IAAI,KAAK,CAAC,eAAe,EAAE,CAAC;IAE7D,OAAO;QACL,KAAK,EAAE,MAAM,CAAC,KAAK;QACnB,KAAK,EAAE,YAAY;QACnB,KAAK,EAAE,MAAM,CAAC,KAAK,IAAI,GAAG;QAC1B,IAAI,EAAE,MAAM,CAAC,IAAI,IAAI,eAAe,CAAC,IAAI;QACzC,SAAS,EAAE,MAAM,CAAC,SAAS,IAAI,eAAe,CAAC,SAAS;QACxD,OAAO,EAAE,MAAM,CAAC,OAAO,IAAI,eAAe,CAAC,OAAO;QAClD,OAAO,EAAE,MAAM,CAAC,OAAO,IAAI,eAAe,CAAC,OAAO;QAClD,KAAK,EAAE,MAAM,CAAC,KAAK;QACnB,OAAO,EAAE,MAAM,CAAC,OAAO,IAAI,eAAe,CAAC,OAAO;QAClD,UAAU,EAAE,MAAM,CAAC,UAAU;KAC9B,CAAC;AACJ,CAAC;AAED;;;GAGG;AACH,MAAM,CAAC,KAAK,UAAU,UAAU,CAAC,UAAkB;IACjD,IAAI,CAAC;QACH,oCAAoC;QACpC,MAAM,MAAM,GAAG,MAAM,MAAM,CAAC,UAAU,CAAC,CAAC;QACxC,MAAM,SAAS,GAAG,MAAM,CAAC,OAAO,CAAC;QAEjC,IAAI,CAAC,SAAS,EAAE,CAAC;YACf,MAAM,IAAI,KAAK,CAAC,wCAAwC,CAAC,CAAC;QAC5D,CAAC;QAED,MAAM,MAAM,GAAG,cAAc,CAAC,SAAS,CAAC,CAAC;QACzC,OAAO,aAAa,CAAC,MAAM,CAAC,CAAC;IAC/B,CAAC;IAAC,OAAO,KAAK,EAAE,CAAC;QACf,IAAI,KAAK,YAAY,KAAK,EAAE,CAAC;YAC3B,MAAM,IAAI,KAAK,CAAC,8BAA8B,UAAU,KAAK,KAAK,CAAC,OAAO,EAAE,CAAC,CAAC;QAChF,CAAC;QACD,MAAM,KAAK,CAAC;IACd,CAAC;AACH,CAAC;AAED;;GAEG;AACH,MAAM,UAAU,gBAAgB,CAC9B,MAAsC,EACtC,cAAwB;IAExB,mBAAmB;IACnB,IAAI,OAAO,MAAM,KAAK,QAAQ,EAAE,CAAC;QAC/B,IAAI,MAAM,KAAK,GAAG,EAAE,CAAC;YACnB,OAAO,cAAc,CAAC;QACxB,CAAC;QACD,IAAI,CAAC,cAAc,CAAC,QAAQ,CAAC,MAAM,CAAC,EAAE,CAAC;YACrC,MAAM,IAAI,KAAK,CAAC,SAAS,MAAM,iCAAiC,cAAc,CAAC,IAAI,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC;QAC/F,CAAC;QACD,OAAO,CAAC,MAAM,CAAC,CAAC;IAClB,CAAC;IAED,sBAAsB;IACtB,IAAI,KAAK,CAAC,OAAO,CAAC,MAAM,CAAC,EAAE,CAAC;QAC1B,MAAM,OAAO,GAAG,MAAM,CAAC,MAAM,CAAC,CAAC,IAAI,EAAE,EAAE,CAAC,CAAC,cAAc,CAAC,QAAQ,CAAC,IAAI,CAAC,CAAC,CAAC;QACxE,IAAI,OAAO,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;YACvB,MAAM,IAAI,KAAK,CACb,oBAAoB,OAAO,CAAC,IAAI,CAAC,IAAI,CAAC,sBAAsB,cAAc,CAAC,IAAI,CAAC,IAAI,CAAC,EAAE,CACxF,CAAC;QACJ,CAAC;QACD,OAAO,MAAM,CAAC;IAChB,CAAC;IAED,kBAAkB;IAClB,OAAO,cAAc,CAAC,MAAM,CAAC,MAAM,CAAC,CAAC;AACvC,CAAC"}
|
|
@@ -0,0 +1,11 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Observability module for agent-eval.
|
|
3
|
+
* Provides transcript parsing and analysis across all agents.
|
|
4
|
+
*/
|
|
5
|
+
export type { ToolName, TranscriptEvent, WebFetchInfo, FileOperationInfo, ShellCommandInfo, TranscriptSummary, Transcript, } from './types.js';
|
|
6
|
+
export { parseTranscript, parseTranscriptSummary, loadTranscript, SUPPORTED_AGENTS } from './parsers/index.js';
|
|
7
|
+
export type { ParseableAgent } from './parsers/index.js';
|
|
8
|
+
export { parseClaudeCodeTranscript } from './parsers/claude-code.js';
|
|
9
|
+
export { parseCodexTranscript } from './parsers/codex.js';
|
|
10
|
+
export { parseOpenCodeTranscript } from './parsers/opencode.js';
|
|
11
|
+
//# sourceMappingURL=index.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../../src/lib/o11y/index.ts"],"names":[],"mappings":"AAAA;;;GAGG;AAGH,YAAY,EACV,QAAQ,EACR,eAAe,EACf,YAAY,EACZ,iBAAiB,EACjB,gBAAgB,EAChB,iBAAiB,EACjB,UAAU,GACX,MAAM,YAAY,CAAC;AAGpB,OAAO,EAAE,eAAe,EAAE,sBAAsB,EAAE,cAAc,EAAE,gBAAgB,EAAE,MAAM,oBAAoB,CAAC;AAC/G,YAAY,EAAE,cAAc,EAAE,MAAM,oBAAoB,CAAC;AAGzD,OAAO,EAAE,yBAAyB,EAAE,MAAM,0BAA0B,CAAC;AACrE,OAAO,EAAE,oBAAoB,EAAE,MAAM,oBAAoB,CAAC;AAC1D,OAAO,EAAE,uBAAuB,EAAE,MAAM,uBAAuB,CAAC"}
|
|
@@ -0,0 +1,11 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Observability module for agent-eval.
|
|
3
|
+
* Provides transcript parsing and analysis across all agents.
|
|
4
|
+
*/
|
|
5
|
+
// Main parsing functions
|
|
6
|
+
export { parseTranscript, parseTranscriptSummary, loadTranscript, SUPPORTED_AGENTS } from './parsers/index.js';
|
|
7
|
+
// Individual parsers (for advanced use)
|
|
8
|
+
export { parseClaudeCodeTranscript } from './parsers/claude-code.js';
|
|
9
|
+
export { parseCodexTranscript } from './parsers/codex.js';
|
|
10
|
+
export { parseOpenCodeTranscript } from './parsers/opencode.js';
|
|
11
|
+
//# sourceMappingURL=index.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"index.js","sourceRoot":"","sources":["../../../src/lib/o11y/index.ts"],"names":[],"mappings":"AAAA;;;GAGG;AAaH,yBAAyB;AACzB,OAAO,EAAE,eAAe,EAAE,sBAAsB,EAAE,cAAc,EAAE,gBAAgB,EAAE,MAAM,oBAAoB,CAAC;AAG/G,wCAAwC;AACxC,OAAO,EAAE,yBAAyB,EAAE,MAAM,0BAA0B,CAAC;AACrE,OAAO,EAAE,oBAAoB,EAAE,MAAM,oBAAoB,CAAC;AAC1D,OAAO,EAAE,uBAAuB,EAAE,MAAM,uBAAuB,CAAC"}
|
|
@@ -0,0 +1,18 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Parser for Claude Code transcript format.
|
|
3
|
+
* Claude Code stores transcripts as JSONL at ~/.claude/projects/{path}/{session}.jsonl
|
|
4
|
+
*
|
|
5
|
+
* Format reference (based on Claude Code CLI output):
|
|
6
|
+
* - Messages have type: "user" | "assistant"
|
|
7
|
+
* - Tool use appears in assistant messages with tool_use blocks
|
|
8
|
+
* - Tool results appear as separate messages with type: "tool_result"
|
|
9
|
+
*/
|
|
10
|
+
import type { TranscriptEvent } from '../types.js';
|
|
11
|
+
/**
|
|
12
|
+
* Parse Claude Code JSONL transcript into events.
|
|
13
|
+
*/
|
|
14
|
+
export declare function parseClaudeCodeTranscript(raw: string): {
|
|
15
|
+
events: TranscriptEvent[];
|
|
16
|
+
errors: string[];
|
|
17
|
+
};
|
|
18
|
+
//# sourceMappingURL=claude-code.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"claude-code.d.ts","sourceRoot":"","sources":["../../../../src/lib/o11y/parsers/claude-code.ts"],"names":[],"mappings":"AAAA;;;;;;;;GAQG;AAEH,OAAO,KAAK,EAAE,eAAe,EAAY,MAAM,aAAa,CAAC;AA8T7D;;GAEG;AACH,wBAAgB,yBAAyB,CAAC,GAAG,EAAE,MAAM,GAAG;IACtD,MAAM,EAAE,eAAe,EAAE,CAAC;IAC1B,MAAM,EAAE,MAAM,EAAE,CAAC;CAClB,CAgDA"}
|