agentv 2.8.0-next.1 → 2.9.0-next.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +28 -2
- package/dist/{chunk-RCFB5QFS.js → chunk-3INJ7ISP.js} +46 -85
- package/dist/chunk-3INJ7ISP.js.map +1 -0
- package/dist/{chunk-2SXGPQVR.js → chunk-PC3FAOHT.js} +4 -4
- package/dist/chunk-PC3FAOHT.js.map +1 -0
- package/dist/{chunk-DJCMBVB3.js → chunk-RJWTL3VS.js} +166 -75
- package/dist/chunk-RJWTL3VS.js.map +1 -0
- package/dist/cli.js +3 -3
- package/dist/{dist-T7REAXNS.js → dist-BGRU67HI.js} +2 -2
- package/dist/index.js +3 -3
- package/dist/{interactive-TE5SJPJW.js → interactive-7KFUCBIP.js} +3 -3
- package/dist/templates/.agents/skills/agentv-eval-builder/SKILL.md +1 -1
- package/dist/templates/.agents/skills/agentv-eval-builder/references/custom-evaluators.md +8 -8
- package/dist/templates/.agents/skills/agentv-eval-builder/references/eval-schema.json +12683 -237
- package/dist/templates/.agentv/config.yaml +1 -1
- package/dist/templates/.agentv/targets.yaml +10 -13
- package/package.json +2 -2
- package/dist/chunk-2SXGPQVR.js.map +0 -1
- package/dist/chunk-DJCMBVB3.js.map +0 -1
- package/dist/chunk-RCFB5QFS.js.map +0 -1
- package/dist/templates/.claude/skills/agentv-eval-builder/SKILL.md +0 -202
- package/dist/templates/.claude/skills/agentv-eval-builder/references/batch-cli-evaluator.md +0 -316
- package/dist/templates/.claude/skills/agentv-eval-builder/references/compare-command.md +0 -137
- package/dist/templates/.claude/skills/agentv-eval-builder/references/composite-evaluator.md +0 -215
- package/dist/templates/.claude/skills/agentv-eval-builder/references/config-schema.json +0 -27
- package/dist/templates/.claude/skills/agentv-eval-builder/references/custom-evaluators.md +0 -118
- package/dist/templates/.claude/skills/agentv-eval-builder/references/eval-schema.json +0 -278
- package/dist/templates/.claude/skills/agentv-eval-builder/references/example-evals.md +0 -333
- package/dist/templates/.claude/skills/agentv-eval-builder/references/rubric-evaluator.md +0 -77
- package/dist/templates/.claude/skills/agentv-eval-builder/references/structured-data-evaluators.md +0 -121
- package/dist/templates/.claude/skills/agentv-eval-builder/references/tool-trajectory-evaluator.md +0 -298
- package/dist/templates/.claude/skills/agentv-prompt-optimizer/SKILL.md +0 -78
- package/dist/templates/.github/prompts/agentv-eval-build.prompt.md +0 -5
- package/dist/templates/.github/prompts/agentv-optimize.prompt.md +0 -4
- /package/dist/{dist-T7REAXNS.js.map → dist-BGRU67HI.js.map} +0 -0
- /package/dist/{interactive-TE5SJPJW.js.map → interactive-7KFUCBIP.js.map} +0 -0
package/README.md
CHANGED
|
@@ -6,6 +6,29 @@ AgentV evaluates your agents locally with multi-objective scoring (correctness,
|
|
|
6
6
|
|
|
7
7
|
## Installation
|
|
8
8
|
|
|
9
|
+
### All Agents Plugin Manager
|
|
10
|
+
|
|
11
|
+
**1. Add AgentV marketplace source:**
|
|
12
|
+
```bash
|
|
13
|
+
npx allagents plugin marketplace add EntityProcess/agentv
|
|
14
|
+
```
|
|
15
|
+
|
|
16
|
+
**2. Ask Claude to set up AgentV in your current repository**
|
|
17
|
+
Example prompt:
|
|
18
|
+
```text
|
|
19
|
+
Set up AgentV in this repo.
|
|
20
|
+
```
|
|
21
|
+
|
|
22
|
+
The `agentv-onboarding` skill bootstraps setup automatically:
|
|
23
|
+
- verifies `agentv` CLI availability
|
|
24
|
+
- installs the CLI if needed
|
|
25
|
+
- runs `agentv init`
|
|
26
|
+
- verifies setup artifacts
|
|
27
|
+
|
|
28
|
+
### CLI-Only Setup (Fallback)
|
|
29
|
+
|
|
30
|
+
If you are not using Claude plugins, use the CLI directly.
|
|
31
|
+
|
|
9
32
|
**1. Install:**
|
|
10
33
|
```bash
|
|
11
34
|
npm install -g agentv
|
|
@@ -54,7 +77,7 @@ Learn more in the [examples/](examples/README.md) directory. For a detailed comp
|
|
|
54
77
|
|
|
55
78
|
| Feature | AgentV | [LangWatch](https://github.com/langwatch/langwatch) | [LangSmith](https://github.com/langchain-ai/langsmith-sdk) | [LangFuse](https://github.com/langfuse/langfuse) |
|
|
56
79
|
|---------|--------|-----------|-----------|----------|
|
|
57
|
-
| **Setup** | `npm install` | Cloud account + API key | Cloud account + API key | Cloud account + API key |
|
|
80
|
+
| **Setup** | `npm install agentv` | Cloud account + API key | Cloud account + API key | Cloud account + API key |
|
|
58
81
|
| **Server** | None (local) | Managed cloud | Managed cloud | Managed cloud |
|
|
59
82
|
| **Privacy** | All local | Cloud-hosted | Cloud-hosted | Cloud-hosted |
|
|
60
83
|
| **CLI-first** | ✓ | ✗ | Limited | Limited |
|
|
@@ -132,7 +155,10 @@ description: Math evaluation dataset
|
|
|
132
155
|
dataset: math-tests
|
|
133
156
|
execution:
|
|
134
157
|
target: azure_base
|
|
135
|
-
|
|
158
|
+
assert:
|
|
159
|
+
- name: correctness
|
|
160
|
+
type: llm_judge
|
|
161
|
+
prompt: ./judges/correctness.md
|
|
136
162
|
```
|
|
137
163
|
|
|
138
164
|
Benefits: Streaming-friendly, Git-friendly diffs, programmatic generation, industry standard (DeepEval, LangWatch, Hugging Face).
|
|
@@ -10,7 +10,7 @@ import {
|
|
|
10
10
|
validateEvalFile,
|
|
11
11
|
validateFileReferences,
|
|
12
12
|
validateTargetsFile
|
|
13
|
-
} from "./chunk-
|
|
13
|
+
} from "./chunk-PC3FAOHT.js";
|
|
14
14
|
import {
|
|
15
15
|
assembleLlmJudgePrompt,
|
|
16
16
|
buildPromptInputs,
|
|
@@ -24,7 +24,7 @@ import {
|
|
|
24
24
|
toCamelCaseDeep,
|
|
25
25
|
toSnakeCaseDeep as toSnakeCaseDeep2,
|
|
26
26
|
trimBaselineResult
|
|
27
|
-
} from "./chunk-
|
|
27
|
+
} from "./chunk-RJWTL3VS.js";
|
|
28
28
|
import {
|
|
29
29
|
__commonJS,
|
|
30
30
|
__esm,
|
|
@@ -2877,7 +2877,7 @@ function oneOf(literals) {
|
|
|
2877
2877
|
// package.json
|
|
2878
2878
|
var package_default = {
|
|
2879
2879
|
name: "agentv",
|
|
2880
|
-
version: "2.
|
|
2880
|
+
version: "2.9.0-next.2",
|
|
2881
2881
|
description: "CLI entry point for AgentV",
|
|
2882
2882
|
type: "module",
|
|
2883
2883
|
repository: {
|
|
@@ -4042,7 +4042,7 @@ var evalRunCommand = command({
|
|
|
4042
4042
|
},
|
|
4043
4043
|
handler: async (args) => {
|
|
4044
4044
|
if (args.evalPaths.length === 0 && process.stdin.isTTY) {
|
|
4045
|
-
const { launchInteractiveWizard } = await import("./interactive-
|
|
4045
|
+
const { launchInteractiveWizard } = await import("./interactive-7KFUCBIP.js");
|
|
4046
4046
|
await launchInteractiveWizard();
|
|
4047
4047
|
return;
|
|
4048
4048
|
}
|
|
@@ -4271,26 +4271,6 @@ import { fileURLToPath } from "node:url";
|
|
|
4271
4271
|
function getAgentvTemplates() {
|
|
4272
4272
|
return getTemplatesFromDir(".agentv");
|
|
4273
4273
|
}
|
|
4274
|
-
function getAgentsTemplates() {
|
|
4275
|
-
if (isDistRuntime()) {
|
|
4276
|
-
return getTemplatesFromDir(".agents");
|
|
4277
|
-
}
|
|
4278
|
-
const repoRoot = getRepoRootFromDev();
|
|
4279
|
-
const skillsRoot = path4.join(repoRoot, "plugins", "agentv-dev", "skills");
|
|
4280
|
-
const skillsToInclude = [
|
|
4281
|
-
"agentv-chat-to-eval",
|
|
4282
|
-
"agentv-eval-builder",
|
|
4283
|
-
"agentv-eval-orchestrator",
|
|
4284
|
-
"agentv-prompt-optimizer"
|
|
4285
|
-
];
|
|
4286
|
-
const templates = [];
|
|
4287
|
-
for (const skill of skillsToInclude) {
|
|
4288
|
-
const skillDir = path4.join(skillsRoot, skill);
|
|
4289
|
-
const skillTemplates = readTemplatesRecursively(skillDir, path4.join("skills", skill));
|
|
4290
|
-
templates.push(...skillTemplates);
|
|
4291
|
-
}
|
|
4292
|
-
return templates;
|
|
4293
|
-
}
|
|
4294
4274
|
function getTemplatesFromDir(subdir) {
|
|
4295
4275
|
const currentDir = path4.dirname(fileURLToPath(import.meta.url));
|
|
4296
4276
|
let templatesDir;
|
|
@@ -4301,14 +4281,6 @@ function getTemplatesFromDir(subdir) {
|
|
|
4301
4281
|
}
|
|
4302
4282
|
return readTemplatesRecursively(templatesDir, "");
|
|
4303
4283
|
}
|
|
4304
|
-
function isDistRuntime() {
|
|
4305
|
-
const currentDir = path4.dirname(fileURLToPath(import.meta.url));
|
|
4306
|
-
return currentDir.includes(`${path4.sep}dist`);
|
|
4307
|
-
}
|
|
4308
|
-
function getRepoRootFromDev() {
|
|
4309
|
-
const currentDir = path4.dirname(fileURLToPath(import.meta.url));
|
|
4310
|
-
return path4.resolve(currentDir, "..", "..", "..", "..");
|
|
4311
|
-
}
|
|
4312
4284
|
function readTemplatesRecursively(dir, relativePath) {
|
|
4313
4285
|
const templates = [];
|
|
4314
4286
|
const entries2 = readdirSync(dir);
|
|
@@ -4331,6 +4303,12 @@ function readTemplatesRecursively(dir, relativePath) {
|
|
|
4331
4303
|
}
|
|
4332
4304
|
|
|
4333
4305
|
// src/commands/init/index.ts
|
|
4306
|
+
function printSkillFirstInstructions() {
|
|
4307
|
+
console.log("\nAI-skills-first setup (recommended):");
|
|
4308
|
+
console.log(" npx allagents plugin marketplace add EntityProcess/agentv");
|
|
4309
|
+
console.log(" npx allagents plugin install agentv-dev@agentv");
|
|
4310
|
+
console.log(' Then ask your agent: "Set up AgentV in this repo."');
|
|
4311
|
+
}
|
|
4334
4312
|
async function promptYesNo(message) {
|
|
4335
4313
|
const rl = readline.createInterface({
|
|
4336
4314
|
input: process.stdin,
|
|
@@ -4346,9 +4324,7 @@ async function promptYesNo(message) {
|
|
|
4346
4324
|
async function initCommand(options = {}) {
|
|
4347
4325
|
const targetPath = path5.resolve(options.targetPath ?? ".");
|
|
4348
4326
|
const agentvDir = path5.join(targetPath, ".agentv");
|
|
4349
|
-
const agentsDir = path5.join(targetPath, ".agents");
|
|
4350
4327
|
const agentvTemplates = getAgentvTemplates();
|
|
4351
|
-
const agentsTemplates = getAgentsTemplates();
|
|
4352
4328
|
const envTemplate = agentvTemplates.find((t) => t.path === ".env.example");
|
|
4353
4329
|
const otherAgentvTemplates = agentvTemplates.filter((t) => t.path !== ".env.example");
|
|
4354
4330
|
const existingFiles = [];
|
|
@@ -4366,14 +4342,6 @@ async function initCommand(options = {}) {
|
|
|
4366
4342
|
}
|
|
4367
4343
|
}
|
|
4368
4344
|
}
|
|
4369
|
-
if (existsSync(agentsDir)) {
|
|
4370
|
-
for (const template of agentsTemplates) {
|
|
4371
|
-
const targetFilePath = path5.join(agentsDir, template.path);
|
|
4372
|
-
if (existsSync(targetFilePath)) {
|
|
4373
|
-
existingFiles.push(path5.relative(targetPath, targetFilePath));
|
|
4374
|
-
}
|
|
4375
|
-
}
|
|
4376
|
-
}
|
|
4377
4345
|
if (existingFiles.length > 0) {
|
|
4378
4346
|
console.log("We detected an existing setup:");
|
|
4379
4347
|
for (const file of existingFiles) {
|
|
@@ -4383,6 +4351,7 @@ async function initCommand(options = {}) {
|
|
|
4383
4351
|
const shouldReplace = await promptYesNo("Do you want to replace these files?");
|
|
4384
4352
|
if (!shouldReplace) {
|
|
4385
4353
|
console.log("\nInit cancelled. No files were changed.");
|
|
4354
|
+
printSkillFirstInstructions();
|
|
4386
4355
|
return;
|
|
4387
4356
|
}
|
|
4388
4357
|
console.log();
|
|
@@ -4390,9 +4359,6 @@ async function initCommand(options = {}) {
|
|
|
4390
4359
|
if (!existsSync(agentvDir)) {
|
|
4391
4360
|
mkdirSync(agentvDir, { recursive: true });
|
|
4392
4361
|
}
|
|
4393
|
-
if (!existsSync(agentsDir)) {
|
|
4394
|
-
mkdirSync(agentsDir, { recursive: true });
|
|
4395
|
-
}
|
|
4396
4362
|
if (envTemplate) {
|
|
4397
4363
|
const envFilePath = path5.join(targetPath, ".env.example");
|
|
4398
4364
|
writeFileSync2(envFilePath, envTemplate.content, "utf-8");
|
|
@@ -4407,15 +4373,6 @@ async function initCommand(options = {}) {
|
|
|
4407
4373
|
writeFileSync2(targetFilePath, template.content, "utf-8");
|
|
4408
4374
|
console.log(`Created ${path5.relative(targetPath, targetFilePath)}`);
|
|
4409
4375
|
}
|
|
4410
|
-
for (const template of agentsTemplates) {
|
|
4411
|
-
const targetFilePath = path5.join(agentsDir, template.path);
|
|
4412
|
-
const targetDirPath = path5.dirname(targetFilePath);
|
|
4413
|
-
if (!existsSync(targetDirPath)) {
|
|
4414
|
-
mkdirSync(targetDirPath, { recursive: true });
|
|
4415
|
-
}
|
|
4416
|
-
writeFileSync2(targetFilePath, template.content, "utf-8");
|
|
4417
|
-
console.log(`Created ${path5.relative(targetPath, targetFilePath)}`);
|
|
4418
|
-
}
|
|
4419
4376
|
console.log("\nAgentV initialized successfully!");
|
|
4420
4377
|
console.log("\nFiles installed to root:");
|
|
4421
4378
|
if (envTemplate) {
|
|
@@ -4426,19 +4383,15 @@ Files installed to ${path5.relative(targetPath, agentvDir)}:`);
|
|
|
4426
4383
|
for (const t of otherAgentvTemplates) {
|
|
4427
4384
|
console.log(` - ${t.path}`);
|
|
4428
4385
|
}
|
|
4429
|
-
console.log(`
|
|
4430
|
-
Files installed to ${path5.relative(targetPath, agentsDir)}:`);
|
|
4431
|
-
for (const t of agentsTemplates) {
|
|
4432
|
-
console.log(` - ${t.path}`);
|
|
4433
|
-
}
|
|
4434
4386
|
console.log("\nYou can now:");
|
|
4435
4387
|
console.log(" 1. Copy .env.example to .env and add your API credentials");
|
|
4436
4388
|
console.log(" 2. Configure targets in .agentv/targets.yaml");
|
|
4437
|
-
console.log(" 3.
|
|
4389
|
+
console.log(" 3. Use AI skills to create and run evals");
|
|
4390
|
+
printSkillFirstInstructions();
|
|
4438
4391
|
}
|
|
4439
4392
|
var initCmdTsCommand = command({
|
|
4440
4393
|
name: "init",
|
|
4441
|
-
description: "Initialize AgentV in your project
|
|
4394
|
+
description: "Initialize AgentV bootstrap files in your project",
|
|
4442
4395
|
args: {
|
|
4443
4396
|
path: option({
|
|
4444
4397
|
type: optional(string),
|
|
@@ -4469,7 +4422,7 @@ function detectPackageManager() {
|
|
|
4469
4422
|
}
|
|
4470
4423
|
function runCommand(cmd, args) {
|
|
4471
4424
|
return new Promise((resolve, reject) => {
|
|
4472
|
-
const child = spawn(cmd, args, { stdio: ["inherit", "pipe", "inherit"] });
|
|
4425
|
+
const child = spawn(cmd, args, { stdio: ["inherit", "pipe", "inherit"], shell: true });
|
|
4473
4426
|
let stdout = "";
|
|
4474
4427
|
child.stdout?.on("data", (data) => {
|
|
4475
4428
|
process.stdout.write(data);
|
|
@@ -4850,7 +4803,12 @@ async function runScore(results, evaluatorConfig, testIdFilter) {
|
|
|
4850
4803
|
promptInputs: { question: "", guidelines: "" },
|
|
4851
4804
|
now: /* @__PURE__ */ new Date(),
|
|
4852
4805
|
output: Array.isArray(output) ? output : void 0,
|
|
4853
|
-
trace
|
|
4806
|
+
trace,
|
|
4807
|
+
tokenUsage: raw.token_usage ? toCamelCaseDeep(raw.token_usage) : void 0,
|
|
4808
|
+
costUsd: raw.cost_usd,
|
|
4809
|
+
durationMs: raw.duration_ms,
|
|
4810
|
+
startTime: raw.start_time,
|
|
4811
|
+
endTime: raw.end_time
|
|
4854
4812
|
};
|
|
4855
4813
|
const score = await evaluator.evaluate(evalContext);
|
|
4856
4814
|
scored.push({
|
|
@@ -4951,7 +4909,9 @@ var traceScoreCommand = command({
|
|
|
4951
4909
|
evaluatorConfig.type
|
|
4952
4910
|
);
|
|
4953
4911
|
if (traceRequired) {
|
|
4954
|
-
const hasTrace = results.some(
|
|
4912
|
+
const hasTrace = results.some(
|
|
4913
|
+
(r) => r.trace || r.cost_usd !== void 0 || r.duration_ms !== void 0 || r.token_usage !== void 0
|
|
4914
|
+
);
|
|
4955
4915
|
if (!hasTrace) {
|
|
4956
4916
|
console.error(
|
|
4957
4917
|
`${c2.red}Error:${c2.reset} Result file lacks trace data. Re-run eval with ${c2.bold}--trace${c2.reset} to capture trace summaries.`
|
|
@@ -4986,26 +4946,27 @@ var traceScoreCommand = command({
|
|
|
4986
4946
|
});
|
|
4987
4947
|
|
|
4988
4948
|
// src/commands/trace/show.ts
|
|
4989
|
-
function renderFlatTrace(
|
|
4949
|
+
function renderFlatTrace(result) {
|
|
4950
|
+
const trace = result.trace;
|
|
4990
4951
|
const parts = [];
|
|
4991
|
-
if (trace
|
|
4952
|
+
if (trace?.tool_names && trace.tool_names.length > 0) {
|
|
4992
4953
|
const toolParts = trace.tool_names.map((name) => {
|
|
4993
4954
|
const count = trace.tool_calls_by_name?.[name] ?? 0;
|
|
4994
4955
|
return count > 1 ? `${name} \xD7${count}` : name;
|
|
4995
4956
|
});
|
|
4996
4957
|
parts.push(`Tools: ${toolParts.join(", ")}`);
|
|
4997
4958
|
}
|
|
4998
|
-
if (
|
|
4999
|
-
parts.push(`Duration: ${formatDuration(
|
|
4959
|
+
if (result.duration_ms !== void 0) {
|
|
4960
|
+
parts.push(`Duration: ${formatDuration(result.duration_ms)}`);
|
|
5000
4961
|
}
|
|
5001
|
-
if (
|
|
5002
|
-
const total =
|
|
4962
|
+
if (result.token_usage) {
|
|
4963
|
+
const total = result.token_usage.input + result.token_usage.output;
|
|
5003
4964
|
parts.push(`Tokens: ${formatNumber(total)}`);
|
|
5004
4965
|
}
|
|
5005
|
-
if (
|
|
5006
|
-
parts.push(`Cost: ${formatCost(
|
|
4966
|
+
if (result.cost_usd !== void 0) {
|
|
4967
|
+
parts.push(`Cost: ${formatCost(result.cost_usd)}`);
|
|
5007
4968
|
}
|
|
5008
|
-
if (trace
|
|
4969
|
+
if (trace?.llm_call_count !== void 0) {
|
|
5009
4970
|
parts.push(`LLM calls: ${trace.llm_call_count}`);
|
|
5010
4971
|
}
|
|
5011
4972
|
return parts.join(" | ");
|
|
@@ -5019,19 +4980,19 @@ function renderScores(scores) {
|
|
|
5019
4980
|
function renderTree(result) {
|
|
5020
4981
|
const messages = result.output;
|
|
5021
4982
|
if (!messages || messages.length === 0) {
|
|
5022
|
-
if (result.trace) {
|
|
5023
|
-
return renderFlatTrace(result
|
|
4983
|
+
if (result.trace || result.duration_ms !== void 0 || result.cost_usd !== void 0) {
|
|
4984
|
+
return renderFlatTrace(result);
|
|
5024
4985
|
}
|
|
5025
4986
|
return `${c2.dim}No trace data available${c2.reset}`;
|
|
5026
4987
|
}
|
|
5027
4988
|
const lines = [];
|
|
5028
4989
|
const testId = result.test_id ?? result.eval_id ?? "unknown";
|
|
5029
|
-
const totalDuration = result.
|
|
5030
|
-
const totalTokens = result.
|
|
4990
|
+
const totalDuration = result.duration_ms;
|
|
4991
|
+
const totalTokens = result.token_usage ? result.token_usage.input + result.token_usage.output : void 0;
|
|
5031
4992
|
const rootParts = [testId];
|
|
5032
4993
|
if (totalDuration !== void 0) rootParts.push(formatDuration(totalDuration));
|
|
5033
4994
|
if (totalTokens !== void 0) rootParts.push(`${formatNumber(totalTokens)} tok`);
|
|
5034
|
-
if (result.
|
|
4995
|
+
if (result.cost_usd !== void 0) rootParts.push(formatCost(result.cost_usd));
|
|
5035
4996
|
lines.push(`${c2.bold}${rootParts.join(", ")}${c2.reset}`);
|
|
5036
4997
|
const steps = [];
|
|
5037
4998
|
for (let i = 0; i < messages.length; i++) {
|
|
@@ -5108,8 +5069,8 @@ function formatResultDetail(result, index, tree) {
|
|
|
5108
5069
|
if (result.scores && result.scores.length > 0) {
|
|
5109
5070
|
lines.push(` ${c2.dim}Scores:${c2.reset} ${renderScores(result.scores)}`);
|
|
5110
5071
|
}
|
|
5111
|
-
if (result.trace) {
|
|
5112
|
-
lines.push(` ${c2.dim}Trace:${c2.reset} ${renderFlatTrace(result
|
|
5072
|
+
if (result.trace || result.duration_ms !== void 0 || result.cost_usd !== void 0) {
|
|
5073
|
+
lines.push(` ${c2.dim}Trace:${c2.reset} ${renderFlatTrace(result)}`);
|
|
5113
5074
|
}
|
|
5114
5075
|
if (result.reasoning) {
|
|
5115
5076
|
const maxLen = 200;
|
|
@@ -5216,7 +5177,7 @@ function collectMetrics(results) {
|
|
|
5216
5177
|
if (scores.length > 0) {
|
|
5217
5178
|
rows.push({ name: "score", values: scores, formatter: (n) => n.toFixed(2) });
|
|
5218
5179
|
}
|
|
5219
|
-
const latencies = results.map((r) => r.
|
|
5180
|
+
const latencies = results.map((r) => r.duration_ms).filter((v) => v !== void 0);
|
|
5220
5181
|
if (latencies.length > 0) {
|
|
5221
5182
|
rows.push({
|
|
5222
5183
|
name: "latency_s",
|
|
@@ -5224,13 +5185,13 @@ function collectMetrics(results) {
|
|
|
5224
5185
|
formatter: (n) => n.toFixed(1)
|
|
5225
5186
|
});
|
|
5226
5187
|
}
|
|
5227
|
-
const costs = results.map((r) => r.
|
|
5188
|
+
const costs = results.map((r) => r.cost_usd).filter((v) => v !== void 0);
|
|
5228
5189
|
if (costs.length > 0) {
|
|
5229
5190
|
rows.push({ name: "cost_usd", values: costs, formatter: (n) => formatCost(n) });
|
|
5230
5191
|
}
|
|
5231
5192
|
const tokens = results.map((r) => {
|
|
5232
|
-
if (!r.
|
|
5233
|
-
return r.
|
|
5193
|
+
if (!r.token_usage) return void 0;
|
|
5194
|
+
return r.token_usage.input + r.token_usage.output;
|
|
5234
5195
|
}).filter((v) => v !== void 0);
|
|
5235
5196
|
if (tokens.length > 0) {
|
|
5236
5197
|
rows.push({
|
|
@@ -5688,4 +5649,4 @@ export {
|
|
|
5688
5649
|
preprocessArgv,
|
|
5689
5650
|
runCli
|
|
5690
5651
|
};
|
|
5691
|
-
//# sourceMappingURL=chunk-
|
|
5652
|
+
//# sourceMappingURL=chunk-3INJ7ISP.js.map
|