agentv 3.7.0 → 3.9.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +1 -1
- package/dist/{chunk-7YS6YNJZ.js → chunk-GC5P5HHZ.js} +127 -46
- package/dist/chunk-GC5P5HHZ.js.map +1 -0
- package/dist/{chunk-TR6H437M.js → chunk-Q2YWV4QM.js} +21 -21
- package/dist/chunk-Q2YWV4QM.js.map +1 -0
- package/dist/{chunk-XGG64VIY.js → chunk-TXDPYXHY.js} +636 -892
- package/dist/chunk-TXDPYXHY.js.map +1 -0
- package/dist/cli.js +3 -3
- package/dist/{dist-VP6AXX6B.js → dist-PIOSPBKX.js} +2 -4
- package/dist/index.js +3 -3
- package/dist/{interactive-F6XECJ33.js → interactive-3VTDK5NX.js} +3 -3
- package/package.json +1 -1
- package/dist/chunk-7YS6YNJZ.js.map +0 -1
- package/dist/chunk-TR6H437M.js.map +0 -1
- package/dist/chunk-XGG64VIY.js.map +0 -1
- /package/dist/{dist-VP6AXX6B.js.map → dist-PIOSPBKX.js.map} +0 -0
- /package/dist/{interactive-F6XECJ33.js.map → interactive-3VTDK5NX.js.map} +0 -0
package/README.md
CHANGED
|
@@ -164,7 +164,7 @@ For large-scale evaluations, AgentV supports JSONL (JSON Lines) format as an alt
|
|
|
164
164
|
Optional sidecar YAML metadata file (`dataset.eval.yaml` alongside `dataset.jsonl`):
|
|
165
165
|
```yaml
|
|
166
166
|
description: Math evaluation dataset
|
|
167
|
-
|
|
167
|
+
name: math-tests
|
|
168
168
|
execution:
|
|
169
169
|
target: azure-llm
|
|
170
170
|
assertions:
|
|
@@ -8,8 +8,10 @@ import {
|
|
|
8
8
|
buildSearchRoots,
|
|
9
9
|
ensureVSCodeSubagents,
|
|
10
10
|
findGitRoot,
|
|
11
|
+
interpolateEnv,
|
|
11
12
|
isEvaluatorKind,
|
|
12
13
|
listTargetNames,
|
|
14
|
+
loadCasesFromFile,
|
|
13
15
|
loadConfig,
|
|
14
16
|
loadTestSuite,
|
|
15
17
|
loadTsConfig,
|
|
@@ -25,12 +27,12 @@ import {
|
|
|
25
27
|
subscribeToCopilotCliLogEntries,
|
|
26
28
|
subscribeToCopilotSdkLogEntries,
|
|
27
29
|
subscribeToPiLogEntries
|
|
28
|
-
} from "./chunk-
|
|
30
|
+
} from "./chunk-TXDPYXHY.js";
|
|
29
31
|
|
|
30
32
|
// package.json
|
|
31
33
|
var package_default = {
|
|
32
34
|
name: "agentv",
|
|
33
|
-
version: "3.
|
|
35
|
+
version: "3.9.0",
|
|
34
36
|
description: "CLI entry point for AgentV",
|
|
35
37
|
type: "module",
|
|
36
38
|
repository: {
|
|
@@ -1326,9 +1328,9 @@ var SCRIPT = `
|
|
|
1326
1328
|
/* input / output */
|
|
1327
1329
|
h+='<div class="detail-grid">';
|
|
1328
1330
|
if(r.input!=null){
|
|
1329
|
-
h+='<div class="detail-block"><h4>Input</h4><pre class="detail-pre">'+esc(
|
|
1331
|
+
h+='<div class="detail-block"><h4>Input</h4><pre class="detail-pre">'+esc(JSON.stringify(r.input,null,2))+"</pre></div>";
|
|
1330
1332
|
}
|
|
1331
|
-
h+='<div class="detail-block"><h4>Output</h4><pre class="detail-pre">'+esc(r.
|
|
1333
|
+
h+='<div class="detail-block"><h4>Output</h4><pre class="detail-pre">'+esc(r.output?JSON.stringify(r.output,null,2):"")+"</pre></div>";
|
|
1332
1334
|
h+="</div>";
|
|
1333
1335
|
|
|
1334
1336
|
/* evaluator results */
|
|
@@ -1522,7 +1524,7 @@ var JunitWriter = class _JunitWriter {
|
|
|
1522
1524
|
this.closed = true;
|
|
1523
1525
|
const grouped = /* @__PURE__ */ new Map();
|
|
1524
1526
|
for (const result of this.results) {
|
|
1525
|
-
const suite = result.
|
|
1527
|
+
const suite = result.eval_set ?? "default";
|
|
1526
1528
|
const existing = grouped.get(suite);
|
|
1527
1529
|
if (existing) {
|
|
1528
1530
|
existing.push(result);
|
|
@@ -2186,7 +2188,7 @@ async function validateEvalFile(filePath) {
|
|
|
2186
2188
|
let parsed;
|
|
2187
2189
|
try {
|
|
2188
2190
|
const content = await readFile22(absolutePath, "utf8");
|
|
2189
|
-
parsed = parse2(content);
|
|
2191
|
+
parsed = interpolateEnv(parse2(content), process.env);
|
|
2190
2192
|
} catch (error) {
|
|
2191
2193
|
errors.push({
|
|
2192
2194
|
severity: "error",
|
|
@@ -2249,6 +2251,31 @@ async function validateEvalFile(filePath) {
|
|
|
2249
2251
|
}
|
|
2250
2252
|
if (typeof cases === "string") {
|
|
2251
2253
|
validateTestsStringPath(cases, absolutePath, errors);
|
|
2254
|
+
await validateWorkspaceConfig(parsed.workspace, absolutePath, errors, "workspace");
|
|
2255
|
+
const ext = path22.extname(cases).toLowerCase();
|
|
2256
|
+
if (VALID_TEST_FILE_EXTENSIONS.has(ext)) {
|
|
2257
|
+
const externalCasesPath = path22.resolve(path22.dirname(absolutePath), cases);
|
|
2258
|
+
try {
|
|
2259
|
+
const externalCases = await loadCasesFromFile(externalCasesPath);
|
|
2260
|
+
for (let i = 0; i < externalCases.length; i++) {
|
|
2261
|
+
const externalCase = externalCases[i];
|
|
2262
|
+
await validateWorkspaceConfig(
|
|
2263
|
+
externalCase.workspace,
|
|
2264
|
+
absolutePath,
|
|
2265
|
+
errors,
|
|
2266
|
+
`tests[${i}].workspace`
|
|
2267
|
+
);
|
|
2268
|
+
}
|
|
2269
|
+
} catch (error) {
|
|
2270
|
+
const message = error instanceof Error ? error.message : String(error);
|
|
2271
|
+
errors.push({
|
|
2272
|
+
severity: "error",
|
|
2273
|
+
filePath: absolutePath,
|
|
2274
|
+
location: "tests",
|
|
2275
|
+
message
|
|
2276
|
+
});
|
|
2277
|
+
}
|
|
2278
|
+
}
|
|
2252
2279
|
return {
|
|
2253
2280
|
valid: errors.filter((e) => e.severity === "error").length === 0,
|
|
2254
2281
|
filePath: absolutePath,
|
|
@@ -2356,10 +2383,14 @@ async function validateEvalFile(filePath) {
|
|
|
2356
2383
|
if (assertField !== void 0) {
|
|
2357
2384
|
validateAssertArray(assertField, location, absolutePath, errors);
|
|
2358
2385
|
}
|
|
2386
|
+
await validateWorkspaceConfig(
|
|
2387
|
+
evalCase.workspace,
|
|
2388
|
+
absolutePath,
|
|
2389
|
+
errors,
|
|
2390
|
+
`${location}.workspace`
|
|
2391
|
+
);
|
|
2359
2392
|
}
|
|
2360
|
-
|
|
2361
|
-
validateWorkspaceRepoConfig(parsed.workspace, absolutePath, errors);
|
|
2362
|
-
}
|
|
2393
|
+
await validateWorkspaceConfig(parsed.workspace, absolutePath, errors, "workspace");
|
|
2363
2394
|
return {
|
|
2364
2395
|
valid: errors.filter((e) => e.severity === "error").length === 0,
|
|
2365
2396
|
filePath: absolutePath,
|
|
@@ -2367,6 +2398,41 @@ async function validateEvalFile(filePath) {
|
|
|
2367
2398
|
errors
|
|
2368
2399
|
};
|
|
2369
2400
|
}
|
|
2401
|
+
async function validateWorkspaceConfig(workspace, evalFilePath, errors, location) {
|
|
2402
|
+
if (workspace === void 0) {
|
|
2403
|
+
return;
|
|
2404
|
+
}
|
|
2405
|
+
if (isObject(workspace)) {
|
|
2406
|
+
validateWorkspaceRepoConfig(workspace, evalFilePath, errors);
|
|
2407
|
+
return;
|
|
2408
|
+
}
|
|
2409
|
+
if (typeof workspace !== "string") {
|
|
2410
|
+
return;
|
|
2411
|
+
}
|
|
2412
|
+
const workspacePath = path22.resolve(path22.dirname(evalFilePath), workspace);
|
|
2413
|
+
try {
|
|
2414
|
+
const workspaceContent = await readFile22(workspacePath, "utf8");
|
|
2415
|
+
const parsedWorkspace = interpolateEnv(parse2(workspaceContent), process.env);
|
|
2416
|
+
if (!isObject(parsedWorkspace)) {
|
|
2417
|
+
errors.push({
|
|
2418
|
+
severity: "error",
|
|
2419
|
+
filePath: evalFilePath,
|
|
2420
|
+
location,
|
|
2421
|
+
message: `External workspace file must contain a YAML object: ${workspace}`
|
|
2422
|
+
});
|
|
2423
|
+
return;
|
|
2424
|
+
}
|
|
2425
|
+
validateWorkspaceRepoConfig(parsedWorkspace, workspacePath, errors);
|
|
2426
|
+
} catch (error) {
|
|
2427
|
+
const message = error instanceof Error ? error.message : String(error);
|
|
2428
|
+
errors.push({
|
|
2429
|
+
severity: "error",
|
|
2430
|
+
filePath: evalFilePath,
|
|
2431
|
+
location,
|
|
2432
|
+
message: `Failed to load external workspace file '${workspace}': ${message}`
|
|
2433
|
+
});
|
|
2434
|
+
}
|
|
2435
|
+
}
|
|
2370
2436
|
function validateWorkspaceRepoConfig(workspace, filePath, errors) {
|
|
2371
2437
|
const repos = workspace.repos;
|
|
2372
2438
|
const hooks = workspace.hooks;
|
|
@@ -2375,8 +2441,21 @@ function validateWorkspaceRepoConfig(workspace, filePath, errors) {
|
|
|
2375
2441
|
if (Array.isArray(repos)) {
|
|
2376
2442
|
for (const repo of repos) {
|
|
2377
2443
|
if (!isObject(repo)) continue;
|
|
2444
|
+
const source = repo.source;
|
|
2378
2445
|
const checkout = repo.checkout;
|
|
2379
2446
|
const clone = repo.clone;
|
|
2447
|
+
if (isObject(source) && isObject(checkout)) {
|
|
2448
|
+
const sourceType = source.type;
|
|
2449
|
+
const resolve = checkout.resolve;
|
|
2450
|
+
if (sourceType === "local" && typeof resolve === "string") {
|
|
2451
|
+
errors.push({
|
|
2452
|
+
severity: "warning",
|
|
2453
|
+
filePath,
|
|
2454
|
+
location: `workspace.repos[path=${repo.path}]`,
|
|
2455
|
+
message: "checkout.resolve has no effect for a local source. Use source.type to choose where the repo comes from; keep checkout.ref or checkout.ancestor only when pinning a local source."
|
|
2456
|
+
});
|
|
2457
|
+
}
|
|
2458
|
+
}
|
|
2380
2459
|
if (isObject(checkout) && isObject(clone)) {
|
|
2381
2460
|
const ancestor = checkout.ancestor;
|
|
2382
2461
|
const depth = clone.depth;
|
|
@@ -3141,31 +3220,6 @@ async function validateConfigFile(filePath) {
|
|
|
3141
3220
|
return { valid: false, filePath, fileType: "config", errors };
|
|
3142
3221
|
}
|
|
3143
3222
|
const config = parsed;
|
|
3144
|
-
const guidelinePatterns = config.guideline_patterns;
|
|
3145
|
-
if (guidelinePatterns !== void 0) {
|
|
3146
|
-
if (!Array.isArray(guidelinePatterns)) {
|
|
3147
|
-
errors.push({
|
|
3148
|
-
severity: "error",
|
|
3149
|
-
filePath,
|
|
3150
|
-
location: "guideline_patterns",
|
|
3151
|
-
message: "Field 'guideline_patterns' must be an array"
|
|
3152
|
-
});
|
|
3153
|
-
} else if (!guidelinePatterns.every((p) => typeof p === "string")) {
|
|
3154
|
-
errors.push({
|
|
3155
|
-
severity: "error",
|
|
3156
|
-
filePath,
|
|
3157
|
-
location: "guideline_patterns",
|
|
3158
|
-
message: "All entries in 'guideline_patterns' must be strings"
|
|
3159
|
-
});
|
|
3160
|
-
} else if (guidelinePatterns.length === 0) {
|
|
3161
|
-
errors.push({
|
|
3162
|
-
severity: "warning",
|
|
3163
|
-
filePath,
|
|
3164
|
-
location: "guideline_patterns",
|
|
3165
|
-
message: "Field 'guideline_patterns' is empty. Consider removing it or adding patterns."
|
|
3166
|
-
});
|
|
3167
|
-
}
|
|
3168
|
-
}
|
|
3169
3223
|
const evalPatterns = config.eval_patterns;
|
|
3170
3224
|
if (evalPatterns !== void 0) {
|
|
3171
3225
|
if (!Array.isArray(evalPatterns)) {
|
|
@@ -3202,13 +3256,7 @@ async function validateConfigFile(filePath) {
|
|
|
3202
3256
|
});
|
|
3203
3257
|
}
|
|
3204
3258
|
}
|
|
3205
|
-
const allowedFields = /* @__PURE__ */ new Set([
|
|
3206
|
-
"$schema",
|
|
3207
|
-
"guideline_patterns",
|
|
3208
|
-
"eval_patterns",
|
|
3209
|
-
"required_version",
|
|
3210
|
-
"execution"
|
|
3211
|
-
]);
|
|
3259
|
+
const allowedFields = /* @__PURE__ */ new Set(["$schema", "eval_patterns", "required_version", "execution"]);
|
|
3212
3260
|
const unexpectedFields = Object.keys(config).filter((key) => !allowedFields.has(key));
|
|
3213
3261
|
if (unexpectedFields.length > 0) {
|
|
3214
3262
|
errors.push({
|
|
@@ -3614,6 +3662,34 @@ function normalizeOptionalNumber(value) {
|
|
|
3614
3662
|
function normalizeWorkspaceMode(value) {
|
|
3615
3663
|
return value === "pooled" || value === "temp" || value === "static" ? value : void 0;
|
|
3616
3664
|
}
|
|
3665
|
+
function normalizeOutputMessages(cliValue) {
|
|
3666
|
+
if (cliValue === void 0) {
|
|
3667
|
+
return 1;
|
|
3668
|
+
}
|
|
3669
|
+
if (cliValue === "all") {
|
|
3670
|
+
return "all";
|
|
3671
|
+
}
|
|
3672
|
+
const parsed = Number.parseInt(cliValue, 10);
|
|
3673
|
+
if (Number.isNaN(parsed) || !Number.isInteger(parsed) || parsed < 1) {
|
|
3674
|
+
console.warn(
|
|
3675
|
+
`Warning: Invalid --output-messages value '${cliValue}'. Must be a positive integer or 'all'. Defaulting to 1.`
|
|
3676
|
+
);
|
|
3677
|
+
return 1;
|
|
3678
|
+
}
|
|
3679
|
+
return parsed;
|
|
3680
|
+
}
|
|
3681
|
+
function trimOutputMessages(output, outputMessages) {
|
|
3682
|
+
const messages = output ?? [];
|
|
3683
|
+
if (outputMessages === "all") {
|
|
3684
|
+
return messages.map((m) => ({ role: m.role, content: m.content }));
|
|
3685
|
+
}
|
|
3686
|
+
if (outputMessages === 1) {
|
|
3687
|
+
const lastAssistant = messages.filter((m) => m.role === "assistant").at(-1);
|
|
3688
|
+
return lastAssistant ? [{ role: lastAssistant.role, content: lastAssistant.content }] : [];
|
|
3689
|
+
}
|
|
3690
|
+
const sliced = messages.slice(-outputMessages);
|
|
3691
|
+
return sliced.map((m) => ({ role: m.role, content: m.content }));
|
|
3692
|
+
}
|
|
3617
3693
|
function normalizeOptions(rawOptions, config, yamlExecution) {
|
|
3618
3694
|
const cliFormat = normalizeString(rawOptions.outputFormat);
|
|
3619
3695
|
const configFormat = config?.output?.format;
|
|
@@ -3693,7 +3769,8 @@ function normalizeOptions(rawOptions, config, yamlExecution) {
|
|
|
3693
3769
|
benchmarkJson: normalizeString(rawOptions.benchmarkJson),
|
|
3694
3770
|
artifacts: normalizeString(rawOptions.artifacts),
|
|
3695
3771
|
graderTarget: normalizeString(rawOptions.graderTarget),
|
|
3696
|
-
model: normalizeString(rawOptions.model)
|
|
3772
|
+
model: normalizeString(rawOptions.model),
|
|
3773
|
+
outputMessages: normalizeOutputMessages(normalizeString(rawOptions.outputMessages))
|
|
3697
3774
|
};
|
|
3698
3775
|
}
|
|
3699
3776
|
async function ensureFileExists(filePath, description) {
|
|
@@ -3942,8 +4019,12 @@ async function runSingleEvalFile(params) {
|
|
|
3942
4019
|
streamCallbacks: streamingObserver?.getStreamCallbacks(),
|
|
3943
4020
|
onResult: async (result) => {
|
|
3944
4021
|
streamingObserver?.finalizeEvalCase(result.score, result.error);
|
|
3945
|
-
const
|
|
3946
|
-
|
|
4022
|
+
const trimmedOutput = trimOutputMessages(result.output, options.outputMessages);
|
|
4023
|
+
const trimmedResult = {
|
|
4024
|
+
...result,
|
|
4025
|
+
output: trimmedOutput
|
|
4026
|
+
};
|
|
4027
|
+
await outputWriter.append(trimmedResult);
|
|
3947
4028
|
if (otelExporter && !streamingObserver) {
|
|
3948
4029
|
try {
|
|
3949
4030
|
await otelExporter.exportResult(result);
|
|
@@ -4044,7 +4125,7 @@ async function runEvalCommand(input) {
|
|
|
4044
4125
|
const useFileExport = !!(options.otelFile || options.traceFile);
|
|
4045
4126
|
if (options.exportOtel || useFileExport) {
|
|
4046
4127
|
try {
|
|
4047
|
-
const { OtelTraceExporter, OTEL_BACKEND_PRESETS } = await import("./dist-
|
|
4128
|
+
const { OtelTraceExporter, OTEL_BACKEND_PRESETS } = await import("./dist-PIOSPBKX.js");
|
|
4048
4129
|
let endpoint = process.env.OTEL_EXPORTER_OTLP_ENDPOINT;
|
|
4049
4130
|
let headers = {};
|
|
4050
4131
|
if (options.otelBackend) {
|
|
@@ -4386,4 +4467,4 @@ export {
|
|
|
4386
4467
|
selectTarget,
|
|
4387
4468
|
runEvalCommand
|
|
4388
4469
|
};
|
|
4389
|
-
//# sourceMappingURL=chunk-
|
|
4470
|
+
//# sourceMappingURL=chunk-GC5P5HHZ.js.map
|