agentv 3.7.0 → 3.8.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/{chunk-7YS6YNJZ.js → chunk-ASYRKFAI.js} +125 -13
- package/dist/chunk-ASYRKFAI.js.map +1 -0
- package/dist/{chunk-XGG64VIY.js → chunk-F4UDJ7LG.js} +549 -545
- package/dist/chunk-F4UDJ7LG.js.map +1 -0
- package/dist/{chunk-TR6H437M.js → chunk-YZRGQ6ZS.js} +13 -9
- package/dist/chunk-YZRGQ6ZS.js.map +1 -0
- package/dist/cli.js +3 -3
- package/dist/{dist-VP6AXX6B.js → dist-4AQUJJAP.js} +2 -2
- package/dist/index.js +3 -3
- package/dist/{interactive-F6XECJ33.js → interactive-OPQGDF77.js} +3 -3
- package/package.json +1 -1
- package/dist/chunk-7YS6YNJZ.js.map +0 -1
- package/dist/chunk-TR6H437M.js.map +0 -1
- package/dist/chunk-XGG64VIY.js.map +0 -1
- /package/dist/{dist-VP6AXX6B.js.map → dist-4AQUJJAP.js.map} +0 -0
- /package/dist/{interactive-F6XECJ33.js.map → interactive-OPQGDF77.js.map} +0 -0
|
@@ -8,8 +8,10 @@ import {
|
|
|
8
8
|
buildSearchRoots,
|
|
9
9
|
ensureVSCodeSubagents,
|
|
10
10
|
findGitRoot,
|
|
11
|
+
interpolateEnv,
|
|
11
12
|
isEvaluatorKind,
|
|
12
13
|
listTargetNames,
|
|
14
|
+
loadCasesFromFile,
|
|
13
15
|
loadConfig,
|
|
14
16
|
loadTestSuite,
|
|
15
17
|
loadTsConfig,
|
|
@@ -25,12 +27,12 @@ import {
|
|
|
25
27
|
subscribeToCopilotCliLogEntries,
|
|
26
28
|
subscribeToCopilotSdkLogEntries,
|
|
27
29
|
subscribeToPiLogEntries
|
|
28
|
-
} from "./chunk-
|
|
30
|
+
} from "./chunk-F4UDJ7LG.js";
|
|
29
31
|
|
|
30
32
|
// package.json
|
|
31
33
|
var package_default = {
|
|
32
34
|
name: "agentv",
|
|
33
|
-
version: "3.
|
|
35
|
+
version: "3.8.0",
|
|
34
36
|
description: "CLI entry point for AgentV",
|
|
35
37
|
type: "module",
|
|
36
38
|
repository: {
|
|
@@ -1326,9 +1328,9 @@ var SCRIPT = `
|
|
|
1326
1328
|
/* input / output */
|
|
1327
1329
|
h+='<div class="detail-grid">';
|
|
1328
1330
|
if(r.input!=null){
|
|
1329
|
-
h+='<div class="detail-block"><h4>Input</h4><pre class="detail-pre">'+esc(
|
|
1331
|
+
h+='<div class="detail-block"><h4>Input</h4><pre class="detail-pre">'+esc(JSON.stringify(r.input,null,2))+"</pre></div>";
|
|
1330
1332
|
}
|
|
1331
|
-
h+='<div class="detail-block"><h4>Output</h4><pre class="detail-pre">'+esc(r.
|
|
1333
|
+
h+='<div class="detail-block"><h4>Output</h4><pre class="detail-pre">'+esc(r.output?JSON.stringify(r.output,null,2):"")+"</pre></div>";
|
|
1332
1334
|
h+="</div>";
|
|
1333
1335
|
|
|
1334
1336
|
/* evaluator results */
|
|
@@ -2186,7 +2188,7 @@ async function validateEvalFile(filePath) {
|
|
|
2186
2188
|
let parsed;
|
|
2187
2189
|
try {
|
|
2188
2190
|
const content = await readFile22(absolutePath, "utf8");
|
|
2189
|
-
parsed = parse2(content);
|
|
2191
|
+
parsed = interpolateEnv(parse2(content), process.env);
|
|
2190
2192
|
} catch (error) {
|
|
2191
2193
|
errors.push({
|
|
2192
2194
|
severity: "error",
|
|
@@ -2249,6 +2251,31 @@ async function validateEvalFile(filePath) {
|
|
|
2249
2251
|
}
|
|
2250
2252
|
if (typeof cases === "string") {
|
|
2251
2253
|
validateTestsStringPath(cases, absolutePath, errors);
|
|
2254
|
+
await validateWorkspaceConfig(parsed.workspace, absolutePath, errors, "workspace");
|
|
2255
|
+
const ext = path22.extname(cases).toLowerCase();
|
|
2256
|
+
if (VALID_TEST_FILE_EXTENSIONS.has(ext)) {
|
|
2257
|
+
const externalCasesPath = path22.resolve(path22.dirname(absolutePath), cases);
|
|
2258
|
+
try {
|
|
2259
|
+
const externalCases = await loadCasesFromFile(externalCasesPath);
|
|
2260
|
+
for (let i = 0; i < externalCases.length; i++) {
|
|
2261
|
+
const externalCase = externalCases[i];
|
|
2262
|
+
await validateWorkspaceConfig(
|
|
2263
|
+
externalCase.workspace,
|
|
2264
|
+
absolutePath,
|
|
2265
|
+
errors,
|
|
2266
|
+
`tests[${i}].workspace`
|
|
2267
|
+
);
|
|
2268
|
+
}
|
|
2269
|
+
} catch (error) {
|
|
2270
|
+
const message = error instanceof Error ? error.message : String(error);
|
|
2271
|
+
errors.push({
|
|
2272
|
+
severity: "error",
|
|
2273
|
+
filePath: absolutePath,
|
|
2274
|
+
location: "tests",
|
|
2275
|
+
message
|
|
2276
|
+
});
|
|
2277
|
+
}
|
|
2278
|
+
}
|
|
2252
2279
|
return {
|
|
2253
2280
|
valid: errors.filter((e) => e.severity === "error").length === 0,
|
|
2254
2281
|
filePath: absolutePath,
|
|
@@ -2356,10 +2383,14 @@ async function validateEvalFile(filePath) {
|
|
|
2356
2383
|
if (assertField !== void 0) {
|
|
2357
2384
|
validateAssertArray(assertField, location, absolutePath, errors);
|
|
2358
2385
|
}
|
|
2386
|
+
await validateWorkspaceConfig(
|
|
2387
|
+
evalCase.workspace,
|
|
2388
|
+
absolutePath,
|
|
2389
|
+
errors,
|
|
2390
|
+
`${location}.workspace`
|
|
2391
|
+
);
|
|
2359
2392
|
}
|
|
2360
|
-
|
|
2361
|
-
validateWorkspaceRepoConfig(parsed.workspace, absolutePath, errors);
|
|
2362
|
-
}
|
|
2393
|
+
await validateWorkspaceConfig(parsed.workspace, absolutePath, errors, "workspace");
|
|
2363
2394
|
return {
|
|
2364
2395
|
valid: errors.filter((e) => e.severity === "error").length === 0,
|
|
2365
2396
|
filePath: absolutePath,
|
|
@@ -2367,6 +2398,41 @@ async function validateEvalFile(filePath) {
|
|
|
2367
2398
|
errors
|
|
2368
2399
|
};
|
|
2369
2400
|
}
|
|
2401
|
+
async function validateWorkspaceConfig(workspace, evalFilePath, errors, location) {
|
|
2402
|
+
if (workspace === void 0) {
|
|
2403
|
+
return;
|
|
2404
|
+
}
|
|
2405
|
+
if (isObject(workspace)) {
|
|
2406
|
+
validateWorkspaceRepoConfig(workspace, evalFilePath, errors);
|
|
2407
|
+
return;
|
|
2408
|
+
}
|
|
2409
|
+
if (typeof workspace !== "string") {
|
|
2410
|
+
return;
|
|
2411
|
+
}
|
|
2412
|
+
const workspacePath = path22.resolve(path22.dirname(evalFilePath), workspace);
|
|
2413
|
+
try {
|
|
2414
|
+
const workspaceContent = await readFile22(workspacePath, "utf8");
|
|
2415
|
+
const parsedWorkspace = interpolateEnv(parse2(workspaceContent), process.env);
|
|
2416
|
+
if (!isObject(parsedWorkspace)) {
|
|
2417
|
+
errors.push({
|
|
2418
|
+
severity: "error",
|
|
2419
|
+
filePath: evalFilePath,
|
|
2420
|
+
location,
|
|
2421
|
+
message: `External workspace file must contain a YAML object: ${workspace}`
|
|
2422
|
+
});
|
|
2423
|
+
return;
|
|
2424
|
+
}
|
|
2425
|
+
validateWorkspaceRepoConfig(parsedWorkspace, workspacePath, errors);
|
|
2426
|
+
} catch (error) {
|
|
2427
|
+
const message = error instanceof Error ? error.message : String(error);
|
|
2428
|
+
errors.push({
|
|
2429
|
+
severity: "error",
|
|
2430
|
+
filePath: evalFilePath,
|
|
2431
|
+
location,
|
|
2432
|
+
message: `Failed to load external workspace file '${workspace}': ${message}`
|
|
2433
|
+
});
|
|
2434
|
+
}
|
|
2435
|
+
}
|
|
2370
2436
|
function validateWorkspaceRepoConfig(workspace, filePath, errors) {
|
|
2371
2437
|
const repos = workspace.repos;
|
|
2372
2438
|
const hooks = workspace.hooks;
|
|
@@ -2375,8 +2441,21 @@ function validateWorkspaceRepoConfig(workspace, filePath, errors) {
|
|
|
2375
2441
|
if (Array.isArray(repos)) {
|
|
2376
2442
|
for (const repo of repos) {
|
|
2377
2443
|
if (!isObject(repo)) continue;
|
|
2444
|
+
const source = repo.source;
|
|
2378
2445
|
const checkout = repo.checkout;
|
|
2379
2446
|
const clone = repo.clone;
|
|
2447
|
+
if (isObject(source) && isObject(checkout)) {
|
|
2448
|
+
const sourceType = source.type;
|
|
2449
|
+
const resolve = checkout.resolve;
|
|
2450
|
+
if (sourceType === "local" && typeof resolve === "string") {
|
|
2451
|
+
errors.push({
|
|
2452
|
+
severity: "warning",
|
|
2453
|
+
filePath,
|
|
2454
|
+
location: `workspace.repos[path=${repo.path}]`,
|
|
2455
|
+
message: "checkout.resolve has no effect for a local source. Use source.type to choose where the repo comes from; keep checkout.ref or checkout.ancestor only when pinning a local source."
|
|
2456
|
+
});
|
|
2457
|
+
}
|
|
2458
|
+
}
|
|
2380
2459
|
if (isObject(checkout) && isObject(clone)) {
|
|
2381
2460
|
const ancestor = checkout.ancestor;
|
|
2382
2461
|
const depth = clone.depth;
|
|
@@ -3614,6 +3693,34 @@ function normalizeOptionalNumber(value) {
|
|
|
3614
3693
|
function normalizeWorkspaceMode(value) {
|
|
3615
3694
|
return value === "pooled" || value === "temp" || value === "static" ? value : void 0;
|
|
3616
3695
|
}
|
|
3696
|
+
function normalizeOutputMessages(cliValue) {
|
|
3697
|
+
if (cliValue === void 0) {
|
|
3698
|
+
return 1;
|
|
3699
|
+
}
|
|
3700
|
+
if (cliValue === "all") {
|
|
3701
|
+
return "all";
|
|
3702
|
+
}
|
|
3703
|
+
const parsed = Number.parseInt(cliValue, 10);
|
|
3704
|
+
if (Number.isNaN(parsed) || !Number.isInteger(parsed) || parsed < 1) {
|
|
3705
|
+
console.warn(
|
|
3706
|
+
`Warning: Invalid --output-messages value '${cliValue}'. Must be a positive integer or 'all'. Defaulting to 1.`
|
|
3707
|
+
);
|
|
3708
|
+
return 1;
|
|
3709
|
+
}
|
|
3710
|
+
return parsed;
|
|
3711
|
+
}
|
|
3712
|
+
function trimOutputMessages(output, outputMessages) {
|
|
3713
|
+
const messages = output ?? [];
|
|
3714
|
+
if (outputMessages === "all") {
|
|
3715
|
+
return messages.map((m) => ({ role: m.role, content: m.content }));
|
|
3716
|
+
}
|
|
3717
|
+
if (outputMessages === 1) {
|
|
3718
|
+
const lastAssistant = messages.filter((m) => m.role === "assistant").at(-1);
|
|
3719
|
+
return lastAssistant ? [{ role: lastAssistant.role, content: lastAssistant.content }] : [];
|
|
3720
|
+
}
|
|
3721
|
+
const sliced = messages.slice(-outputMessages);
|
|
3722
|
+
return sliced.map((m) => ({ role: m.role, content: m.content }));
|
|
3723
|
+
}
|
|
3617
3724
|
function normalizeOptions(rawOptions, config, yamlExecution) {
|
|
3618
3725
|
const cliFormat = normalizeString(rawOptions.outputFormat);
|
|
3619
3726
|
const configFormat = config?.output?.format;
|
|
@@ -3693,7 +3800,8 @@ function normalizeOptions(rawOptions, config, yamlExecution) {
|
|
|
3693
3800
|
benchmarkJson: normalizeString(rawOptions.benchmarkJson),
|
|
3694
3801
|
artifacts: normalizeString(rawOptions.artifacts),
|
|
3695
3802
|
graderTarget: normalizeString(rawOptions.graderTarget),
|
|
3696
|
-
model: normalizeString(rawOptions.model)
|
|
3803
|
+
model: normalizeString(rawOptions.model),
|
|
3804
|
+
outputMessages: normalizeOutputMessages(normalizeString(rawOptions.outputMessages))
|
|
3697
3805
|
};
|
|
3698
3806
|
}
|
|
3699
3807
|
async function ensureFileExists(filePath, description) {
|
|
@@ -3942,8 +4050,12 @@ async function runSingleEvalFile(params) {
|
|
|
3942
4050
|
streamCallbacks: streamingObserver?.getStreamCallbacks(),
|
|
3943
4051
|
onResult: async (result) => {
|
|
3944
4052
|
streamingObserver?.finalizeEvalCase(result.score, result.error);
|
|
3945
|
-
const
|
|
3946
|
-
|
|
4053
|
+
const trimmedOutput = trimOutputMessages(result.output, options.outputMessages);
|
|
4054
|
+
const trimmedResult = {
|
|
4055
|
+
...result,
|
|
4056
|
+
output: trimmedOutput
|
|
4057
|
+
};
|
|
4058
|
+
await outputWriter.append(trimmedResult);
|
|
3947
4059
|
if (otelExporter && !streamingObserver) {
|
|
3948
4060
|
try {
|
|
3949
4061
|
await otelExporter.exportResult(result);
|
|
@@ -4044,7 +4156,7 @@ async function runEvalCommand(input) {
|
|
|
4044
4156
|
const useFileExport = !!(options.otelFile || options.traceFile);
|
|
4045
4157
|
if (options.exportOtel || useFileExport) {
|
|
4046
4158
|
try {
|
|
4047
|
-
const { OtelTraceExporter, OTEL_BACKEND_PRESETS } = await import("./dist-
|
|
4159
|
+
const { OtelTraceExporter, OTEL_BACKEND_PRESETS } = await import("./dist-4AQUJJAP.js");
|
|
4048
4160
|
let endpoint = process.env.OTEL_EXPORTER_OTLP_ENDPOINT;
|
|
4049
4161
|
let headers = {};
|
|
4050
4162
|
if (options.otelBackend) {
|
|
@@ -4386,4 +4498,4 @@ export {
|
|
|
4386
4498
|
selectTarget,
|
|
4387
4499
|
runEvalCommand
|
|
4388
4500
|
};
|
|
4389
|
-
//# sourceMappingURL=chunk-
|
|
4501
|
+
//# sourceMappingURL=chunk-ASYRKFAI.js.map
|