agentv 2.10.0 → 2.11.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +63 -0
- package/dist/{chunk-G3OTPFYX.js → chunk-D6KWUG7C.js} +204 -22
- package/dist/chunk-D6KWUG7C.js.map +1 -0
- package/dist/{chunk-PC3FAOHT.js → chunk-IL7CRMY6.js} +126 -15
- package/dist/chunk-IL7CRMY6.js.map +1 -0
- package/dist/{chunk-RJWTL3VS.js → chunk-MQIQH5LB.js} +826 -178
- package/dist/chunk-MQIQH5LB.js.map +1 -0
- package/dist/cli.js +3 -3
- package/dist/{dist-BGRU67HI.js → dist-OVEHXEXC.js} +18 -2
- package/dist/index.js +3 -3
- package/dist/{interactive-7KFUCBIP.js → interactive-7NQRG7GK.js} +3 -3
- package/package.json +1 -1
- package/dist/chunk-G3OTPFYX.js.map +0 -1
- package/dist/chunk-PC3FAOHT.js.map +0 -1
- package/dist/chunk-RJWTL3VS.js.map +0 -1
- /package/dist/{dist-BGRU67HI.js.map → dist-OVEHXEXC.js.map} +0 -0
- /package/dist/{interactive-7KFUCBIP.js.map → interactive-7NQRG7GK.js.map} +0 -0
|
@@ -10,6 +10,7 @@ import {
|
|
|
10
10
|
findGitRoot,
|
|
11
11
|
isEvaluatorKind,
|
|
12
12
|
listTargetNames,
|
|
13
|
+
loadConfig,
|
|
13
14
|
loadTestSuite,
|
|
14
15
|
loadTsConfig,
|
|
15
16
|
normalizeLineEndings,
|
|
@@ -21,9 +22,10 @@ import {
|
|
|
21
22
|
shouldEnableCache,
|
|
22
23
|
shouldSkipCacheForTemperature,
|
|
23
24
|
subscribeToCodexLogEntries,
|
|
25
|
+
subscribeToCopilotCliLogEntries,
|
|
24
26
|
subscribeToCopilotSdkLogEntries,
|
|
25
27
|
subscribeToPiLogEntries
|
|
26
|
-
} from "./chunk-
|
|
28
|
+
} from "./chunk-MQIQH5LB.js";
|
|
27
29
|
|
|
28
30
|
// src/commands/eval/shared.ts
|
|
29
31
|
import { constants } from "node:fs";
|
|
@@ -1063,7 +1065,20 @@ function inferFileTypeFromPath(filePath) {
|
|
|
1063
1065
|
}
|
|
1064
1066
|
return "eval";
|
|
1065
1067
|
}
|
|
1066
|
-
var
|
|
1068
|
+
var ASSERTION_TYPES_WITH_STRING_VALUE = /* @__PURE__ */ new Set([
|
|
1069
|
+
"contains",
|
|
1070
|
+
"icontains",
|
|
1071
|
+
"starts_with",
|
|
1072
|
+
"ends_with",
|
|
1073
|
+
"equals",
|
|
1074
|
+
"regex"
|
|
1075
|
+
]);
|
|
1076
|
+
var ASSERTION_TYPES_WITH_ARRAY_VALUE = /* @__PURE__ */ new Set([
|
|
1077
|
+
"contains_any",
|
|
1078
|
+
"contains_all",
|
|
1079
|
+
"icontains_any",
|
|
1080
|
+
"icontains_all"
|
|
1081
|
+
]);
|
|
1067
1082
|
var VALID_TEST_FILE_EXTENSIONS = /* @__PURE__ */ new Set([".yaml", ".yml", ".jsonl"]);
|
|
1068
1083
|
var NAME_PATTERN = /^[a-z0-9-]+$/;
|
|
1069
1084
|
function isObject(value) {
|
|
@@ -1246,6 +1261,9 @@ async function validateEvalFile(filePath) {
|
|
|
1246
1261
|
validateAssertArray(assertField, location, absolutePath, errors);
|
|
1247
1262
|
}
|
|
1248
1263
|
}
|
|
1264
|
+
if (isObject(parsed.workspace)) {
|
|
1265
|
+
validateWorkspaceRepoConfig(parsed.workspace, absolutePath, errors);
|
|
1266
|
+
}
|
|
1249
1267
|
return {
|
|
1250
1268
|
valid: errors.filter((e) => e.severity === "error").length === 0,
|
|
1251
1269
|
filePath: absolutePath,
|
|
@@ -1253,6 +1271,48 @@ async function validateEvalFile(filePath) {
|
|
|
1253
1271
|
errors
|
|
1254
1272
|
};
|
|
1255
1273
|
}
|
|
1274
|
+
function validateWorkspaceRepoConfig(workspace, filePath, errors) {
|
|
1275
|
+
const repos = workspace.repos;
|
|
1276
|
+
const reset = workspace.reset;
|
|
1277
|
+
const isolation = workspace.isolation;
|
|
1278
|
+
if (Array.isArray(repos)) {
|
|
1279
|
+
for (const repo of repos) {
|
|
1280
|
+
if (!isObject(repo)) continue;
|
|
1281
|
+
const checkout = repo.checkout;
|
|
1282
|
+
const clone = repo.clone;
|
|
1283
|
+
if (isObject(checkout) && isObject(clone)) {
|
|
1284
|
+
const ancestor = checkout.ancestor;
|
|
1285
|
+
const depth = clone.depth;
|
|
1286
|
+
if (typeof ancestor === "number" && typeof depth === "number" && depth < ancestor + 1) {
|
|
1287
|
+
errors.push({
|
|
1288
|
+
severity: "warning",
|
|
1289
|
+
filePath,
|
|
1290
|
+
location: `workspace.repos[path=${repo.path}]`,
|
|
1291
|
+
message: `clone.depth (${depth}) may be insufficient for checkout.ancestor (${ancestor}). Recommend depth >= ${ancestor + 1}.`
|
|
1292
|
+
});
|
|
1293
|
+
}
|
|
1294
|
+
}
|
|
1295
|
+
}
|
|
1296
|
+
}
|
|
1297
|
+
if (isObject(reset) && reset.strategy && reset.strategy !== "none") {
|
|
1298
|
+
if (!Array.isArray(repos) || repos.length === 0) {
|
|
1299
|
+
errors.push({
|
|
1300
|
+
severity: "warning",
|
|
1301
|
+
filePath,
|
|
1302
|
+
location: "workspace.reset",
|
|
1303
|
+
message: `reset.strategy '${reset.strategy}' has no effect without repos.`
|
|
1304
|
+
});
|
|
1305
|
+
}
|
|
1306
|
+
}
|
|
1307
|
+
if (isObject(reset) && reset.after_each === true && isolation === "per_test") {
|
|
1308
|
+
errors.push({
|
|
1309
|
+
severity: "warning",
|
|
1310
|
+
filePath,
|
|
1311
|
+
location: "workspace.reset",
|
|
1312
|
+
message: "reset.after_each is redundant with isolation: per_test (each test gets a fresh workspace)."
|
|
1313
|
+
});
|
|
1314
|
+
}
|
|
1315
|
+
}
|
|
1256
1316
|
function validateMessages(messages, location, filePath, errors) {
|
|
1257
1317
|
for (let i = 0; i < messages.length; i++) {
|
|
1258
1318
|
const message = messages[i];
|
|
@@ -1402,7 +1462,7 @@ function validateAssertArray(assertField, parentLocation, filePath, errors) {
|
|
|
1402
1462
|
});
|
|
1403
1463
|
continue;
|
|
1404
1464
|
}
|
|
1405
|
-
if (
|
|
1465
|
+
if (ASSERTION_TYPES_WITH_STRING_VALUE.has(typeValue)) {
|
|
1406
1466
|
const value = item.value;
|
|
1407
1467
|
if (value === void 0 || typeof value !== "string") {
|
|
1408
1468
|
errors.push({
|
|
@@ -1426,6 +1486,18 @@ function validateAssertArray(assertField, parentLocation, filePath, errors) {
|
|
|
1426
1486
|
}
|
|
1427
1487
|
}
|
|
1428
1488
|
}
|
|
1489
|
+
if (ASSERTION_TYPES_WITH_ARRAY_VALUE.has(typeValue)) {
|
|
1490
|
+
const value = item.value;
|
|
1491
|
+
if (!Array.isArray(value) || value.length === 0) {
|
|
1492
|
+
errors.push({
|
|
1493
|
+
severity: "warning",
|
|
1494
|
+
filePath,
|
|
1495
|
+
location: `${location}.value`,
|
|
1496
|
+
message: `Assertion type '${typeValue}' requires a 'value' field (non-empty string array).`
|
|
1497
|
+
});
|
|
1498
|
+
continue;
|
|
1499
|
+
}
|
|
1500
|
+
}
|
|
1429
1501
|
const required = item.required;
|
|
1430
1502
|
if (required !== void 0) {
|
|
1431
1503
|
validateRequiredField(required, location, filePath, errors);
|
|
@@ -1571,6 +1643,26 @@ var COPILOT_SDK_SETTINGS = /* @__PURE__ */ new Set([
|
|
|
1571
1643
|
"workspace_template",
|
|
1572
1644
|
"workspaceTemplate"
|
|
1573
1645
|
]);
|
|
1646
|
+
var COPILOT_CLI_SETTINGS = /* @__PURE__ */ new Set([
|
|
1647
|
+
...COMMON_SETTINGS,
|
|
1648
|
+
"executable",
|
|
1649
|
+
"command",
|
|
1650
|
+
"binary",
|
|
1651
|
+
"args",
|
|
1652
|
+
"arguments",
|
|
1653
|
+
"model",
|
|
1654
|
+
"cwd",
|
|
1655
|
+
"timeout_seconds",
|
|
1656
|
+
"timeoutSeconds",
|
|
1657
|
+
"log_dir",
|
|
1658
|
+
"logDir",
|
|
1659
|
+
"log_format",
|
|
1660
|
+
"logFormat",
|
|
1661
|
+
"system_prompt",
|
|
1662
|
+
"systemPrompt",
|
|
1663
|
+
"workspace_template",
|
|
1664
|
+
"workspaceTemplate"
|
|
1665
|
+
]);
|
|
1574
1666
|
var VSCODE_SETTINGS = /* @__PURE__ */ new Set([
|
|
1575
1667
|
...COMMON_SETTINGS,
|
|
1576
1668
|
"executable",
|
|
@@ -1631,11 +1723,12 @@ function getKnownSettings(provider) {
|
|
|
1631
1723
|
case "codex":
|
|
1632
1724
|
case "codex-cli":
|
|
1633
1725
|
return CODEX_SETTINGS;
|
|
1634
|
-
case "copilot":
|
|
1635
1726
|
case "copilot-sdk":
|
|
1636
1727
|
case "copilot_sdk":
|
|
1637
|
-
case "copilot-cli":
|
|
1638
1728
|
return COPILOT_SDK_SETTINGS;
|
|
1729
|
+
case "copilot":
|
|
1730
|
+
case "copilot-cli":
|
|
1731
|
+
return COPILOT_CLI_SETTINGS;
|
|
1639
1732
|
case "claude":
|
|
1640
1733
|
case "claude-code":
|
|
1641
1734
|
case "claude-sdk":
|
|
@@ -2307,6 +2400,13 @@ function normalizeString(value) {
|
|
|
2307
2400
|
const trimmed = value.trim();
|
|
2308
2401
|
return trimmed.length > 0 ? trimmed : void 0;
|
|
2309
2402
|
}
|
|
2403
|
+
function resolveTimestampPlaceholder(value) {
|
|
2404
|
+
if (!value.includes("{timestamp}")) {
|
|
2405
|
+
return value;
|
|
2406
|
+
}
|
|
2407
|
+
const timestamp = (/* @__PURE__ */ new Date()).toISOString().replace(/[:.]/g, "-");
|
|
2408
|
+
return value.replaceAll("{timestamp}", timestamp);
|
|
2409
|
+
}
|
|
2310
2410
|
function normalizeNumber(value, fallback) {
|
|
2311
2411
|
if (typeof value === "number" && Number.isFinite(value)) {
|
|
2312
2412
|
return value;
|
|
@@ -2331,7 +2431,7 @@ function normalizeOptionalNumber(value) {
|
|
|
2331
2431
|
}
|
|
2332
2432
|
return void 0;
|
|
2333
2433
|
}
|
|
2334
|
-
function normalizeOptions(rawOptions, config) {
|
|
2434
|
+
function normalizeOptions(rawOptions, config, yamlExecution) {
|
|
2335
2435
|
const cliFormat = normalizeString(rawOptions.outputFormat);
|
|
2336
2436
|
const configFormat = config?.output?.format;
|
|
2337
2437
|
const formatStr = cliFormat ?? configFormat ?? "jsonl";
|
|
@@ -2382,12 +2482,14 @@ function normalizeOptions(rawOptions, config) {
|
|
|
2382
2482
|
maxRetries: cliMaxRetries ?? configMaxRetries ?? 2,
|
|
2383
2483
|
cache: resolvedCache,
|
|
2384
2484
|
noCache: resolvedNoCache,
|
|
2385
|
-
|
|
2386
|
-
|
|
2485
|
+
// Boolean OR: config `true` cannot be overridden to `false` from CLI.
|
|
2486
|
+
// Intentional — there are no --no-verbose / --no-keep-workspaces flags.
|
|
2487
|
+
verbose: normalizeBoolean(rawOptions.verbose) || yamlExecution?.verbose === true,
|
|
2488
|
+
keepWorkspaces: normalizeBoolean(rawOptions.keepWorkspaces) || yamlExecution?.keep_workspaces === true,
|
|
2387
2489
|
cleanupWorkspaces: normalizeBoolean(rawOptions.cleanupWorkspaces),
|
|
2388
2490
|
trace: normalizeBoolean(rawOptions.trace),
|
|
2389
|
-
otelFile: normalizeString(rawOptions.otelFile),
|
|
2390
|
-
traceFile: normalizeString(rawOptions.traceFile),
|
|
2491
|
+
otelFile: normalizeString(rawOptions.otelFile) ?? (yamlExecution?.otel_file ? resolveTimestampPlaceholder(yamlExecution.otel_file) : void 0),
|
|
2492
|
+
traceFile: normalizeString(rawOptions.traceFile) ?? (yamlExecution?.trace_file ? resolveTimestampPlaceholder(yamlExecution.trace_file) : void 0),
|
|
2391
2493
|
exportOtel: normalizeBoolean(rawOptions.exportOtel),
|
|
2392
2494
|
otelBackend: normalizeString(rawOptions.otelBackend),
|
|
2393
2495
|
otelCaptureContent: normalizeBoolean(rawOptions.otelCaptureContent),
|
|
@@ -2672,8 +2774,9 @@ async function runEvalCommand(input) {
|
|
|
2672
2774
|
`Warning: Failed to load agentv config: ${err instanceof Error ? err.message : String(err)}`
|
|
2673
2775
|
);
|
|
2674
2776
|
}
|
|
2675
|
-
const options = normalizeOptions(input.rawOptions, config);
|
|
2676
2777
|
const repoRoot = await findRepoRoot(cwd);
|
|
2778
|
+
const yamlConfig = await loadConfig(path10.join(cwd, "_"), repoRoot);
|
|
2779
|
+
const options = normalizeOptions(input.rawOptions, config, yamlConfig?.execution);
|
|
2677
2780
|
if (options.keepWorkspaces && options.cleanupWorkspaces) {
|
|
2678
2781
|
console.warn(
|
|
2679
2782
|
"Warning: Both --keep-workspaces and --cleanup-workspaces specified. --cleanup-workspaces takes precedence."
|
|
@@ -2686,7 +2789,7 @@ async function runEvalCommand(input) {
|
|
|
2686
2789
|
const useFileExport = !!(options.otelFile || options.traceFile);
|
|
2687
2790
|
if (options.exportOtel || useFileExport) {
|
|
2688
2791
|
try {
|
|
2689
|
-
const { OtelTraceExporter, OTEL_BACKEND_PRESETS } = await import("./dist-
|
|
2792
|
+
const { OtelTraceExporter, OTEL_BACKEND_PRESETS } = await import("./dist-OVEHXEXC.js");
|
|
2690
2793
|
let endpoint = process.env.OTEL_EXPORTER_OTLP_ENDPOINT;
|
|
2691
2794
|
let headers = {};
|
|
2692
2795
|
if (options.otelBackend) {
|
|
@@ -2814,7 +2917,14 @@ async function runEvalCommand(input) {
|
|
|
2814
2917
|
progressReporter.addLogPaths([entry.filePath], "pi");
|
|
2815
2918
|
});
|
|
2816
2919
|
const seenCopilotLogPaths = /* @__PURE__ */ new Set();
|
|
2817
|
-
const
|
|
2920
|
+
const unsubscribeCopilotSdkLogs = subscribeToCopilotSdkLogEntries((entry) => {
|
|
2921
|
+
if (!entry.filePath || seenCopilotLogPaths.has(entry.filePath)) {
|
|
2922
|
+
return;
|
|
2923
|
+
}
|
|
2924
|
+
seenCopilotLogPaths.add(entry.filePath);
|
|
2925
|
+
progressReporter.addLogPaths([entry.filePath], "copilot");
|
|
2926
|
+
});
|
|
2927
|
+
const unsubscribeCopilotCliLogs = subscribeToCopilotCliLogEntries((entry) => {
|
|
2818
2928
|
if (!entry.filePath || seenCopilotLogPaths.has(entry.filePath)) {
|
|
2819
2929
|
return;
|
|
2820
2930
|
}
|
|
@@ -2908,7 +3018,8 @@ Results written to: ${outputPath}`);
|
|
|
2908
3018
|
} finally {
|
|
2909
3019
|
unsubscribeCodexLogs();
|
|
2910
3020
|
unsubscribePiLogs();
|
|
2911
|
-
|
|
3021
|
+
unsubscribeCopilotSdkLogs();
|
|
3022
|
+
unsubscribeCopilotCliLogs();
|
|
2912
3023
|
await outputWriter.close().catch(() => void 0);
|
|
2913
3024
|
if (otelExporter) {
|
|
2914
3025
|
try {
|
|
@@ -2949,4 +3060,4 @@ export {
|
|
|
2949
3060
|
selectTarget,
|
|
2950
3061
|
runEvalCommand
|
|
2951
3062
|
};
|
|
2952
|
-
//# sourceMappingURL=chunk-
|
|
3063
|
+
//# sourceMappingURL=chunk-IL7CRMY6.js.map
|