agentv 2.10.0 → 2.11.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -10,6 +10,7 @@ import {
10
10
  findGitRoot,
11
11
  isEvaluatorKind,
12
12
  listTargetNames,
13
+ loadConfig,
13
14
  loadTestSuite,
14
15
  loadTsConfig,
15
16
  normalizeLineEndings,
@@ -21,9 +22,10 @@ import {
21
22
  shouldEnableCache,
22
23
  shouldSkipCacheForTemperature,
23
24
  subscribeToCodexLogEntries,
25
+ subscribeToCopilotCliLogEntries,
24
26
  subscribeToCopilotSdkLogEntries,
25
27
  subscribeToPiLogEntries
26
- } from "./chunk-RJWTL3VS.js";
28
+ } from "./chunk-MQIQH5LB.js";
27
29
 
28
30
  // src/commands/eval/shared.ts
29
31
  import { constants } from "node:fs";
@@ -1063,7 +1065,20 @@ function inferFileTypeFromPath(filePath) {
1063
1065
  }
1064
1066
  return "eval";
1065
1067
  }
1066
- var ASSERTION_TYPES_WITH_VALUE = /* @__PURE__ */ new Set(["contains", "equals", "regex"]);
1068
+ var ASSERTION_TYPES_WITH_STRING_VALUE = /* @__PURE__ */ new Set([
1069
+ "contains",
1070
+ "icontains",
1071
+ "starts_with",
1072
+ "ends_with",
1073
+ "equals",
1074
+ "regex"
1075
+ ]);
1076
+ var ASSERTION_TYPES_WITH_ARRAY_VALUE = /* @__PURE__ */ new Set([
1077
+ "contains_any",
1078
+ "contains_all",
1079
+ "icontains_any",
1080
+ "icontains_all"
1081
+ ]);
1067
1082
  var VALID_TEST_FILE_EXTENSIONS = /* @__PURE__ */ new Set([".yaml", ".yml", ".jsonl"]);
1068
1083
  var NAME_PATTERN = /^[a-z0-9-]+$/;
1069
1084
  function isObject(value) {
@@ -1246,6 +1261,9 @@ async function validateEvalFile(filePath) {
1246
1261
  validateAssertArray(assertField, location, absolutePath, errors);
1247
1262
  }
1248
1263
  }
1264
+ if (isObject(parsed.workspace)) {
1265
+ validateWorkspaceRepoConfig(parsed.workspace, absolutePath, errors);
1266
+ }
1249
1267
  return {
1250
1268
  valid: errors.filter((e) => e.severity === "error").length === 0,
1251
1269
  filePath: absolutePath,
@@ -1253,6 +1271,48 @@ async function validateEvalFile(filePath) {
1253
1271
  errors
1254
1272
  };
1255
1273
  }
1274
+ function validateWorkspaceRepoConfig(workspace, filePath, errors) {
1275
+ const repos = workspace.repos;
1276
+ const reset = workspace.reset;
1277
+ const isolation = workspace.isolation;
1278
+ if (Array.isArray(repos)) {
1279
+ for (const repo of repos) {
1280
+ if (!isObject(repo)) continue;
1281
+ const checkout = repo.checkout;
1282
+ const clone = repo.clone;
1283
+ if (isObject(checkout) && isObject(clone)) {
1284
+ const ancestor = checkout.ancestor;
1285
+ const depth = clone.depth;
1286
+ if (typeof ancestor === "number" && typeof depth === "number" && depth < ancestor + 1) {
1287
+ errors.push({
1288
+ severity: "warning",
1289
+ filePath,
1290
+ location: `workspace.repos[path=${repo.path}]`,
1291
+ message: `clone.depth (${depth}) may be insufficient for checkout.ancestor (${ancestor}). Recommend depth >= ${ancestor + 1}.`
1292
+ });
1293
+ }
1294
+ }
1295
+ }
1296
+ }
1297
+ if (isObject(reset) && reset.strategy && reset.strategy !== "none") {
1298
+ if (!Array.isArray(repos) || repos.length === 0) {
1299
+ errors.push({
1300
+ severity: "warning",
1301
+ filePath,
1302
+ location: "workspace.reset",
1303
+ message: `reset.strategy '${reset.strategy}' has no effect without repos.`
1304
+ });
1305
+ }
1306
+ }
1307
+ if (isObject(reset) && reset.after_each === true && isolation === "per_test") {
1308
+ errors.push({
1309
+ severity: "warning",
1310
+ filePath,
1311
+ location: "workspace.reset",
1312
+ message: "reset.after_each is redundant with isolation: per_test (each test gets a fresh workspace)."
1313
+ });
1314
+ }
1315
+ }
1256
1316
  function validateMessages(messages, location, filePath, errors) {
1257
1317
  for (let i = 0; i < messages.length; i++) {
1258
1318
  const message = messages[i];
@@ -1402,7 +1462,7 @@ function validateAssertArray(assertField, parentLocation, filePath, errors) {
1402
1462
  });
1403
1463
  continue;
1404
1464
  }
1405
- if (ASSERTION_TYPES_WITH_VALUE.has(typeValue)) {
1465
+ if (ASSERTION_TYPES_WITH_STRING_VALUE.has(typeValue)) {
1406
1466
  const value = item.value;
1407
1467
  if (value === void 0 || typeof value !== "string") {
1408
1468
  errors.push({
@@ -1426,6 +1486,18 @@ function validateAssertArray(assertField, parentLocation, filePath, errors) {
1426
1486
  }
1427
1487
  }
1428
1488
  }
1489
+ if (ASSERTION_TYPES_WITH_ARRAY_VALUE.has(typeValue)) {
1490
+ const value = item.value;
1491
+ if (!Array.isArray(value) || value.length === 0) {
1492
+ errors.push({
1493
+ severity: "warning",
1494
+ filePath,
1495
+ location: `${location}.value`,
1496
+ message: `Assertion type '${typeValue}' requires a 'value' field (non-empty string array).`
1497
+ });
1498
+ continue;
1499
+ }
1500
+ }
1429
1501
  const required = item.required;
1430
1502
  if (required !== void 0) {
1431
1503
  validateRequiredField(required, location, filePath, errors);
@@ -1571,6 +1643,26 @@ var COPILOT_SDK_SETTINGS = /* @__PURE__ */ new Set([
1571
1643
  "workspace_template",
1572
1644
  "workspaceTemplate"
1573
1645
  ]);
1646
+ var COPILOT_CLI_SETTINGS = /* @__PURE__ */ new Set([
1647
+ ...COMMON_SETTINGS,
1648
+ "executable",
1649
+ "command",
1650
+ "binary",
1651
+ "args",
1652
+ "arguments",
1653
+ "model",
1654
+ "cwd",
1655
+ "timeout_seconds",
1656
+ "timeoutSeconds",
1657
+ "log_dir",
1658
+ "logDir",
1659
+ "log_format",
1660
+ "logFormat",
1661
+ "system_prompt",
1662
+ "systemPrompt",
1663
+ "workspace_template",
1664
+ "workspaceTemplate"
1665
+ ]);
1574
1666
  var VSCODE_SETTINGS = /* @__PURE__ */ new Set([
1575
1667
  ...COMMON_SETTINGS,
1576
1668
  "executable",
@@ -1631,11 +1723,12 @@ function getKnownSettings(provider) {
1631
1723
  case "codex":
1632
1724
  case "codex-cli":
1633
1725
  return CODEX_SETTINGS;
1634
- case "copilot":
1635
1726
  case "copilot-sdk":
1636
1727
  case "copilot_sdk":
1637
- case "copilot-cli":
1638
1728
  return COPILOT_SDK_SETTINGS;
1729
+ case "copilot":
1730
+ case "copilot-cli":
1731
+ return COPILOT_CLI_SETTINGS;
1639
1732
  case "claude":
1640
1733
  case "claude-code":
1641
1734
  case "claude-sdk":
@@ -2307,6 +2400,13 @@ function normalizeString(value) {
2307
2400
  const trimmed = value.trim();
2308
2401
  return trimmed.length > 0 ? trimmed : void 0;
2309
2402
  }
2403
+ function resolveTimestampPlaceholder(value) {
2404
+ if (!value.includes("{timestamp}")) {
2405
+ return value;
2406
+ }
2407
+ const timestamp = (/* @__PURE__ */ new Date()).toISOString().replace(/[:.]/g, "-");
2408
+ return value.replaceAll("{timestamp}", timestamp);
2409
+ }
2310
2410
  function normalizeNumber(value, fallback) {
2311
2411
  if (typeof value === "number" && Number.isFinite(value)) {
2312
2412
  return value;
@@ -2331,7 +2431,7 @@ function normalizeOptionalNumber(value) {
2331
2431
  }
2332
2432
  return void 0;
2333
2433
  }
2334
- function normalizeOptions(rawOptions, config) {
2434
+ function normalizeOptions(rawOptions, config, yamlExecution) {
2335
2435
  const cliFormat = normalizeString(rawOptions.outputFormat);
2336
2436
  const configFormat = config?.output?.format;
2337
2437
  const formatStr = cliFormat ?? configFormat ?? "jsonl";
@@ -2382,12 +2482,14 @@ function normalizeOptions(rawOptions, config) {
2382
2482
  maxRetries: cliMaxRetries ?? configMaxRetries ?? 2,
2383
2483
  cache: resolvedCache,
2384
2484
  noCache: resolvedNoCache,
2385
- verbose: normalizeBoolean(rawOptions.verbose),
2386
- keepWorkspaces: normalizeBoolean(rawOptions.keepWorkspaces),
2485
+ // Boolean OR: config `true` cannot be overridden to `false` from CLI.
2486
+ // Intentional — there are no --no-verbose / --no-keep-workspaces flags.
2487
+ verbose: normalizeBoolean(rawOptions.verbose) || yamlExecution?.verbose === true,
2488
+ keepWorkspaces: normalizeBoolean(rawOptions.keepWorkspaces) || yamlExecution?.keep_workspaces === true,
2387
2489
  cleanupWorkspaces: normalizeBoolean(rawOptions.cleanupWorkspaces),
2388
2490
  trace: normalizeBoolean(rawOptions.trace),
2389
- otelFile: normalizeString(rawOptions.otelFile),
2390
- traceFile: normalizeString(rawOptions.traceFile),
2491
+ otelFile: normalizeString(rawOptions.otelFile) ?? (yamlExecution?.otel_file ? resolveTimestampPlaceholder(yamlExecution.otel_file) : void 0),
2492
+ traceFile: normalizeString(rawOptions.traceFile) ?? (yamlExecution?.trace_file ? resolveTimestampPlaceholder(yamlExecution.trace_file) : void 0),
2391
2493
  exportOtel: normalizeBoolean(rawOptions.exportOtel),
2392
2494
  otelBackend: normalizeString(rawOptions.otelBackend),
2393
2495
  otelCaptureContent: normalizeBoolean(rawOptions.otelCaptureContent),
@@ -2672,8 +2774,9 @@ async function runEvalCommand(input) {
2672
2774
  `Warning: Failed to load agentv config: ${err instanceof Error ? err.message : String(err)}`
2673
2775
  );
2674
2776
  }
2675
- const options = normalizeOptions(input.rawOptions, config);
2676
2777
  const repoRoot = await findRepoRoot(cwd);
2778
+ const yamlConfig = await loadConfig(path10.join(cwd, "_"), repoRoot);
2779
+ const options = normalizeOptions(input.rawOptions, config, yamlConfig?.execution);
2677
2780
  if (options.keepWorkspaces && options.cleanupWorkspaces) {
2678
2781
  console.warn(
2679
2782
  "Warning: Both --keep-workspaces and --cleanup-workspaces specified. --cleanup-workspaces takes precedence."
@@ -2686,7 +2789,7 @@ async function runEvalCommand(input) {
2686
2789
  const useFileExport = !!(options.otelFile || options.traceFile);
2687
2790
  if (options.exportOtel || useFileExport) {
2688
2791
  try {
2689
- const { OtelTraceExporter, OTEL_BACKEND_PRESETS } = await import("./dist-BGRU67HI.js");
2792
+ const { OtelTraceExporter, OTEL_BACKEND_PRESETS } = await import("./dist-OVEHXEXC.js");
2690
2793
  let endpoint = process.env.OTEL_EXPORTER_OTLP_ENDPOINT;
2691
2794
  let headers = {};
2692
2795
  if (options.otelBackend) {
@@ -2814,7 +2917,14 @@ async function runEvalCommand(input) {
2814
2917
  progressReporter.addLogPaths([entry.filePath], "pi");
2815
2918
  });
2816
2919
  const seenCopilotLogPaths = /* @__PURE__ */ new Set();
2817
- const unsubscribeCopilotLogs = subscribeToCopilotSdkLogEntries((entry) => {
2920
+ const unsubscribeCopilotSdkLogs = subscribeToCopilotSdkLogEntries((entry) => {
2921
+ if (!entry.filePath || seenCopilotLogPaths.has(entry.filePath)) {
2922
+ return;
2923
+ }
2924
+ seenCopilotLogPaths.add(entry.filePath);
2925
+ progressReporter.addLogPaths([entry.filePath], "copilot");
2926
+ });
2927
+ const unsubscribeCopilotCliLogs = subscribeToCopilotCliLogEntries((entry) => {
2818
2928
  if (!entry.filePath || seenCopilotLogPaths.has(entry.filePath)) {
2819
2929
  return;
2820
2930
  }
@@ -2908,7 +3018,8 @@ Results written to: ${outputPath}`);
2908
3018
  } finally {
2909
3019
  unsubscribeCodexLogs();
2910
3020
  unsubscribePiLogs();
2911
- unsubscribeCopilotLogs();
3021
+ unsubscribeCopilotSdkLogs();
3022
+ unsubscribeCopilotCliLogs();
2912
3023
  await outputWriter.close().catch(() => void 0);
2913
3024
  if (otelExporter) {
2914
3025
  try {
@@ -2949,4 +3060,4 @@ export {
2949
3060
  selectTarget,
2950
3061
  runEvalCommand
2951
3062
  };
2952
- //# sourceMappingURL=chunk-PC3FAOHT.js.map
3063
+ //# sourceMappingURL=chunk-IL7CRMY6.js.map