agentv 2.12.0 → 2.13.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -25,7 +25,59 @@ import {
25
25
  subscribeToCopilotCliLogEntries,
26
26
  subscribeToCopilotSdkLogEntries,
27
27
  subscribeToPiLogEntries
28
- } from "./chunk-LUHCYBMD.js";
28
+ } from "./chunk-FSBZM3HT.js";
29
+
30
+ // package.json
31
+ var package_default = {
32
+ name: "agentv",
33
+ version: "2.13.0",
34
+ description: "CLI entry point for AgentV",
35
+ type: "module",
36
+ repository: {
37
+ type: "git",
38
+ url: "https://github.com/EntityProcess/agentv.git"
39
+ },
40
+ homepage: "https://github.com/EntityProcess/agentv#readme",
41
+ bugs: {
42
+ url: "https://github.com/EntityProcess/agentv/issues"
43
+ },
44
+ bin: {
45
+ agentv: "./dist/cli.js"
46
+ },
47
+ files: ["dist", "README.md"],
48
+ scripts: {
49
+ dev: "bun src/cli.ts",
50
+ build: "tsup && bun run copy-readme",
51
+ "copy-readme": `bun -e "import { cpSync } from 'fs'; cpSync('../../README.md', 'README.md')"`,
52
+ prepublishOnly: "bun run copy-readme",
53
+ typecheck: "tsc --noEmit",
54
+ lint: "biome check .",
55
+ format: "biome format --write .",
56
+ fix: "biome check --write .",
57
+ test: "bun test",
58
+ "test:watch": "bun test --watch"
59
+ },
60
+ dependencies: {
61
+ "@anthropic-ai/claude-agent-sdk": "^0.2.49",
62
+ "@github/copilot-sdk": "^0.1.25",
63
+ "@inquirer/prompts": "^8.2.1",
64
+ "@mariozechner/pi-agent-core": "^0.54.2",
65
+ "@mariozechner/pi-ai": "^0.54.2",
66
+ "@openai/codex-sdk": "^0.104.0",
67
+ "cmd-ts": "^0.14.3",
68
+ dotenv: "^16.4.5",
69
+ "fast-glob": "^3.3.3",
70
+ json5: "^2.2.3",
71
+ micromatch: "^4.0.8",
72
+ semver: "^7.7.4",
73
+ yaml: "^2.6.1"
74
+ },
75
+ devDependencies: {
76
+ "@agentv/core": "workspace:*",
77
+ "@types/semver": "^7.7.1",
78
+ execa: "^9.3.0"
79
+ }
80
+ };
29
81
 
30
82
  // src/commands/eval/shared.ts
31
83
  import { constants } from "node:fs";
@@ -152,6 +204,60 @@ import { access as access4 } from "node:fs/promises";
152
204
  import path10 from "node:path";
153
205
  import { pathToFileURL } from "node:url";
154
206
 
207
+ // src/version-check.ts
208
+ import { satisfies, validRange } from "semver";
209
+ var ANSI_YELLOW = "\x1B[33m";
210
+ var ANSI_RED = "\x1B[31m";
211
+ var ANSI_RESET = "\x1B[0m";
212
+ function checkVersion(requiredVersion) {
213
+ const currentVersion = package_default.version;
214
+ if (!requiredVersion.trim() || !validRange(requiredVersion)) {
215
+ throw new Error(
216
+ `Invalid required_version "${requiredVersion}" in .agentv/config.yaml. Must be a valid semver range (e.g., ">=2.11.0", "^2.11.0").`
217
+ );
218
+ }
219
+ return {
220
+ satisfied: satisfies(currentVersion, requiredVersion),
221
+ currentVersion,
222
+ requiredRange: requiredVersion
223
+ };
224
+ }
225
+ async function enforceRequiredVersion(requiredVersion, options) {
226
+ let result;
227
+ try {
228
+ result = checkVersion(requiredVersion);
229
+ } catch (err) {
230
+ console.error(`${ANSI_RED}Error: ${err.message}${ANSI_RESET}`);
231
+ process.exit(1);
232
+ }
233
+ if (result.satisfied) {
234
+ return;
235
+ }
236
+ const warning = `${ANSI_YELLOW}Warning: This project requires agentv ${result.requiredRange} but you have ${result.currentVersion}.${ANSI_RESET}
237
+ Run \`agentv self update\` to upgrade.`;
238
+ if (options?.strict) {
239
+ console.error(warning);
240
+ console.error(
241
+ `${ANSI_RED}Aborting: --strict mode requires the installed version to satisfy the required range.${ANSI_RESET}`
242
+ );
243
+ process.exit(1);
244
+ }
245
+ if (process.stdin.isTTY && process.stdout.isTTY) {
246
+ console.warn(warning);
247
+ const shouldContinue = await promptContinue();
248
+ if (!shouldContinue) {
249
+ process.exit(1);
250
+ }
251
+ } else {
252
+ process.stderr.write(`${warning}
253
+ `);
254
+ }
255
+ }
256
+ async function promptContinue() {
257
+ const { confirm } = await import("@inquirer/prompts");
258
+ return confirm({ message: "Continue anyway?", default: false });
259
+ }
260
+
155
261
  // src/commands/eval/env.ts
156
262
  import { constants as constants3 } from "node:fs";
157
263
  import { access as access3 } from "node:fs/promises";
@@ -822,6 +928,49 @@ var ProgressDisplay = class {
822
928
  }
823
929
  };
824
930
 
931
+ // src/commands/eval/retry-errors.ts
932
+ import { createReadStream } from "node:fs";
933
+ import { createInterface } from "node:readline";
934
+ async function loadErrorTestIds(jsonlPath) {
935
+ const ids = [];
936
+ const rl = createInterface({
937
+ input: createReadStream(jsonlPath),
938
+ crlfDelay: Number.POSITIVE_INFINITY
939
+ });
940
+ for await (const line of rl) {
941
+ const trimmed = line.trim();
942
+ if (!trimmed) continue;
943
+ try {
944
+ const parsed = JSON.parse(trimmed);
945
+ if (parsed.executionStatus === "execution_error" && parsed.testId) {
946
+ ids.push(parsed.testId);
947
+ }
948
+ } catch {
949
+ }
950
+ }
951
+ return [...new Set(ids)];
952
+ }
953
+ async function loadNonErrorResults(jsonlPath) {
954
+ const results = [];
955
+ const rl = createInterface({
956
+ input: createReadStream(jsonlPath),
957
+ crlfDelay: Number.POSITIVE_INFINITY
958
+ });
959
+ for await (const line of rl) {
960
+ const trimmed = line.trim();
961
+ if (!trimmed) continue;
962
+ try {
963
+ const parsed = JSON.parse(trimmed);
964
+ if (!parsed.testId || parsed.score === void 0) continue;
965
+ if (parsed.executionStatus !== "execution_error") {
966
+ results.push(parsed);
967
+ }
968
+ } catch {
969
+ }
970
+ }
971
+ return results;
972
+ }
973
+
825
974
  // src/commands/eval/statistics.ts
826
975
  var HISTOGRAM_BREAKPOINTS = [0, 0.2, 0.4, 0.6, 0.8, 1];
827
976
  function computeMean(values) {
@@ -2230,9 +2379,9 @@ async function validateMessagesFileRefs(messages, location, searchRoots, filePat
2230
2379
  }
2231
2380
 
2232
2381
  // src/commands/eval/targets.ts
2233
- var ANSI_YELLOW = "\x1B[33m";
2234
- var ANSI_RED = "\x1B[31m";
2235
- var ANSI_RESET = "\x1B[0m";
2382
+ var ANSI_YELLOW2 = "\x1B[33m";
2383
+ var ANSI_RED2 = "\x1B[31m";
2384
+ var ANSI_RESET2 = "\x1B[0m";
2236
2385
  function isTTY() {
2237
2386
  return process.stdout.isTTY ?? false;
2238
2387
  }
@@ -2278,8 +2427,8 @@ async function selectTarget(options) {
2278
2427
  Warnings in ${targetsFilePath}:`);
2279
2428
  for (const warning of warnings) {
2280
2429
  const location = warning.location ? ` [${warning.location}]` : "";
2281
- const prefix = useColors ? `${ANSI_YELLOW} \u26A0${ANSI_RESET}` : " \u26A0";
2282
- const message = useColors ? `${ANSI_YELLOW}${warning.message}${ANSI_RESET}` : warning.message;
2430
+ const prefix = useColors ? `${ANSI_YELLOW2} \u26A0${ANSI_RESET2}` : " \u26A0";
2431
+ const message = useColors ? `${ANSI_YELLOW2}${warning.message}${ANSI_RESET2}` : warning.message;
2283
2432
  console.warn(`${prefix}${location} ${message}`);
2284
2433
  }
2285
2434
  console.warn("");
@@ -2290,8 +2439,8 @@ Warnings in ${targetsFilePath}:`);
2290
2439
  Errors in ${targetsFilePath}:`);
2291
2440
  for (const error of errors) {
2292
2441
  const location = error.location ? ` [${error.location}]` : "";
2293
- const prefix = useColors ? `${ANSI_RED} \u2717${ANSI_RESET}` : " \u2717";
2294
- const message = useColors ? `${ANSI_RED}${error.message}${ANSI_RESET}` : error.message;
2442
+ const prefix = useColors ? `${ANSI_RED2} \u2717${ANSI_RESET2}` : " \u2717";
2443
+ const message = useColors ? `${ANSI_RED2}${error.message}${ANSI_RESET2}` : error.message;
2295
2444
  console.error(`${prefix}${location} ${message}`);
2296
2445
  }
2297
2446
  throw new Error(`Targets file validation failed with ${errors.length} error(s)`);
@@ -2369,8 +2518,8 @@ async function selectMultipleTargets(options) {
2369
2518
  Warnings in ${targetsFilePath}:`);
2370
2519
  for (const warning of warnings) {
2371
2520
  const location = warning.location ? ` [${warning.location}]` : "";
2372
- const prefix = useColors ? `${ANSI_YELLOW} \u26A0${ANSI_RESET}` : " \u26A0";
2373
- const message = useColors ? `${ANSI_YELLOW}${warning.message}${ANSI_RESET}` : warning.message;
2521
+ const prefix = useColors ? `${ANSI_YELLOW2} \u26A0${ANSI_RESET2}` : " \u26A0";
2522
+ const message = useColors ? `${ANSI_YELLOW2}${warning.message}${ANSI_RESET2}` : warning.message;
2374
2523
  console.warn(`${prefix}${location} ${message}`);
2375
2524
  }
2376
2525
  console.warn("");
@@ -2381,8 +2530,8 @@ Warnings in ${targetsFilePath}:`);
2381
2530
  Errors in ${targetsFilePath}:`);
2382
2531
  for (const error of errors) {
2383
2532
  const location = error.location ? ` [${error.location}]` : "";
2384
- const prefix = useColors ? `${ANSI_RED} \u2717${ANSI_RESET}` : " \u2717";
2385
- const message = useColors ? `${ANSI_RED}${error.message}${ANSI_RESET}` : error.message;
2533
+ const prefix = useColors ? `${ANSI_RED2} \u2717${ANSI_RESET2}` : " \u2717";
2534
+ const message = useColors ? `${ANSI_RED2}${error.message}${ANSI_RESET2}` : error.message;
2386
2535
  console.error(`${prefix}${location} ${message}`);
2387
2536
  }
2388
2537
  throw new Error(`Targets file validation failed with ${errors.length} error(s)`);
@@ -2543,7 +2692,8 @@ function normalizeOptions(rawOptions, config, yamlExecution) {
2543
2692
  exportOtel: normalizeBoolean(rawOptions.exportOtel),
2544
2693
  otelBackend: normalizeString(rawOptions.otelBackend),
2545
2694
  otelCaptureContent: normalizeBoolean(rawOptions.otelCaptureContent),
2546
- otelGroupTurns: normalizeBoolean(rawOptions.otelGroupTurns)
2695
+ otelGroupTurns: normalizeBoolean(rawOptions.otelGroupTurns),
2696
+ retryErrors: normalizeString(rawOptions.retryErrors)
2547
2697
  };
2548
2698
  }
2549
2699
  async function ensureFileExists(filePath, description) {
@@ -2677,7 +2827,8 @@ async function prepareFileMetadata(params) {
2677
2827
  suiteTargets,
2678
2828
  yamlCache: suite.cacheConfig?.enabled,
2679
2829
  yamlCachePath: suite.cacheConfig?.cachePath,
2680
- totalBudgetUsd: suite.totalBudgetUsd
2830
+ totalBudgetUsd: suite.totalBudgetUsd,
2831
+ failOnError: suite.failOnError
2681
2832
  };
2682
2833
  }
2683
2834
  async function runWithLimit(items, limit, task) {
@@ -2711,7 +2862,8 @@ async function runSingleEvalFile(params) {
2711
2862
  evalCases,
2712
2863
  trialsConfig,
2713
2864
  matrixMode,
2714
- totalBudgetUsd
2865
+ totalBudgetUsd,
2866
+ failOnError
2715
2867
  } = params;
2716
2868
  const targetName = selection.targetName;
2717
2869
  await ensureFileExists(testFilePath, "Test file");
@@ -2773,6 +2925,7 @@ async function runSingleEvalFile(params) {
2773
2925
  cleanupWorkspaces: options.cleanupWorkspaces,
2774
2926
  trials: trialsConfig,
2775
2927
  totalBudgetUsd,
2928
+ failOnError,
2776
2929
  streamCallbacks: streamingObserver?.getStreamCallbacks(),
2777
2930
  onResult: async (result) => {
2778
2931
  streamingObserver?.finalizeEvalCase(result.score, result.error);
@@ -2826,7 +2979,26 @@ async function runEvalCommand(input) {
2826
2979
  }
2827
2980
  const repoRoot = await findRepoRoot(cwd);
2828
2981
  const yamlConfig = await loadConfig(path10.join(cwd, "_"), repoRoot);
2829
- const options = normalizeOptions(input.rawOptions, config, yamlConfig?.execution);
2982
+ if (yamlConfig?.required_version) {
2983
+ await enforceRequiredVersion(yamlConfig.required_version, {
2984
+ strict: normalizeBoolean(input.rawOptions.strict)
2985
+ });
2986
+ }
2987
+ let options = normalizeOptions(input.rawOptions, config, yamlConfig?.execution);
2988
+ let retryNonErrorResults;
2989
+ if (options.retryErrors) {
2990
+ const retryPath = path10.resolve(options.retryErrors);
2991
+ await ensureFileExists(retryPath, "Retry-errors JSONL file");
2992
+ const errorIds = await loadErrorTestIds(retryPath);
2993
+ if (errorIds.length === 0) {
2994
+ console.log("No execution errors found in the previous output. Nothing to retry.");
2995
+ return;
2996
+ }
2997
+ console.log(`Retrying ${errorIds.length} execution-error test(s): ${errorIds.join(", ")}`);
2998
+ const filterPattern = errorIds.length === 1 ? errorIds[0] : `{${errorIds.join(",")}}`;
2999
+ options = { ...options, filter: filterPattern };
3000
+ retryNonErrorResults = await loadNonErrorResults(retryPath);
3001
+ }
2830
3002
  if (options.keepWorkspaces && options.cleanupWorkspaces) {
2831
3003
  console.warn(
2832
3004
  "Warning: Both --keep-workspaces and --cleanup-workspaces specified. --cleanup-workspaces takes precedence."
@@ -2839,7 +3011,7 @@ async function runEvalCommand(input) {
2839
3011
  const useFileExport = !!(options.otelFile || options.traceFile);
2840
3012
  if (options.exportOtel || useFileExport) {
2841
3013
  try {
2842
- const { OtelTraceExporter, OTEL_BACKEND_PRESETS } = await import("./dist-OPPA4P5R.js");
3014
+ const { OtelTraceExporter, OTEL_BACKEND_PRESETS } = await import("./dist-CCUHG3SN.js");
2843
3015
  let endpoint = process.env.OTEL_EXPORTER_OTLP_ENDPOINT;
2844
3016
  let headers = {};
2845
3017
  if (options.otelBackend) {
@@ -3034,12 +3206,22 @@ async function runEvalCommand(input) {
3034
3206
  evalCases: applicableEvalCases,
3035
3207
  trialsConfig: targetPrep.trialsConfig,
3036
3208
  matrixMode: targetPrep.selections.length > 1,
3037
- totalBudgetUsd: targetPrep.totalBudgetUsd
3209
+ totalBudgetUsd: targetPrep.totalBudgetUsd,
3210
+ failOnError: targetPrep.failOnError
3038
3211
  });
3039
3212
  allResults.push(...result.results);
3040
3213
  }
3041
3214
  });
3042
3215
  progressReporter.finish();
3216
+ if (retryNonErrorResults && retryNonErrorResults.length > 0) {
3217
+ for (const preserved of retryNonErrorResults) {
3218
+ await outputWriter.append(preserved);
3219
+ }
3220
+ allResults.push(...retryNonErrorResults);
3221
+ console.log(
3222
+ `Merged ${retryNonErrorResults.length} non-error result(s) from previous output.`
3223
+ );
3224
+ }
3043
3225
  const summary = calculateEvaluationSummary(allResults);
3044
3226
  console.log(formatEvaluationSummary(summary));
3045
3227
  if (isMatrixMode && allResults.length > 0) {
@@ -3097,6 +3279,7 @@ async function resolveEvaluationRunner() {
3097
3279
  }
3098
3280
 
3099
3281
  export {
3282
+ package_default,
3100
3283
  toSnakeCaseDeep,
3101
3284
  resolveEvalPaths,
3102
3285
  findRepoRoot,
@@ -3110,4 +3293,4 @@ export {
3110
3293
  selectTarget,
3111
3294
  runEvalCommand
3112
3295
  };
3113
- //# sourceMappingURL=chunk-YBJX5CP6.js.map
3296
+ //# sourceMappingURL=chunk-UWDI4UVN.js.map