agentv 2.15.0 → 2.16.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -6,14 +6,13 @@ import {
6
6
  resolveEvalPaths,
7
7
  runEvalCommand,
8
8
  selectTarget,
9
- toSnakeCaseDeep as toSnakeCaseDeep2,
9
+ toSnakeCaseDeep,
10
10
  validateConfigFile,
11
11
  validateEvalFile,
12
12
  validateFileReferences,
13
13
  validateTargetsFile
14
- } from "./chunk-ZDSLKUCM.js";
14
+ } from "./chunk-QLCVA3ZS.js";
15
15
  import {
16
- RepoManager,
17
16
  assembleLlmJudgePrompt,
18
17
  buildPromptInputs,
19
18
  createBuiltinRegistry,
@@ -21,15 +20,14 @@ import {
21
20
  executeScript,
22
21
  generateRubrics,
23
22
  getAgentvHome,
24
- getGitCacheRoot,
25
23
  getWorkspacePoolRoot,
26
24
  loadTestById,
27
25
  loadTests,
28
26
  normalizeLineEndings,
29
27
  toCamelCaseDeep,
30
- toSnakeCaseDeep,
28
+ toSnakeCaseDeep as toSnakeCaseDeep2,
31
29
  trimBaselineResult
32
- } from "./chunk-VBK7BJLE.js";
30
+ } from "./chunk-LZ5MPQFM.js";
33
31
  import {
34
32
  __commonJS,
35
33
  __esm,
@@ -2879,90 +2877,6 @@ function oneOf(literals) {
2879
2877
  };
2880
2878
  }
2881
2879
 
2882
- // src/commands/cache/add.ts
2883
- import { existsSync } from "node:fs";
2884
- import { join, resolve } from "node:path";
2885
- var addCommand = command({
2886
- name: "add",
2887
- description: "Seed cache from a local git repository",
2888
- args: {
2889
- url: option({
2890
- long: "url",
2891
- description: "Remote URL to associate with the cache entry",
2892
- type: string
2893
- }),
2894
- from: option({
2895
- long: "from",
2896
- description: "Path to local git repository to clone from",
2897
- type: string
2898
- }),
2899
- force: flag({
2900
- long: "force",
2901
- short: "f",
2902
- description: "Overwrite existing cache entry"
2903
- })
2904
- },
2905
- handler: async ({ url, from, force }) => {
2906
- const localPath = resolve(from);
2907
- if (!existsSync(localPath)) {
2908
- console.error(`Error: local path does not exist: ${localPath}`);
2909
- process.exit(1);
2910
- }
2911
- if (!existsSync(join(localPath, ".git")) && !existsSync(join(localPath, "HEAD"))) {
2912
- console.error(`Error: ${localPath} does not appear to be a git repository`);
2913
- process.exit(1);
2914
- }
2915
- const manager = new RepoManager();
2916
- try {
2917
- const cachePath = await manager.seedCache(localPath, url, { force });
2918
- console.log(`Cache seeded from ${localPath}`);
2919
- console.log(` Remote URL: ${url}`);
2920
- console.log(` Cache path: ${cachePath}`);
2921
- } catch (err2) {
2922
- console.error(`Error: ${err2 instanceof Error ? err2.message : err2}`);
2923
- process.exit(1);
2924
- }
2925
- }
2926
- });
2927
-
2928
- // src/commands/cache/index.ts
2929
- var cleanCommand = command({
2930
- name: "clean",
2931
- description: "Remove all cached git repositories",
2932
- args: {
2933
- force: flag({
2934
- long: "force",
2935
- short: "f",
2936
- description: "Skip confirmation prompt"
2937
- })
2938
- },
2939
- handler: async ({ force }) => {
2940
- if (!force) {
2941
- const readline2 = await import("node:readline");
2942
- const rl = readline2.createInterface({ input: process.stdin, output: process.stdout });
2943
- const answer = await new Promise((resolve2) => {
2944
- rl.question(`Remove all cached git repos from ${getGitCacheRoot()}? [y/N] `, resolve2);
2945
- });
2946
- rl.close();
2947
- if (answer.toLowerCase() !== "y") {
2948
- console.log("Cancelled.");
2949
- return;
2950
- }
2951
- }
2952
- const manager = new RepoManager();
2953
- await manager.cleanCache();
2954
- console.log("Cache cleaned.");
2955
- }
2956
- });
2957
- var cacheCommand = subcommands({
2958
- name: "cache",
2959
- description: "Manage AgentV cache",
2960
- cmds: {
2961
- add: addCommand,
2962
- clean: cleanCommand
2963
- }
2964
- });
2965
-
2966
2880
  // src/commands/compare/index.ts
2967
2881
  import { readFileSync } from "node:fs";
2968
2882
  var colors = {
@@ -3306,7 +3220,7 @@ var compareCommand = command({
3306
3220
  const results2 = loadJsonlResults(results[1]);
3307
3221
  const comparison = compareResults(results1, results2, effectiveThreshold);
3308
3222
  if (outputFormat === "json") {
3309
- console.log(JSON.stringify(toSnakeCaseDeep2(comparison), null, 2));
3223
+ console.log(JSON.stringify(toSnakeCaseDeep(comparison), null, 2));
3310
3224
  } else {
3311
3225
  console.log(formatTable(comparison, results[0], results[1]));
3312
3226
  }
@@ -3352,7 +3266,7 @@ var compareCommand = command({
3352
3266
  }
3353
3267
  const comparison = compareResults(baselineResults, candidateResults, effectiveThreshold);
3354
3268
  if (outputFormat === "json") {
3355
- console.log(JSON.stringify(toSnakeCaseDeep2(comparison), null, 2));
3269
+ console.log(JSON.stringify(toSnakeCaseDeep(comparison), null, 2));
3356
3270
  } else {
3357
3271
  console.log(formatTable(comparison, baseline, candidate));
3358
3272
  }
@@ -3361,7 +3275,7 @@ var compareCommand = command({
3361
3275
  } else {
3362
3276
  const matrixOutput = compareMatrix(groups, effectiveThreshold);
3363
3277
  if (outputFormat === "json") {
3364
- console.log(JSON.stringify(toSnakeCaseDeep2(matrixOutput), null, 2));
3278
+ console.log(JSON.stringify(toSnakeCaseDeep(matrixOutput), null, 2));
3365
3279
  } else {
3366
3280
  console.log(formatMatrix(matrixOutput, baseline));
3367
3281
  }
@@ -3804,7 +3718,7 @@ async function processEvaluator(config, evalCase, candidate, promptInputs) {
3804
3718
  config: codeConfig.config ?? null
3805
3719
  };
3806
3720
  try {
3807
- const inputPayload = JSON.stringify(toSnakeCaseDeep(payload), null, 2);
3721
+ const inputPayload = JSON.stringify(toSnakeCaseDeep2(payload), null, 2);
3808
3722
  const stdout = await executeScript(script, inputPayload, 6e4, scriptCwd);
3809
3723
  const parsed = JSON.parse(stdout);
3810
3724
  return {
@@ -4053,13 +3967,42 @@ var evalRunCommand = command({
4053
3967
  }),
4054
3968
  poolWorkspaces: flag({
4055
3969
  long: "pool-workspaces",
4056
- description: "Reuse materialized workspaces across eval runs"
3970
+ description: "Enable workspace pooling (default for shared workspaces with repos)"
3971
+ }),
3972
+ noPool: flag({
3973
+ long: "no-pool",
3974
+ description: "Disable workspace pooling (clone fresh each run)"
4057
3975
  }),
4058
3976
  workspace: option({
4059
3977
  type: optional(string),
4060
3978
  long: "workspace",
4061
3979
  description: "Use an existing directory as the workspace directly (skips clone/copy/pool)"
4062
3980
  }),
3981
+ workspaceMode: option({
3982
+ type: optional(string),
3983
+ long: "workspace-mode",
3984
+ description: "Workspace mode: 'pooled', 'ephemeral', or 'static'"
3985
+ }),
3986
+ workspacePath: option({
3987
+ type: optional(string),
3988
+ long: "workspace-path",
3989
+ description: "Static workspace directory path (used when workspace mode is static)"
3990
+ }),
3991
+ workspaceClean: option({
3992
+ type: optional(string),
3993
+ long: "workspace-clean",
3994
+ description: "Pooled reset clean mode: 'standard' or 'full'"
3995
+ }),
3996
+ retainOnSuccess: option({
3997
+ type: optional(string),
3998
+ long: "retain-on-success",
3999
+ description: "Workspace retention on success: 'keep' or 'cleanup'"
4000
+ }),
4001
+ retainOnFailure: option({
4002
+ type: optional(string),
4003
+ long: "retain-on-failure",
4004
+ description: "Workspace retention on failure: 'keep' or 'cleanup'"
4005
+ }),
4063
4006
  otelFile: option({
4064
4007
  type: optional(string),
4065
4008
  long: "otel-file",
@@ -4099,7 +4042,7 @@ var evalRunCommand = command({
4099
4042
  },
4100
4043
  handler: async (args) => {
4101
4044
  if (args.evalPaths.length === 0 && process.stdin.isTTY) {
4102
- const { launchInteractiveWizard } = await import("./interactive-SSGXAAKA.js");
4045
+ const { launchInteractiveWizard } = await import("./interactive-ZXYNPRCT.js");
4103
4046
  await launchInteractiveWizard();
4104
4047
  return;
4105
4048
  }
@@ -4124,7 +4067,13 @@ var evalRunCommand = command({
4124
4067
  keepWorkspaces: args.keepWorkspaces,
4125
4068
  cleanupWorkspaces: args.cleanupWorkspaces,
4126
4069
  poolWorkspaces: args.poolWorkspaces,
4070
+ noPool: args.noPool,
4127
4071
  workspace: args.workspace,
4072
+ workspaceMode: args.workspaceMode,
4073
+ workspacePath: args.workspacePath,
4074
+ workspaceClean: args.workspaceClean,
4075
+ retainOnSuccess: args.retainOnSuccess,
4076
+ retainOnFailure: args.retainOnFailure,
4128
4077
  trace: false,
4129
4078
  otelFile: args.otelFile,
4130
4079
  traceFile: args.traceFile,
@@ -4321,7 +4270,7 @@ var generateCommand = subcommands({
4321
4270
  });
4322
4271
 
4323
4272
  // src/commands/init/index.ts
4324
- import { existsSync as existsSync2, mkdirSync, writeFileSync as writeFileSync2 } from "node:fs";
4273
+ import { existsSync, mkdirSync, writeFileSync as writeFileSync2 } from "node:fs";
4325
4274
  import path5 from "node:path";
4326
4275
  import * as readline from "node:readline/promises";
4327
4276
 
@@ -4391,14 +4340,14 @@ async function initCommand(options = {}) {
4391
4340
  const existingFiles = [];
4392
4341
  if (envTemplate) {
4393
4342
  const envFilePath = path5.join(targetPath, ".env.example");
4394
- if (existsSync2(envFilePath)) {
4343
+ if (existsSync(envFilePath)) {
4395
4344
  existingFiles.push(".env.example");
4396
4345
  }
4397
4346
  }
4398
- if (existsSync2(agentvDir)) {
4347
+ if (existsSync(agentvDir)) {
4399
4348
  for (const template of otherAgentvTemplates) {
4400
4349
  const targetFilePath = path5.join(agentvDir, template.path);
4401
- if (existsSync2(targetFilePath)) {
4350
+ if (existsSync(targetFilePath)) {
4402
4351
  existingFiles.push(path5.relative(targetPath, targetFilePath));
4403
4352
  }
4404
4353
  }
@@ -4417,7 +4366,7 @@ async function initCommand(options = {}) {
4417
4366
  }
4418
4367
  console.log();
4419
4368
  }
4420
- if (!existsSync2(agentvDir)) {
4369
+ if (!existsSync(agentvDir)) {
4421
4370
  mkdirSync(agentvDir, { recursive: true });
4422
4371
  }
4423
4372
  if (envTemplate) {
@@ -4428,7 +4377,7 @@ async function initCommand(options = {}) {
4428
4377
  for (const template of otherAgentvTemplates) {
4429
4378
  const targetFilePath = path5.join(agentvDir, template.path);
4430
4379
  const targetDirPath = path5.dirname(targetFilePath);
4431
- if (!existsSync2(targetDirPath)) {
4380
+ if (!existsSync(targetDirPath)) {
4432
4381
  mkdirSync(targetDirPath, { recursive: true });
4433
4382
  }
4434
4383
  writeFileSync2(targetFilePath, template.content, "utf-8");
@@ -4482,7 +4431,7 @@ function detectPackageManager() {
4482
4431
  return detectPackageManagerFromPath(process.argv[1] ?? "");
4483
4432
  }
4484
4433
  function runCommand(cmd, args) {
4485
- return new Promise((resolve2, reject) => {
4434
+ return new Promise((resolve, reject) => {
4486
4435
  const child = spawn(cmd, args, { stdio: ["inherit", "pipe", "inherit"], shell: true });
4487
4436
  let stdout = "";
4488
4437
  child.stdout?.on("data", (data) => {
@@ -4490,7 +4439,7 @@ function runCommand(cmd, args) {
4490
4439
  stdout += data.toString();
4491
4440
  });
4492
4441
  child.on("error", reject);
4493
- child.on("close", (code) => resolve2({ exitCode: code ?? 1, stdout }));
4442
+ child.on("close", (code) => resolve({ exitCode: code ?? 1, stdout }));
4494
4443
  });
4495
4444
  }
4496
4445
  var updateCommand = command({
@@ -4719,7 +4668,7 @@ var traceListCommand = command({
4719
4668
  try {
4720
4669
  const metas = listResultFiles(cwd, limit);
4721
4670
  if (outputFormat === "json") {
4722
- console.log(JSON.stringify(toSnakeCaseDeep2(metas), null, 2));
4671
+ console.log(JSON.stringify(toSnakeCaseDeep(metas), null, 2));
4723
4672
  } else {
4724
4673
  console.log(formatListTable(metas));
4725
4674
  }
@@ -5386,7 +5335,7 @@ var traceStatsCommand = command({
5386
5335
  const groups = groupResults(results, groupBy2);
5387
5336
  if (outputFormat === "json") {
5388
5337
  const statsJson = computeStatsJson(groups, file);
5389
- console.log(JSON.stringify(toSnakeCaseDeep2(statsJson), null, 2));
5338
+ console.log(JSON.stringify(toSnakeCaseDeep(statsJson), null, 2));
5390
5339
  } else {
5391
5340
  console.log(formatStatsTable(groups, file));
5392
5341
  }
@@ -5435,7 +5384,7 @@ var trimCommand = command({
5435
5384
  const record = JSON.parse(line);
5436
5385
  const camel = toCamelCaseDeep(record);
5437
5386
  const trimmed = trimBaselineResult(camel);
5438
- const snake = toSnakeCaseDeep(trimmed);
5387
+ const snake = toSnakeCaseDeep2(trimmed);
5439
5388
  return JSON.stringify(snake);
5440
5389
  });
5441
5390
  const output = `${trimmedLines.join("\n")}
@@ -5658,19 +5607,19 @@ var validateCommand = command({
5658
5607
  });
5659
5608
 
5660
5609
  // src/commands/workspace/clean.ts
5661
- import { existsSync as existsSync3 } from "node:fs";
5610
+ import { existsSync as existsSync2 } from "node:fs";
5662
5611
  import { readFile as readFile3, readdir as readdir2, rm } from "node:fs/promises";
5663
5612
  import path8 from "node:path";
5664
5613
  async function confirm(message) {
5665
5614
  const readline2 = await import("node:readline");
5666
5615
  const rl = readline2.createInterface({ input: process.stdin, output: process.stdout });
5667
- const answer = await new Promise((resolve2) => {
5668
- rl.question(`${message} [y/N] `, resolve2);
5616
+ const answer = await new Promise((resolve) => {
5617
+ rl.question(`${message} [y/N] `, resolve);
5669
5618
  });
5670
5619
  rl.close();
5671
5620
  return answer.toLowerCase() === "y";
5672
5621
  }
5673
- var cleanCommand2 = command({
5622
+ var cleanCommand = command({
5674
5623
  name: "clean",
5675
5624
  description: "Remove workspace pool entries",
5676
5625
  args: {
@@ -5687,7 +5636,7 @@ var cleanCommand2 = command({
5687
5636
  },
5688
5637
  handler: async ({ repo, force }) => {
5689
5638
  const poolRoot = getWorkspacePoolRoot();
5690
- if (!existsSync3(poolRoot)) {
5639
+ if (!existsSync2(poolRoot)) {
5691
5640
  console.log("No workspace pool entries found.");
5692
5641
  return;
5693
5642
  }
@@ -5746,7 +5695,7 @@ var cleanCommand2 = command({
5746
5695
  });
5747
5696
 
5748
5697
  // src/commands/workspace/list.ts
5749
- import { existsSync as existsSync4 } from "node:fs";
5698
+ import { existsSync as existsSync3 } from "node:fs";
5750
5699
  import { readFile as readFile4, readdir as readdir3, stat as stat2 } from "node:fs/promises";
5751
5700
  import path9 from "node:path";
5752
5701
  async function getDirectorySize(dirPath) {
@@ -5778,7 +5727,7 @@ var listCommand = command({
5778
5727
  args: {},
5779
5728
  handler: async () => {
5780
5729
  const poolRoot = getWorkspacePoolRoot();
5781
- if (!existsSync4(poolRoot)) {
5730
+ if (!existsSync3(poolRoot)) {
5782
5731
  console.log("No workspace pool entries found.");
5783
5732
  return;
5784
5733
  }
@@ -5827,20 +5776,20 @@ var workspaceCommand = subcommands({
5827
5776
  description: "Manage workspace pool",
5828
5777
  cmds: {
5829
5778
  list: listCommand,
5830
- clean: cleanCommand2
5779
+ clean: cleanCommand
5831
5780
  }
5832
5781
  });
5833
5782
 
5834
5783
  // src/update-check.ts
5835
5784
  import { spawn as spawn2 } from "node:child_process";
5836
5785
  import { readFile as readFile5 } from "node:fs/promises";
5837
- import { join as join2 } from "node:path";
5786
+ import { join } from "node:path";
5838
5787
  var CHECK_INTERVAL_MS = 24 * 60 * 60 * 1e3;
5839
5788
  var AGENTV_DIR = getAgentvHome();
5840
5789
  var CACHE_FILE = "version-check.json";
5841
5790
  var NPM_REGISTRY_URL = "https://registry.npmjs.org/agentv/latest";
5842
5791
  async function getCachedUpdateInfo(path10) {
5843
- const filePath = path10 ?? join2(AGENTV_DIR, CACHE_FILE);
5792
+ const filePath = path10 ?? join(AGENTV_DIR, CACHE_FILE);
5844
5793
  try {
5845
5794
  const raw = await readFile5(filePath, "utf-8");
5846
5795
  const data = JSON.parse(raw);
@@ -5874,7 +5823,7 @@ function buildNotice(currentVersion, latestVersion) {
5874
5823
  }
5875
5824
  function backgroundUpdateCheck() {
5876
5825
  const dir = AGENTV_DIR;
5877
- const filePath = join2(dir, CACHE_FILE);
5826
+ const filePath = join(dir, CACHE_FILE);
5878
5827
  const script = `
5879
5828
  const https = require('https');
5880
5829
  const fs = require('fs');
@@ -5923,7 +5872,6 @@ var app = subcommands({
5923
5872
  description: "AgentV CLI",
5924
5873
  version: package_default.version,
5925
5874
  cmds: {
5926
- cache: cacheCommand,
5927
5875
  eval: evalRunCommand,
5928
5876
  prompt: evalPromptCommand,
5929
5877
  compare: compareCommand,
@@ -5981,4 +5929,4 @@ export {
5981
5929
  preprocessArgv,
5982
5930
  runCli
5983
5931
  };
5984
- //# sourceMappingURL=chunk-IKGJTJSU.js.map
5932
+ //# sourceMappingURL=chunk-JZ62HLUC.js.map