@agentv/core 2.15.0 → 2.17.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.js CHANGED
@@ -17,7 +17,7 @@ import {
17
17
  readTextFile,
18
18
  resolveFileReference,
19
19
  resolveTargetDefinition
20
- } from "./chunk-N55K52OO.js";
20
+ } from "./chunk-CPPYERD2.js";
21
21
  import {
22
22
  OtlpJsonFileExporter
23
23
  } from "./chunk-HFSYZHGF.js";
@@ -3114,17 +3114,32 @@ function parseRepoConfig(raw) {
3114
3114
  ...clone !== void 0 && { clone }
3115
3115
  };
3116
3116
  }
3117
- function parseResetConfig(raw) {
3117
+ function parseWorkspaceHookConfig(raw, evalFileDir) {
3118
3118
  if (!isJsonObject(raw)) return void 0;
3119
+ const script = parseWorkspaceScriptConfig(raw, evalFileDir);
3119
3120
  const obj = raw;
3120
- const strategy = obj.strategy === "none" || obj.strategy === "hard" || obj.strategy === "recreate" ? obj.strategy : void 0;
3121
- const afterEach = typeof obj.after_each === "boolean" ? obj.after_each : void 0;
3122
- if (!strategy && afterEach === void 0) return void 0;
3121
+ const reset = obj.reset === "none" || obj.reset === "fast" || obj.reset === "strict" ? obj.reset : void 0;
3122
+ if (!script && !reset) return void 0;
3123
3123
  return {
3124
- ...strategy !== void 0 && { strategy },
3125
- ...afterEach !== void 0 && { after_each: afterEach }
3124
+ ...script ?? {},
3125
+ ...reset !== void 0 && { reset }
3126
3126
  };
3127
3127
  }
3128
+ function parseWorkspaceHooksConfig(raw, evalFileDir) {
3129
+ if (!isJsonObject(raw)) return void 0;
3130
+ const obj = raw;
3131
+ const beforeAll = parseWorkspaceHookConfig(obj.before_all, evalFileDir);
3132
+ const beforeEach = parseWorkspaceHookConfig(obj.before_each, evalFileDir);
3133
+ const afterEach = parseWorkspaceHookConfig(obj.after_each, evalFileDir);
3134
+ const afterAll = parseWorkspaceHookConfig(obj.after_all, evalFileDir);
3135
+ const hooks = {
3136
+ ...beforeAll !== void 0 && { before_all: beforeAll },
3137
+ ...beforeEach !== void 0 && { before_each: beforeEach },
3138
+ ...afterEach !== void 0 && { after_each: afterEach },
3139
+ ...afterAll !== void 0 && { after_all: afterAll }
3140
+ };
3141
+ return Object.keys(hooks).length > 0 ? hooks : void 0;
3142
+ }
3128
3143
  async function resolveWorkspaceConfig(raw, evalFileDir) {
3129
3144
  if (typeof raw === "string") {
3130
3145
  const workspaceFilePath = path8.resolve(evalFileDir, raw);
@@ -3154,37 +3169,48 @@ function parseWorkspaceConfig(raw, evalFileDir) {
3154
3169
  }
3155
3170
  const isolation = obj.isolation === "shared" || obj.isolation === "per_test" ? obj.isolation : void 0;
3156
3171
  const repos = Array.isArray(obj.repos) ? obj.repos.map(parseRepoConfig).filter(Boolean) : void 0;
3157
- const reset = parseResetConfig(obj.reset);
3158
- const beforeAll = parseWorkspaceScriptConfig(obj.before_all, evalFileDir);
3159
- const afterAll = parseWorkspaceScriptConfig(obj.after_all, evalFileDir);
3160
- const beforeEach = parseWorkspaceScriptConfig(obj.before_each, evalFileDir);
3161
- const afterEach = parseWorkspaceScriptConfig(obj.after_each, evalFileDir);
3162
- if (!template && !isolation && !repos && !reset && !beforeAll && !afterAll && !beforeEach && !afterEach)
3172
+ const hooks = parseWorkspaceHooksConfig(obj.hooks, evalFileDir);
3173
+ const mode = obj.mode === "pooled" || obj.mode === "ephemeral" || obj.mode === "static" ? obj.mode : void 0;
3174
+ const staticPath = typeof obj.static_path === "string" ? obj.static_path : void 0;
3175
+ const pool = typeof obj.pool === "boolean" ? obj.pool : void 0;
3176
+ if (!template && !isolation && !repos && !hooks && !mode && !staticPath && pool === void 0)
3163
3177
  return void 0;
3164
3178
  return {
3165
3179
  ...template !== void 0 && { template },
3166
3180
  ...isolation !== void 0 && { isolation },
3167
3181
  ...repos !== void 0 && { repos },
3168
- ...reset !== void 0 && { reset },
3169
- ...beforeAll !== void 0 && { before_all: beforeAll },
3170
- ...afterAll !== void 0 && { after_all: afterAll },
3171
- ...beforeEach !== void 0 && { before_each: beforeEach },
3172
- ...afterEach !== void 0 && { after_each: afterEach }
3182
+ ...hooks !== void 0 && { hooks },
3183
+ ...mode !== void 0 && { mode },
3184
+ ...staticPath !== void 0 && { static_path: staticPath },
3185
+ ...pool !== void 0 && { pool }
3173
3186
  };
3174
3187
  }
3175
3188
  function mergeWorkspaceConfigs(suiteLevel, caseLevel) {
3176
3189
  if (!suiteLevel && !caseLevel) return void 0;
3177
3190
  if (!suiteLevel) return caseLevel;
3178
3191
  if (!caseLevel) return suiteLevel;
3192
+ const mergeHook = (suiteHook, caseHook) => {
3193
+ if (!suiteHook && !caseHook) return void 0;
3194
+ return {
3195
+ ...suiteHook ?? {},
3196
+ ...caseHook ?? {}
3197
+ };
3198
+ };
3199
+ const mergedHooks = {
3200
+ before_all: mergeHook(suiteLevel.hooks?.before_all, caseLevel.hooks?.before_all),
3201
+ before_each: mergeHook(suiteLevel.hooks?.before_each, caseLevel.hooks?.before_each),
3202
+ after_each: mergeHook(suiteLevel.hooks?.after_each, caseLevel.hooks?.after_each),
3203
+ after_all: mergeHook(suiteLevel.hooks?.after_all, caseLevel.hooks?.after_all)
3204
+ };
3205
+ const hasHooks = Object.values(mergedHooks).some((hook) => hook !== void 0);
3179
3206
  return {
3180
3207
  template: caseLevel.template ?? suiteLevel.template,
3181
3208
  isolation: caseLevel.isolation ?? suiteLevel.isolation,
3182
3209
  repos: caseLevel.repos ?? suiteLevel.repos,
3183
- reset: caseLevel.reset ?? suiteLevel.reset,
3184
- before_all: caseLevel.before_all ?? suiteLevel.before_all,
3185
- after_all: caseLevel.after_all ?? suiteLevel.after_all,
3186
- before_each: caseLevel.before_each ?? suiteLevel.before_each,
3187
- after_each: caseLevel.after_each ?? suiteLevel.after_each
3210
+ ...hasHooks && { hooks: mergedHooks },
3211
+ mode: caseLevel.mode ?? suiteLevel.mode,
3212
+ static_path: caseLevel.static_path ?? suiteLevel.static_path,
3213
+ pool: caseLevel.pool ?? suiteLevel.pool
3188
3214
  };
3189
3215
  }
3190
3216
  function asString6(value) {
@@ -5304,6 +5330,7 @@ var CopilotCliProvider = class {
5304
5330
  const agentProcess = spawn(executable, args, {
5305
5331
  stdio: ["pipe", "pipe", "inherit"]
5306
5332
  });
5333
+ await waitForProcessSpawn(agentProcess, executable, this.targetName);
5307
5334
  const toolCallsInProgress = /* @__PURE__ */ new Map();
5308
5335
  const completedToolCalls = [];
5309
5336
  let finalContent = "";
@@ -5583,6 +5610,47 @@ var CopilotCliProvider = class {
5583
5610
  }
5584
5611
  }
5585
5612
  };
5613
+ async function waitForProcessSpawn(proc, executable, targetName) {
5614
+ if (proc.pid) {
5615
+ return;
5616
+ }
5617
+ await new Promise((resolve, reject) => {
5618
+ const onSpawn = () => {
5619
+ cleanup();
5620
+ resolve();
5621
+ };
5622
+ const onError = (error) => {
5623
+ cleanup();
5624
+ reject(new Error(formatCopilotSpawnError(error, executable, targetName)));
5625
+ };
5626
+ const cleanup = () => {
5627
+ proc.off("spawn", onSpawn);
5628
+ proc.off("error", onError);
5629
+ };
5630
+ proc.once("spawn", onSpawn);
5631
+ proc.once("error", onError);
5632
+ });
5633
+ }
5634
+ function formatCopilotSpawnError(error, executable, targetName) {
5635
+ const code = error.code;
5636
+ const base = `Failed to start Copilot CLI executable '${executable}' for target '${targetName}'. ${error.message}`;
5637
+ if (process.platform !== "win32") {
5638
+ return base;
5639
+ }
5640
+ if (code !== "ENOENT" && code !== "EINVAL") {
5641
+ return base;
5642
+ }
5643
+ return `${base}
5644
+
5645
+ On Windows, shell commands like 'copilot -h' can work via .ps1/.bat shims, but AgentV launches a subprocess that needs a directly spawnable executable path.
5646
+
5647
+ Fix options:
5648
+ 1) Install native Copilot binary package:
5649
+ npm install -g @github/copilot-win32-x64
5650
+ 2) Set explicit executable for Copilot targets:
5651
+ - In .env: COPILOT_EXE=C:\\Users\\<you>\\AppData\\Roaming\\npm\\node_modules\\@github\\copilot-win32-x64\\copilot.exe
5652
+ - In .agentv/targets.yaml: executable: \${{ COPILOT_EXE }}`;
5653
+ }
5586
5654
  function summarizeAcpEvent(eventType, data) {
5587
5655
  if (!data || typeof data !== "object") {
5588
5656
  return eventType;
@@ -7238,9 +7306,6 @@ function getAgentvHome() {
7238
7306
  function getWorkspacesRoot() {
7239
7307
  return path21.join(getAgentvHome(), "workspaces");
7240
7308
  }
7241
- function getGitCacheRoot() {
7242
- return path21.join(getAgentvHome(), "git-cache");
7243
- }
7244
7309
  function getSubagentsRoot() {
7245
7310
  return path21.join(getAgentvHome(), "subagents");
7246
7311
  }
@@ -8700,16 +8765,16 @@ async function execFileWithStdinNode(argv, stdinPayload, options) {
8700
8765
  });
8701
8766
  }
8702
8767
  async function execShellWithStdin(command, stdinPayload, options = {}) {
8703
- const { mkdir: mkdir15, readFile: readFile13, rm: rm7, writeFile: writeFile10 } = await import("node:fs/promises");
8768
+ const { mkdir: mkdir14, readFile: readFile13, rm: rm6, writeFile: writeFile9 } = await import("node:fs/promises");
8704
8769
  const { tmpdir: tmpdir3 } = await import("node:os");
8705
8770
  const path42 = await import("node:path");
8706
8771
  const { randomUUID: randomUUID8 } = await import("node:crypto");
8707
8772
  const dir = path42.join(tmpdir3(), `agentv-exec-${randomUUID8()}`);
8708
- await mkdir15(dir, { recursive: true });
8773
+ await mkdir14(dir, { recursive: true });
8709
8774
  const stdinPath = path42.join(dir, "stdin.txt");
8710
8775
  const stdoutPath = path42.join(dir, "stdout.txt");
8711
8776
  const stderrPath = path42.join(dir, "stderr.txt");
8712
- await writeFile10(stdinPath, stdinPayload, "utf8");
8777
+ await writeFile9(stdinPath, stdinPayload, "utf8");
8713
8778
  const wrappedCommand = process.platform === "win32" ? `(${command}) < ${shellEscapePath(stdinPath)} > ${shellEscapePath(stdoutPath)} 2> ${shellEscapePath(stderrPath)}` : `(${command}) < ${shellEscapePath(stdinPath)} > ${shellEscapePath(stdoutPath)} 2> ${shellEscapePath(stderrPath)}`;
8714
8779
  const { spawn: spawn4 } = await import("node:child_process");
8715
8780
  try {
@@ -8742,7 +8807,7 @@ async function execShellWithStdin(command, stdinPayload, options = {}) {
8742
8807
  const stderr = (await readFile13(stderrPath, "utf8")).replace(/\r\n/g, "\n");
8743
8808
  return { stdout, stderr, exitCode };
8744
8809
  } finally {
8745
- await rm7(dir, { recursive: true, force: true });
8810
+ await rm6(dir, { recursive: true, force: true });
8746
8811
  }
8747
8812
  }
8748
8813
 
@@ -11930,8 +11995,8 @@ function runEqualsAssertion(output, value) {
11930
11995
  }
11931
11996
 
11932
11997
  // src/evaluation/orchestrator.ts
11933
- import { createHash as createHash3, randomUUID as randomUUID7 } from "node:crypto";
11934
- import { mkdir as mkdir13, stat as stat7 } from "node:fs/promises";
11998
+ import { createHash as createHash2, randomUUID as randomUUID7 } from "node:crypto";
11999
+ import { mkdir as mkdir12, stat as stat7 } from "node:fs/promises";
11935
12000
  import path39 from "node:path";
11936
12001
  import micromatch4 from "micromatch";
11937
12002
 
@@ -12894,7 +12959,7 @@ var WorkspacePoolManager = class {
12894
12959
  * 7. Return the slot (with path, index, isExisting)
12895
12960
  */
12896
12961
  async acquireWorkspace(options) {
12897
- const { templatePath, repos, maxSlots, repoManager } = options;
12962
+ const { templatePath, repos, maxSlots, repoManager, poolReset } = options;
12898
12963
  const fingerprint = computeWorkspaceFingerprint(templatePath, repos);
12899
12964
  const poolDir = path36.join(this.poolRoot, fingerprint);
12900
12965
  await mkdir11(poolDir, { recursive: true });
@@ -12914,7 +12979,7 @@ var WorkspacePoolManager = class {
12914
12979
  }
12915
12980
  const slotExists = existsSync2(slotPath);
12916
12981
  if (slotExists) {
12917
- await this.resetSlot(slotPath, templatePath, repos);
12982
+ await this.resetSlot(slotPath, templatePath, repos, poolReset);
12918
12983
  return {
12919
12984
  index: i,
12920
12985
  path: slotPath,
@@ -13046,15 +13111,19 @@ var WorkspacePoolManager = class {
13046
13111
  * 1. Reset repos (git reset --hard {ref} && git clean -fd per repo)
13047
13112
  * 2. Re-copy template files (skip repo directories)
13048
13113
  */
13049
- async resetSlot(slotPath, templatePath, repos) {
13114
+ async resetSlot(slotPath, templatePath, repos, poolReset = "fast") {
13050
13115
  for (const repo of repos) {
13051
13116
  const repoDir = path36.join(slotPath, repo.path);
13052
13117
  if (!existsSync2(repoDir)) {
13053
13118
  continue;
13054
13119
  }
13120
+ if (poolReset === "none") {
13121
+ continue;
13122
+ }
13055
13123
  const ref = repo.checkout?.ref ?? "HEAD";
13056
13124
  await git(["reset", "--hard", ref], { cwd: repoDir });
13057
- await git(["clean", "-fd"], { cwd: repoDir });
13125
+ const cleanFlag = poolReset === "strict" ? "-fdx" : "-fd";
13126
+ await git(["clean", cleanFlag], { cwd: repoDir });
13058
13127
  }
13059
13128
  if (templatePath) {
13060
13129
  const repoDirNames = new Set(
@@ -13070,14 +13139,10 @@ var WorkspacePoolManager = class {
13070
13139
 
13071
13140
  // src/evaluation/workspace/repo-manager.ts
13072
13141
  import { execFile as execFile2 } from "node:child_process";
13073
- import { createHash as createHash2 } from "node:crypto";
13074
- import { existsSync as existsSync3 } from "node:fs";
13075
- import { mkdir as mkdir12, rm as rm6, unlink as unlink2, writeFile as writeFile8 } from "node:fs/promises";
13076
13142
  import path37 from "node:path";
13077
13143
  import { promisify as promisify6 } from "node:util";
13078
13144
  var execFileAsync2 = promisify6(execFile2);
13079
13145
  var DEFAULT_TIMEOUT_MS2 = 3e5;
13080
- var LOCK_TIMEOUT_MS = 6e4;
13081
13146
  function gitEnv2() {
13082
13147
  const env = { ...process.env };
13083
13148
  for (const key of Object.keys(env)) {
@@ -13092,10 +13157,6 @@ function gitEnv2() {
13092
13157
  GIT_SSH_COMMAND: "ssh -o BatchMode=yes"
13093
13158
  };
13094
13159
  }
13095
- function cacheKey(source) {
13096
- const raw = source.type === "git" ? source.url.toLowerCase().replace(/\.git$/, "") : source.path;
13097
- return createHash2("sha256").update(raw).digest("hex");
13098
- }
13099
13160
  function getSourceUrl(source) {
13100
13161
  return source.type === "git" ? source.url : source.path;
13101
13162
  }
@@ -13109,33 +13170,9 @@ async function git2(args, opts) {
13109
13170
  });
13110
13171
  return stdout.trim();
13111
13172
  }
13112
- async function acquireLock(lockPath) {
13113
- const start = Date.now();
13114
- while (Date.now() - start < LOCK_TIMEOUT_MS) {
13115
- try {
13116
- await writeFile8(lockPath, String(process.pid), { flag: "wx" });
13117
- return;
13118
- } catch (err) {
13119
- if (err.code === "EEXIST") {
13120
- await new Promise((r) => setTimeout(r, 200));
13121
- continue;
13122
- }
13123
- throw err;
13124
- }
13125
- }
13126
- throw new Error(`Timed out waiting for lock: ${lockPath}`);
13127
- }
13128
- async function releaseLock(lockPath) {
13129
- try {
13130
- await unlink2(lockPath);
13131
- } catch {
13132
- }
13133
- }
13134
13173
  var RepoManager = class {
13135
- cacheDir;
13136
13174
  verbose;
13137
- constructor(cacheDir, verbose = false) {
13138
- this.cacheDir = cacheDir ?? getGitCacheRoot();
13175
+ constructor(verbose = false) {
13139
13176
  this.verbose = verbose;
13140
13177
  }
13141
13178
  async runGit(args, opts) {
@@ -13160,86 +13197,18 @@ var RepoManager = class {
13160
13197
  }
13161
13198
  }
13162
13199
  /**
13163
- * Ensure a bare mirror cache exists for the given source.
13164
- * Creates on first access, fetches updates on subsequent calls.
13165
- * Returns the absolute path to the cache directory.
13166
- */
13167
- async ensureCache(source, depth, resolve) {
13168
- const key = cacheKey(source);
13169
- const cachePath = path37.join(this.cacheDir, key);
13170
- const lockPath = `${cachePath}.lock`;
13171
- const cacheExists = existsSync3(path37.join(cachePath, "HEAD"));
13172
- if (this.verbose) {
13173
- console.log(
13174
- `[repo] ensureCache source=${getSourceUrl(source)} resolve=${resolve ?? "remote"} cache=${cachePath}`
13175
- );
13176
- }
13177
- if (resolve === "local") {
13178
- if (cacheExists) {
13179
- if (this.verbose) {
13180
- console.log(`[repo] using existing local cache ${cachePath}`);
13181
- }
13182
- return cachePath;
13183
- }
13184
- const url = getSourceUrl(source);
13185
- throw new Error(
13186
- `No cache found for \`${url}\`. Run \`agentv cache add --url ${url} --from <local-path>\` to seed it.`
13187
- );
13188
- }
13189
- await mkdir12(this.cacheDir, { recursive: true });
13190
- const lockStartedAt = Date.now();
13191
- await acquireLock(lockPath);
13192
- if (this.verbose) {
13193
- console.log(`[repo] lock acquired path=${lockPath} waitedMs=${Date.now() - lockStartedAt}`);
13194
- }
13195
- try {
13196
- if (cacheExists) {
13197
- if (this.verbose) {
13198
- console.log(`[repo] refreshing existing cache ${cachePath}`);
13199
- }
13200
- const fetchArgs = ["fetch", "--prune"];
13201
- if (depth) {
13202
- fetchArgs.push("--depth", String(depth));
13203
- }
13204
- await this.runGit(fetchArgs, { cwd: cachePath });
13205
- } else {
13206
- if (this.verbose) {
13207
- console.log(`[repo] creating new cache ${cachePath}`);
13208
- }
13209
- const cloneArgs = ["clone", "--mirror", "--bare"];
13210
- if (depth) {
13211
- cloneArgs.push("--depth", String(depth));
13212
- }
13213
- const sourceUrl = getSourceUrl(source);
13214
- const cloneUrl = depth && source.type === "local" ? `file://${sourceUrl}` : sourceUrl;
13215
- cloneArgs.push(cloneUrl, cachePath);
13216
- await this.runGit(cloneArgs);
13217
- }
13218
- } finally {
13219
- await releaseLock(lockPath);
13220
- if (this.verbose) {
13221
- console.log(`[repo] lock released path=${lockPath}`);
13222
- }
13223
- }
13224
- return cachePath;
13225
- }
13226
- /**
13227
- * Clone a repo from cache into the workspace at the configured path.
13200
+ * Clone a repo directly from source into the workspace at the configured path.
13228
13201
  * Handles checkout, ref resolution, ancestor walking, shallow clone, sparse checkout.
13229
13202
  */
13230
13203
  async materialize(repo, workspacePath) {
13231
13204
  const targetDir = path37.join(workspacePath, repo.path);
13205
+ const sourceUrl = getSourceUrl(repo.source);
13232
13206
  const startedAt = Date.now();
13233
13207
  if (this.verbose) {
13234
13208
  console.log(
13235
- `[repo] materialize start path=${repo.path} source=${getSourceUrl(repo.source)} workspace=${workspacePath}`
13209
+ `[repo] materialize start path=${repo.path} source=${sourceUrl} workspace=${workspacePath}`
13236
13210
  );
13237
13211
  }
13238
- const cachePath = await this.ensureCache(
13239
- repo.source,
13240
- repo.clone?.depth,
13241
- repo.checkout?.resolve
13242
- );
13243
13212
  const cloneArgs = ["clone"];
13244
13213
  if (repo.clone?.depth) {
13245
13214
  cloneArgs.push("--depth", String(repo.clone.depth));
@@ -13248,7 +13217,7 @@ var RepoManager = class {
13248
13217
  cloneArgs.push("--filter", repo.clone.filter);
13249
13218
  }
13250
13219
  cloneArgs.push("--no-checkout");
13251
- const cloneUrl = repo.clone?.depth || repo.clone?.filter ? `file://${cachePath}` : cachePath;
13220
+ const cloneUrl = (repo.clone?.depth || repo.clone?.filter) && repo.source.type === "local" ? `file://${sourceUrl}` : sourceUrl;
13252
13221
  cloneArgs.push(cloneUrl, targetDir);
13253
13222
  await this.runGit(cloneArgs);
13254
13223
  if (repo.clone?.sparse?.length) {
@@ -13320,51 +13289,13 @@ var RepoManager = class {
13320
13289
  }
13321
13290
  }
13322
13291
  /** Reset repos in workspace to their checkout state. */
13323
- async reset(repos, workspacePath, strategy) {
13324
- if (strategy === "recreate") {
13325
- for (const repo of repos) {
13326
- const targetDir = path37.join(workspacePath, repo.path);
13327
- await rm6(targetDir, { recursive: true, force: true });
13328
- }
13329
- await this.materializeAll(repos, workspacePath);
13330
- return;
13331
- }
13292
+ async reset(repos, workspacePath, reset) {
13293
+ const cleanFlag = reset === "strict" ? "-fdx" : "-fd";
13332
13294
  for (const repo of repos) {
13333
13295
  const targetDir = path37.join(workspacePath, repo.path);
13334
13296
  await this.runGit(["reset", "--hard", "HEAD"], { cwd: targetDir });
13335
- await this.runGit(["clean", "-fd"], { cwd: targetDir });
13336
- }
13337
- }
13338
- /**
13339
- * Seed the cache from a local repository, setting the remote to a given URL.
13340
- * Useful for avoiding slow network clones when a local clone already exists.
13341
- */
13342
- async seedCache(localPath, remoteUrl, opts) {
13343
- const source = { type: "git", url: remoteUrl };
13344
- const key = cacheKey(source);
13345
- const cachePath = path37.join(this.cacheDir, key);
13346
- const lockPath = `${cachePath}.lock`;
13347
- await mkdir12(this.cacheDir, { recursive: true });
13348
- await acquireLock(lockPath);
13349
- try {
13350
- if (existsSync3(path37.join(cachePath, "HEAD"))) {
13351
- if (!opts?.force) {
13352
- throw new Error(
13353
- `Cache already exists for ${remoteUrl} at ${cachePath}. Use force to overwrite.`
13354
- );
13355
- }
13356
- await rm6(cachePath, { recursive: true, force: true });
13357
- }
13358
- await git2(["clone", "--mirror", "--bare", localPath, cachePath]);
13359
- await git2(["remote", "set-url", "origin", remoteUrl], { cwd: cachePath });
13360
- } finally {
13361
- await releaseLock(lockPath);
13297
+ await this.runGit(["clean", cleanFlag], { cwd: targetDir });
13362
13298
  }
13363
- return cachePath;
13364
- }
13365
- /** Remove the entire cache directory. */
13366
- async cleanCache() {
13367
- await rm6(this.cacheDir, { recursive: true, force: true });
13368
13299
  }
13369
13300
  };
13370
13301
 
@@ -13450,6 +13381,22 @@ function classifyQualityStatus(score) {
13450
13381
  function usesFileReferencePrompt(provider) {
13451
13382
  return isAgentProvider(provider) || provider.kind === "cli";
13452
13383
  }
13384
+ function toScriptConfig(hook, hookName, context) {
13385
+ const command = hook.command ?? hook.script;
13386
+ if (!command || command.length === 0) {
13387
+ throw new Error(`${hookName} hook in ${context} requires command or script`);
13388
+ }
13389
+ return {
13390
+ command,
13391
+ ...hook.timeout_ms !== void 0 && { timeout_ms: hook.timeout_ms },
13392
+ ...hook.timeoutMs !== void 0 && { timeoutMs: hook.timeoutMs },
13393
+ ...hook.cwd !== void 0 && { cwd: hook.cwd },
13394
+ ...hook.script !== void 0 && { script: hook.script }
13395
+ };
13396
+ }
13397
+ function hasHookCommand(hook) {
13398
+ return !!(hook?.command && hook.command.length > 0 || hook?.script && hook.script.length > 0);
13399
+ }
13453
13400
  function getWorkspaceTemplate(target) {
13454
13401
  const config = target.config;
13455
13402
  if ("workspaceTemplate" in config && typeof config.workspaceTemplate === "string") {
@@ -13483,7 +13430,12 @@ async function runEvaluation(options) {
13483
13430
  failOnError,
13484
13431
  poolWorkspaces,
13485
13432
  poolMaxSlots: configPoolMaxSlots,
13486
- workspace: userWorkspacePath
13433
+ workspace: legacyWorkspacePath,
13434
+ workspaceMode,
13435
+ workspacePath,
13436
+ workspaceClean,
13437
+ retainOnSuccess,
13438
+ retainOnFailure
13487
13439
  } = options;
13488
13440
  let useCache = options.useCache;
13489
13441
  if (trials && trials.count > 1 && useCache) {
@@ -13619,13 +13571,22 @@ async function runEvaluation(options) {
13619
13571
  }
13620
13572
  };
13621
13573
  const isPerTestIsolation = suiteWorkspace?.isolation === "per_test";
13622
- if (userWorkspacePath && isPerTestIsolation) {
13574
+ const configuredMode = suiteWorkspace?.mode ?? workspaceMode;
13575
+ const configuredStaticPath = suiteWorkspace?.static_path ?? workspacePath ?? legacyWorkspacePath;
13576
+ const useStaticWorkspace = configuredMode === "static" || !!configuredStaticPath && !configuredMode;
13577
+ if (useStaticWorkspace && isPerTestIsolation) {
13623
13578
  throw new Error(
13624
- "--workspace is incompatible with isolation: per_test. Use isolation: shared (default)."
13579
+ "static workspace mode is incompatible with isolation: per_test. Use isolation: shared (default)."
13625
13580
  );
13626
13581
  }
13627
- const hasSharedWorkspace = !!(userWorkspacePath || workspaceTemplate || suiteWorkspace?.before_all || suiteWorkspace?.repos?.length && !isPerTestIsolation);
13628
- const usePool = poolWorkspaces === true && !!suiteWorkspace?.repos?.length && !isPerTestIsolation && !userWorkspacePath;
13582
+ if (configuredMode === "static" && !configuredStaticPath) {
13583
+ throw new Error("workspace.mode=static requires workspace.static_path or --workspace-path");
13584
+ }
13585
+ const hasSharedWorkspace = !!(useStaticWorkspace || workspaceTemplate || suiteWorkspace?.hooks || suiteWorkspace?.repos?.length && !isPerTestIsolation);
13586
+ const poolEnabled = configuredMode === "pooled" ? true : configuredMode === "ephemeral" || useStaticWorkspace ? false : suiteWorkspace?.pool ?? poolWorkspaces ?? true;
13587
+ const usePool = poolEnabled !== false && !!suiteWorkspace?.repos?.length && !isPerTestIsolation && !useStaticWorkspace;
13588
+ const resolvedRetainOnSuccess = retainOnSuccess ?? (keepWorkspaces ? "keep" : "cleanup");
13589
+ const resolvedRetainOnFailure = retainOnFailure ?? (cleanupWorkspaces ? "cleanup" : "keep");
13629
13590
  const requestedWorkers = options.maxConcurrency ?? target.workers ?? 1;
13630
13591
  const workers = hasSharedWorkspace && !usePool ? 1 : requestedWorkers;
13631
13592
  setupLog(
@@ -13646,20 +13607,21 @@ async function runEvaluation(options) {
13646
13607
  const availablePoolSlots = [];
13647
13608
  const poolSlotBaselines = /* @__PURE__ */ new Map();
13648
13609
  const poolMaxSlots = Math.min(configPoolMaxSlots ?? 10, 50);
13649
- if (userWorkspacePath) {
13650
- sharedWorkspacePath = userWorkspacePath;
13651
- setupLog(`using user-provided workspace: ${userWorkspacePath}`);
13610
+ if (useStaticWorkspace && configuredStaticPath) {
13611
+ sharedWorkspacePath = configuredStaticPath;
13612
+ setupLog(`using static workspace: ${configuredStaticPath}`);
13652
13613
  } else if (usePool && suiteWorkspace?.repos) {
13653
13614
  const slotsNeeded = workers;
13654
13615
  setupLog(`acquiring ${slotsNeeded} workspace pool slot(s) (pool capacity: ${poolMaxSlots})`);
13655
13616
  poolManager = new WorkspacePoolManager(getWorkspacePoolRoot());
13656
- const poolRepoManager = new RepoManager(void 0, verbose);
13617
+ const poolRepoManager = new RepoManager(verbose);
13657
13618
  for (let i = 0; i < slotsNeeded; i++) {
13658
13619
  const slot = await poolManager.acquireWorkspace({
13659
13620
  templatePath: workspaceTemplate,
13660
13621
  repos: suiteWorkspace.repos,
13661
13622
  maxSlots: poolMaxSlots,
13662
- repoManager: poolRepoManager
13623
+ repoManager: poolRepoManager,
13624
+ poolReset: (workspaceClean === "full" ? "strict" : workspaceClean === "standard" ? "fast" : null) ?? "fast"
13663
13625
  });
13664
13626
  poolSlots.push(slot);
13665
13627
  setupLog(`pool slot ${i} acquired at: ${slot.path} (existing=${slot.isExisting})`);
@@ -13679,9 +13641,9 @@ async function runEvaluation(options) {
13679
13641
  const message = error instanceof Error ? error.message : String(error);
13680
13642
  throw new Error(`Failed to create shared workspace: ${message}`);
13681
13643
  }
13682
- } else if (suiteWorkspace?.before_all || suiteWorkspace?.repos?.length && !isPerTestIsolation) {
13644
+ } else if (suiteWorkspace?.hooks || suiteWorkspace?.repos?.length && !isPerTestIsolation) {
13683
13645
  sharedWorkspacePath = getWorkspacePath(evalRunId, "shared");
13684
- await mkdir13(sharedWorkspacePath, { recursive: true });
13646
+ await mkdir12(sharedWorkspacePath, { recursive: true });
13685
13647
  setupLog(`created empty shared workspace at: ${sharedWorkspacePath}`);
13686
13648
  }
13687
13649
  try {
@@ -13693,7 +13655,7 @@ async function runEvaluation(options) {
13693
13655
  } catch {
13694
13656
  }
13695
13657
  }
13696
- const repoManager = suiteWorkspace?.repos?.length && !usePool && !userWorkspacePath ? new RepoManager(void 0, verbose) : void 0;
13658
+ const repoManager = suiteWorkspace?.repos?.length && !usePool && !useStaticWorkspace ? new RepoManager(verbose) : void 0;
13697
13659
  if (repoManager && sharedWorkspacePath && suiteWorkspace?.repos && !isPerTestIsolation) {
13698
13660
  setupLog(
13699
13661
  `materializing ${suiteWorkspace.repos.length} shared repo(s) into ${sharedWorkspacePath}`
@@ -13703,17 +13665,19 @@ async function runEvaluation(options) {
13703
13665
  setupLog("shared repo materialization complete");
13704
13666
  } catch (error) {
13705
13667
  const message = error instanceof Error ? error.message : String(error);
13706
- if (sharedWorkspacePath && !userWorkspacePath) {
13668
+ if (sharedWorkspacePath && !useStaticWorkspace) {
13707
13669
  await cleanupWorkspace(sharedWorkspacePath).catch(() => {
13708
13670
  });
13709
13671
  }
13710
13672
  throw new Error(`Failed to materialize repos: ${message}`);
13711
13673
  }
13712
13674
  }
13713
- if (sharedWorkspacePath && suiteWorkspace?.before_all) {
13714
- const beforeAllCommand = (suiteWorkspace.before_all.command ?? suiteWorkspace.before_all.script ?? []).join(" ");
13675
+ const suiteBeforeAllHook = suiteWorkspace?.hooks?.before_all;
13676
+ if (sharedWorkspacePath && hasHookCommand(suiteBeforeAllHook)) {
13677
+ const beforeAllHook = suiteBeforeAllHook;
13678
+ const beforeAllCommand = (beforeAllHook.command ?? beforeAllHook.script ?? []).join(" ");
13715
13679
  setupLog(
13716
- `running shared before_all in cwd=${suiteWorkspace.before_all.cwd ?? evalDir} command=${beforeAllCommand}`
13680
+ `running shared before_all in cwd=${beforeAllHook.cwd ?? evalDir} command=${beforeAllCommand}`
13717
13681
  );
13718
13682
  const scriptContext = {
13719
13683
  workspacePath: sharedWorkspacePath,
@@ -13722,18 +13686,22 @@ async function runEvaluation(options) {
13722
13686
  evalDir
13723
13687
  };
13724
13688
  try {
13725
- beforeAllOutput = await executeWorkspaceScript(suiteWorkspace.before_all, scriptContext);
13689
+ beforeAllOutput = await executeWorkspaceScript(
13690
+ toScriptConfig(beforeAllHook, "before_all", "suite workspace"),
13691
+ scriptContext
13692
+ );
13726
13693
  setupLog("shared before_all completed");
13727
13694
  } catch (error) {
13728
13695
  const message = error instanceof Error ? error.message : String(error);
13729
- if (sharedWorkspacePath && !userWorkspacePath) {
13696
+ if (sharedWorkspacePath && !useStaticWorkspace) {
13730
13697
  await cleanupWorkspace(sharedWorkspacePath).catch(() => {
13731
13698
  });
13732
13699
  }
13733
13700
  throw new Error(`before_all script failed: ${message}`);
13734
13701
  }
13735
13702
  }
13736
- if (availablePoolSlots.length > 0 && suiteWorkspace?.before_all) {
13703
+ if (availablePoolSlots.length > 0 && hasHookCommand(suiteBeforeAllHook)) {
13704
+ const beforeAllHook = suiteBeforeAllHook;
13737
13705
  for (const slot of availablePoolSlots) {
13738
13706
  setupLog(`running before_all on pool slot ${slot.index}`);
13739
13707
  const scriptContext = {
@@ -13743,7 +13711,10 @@ async function runEvaluation(options) {
13743
13711
  evalDir
13744
13712
  };
13745
13713
  try {
13746
- const output = await executeWorkspaceScript(suiteWorkspace.before_all, scriptContext);
13714
+ const output = await executeWorkspaceScript(
13715
+ toScriptConfig(beforeAllHook, "before_all", "suite workspace"),
13716
+ scriptContext
13717
+ );
13747
13718
  if (!beforeAllOutput) beforeAllOutput = output;
13748
13719
  setupLog(`before_all completed on pool slot ${slot.index}`);
13749
13720
  } catch (error) {
@@ -13875,6 +13846,8 @@ async function runEvaluation(options) {
13875
13846
  evalRunId,
13876
13847
  keepWorkspaces,
13877
13848
  cleanupWorkspaces,
13849
+ retainOnSuccess: resolvedRetainOnSuccess,
13850
+ retainOnFailure: resolvedRetainOnFailure,
13878
13851
  sharedWorkspacePath: testWorkspacePath,
13879
13852
  sharedBaselineCommit: testBaselineCommit,
13880
13853
  suiteWorkspaceFile,
@@ -13968,7 +13941,9 @@ async function runEvaluation(options) {
13968
13941
  }
13969
13942
  }
13970
13943
  const afterAllWorkspaces = poolSlots.length > 1 ? poolSlots.map((s) => s.path) : sharedWorkspacePath ? [sharedWorkspacePath] : [];
13971
- if (afterAllWorkspaces.length > 0 && suiteWorkspace?.after_all) {
13944
+ const suiteAfterAllHook = suiteWorkspace?.hooks?.after_all;
13945
+ if (afterAllWorkspaces.length > 0 && hasHookCommand(suiteAfterAllHook)) {
13946
+ const afterAllHook = suiteAfterAllHook;
13972
13947
  for (const wsPath of afterAllWorkspaces) {
13973
13948
  const scriptContext = {
13974
13949
  workspacePath: wsPath,
@@ -13978,7 +13953,7 @@ async function runEvaluation(options) {
13978
13953
  };
13979
13954
  try {
13980
13955
  const afterAllOutput = await executeWorkspaceScript(
13981
- suiteWorkspace.after_all,
13956
+ toScriptConfig(afterAllHook, "after_all", "suite workspace"),
13982
13957
  scriptContext,
13983
13958
  "warn"
13984
13959
  );
@@ -13989,12 +13964,14 @@ async function runEvaluation(options) {
13989
13964
  }
13990
13965
  }
13991
13966
  }
13992
- if (sharedWorkspacePath && !poolSlot && poolSlots.length === 0 && !userWorkspacePath) {
13967
+ if (sharedWorkspacePath && !poolSlot && poolSlots.length === 0 && !useStaticWorkspace) {
13993
13968
  const hasFailure = results.some((r) => !!r.error || r.score < 0.5);
13994
- if (cleanupWorkspaces) {
13995
- await cleanupWorkspace(sharedWorkspacePath).catch(() => {
13996
- });
13997
- } else if (!hasFailure && !keepWorkspaces) {
13969
+ if (hasFailure) {
13970
+ if (resolvedRetainOnFailure === "cleanup") {
13971
+ await cleanupWorkspace(sharedWorkspacePath).catch(() => {
13972
+ });
13973
+ }
13974
+ } else if (resolvedRetainOnSuccess === "cleanup") {
13998
13975
  await cleanupWorkspace(sharedWorkspacePath).catch(() => {
13999
13976
  });
14000
13977
  }
@@ -14188,6 +14165,8 @@ async function runEvalCase(options) {
14188
14165
  evalRunId,
14189
14166
  keepWorkspaces,
14190
14167
  cleanupWorkspaces: forceCleanup,
14168
+ retainOnSuccess,
14169
+ retainOnFailure,
14191
14170
  sharedWorkspacePath,
14192
14171
  sharedBaselineCommit,
14193
14172
  suiteWorkspaceFile,
@@ -14199,10 +14178,10 @@ async function runEvalCase(options) {
14199
14178
  const formattingMode = usesFileReferencePrompt(provider) ? "agent" : "lm";
14200
14179
  const promptInputs = await buildPromptInputs(evalCase, formattingMode);
14201
14180
  const typeRegistry = providedTypeRegistry ?? createBuiltinRegistry();
14202
- const cacheKey2 = useCache ? createCacheKey(provider, target, evalCase, promptInputs) : void 0;
14181
+ const cacheKey = useCache ? createCacheKey(provider, target, evalCase, promptInputs) : void 0;
14203
14182
  let cachedResponse;
14204
- if (cacheKey2 && cache) {
14205
- cachedResponse = await cache.get(cacheKey2);
14183
+ if (cacheKey && cache) {
14184
+ cachedResponse = await cache.get(cacheKey);
14206
14185
  }
14207
14186
  const nowFn = now ?? (() => /* @__PURE__ */ new Date());
14208
14187
  let workspacePath = sharedWorkspacePath;
@@ -14241,12 +14220,12 @@ async function runEvalCase(options) {
14241
14220
  }
14242
14221
  }
14243
14222
  }
14244
- if (!workspacePath && (evalCase.workspace?.before_all || evalCase.workspace?.repos?.length) && evalRunId) {
14223
+ if (!workspacePath && (evalCase.workspace?.hooks || evalCase.workspace?.repos?.length) && evalRunId) {
14245
14224
  workspacePath = getWorkspacePath(evalRunId, evalCase.id);
14246
- await mkdir13(workspacePath, { recursive: true });
14225
+ await mkdir12(workspacePath, { recursive: true });
14247
14226
  }
14248
14227
  if (evalCase.workspace?.repos?.length && workspacePath) {
14249
- const perCaseRepoManager = new RepoManager(void 0, setupDebug);
14228
+ const perCaseRepoManager = new RepoManager(setupDebug);
14250
14229
  try {
14251
14230
  if (setupDebug) {
14252
14231
  console.log(
@@ -14271,11 +14250,13 @@ async function runEvalCase(options) {
14271
14250
  );
14272
14251
  }
14273
14252
  }
14274
- if (workspacePath && evalCase.workspace?.before_all) {
14275
- const beforeAllCommand = (evalCase.workspace.before_all.command ?? evalCase.workspace.before_all.script ?? []).join(" ");
14253
+ const caseBeforeAllHook = evalCase.workspace?.hooks?.before_all;
14254
+ if (workspacePath && hasHookCommand(caseBeforeAllHook)) {
14255
+ const beforeAllHook = caseBeforeAllHook;
14256
+ const beforeAllCommand = (beforeAllHook.command ?? beforeAllHook.script ?? []).join(" ");
14276
14257
  if (setupDebug) {
14277
14258
  console.log(
14278
- `[setup] test=${evalCase.id} running before_all in cwd=${evalCase.workspace.before_all.cwd ?? evalDir} command=${beforeAllCommand}`
14259
+ `[setup] test=${evalCase.id} running before_all in cwd=${beforeAllHook.cwd ?? evalDir} command=${beforeAllCommand}`
14279
14260
  );
14280
14261
  }
14281
14262
  const scriptContext = {
@@ -14288,7 +14269,7 @@ async function runEvalCase(options) {
14288
14269
  };
14289
14270
  try {
14290
14271
  beforeAllOutput = await executeWorkspaceScript(
14291
- evalCase.workspace.before_all,
14272
+ toScriptConfig(beforeAllHook, "before_all", `test '${evalCase.id}'`),
14292
14273
  scriptContext
14293
14274
  );
14294
14275
  if (setupDebug) {
@@ -14313,7 +14294,9 @@ async function runEvalCase(options) {
14313
14294
  }
14314
14295
  }
14315
14296
  }
14316
- if (workspacePath && evalCase.workspace?.before_each) {
14297
+ const caseBeforeEachHook = evalCase.workspace?.hooks?.before_each;
14298
+ if (workspacePath && hasHookCommand(caseBeforeEachHook)) {
14299
+ const beforeEachHook = caseBeforeEachHook;
14317
14300
  const scriptContext = {
14318
14301
  workspacePath,
14319
14302
  testId: evalCase.id,
@@ -14324,7 +14307,7 @@ async function runEvalCase(options) {
14324
14307
  };
14325
14308
  try {
14326
14309
  beforeEachOutput = await executeWorkspaceScript(
14327
- evalCase.workspace.before_each,
14310
+ toScriptConfig(beforeEachHook, "before_each", `test '${evalCase.id}'`),
14328
14311
  scriptContext
14329
14312
  );
14330
14313
  } catch (error) {
@@ -14412,8 +14395,8 @@ async function runEvalCase(options) {
14412
14395
  }
14413
14396
  return errorResult;
14414
14397
  }
14415
- if (cacheKey2 && cache && !cachedResponse) {
14416
- await cache.set(cacheKey2, providerResponse);
14398
+ if (cacheKey && cache && !cachedResponse) {
14399
+ await cache.set(cacheKey, providerResponse);
14417
14400
  }
14418
14401
  const output = providerResponse.output;
14419
14402
  const hasExecutionMetrics = providerResponse.tokenUsage !== void 0 || providerResponse.costUsd !== void 0 || providerResponse.durationMs !== void 0;
@@ -14441,17 +14424,19 @@ async function runEvalCase(options) {
14441
14424
  }
14442
14425
  }
14443
14426
  const providerError = extractProviderError(providerResponse);
14444
- if (repoManager && workspacePath && evalCase.workspace?.reset?.after_each && evalCase.workspace.reset.strategy && evalCase.workspace.reset.strategy !== "none" && evalCase.workspace.repos) {
14427
+ if (repoManager && workspacePath && evalCase.workspace?.hooks?.after_each?.reset && evalCase.workspace.hooks.after_each.reset !== "none" && evalCase.workspace.repos) {
14445
14428
  try {
14446
14429
  await repoManager.reset(
14447
14430
  evalCase.workspace.repos,
14448
14431
  workspacePath,
14449
- evalCase.workspace.reset.strategy
14432
+ evalCase.workspace.hooks.after_each.reset
14450
14433
  );
14451
14434
  } catch {
14452
14435
  }
14453
14436
  }
14454
- if (workspacePath && evalCase.workspace?.after_each) {
14437
+ const caseAfterEachHook = evalCase.workspace?.hooks?.after_each;
14438
+ if (workspacePath && hasHookCommand(caseAfterEachHook)) {
14439
+ const afterEachHook = caseAfterEachHook;
14455
14440
  const scriptContext = {
14456
14441
  workspacePath,
14457
14442
  testId: evalCase.id,
@@ -14462,7 +14447,7 @@ async function runEvalCase(options) {
14462
14447
  };
14463
14448
  try {
14464
14449
  afterEachOutput = await executeWorkspaceScript(
14465
- evalCase.workspace.after_each,
14450
+ toScriptConfig(afterEachHook, "after_each", `test '${evalCase.id}'`),
14466
14451
  scriptContext,
14467
14452
  "warn"
14468
14453
  );
@@ -14512,8 +14497,13 @@ async function runEvalCase(options) {
14512
14497
  await cleanupWorkspace(workspacePath).catch(() => {
14513
14498
  });
14514
14499
  } else if (isFailure) {
14515
- return { ...finalResult, workspacePath };
14516
- } else if (!keepWorkspaces) {
14500
+ if ((retainOnFailure ?? "keep") === "cleanup") {
14501
+ await cleanupWorkspace(workspacePath).catch(() => {
14502
+ });
14503
+ } else {
14504
+ return { ...finalResult, workspacePath };
14505
+ }
14506
+ } else if ((retainOnSuccess ?? (keepWorkspaces ? "keep" : "cleanup")) !== "keep") {
14517
14507
  await cleanupWorkspace(workspacePath).catch(() => {
14518
14508
  });
14519
14509
  }
@@ -14531,11 +14521,12 @@ async function runEvalCase(options) {
14531
14521
  "evaluator_error"
14532
14522
  );
14533
14523
  if (workspacePath && !isSharedWorkspace) {
14534
- if (forceCleanup) {
14524
+ if (forceCleanup || (retainOnFailure ?? "keep") === "cleanup") {
14535
14525
  await cleanupWorkspace(workspacePath).catch(() => {
14536
14526
  });
14527
+ } else {
14528
+ return { ...errorResult, workspacePath, beforeEachOutput, afterEachOutput };
14537
14529
  }
14538
- return { ...errorResult, workspacePath, beforeEachOutput, afterEachOutput };
14539
14530
  }
14540
14531
  return { ...errorResult, beforeEachOutput, afterEachOutput };
14541
14532
  }
@@ -14554,7 +14545,9 @@ async function runEvalCaseWithTrials(options, trialsConfig) {
14554
14545
  useCache: false,
14555
14546
  // Force cleanup for intermediate trials
14556
14547
  cleanupWorkspaces: isLastDeclaredTrial ? options.cleanupWorkspaces : true,
14557
- keepWorkspaces: isLastDeclaredTrial ? options.keepWorkspaces : false
14548
+ keepWorkspaces: isLastDeclaredTrial ? options.keepWorkspaces : false,
14549
+ retainOnSuccess: isLastDeclaredTrial ? options.retainOnSuccess : "cleanup",
14550
+ retainOnFailure: isLastDeclaredTrial ? options.retainOnFailure : "cleanup"
14558
14551
  };
14559
14552
  const result = await runEvalCase(trialOptions);
14560
14553
  allResults.push(result);
@@ -15077,7 +15070,7 @@ function extractProviderError(response) {
15077
15070
  return trimmed.length > 0 ? trimmed : void 0;
15078
15071
  }
15079
15072
  function createCacheKey(provider, target, evalCase, promptInputs) {
15080
- const hash = createHash3("sha256");
15073
+ const hash = createHash2("sha256");
15081
15074
  hash.update(provider.id);
15082
15075
  hash.update(target.name);
15083
15076
  hash.update(evalCase.id);
@@ -15145,7 +15138,7 @@ function computeWeightedMean(entries) {
15145
15138
  }
15146
15139
 
15147
15140
  // src/evaluation/evaluate.ts
15148
- import { existsSync as existsSync4 } from "node:fs";
15141
+ import { existsSync as existsSync3 } from "node:fs";
15149
15142
  import path40 from "node:path";
15150
15143
  async function evaluate(config) {
15151
15144
  const startTime = Date.now();
@@ -15264,7 +15257,7 @@ async function discoverDefaultTarget(repoRoot) {
15264
15257
  for (const dir of chain) {
15265
15258
  for (const candidate of TARGET_FILE_CANDIDATES) {
15266
15259
  const targetsPath = path40.join(dir, candidate);
15267
- if (!existsSync4(targetsPath)) continue;
15260
+ if (!existsSync3(targetsPath)) continue;
15268
15261
  try {
15269
15262
  const definitions = await readTargetDefinitions(targetsPath);
15270
15263
  const defaultTarget = definitions.find((d) => d.name === "default");
@@ -15282,7 +15275,7 @@ async function loadEnvHierarchy(repoRoot) {
15282
15275
  const envFiles = [];
15283
15276
  for (const dir of chain) {
15284
15277
  const envPath = path40.join(dir, ".env");
15285
- if (existsSync4(envPath)) envFiles.push(envPath);
15278
+ if (existsSync3(envPath)) envFiles.push(envPath);
15286
15279
  }
15287
15280
  for (let i = envFiles.length - 1; i >= 0; i--) {
15288
15281
  try {
@@ -15360,12 +15353,12 @@ var CONFIG_FILE_NAMES = [
15360
15353
  ".agentv/config.js"
15361
15354
  ];
15362
15355
  async function loadTsConfig(projectRoot) {
15363
- const { existsSync: existsSync5 } = await import("node:fs");
15356
+ const { existsSync: existsSync4 } = await import("node:fs");
15364
15357
  const { pathToFileURL } = await import("node:url");
15365
15358
  const { join: join2 } = await import("node:path");
15366
15359
  for (const fileName of CONFIG_FILE_NAMES) {
15367
15360
  const filePath = join2(projectRoot, fileName);
15368
- if (!existsSync5(filePath)) {
15361
+ if (!existsSync4(filePath)) {
15369
15362
  continue;
15370
15363
  }
15371
15364
  try {
@@ -15462,7 +15455,7 @@ function buildPrompt(criteria, question, referenceAnswer) {
15462
15455
  }
15463
15456
 
15464
15457
  // src/evaluation/cache/response-cache.ts
15465
- import { mkdir as mkdir14, readFile as readFile12, writeFile as writeFile9 } from "node:fs/promises";
15458
+ import { mkdir as mkdir13, readFile as readFile12, writeFile as writeFile8 } from "node:fs/promises";
15466
15459
  import path41 from "node:path";
15467
15460
  var DEFAULT_CACHE_PATH = ".agentv/cache";
15468
15461
  var ResponseCache = class {
@@ -15482,8 +15475,8 @@ var ResponseCache = class {
15482
15475
  async set(key, value) {
15483
15476
  const filePath = this.keyToPath(key);
15484
15477
  const dir = path41.dirname(filePath);
15485
- await mkdir14(dir, { recursive: true });
15486
- await writeFile9(filePath, JSON.stringify(value, null, 2), "utf8");
15478
+ await mkdir13(dir, { recursive: true });
15479
+ await writeFile8(filePath, JSON.stringify(value, null, 2), "utf8");
15487
15480
  }
15488
15481
  keyToPath(key) {
15489
15482
  const prefix = key.slice(0, 2);
@@ -16017,7 +16010,6 @@ export {
16017
16010
  freeformEvaluationSchema,
16018
16011
  generateRubrics,
16019
16012
  getAgentvHome,
16020
- getGitCacheRoot,
16021
16013
  getHitCount,
16022
16014
  getSubagentsRoot,
16023
16015
  getTraceStateRoot,