@agentv/core 2.15.0 → 2.17.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.cjs CHANGED
@@ -1244,11 +1244,11 @@ function serializeAttributeValue(value) {
1244
1244
  if (Array.isArray(value)) return { arrayValue: { values: value.map(serializeAttributeValue) } };
1245
1245
  return { stringValue: String(value) };
1246
1246
  }
1247
- var import_promises32, import_node_path45, OtlpJsonFileExporter;
1247
+ var import_promises31, import_node_path45, OtlpJsonFileExporter;
1248
1248
  var init_otlp_json_file_exporter = __esm({
1249
1249
  "src/observability/otlp-json-file-exporter.ts"() {
1250
1250
  "use strict";
1251
- import_promises32 = require("fs/promises");
1251
+ import_promises31 = require("fs/promises");
1252
1252
  import_node_path45 = require("path");
1253
1253
  OtlpJsonFileExporter = class {
1254
1254
  // biome-ignore lint/suspicious/noExplicitAny: serialized span data
@@ -1288,7 +1288,7 @@ var init_otlp_json_file_exporter = __esm({
1288
1288
  }
1289
1289
  async flush() {
1290
1290
  if (this.spans.length === 0) return;
1291
- await (0, import_promises32.mkdir)((0, import_node_path45.dirname)(this.filePath), { recursive: true });
1291
+ await (0, import_promises31.mkdir)((0, import_node_path45.dirname)(this.filePath), { recursive: true });
1292
1292
  const otlpJson = {
1293
1293
  resourceSpans: [
1294
1294
  {
@@ -1302,8 +1302,8 @@ var init_otlp_json_file_exporter = __esm({
1302
1302
  }
1303
1303
  ]
1304
1304
  };
1305
- const { writeFile: writeFile10 } = await import("fs/promises");
1306
- await writeFile10(this.filePath, JSON.stringify(otlpJson, null, 2));
1305
+ const { writeFile: writeFile9 } = await import("fs/promises");
1306
+ await writeFile9(this.filePath, JSON.stringify(otlpJson, null, 2));
1307
1307
  }
1308
1308
  };
1309
1309
  }
@@ -1319,12 +1319,12 @@ function hrTimeDiffMs(start, end) {
1319
1319
  const diffNano = end[1] - start[1];
1320
1320
  return Math.round(diffSec * 1e3 + diffNano / 1e6);
1321
1321
  }
1322
- var import_node_fs14, import_promises33, import_node_path46, SimpleTraceFileExporter;
1322
+ var import_node_fs13, import_promises32, import_node_path46, SimpleTraceFileExporter;
1323
1323
  var init_simple_trace_file_exporter = __esm({
1324
1324
  "src/observability/simple-trace-file-exporter.ts"() {
1325
1325
  "use strict";
1326
- import_node_fs14 = require("fs");
1327
- import_promises33 = require("fs/promises");
1326
+ import_node_fs13 = require("fs");
1327
+ import_promises32 = require("fs/promises");
1328
1328
  import_node_path46 = require("path");
1329
1329
  SimpleTraceFileExporter = class {
1330
1330
  stream = null;
@@ -1338,8 +1338,8 @@ var init_simple_trace_file_exporter = __esm({
1338
1338
  async ensureStream() {
1339
1339
  if (!this.streamReady) {
1340
1340
  this.streamReady = (async () => {
1341
- await (0, import_promises33.mkdir)((0, import_node_path46.dirname)(this.filePath), { recursive: true });
1342
- this.stream = (0, import_node_fs14.createWriteStream)(this.filePath, { flags: "w" });
1341
+ await (0, import_promises32.mkdir)((0, import_node_path46.dirname)(this.filePath), { recursive: true });
1342
+ this.stream = (0, import_node_fs13.createWriteStream)(this.filePath, { flags: "w" });
1343
1343
  return this.stream;
1344
1344
  })();
1345
1345
  }
@@ -1505,7 +1505,6 @@ __export(index_exports, {
1505
1505
  freeformEvaluationSchema: () => freeformEvaluationSchema,
1506
1506
  generateRubrics: () => generateRubrics,
1507
1507
  getAgentvHome: () => getAgentvHome,
1508
- getGitCacheRoot: () => getGitCacheRoot,
1509
1508
  getHitCount: () => getHitCount,
1510
1509
  getSubagentsRoot: () => getSubagentsRoot,
1511
1510
  getTraceStateRoot: () => getTraceStateRoot,
@@ -4741,16 +4740,31 @@ function parseRepoConfig(raw) {
4741
4740
  ...clone !== void 0 && { clone }
4742
4741
  };
4743
4742
  }
4744
- function parseResetConfig(raw) {
4743
+ function parseWorkspaceHookConfig(raw, evalFileDir) {
4745
4744
  if (!isJsonObject(raw)) return void 0;
4745
+ const script = parseWorkspaceScriptConfig(raw, evalFileDir);
4746
4746
  const obj = raw;
4747
- const strategy = obj.strategy === "none" || obj.strategy === "hard" || obj.strategy === "recreate" ? obj.strategy : void 0;
4748
- const afterEach = typeof obj.after_each === "boolean" ? obj.after_each : void 0;
4749
- if (!strategy && afterEach === void 0) return void 0;
4747
+ const reset = obj.reset === "none" || obj.reset === "fast" || obj.reset === "strict" ? obj.reset : void 0;
4748
+ if (!script && !reset) return void 0;
4750
4749
  return {
4751
- ...strategy !== void 0 && { strategy },
4752
- ...afterEach !== void 0 && { after_each: afterEach }
4750
+ ...script ?? {},
4751
+ ...reset !== void 0 && { reset }
4752
+ };
4753
+ }
4754
+ function parseWorkspaceHooksConfig(raw, evalFileDir) {
4755
+ if (!isJsonObject(raw)) return void 0;
4756
+ const obj = raw;
4757
+ const beforeAll = parseWorkspaceHookConfig(obj.before_all, evalFileDir);
4758
+ const beforeEach = parseWorkspaceHookConfig(obj.before_each, evalFileDir);
4759
+ const afterEach = parseWorkspaceHookConfig(obj.after_each, evalFileDir);
4760
+ const afterAll = parseWorkspaceHookConfig(obj.after_all, evalFileDir);
4761
+ const hooks = {
4762
+ ...beforeAll !== void 0 && { before_all: beforeAll },
4763
+ ...beforeEach !== void 0 && { before_each: beforeEach },
4764
+ ...afterEach !== void 0 && { after_each: afterEach },
4765
+ ...afterAll !== void 0 && { after_all: afterAll }
4753
4766
  };
4767
+ return Object.keys(hooks).length > 0 ? hooks : void 0;
4754
4768
  }
4755
4769
  async function resolveWorkspaceConfig(raw, evalFileDir) {
4756
4770
  if (typeof raw === "string") {
@@ -4781,37 +4795,48 @@ function parseWorkspaceConfig(raw, evalFileDir) {
4781
4795
  }
4782
4796
  const isolation = obj.isolation === "shared" || obj.isolation === "per_test" ? obj.isolation : void 0;
4783
4797
  const repos = Array.isArray(obj.repos) ? obj.repos.map(parseRepoConfig).filter(Boolean) : void 0;
4784
- const reset = parseResetConfig(obj.reset);
4785
- const beforeAll = parseWorkspaceScriptConfig(obj.before_all, evalFileDir);
4786
- const afterAll = parseWorkspaceScriptConfig(obj.after_all, evalFileDir);
4787
- const beforeEach = parseWorkspaceScriptConfig(obj.before_each, evalFileDir);
4788
- const afterEach = parseWorkspaceScriptConfig(obj.after_each, evalFileDir);
4789
- if (!template && !isolation && !repos && !reset && !beforeAll && !afterAll && !beforeEach && !afterEach)
4798
+ const hooks = parseWorkspaceHooksConfig(obj.hooks, evalFileDir);
4799
+ const mode = obj.mode === "pooled" || obj.mode === "ephemeral" || obj.mode === "static" ? obj.mode : void 0;
4800
+ const staticPath = typeof obj.static_path === "string" ? obj.static_path : void 0;
4801
+ const pool = typeof obj.pool === "boolean" ? obj.pool : void 0;
4802
+ if (!template && !isolation && !repos && !hooks && !mode && !staticPath && pool === void 0)
4790
4803
  return void 0;
4791
4804
  return {
4792
4805
  ...template !== void 0 && { template },
4793
4806
  ...isolation !== void 0 && { isolation },
4794
4807
  ...repos !== void 0 && { repos },
4795
- ...reset !== void 0 && { reset },
4796
- ...beforeAll !== void 0 && { before_all: beforeAll },
4797
- ...afterAll !== void 0 && { after_all: afterAll },
4798
- ...beforeEach !== void 0 && { before_each: beforeEach },
4799
- ...afterEach !== void 0 && { after_each: afterEach }
4808
+ ...hooks !== void 0 && { hooks },
4809
+ ...mode !== void 0 && { mode },
4810
+ ...staticPath !== void 0 && { static_path: staticPath },
4811
+ ...pool !== void 0 && { pool }
4800
4812
  };
4801
4813
  }
4802
4814
  function mergeWorkspaceConfigs(suiteLevel, caseLevel) {
4803
4815
  if (!suiteLevel && !caseLevel) return void 0;
4804
4816
  if (!suiteLevel) return caseLevel;
4805
4817
  if (!caseLevel) return suiteLevel;
4818
+ const mergeHook = (suiteHook, caseHook) => {
4819
+ if (!suiteHook && !caseHook) return void 0;
4820
+ return {
4821
+ ...suiteHook ?? {},
4822
+ ...caseHook ?? {}
4823
+ };
4824
+ };
4825
+ const mergedHooks = {
4826
+ before_all: mergeHook(suiteLevel.hooks?.before_all, caseLevel.hooks?.before_all),
4827
+ before_each: mergeHook(suiteLevel.hooks?.before_each, caseLevel.hooks?.before_each),
4828
+ after_each: mergeHook(suiteLevel.hooks?.after_each, caseLevel.hooks?.after_each),
4829
+ after_all: mergeHook(suiteLevel.hooks?.after_all, caseLevel.hooks?.after_all)
4830
+ };
4831
+ const hasHooks = Object.values(mergedHooks).some((hook) => hook !== void 0);
4806
4832
  return {
4807
4833
  template: caseLevel.template ?? suiteLevel.template,
4808
4834
  isolation: caseLevel.isolation ?? suiteLevel.isolation,
4809
4835
  repos: caseLevel.repos ?? suiteLevel.repos,
4810
- reset: caseLevel.reset ?? suiteLevel.reset,
4811
- before_all: caseLevel.before_all ?? suiteLevel.before_all,
4812
- after_all: caseLevel.after_all ?? suiteLevel.after_all,
4813
- before_each: caseLevel.before_each ?? suiteLevel.before_each,
4814
- after_each: caseLevel.after_each ?? suiteLevel.after_each
4836
+ ...hasHooks && { hooks: mergedHooks },
4837
+ mode: caseLevel.mode ?? suiteLevel.mode,
4838
+ static_path: caseLevel.static_path ?? suiteLevel.static_path,
4839
+ pool: caseLevel.pool ?? suiteLevel.pool
4815
4840
  };
4816
4841
  }
4817
4842
  function asString6(value) {
@@ -7047,6 +7072,7 @@ var CopilotCliProvider = class {
7047
7072
  const agentProcess = (0, import_node_child_process2.spawn)(executable, args, {
7048
7073
  stdio: ["pipe", "pipe", "inherit"]
7049
7074
  });
7075
+ await waitForProcessSpawn(agentProcess, executable, this.targetName);
7050
7076
  const toolCallsInProgress = /* @__PURE__ */ new Map();
7051
7077
  const completedToolCalls = [];
7052
7078
  let finalContent = "";
@@ -7326,6 +7352,47 @@ var CopilotCliProvider = class {
7326
7352
  }
7327
7353
  }
7328
7354
  };
7355
+ async function waitForProcessSpawn(proc, executable, targetName) {
7356
+ if (proc.pid) {
7357
+ return;
7358
+ }
7359
+ await new Promise((resolve, reject) => {
7360
+ const onSpawn = () => {
7361
+ cleanup();
7362
+ resolve();
7363
+ };
7364
+ const onError = (error) => {
7365
+ cleanup();
7366
+ reject(new Error(formatCopilotSpawnError(error, executable, targetName)));
7367
+ };
7368
+ const cleanup = () => {
7369
+ proc.off("spawn", onSpawn);
7370
+ proc.off("error", onError);
7371
+ };
7372
+ proc.once("spawn", onSpawn);
7373
+ proc.once("error", onError);
7374
+ });
7375
+ }
7376
+ function formatCopilotSpawnError(error, executable, targetName) {
7377
+ const code = error.code;
7378
+ const base = `Failed to start Copilot CLI executable '${executable}' for target '${targetName}'. ${error.message}`;
7379
+ if (process.platform !== "win32") {
7380
+ return base;
7381
+ }
7382
+ if (code !== "ENOENT" && code !== "EINVAL") {
7383
+ return base;
7384
+ }
7385
+ return `${base}
7386
+
7387
+ On Windows, shell commands like 'copilot -h' can work via .ps1/.bat shims, but AgentV launches a subprocess that needs a directly spawnable executable path.
7388
+
7389
+ Fix options:
7390
+ 1) Install native Copilot binary package:
7391
+ npm install -g @github/copilot-win32-x64
7392
+ 2) Set explicit executable for Copilot targets:
7393
+ - In .env: COPILOT_EXE=C:\\Users\\<you>\\AppData\\Roaming\\npm\\node_modules\\@github\\copilot-win32-x64\\copilot.exe
7394
+ - In .agentv/targets.yaml: executable: \${{ COPILOT_EXE }}`;
7395
+ }
7329
7396
  function summarizeAcpEvent(eventType, data) {
7330
7397
  if (!data || typeof data !== "object") {
7331
7398
  return eventType;
@@ -10077,9 +10144,6 @@ function getAgentvHome() {
10077
10144
  function getWorkspacesRoot() {
10078
10145
  return import_node_path23.default.join(getAgentvHome(), "workspaces");
10079
10146
  }
10080
- function getGitCacheRoot() {
10081
- return import_node_path23.default.join(getAgentvHome(), "git-cache");
10082
- }
10083
10147
  function getSubagentsRoot() {
10084
10148
  return import_node_path23.default.join(getAgentvHome(), "subagents");
10085
10149
  }
@@ -11539,16 +11603,16 @@ async function execFileWithStdinNode(argv, stdinPayload, options) {
11539
11603
  });
11540
11604
  }
11541
11605
  async function execShellWithStdin(command, stdinPayload, options = {}) {
11542
- const { mkdir: mkdir17, readFile: readFile14, rm: rm7, writeFile: writeFile10 } = await import("fs/promises");
11606
+ const { mkdir: mkdir16, readFile: readFile14, rm: rm6, writeFile: writeFile9 } = await import("fs/promises");
11543
11607
  const { tmpdir: tmpdir3 } = await import("os");
11544
11608
  const path44 = await import("path");
11545
11609
  const { randomUUID: randomUUID8 } = await import("crypto");
11546
11610
  const dir = path44.join(tmpdir3(), `agentv-exec-${randomUUID8()}`);
11547
- await mkdir17(dir, { recursive: true });
11611
+ await mkdir16(dir, { recursive: true });
11548
11612
  const stdinPath = path44.join(dir, "stdin.txt");
11549
11613
  const stdoutPath = path44.join(dir, "stdout.txt");
11550
11614
  const stderrPath = path44.join(dir, "stderr.txt");
11551
- await writeFile10(stdinPath, stdinPayload, "utf8");
11615
+ await writeFile9(stdinPath, stdinPayload, "utf8");
11552
11616
  const wrappedCommand = process.platform === "win32" ? `(${command}) < ${shellEscapePath(stdinPath)} > ${shellEscapePath(stdoutPath)} 2> ${shellEscapePath(stderrPath)}` : `(${command}) < ${shellEscapePath(stdinPath)} > ${shellEscapePath(stdoutPath)} 2> ${shellEscapePath(stderrPath)}`;
11553
11617
  const { spawn: spawn4 } = await import("child_process");
11554
11618
  try {
@@ -11581,7 +11645,7 @@ async function execShellWithStdin(command, stdinPayload, options = {}) {
11581
11645
  const stderr = (await readFile14(stderrPath, "utf8")).replace(/\r\n/g, "\n");
11582
11646
  return { stdout, stderr, exitCode };
11583
11647
  } finally {
11584
- await rm7(dir, { recursive: true, force: true });
11648
+ await rm6(dir, { recursive: true, force: true });
11585
11649
  }
11586
11650
  }
11587
11651
 
@@ -14798,8 +14862,8 @@ function runEqualsAssertion(output, value) {
14798
14862
  }
14799
14863
 
14800
14864
  // src/evaluation/orchestrator.ts
14801
- var import_node_crypto10 = require("crypto");
14802
- var import_promises30 = require("fs/promises");
14865
+ var import_node_crypto9 = require("crypto");
14866
+ var import_promises29 = require("fs/promises");
14803
14867
  var import_node_path42 = __toESM(require("path"), 1);
14804
14868
  var import_micromatch4 = __toESM(require("micromatch"), 1);
14805
14869
 
@@ -15762,7 +15826,7 @@ var WorkspacePoolManager = class {
15762
15826
  * 7. Return the slot (with path, index, isExisting)
15763
15827
  */
15764
15828
  async acquireWorkspace(options) {
15765
- const { templatePath, repos, maxSlots, repoManager } = options;
15829
+ const { templatePath, repos, maxSlots, repoManager, poolReset } = options;
15766
15830
  const fingerprint = computeWorkspaceFingerprint(templatePath, repos);
15767
15831
  const poolDir = import_node_path39.default.join(this.poolRoot, fingerprint);
15768
15832
  await (0, import_promises27.mkdir)(poolDir, { recursive: true });
@@ -15782,7 +15846,7 @@ var WorkspacePoolManager = class {
15782
15846
  }
15783
15847
  const slotExists = (0, import_node_fs11.existsSync)(slotPath);
15784
15848
  if (slotExists) {
15785
- await this.resetSlot(slotPath, templatePath, repos);
15849
+ await this.resetSlot(slotPath, templatePath, repos, poolReset);
15786
15850
  return {
15787
15851
  index: i,
15788
15852
  path: slotPath,
@@ -15914,15 +15978,19 @@ var WorkspacePoolManager = class {
15914
15978
  * 1. Reset repos (git reset --hard {ref} && git clean -fd per repo)
15915
15979
  * 2. Re-copy template files (skip repo directories)
15916
15980
  */
15917
- async resetSlot(slotPath, templatePath, repos) {
15981
+ async resetSlot(slotPath, templatePath, repos, poolReset = "fast") {
15918
15982
  for (const repo of repos) {
15919
15983
  const repoDir = import_node_path39.default.join(slotPath, repo.path);
15920
15984
  if (!(0, import_node_fs11.existsSync)(repoDir)) {
15921
15985
  continue;
15922
15986
  }
15987
+ if (poolReset === "none") {
15988
+ continue;
15989
+ }
15923
15990
  const ref = repo.checkout?.ref ?? "HEAD";
15924
15991
  await git(["reset", "--hard", ref], { cwd: repoDir });
15925
- await git(["clean", "-fd"], { cwd: repoDir });
15992
+ const cleanFlag = poolReset === "strict" ? "-fdx" : "-fd";
15993
+ await git(["clean", cleanFlag], { cwd: repoDir });
15926
15994
  }
15927
15995
  if (templatePath) {
15928
15996
  const repoDirNames = new Set(
@@ -15938,14 +16006,10 @@ var WorkspacePoolManager = class {
15938
16006
 
15939
16007
  // src/evaluation/workspace/repo-manager.ts
15940
16008
  var import_node_child_process8 = require("child_process");
15941
- var import_node_crypto9 = require("crypto");
15942
- var import_node_fs12 = require("fs");
15943
- var import_promises28 = require("fs/promises");
15944
16009
  var import_node_path40 = __toESM(require("path"), 1);
15945
16010
  var import_node_util6 = require("util");
15946
16011
  var execFileAsync2 = (0, import_node_util6.promisify)(import_node_child_process8.execFile);
15947
16012
  var DEFAULT_TIMEOUT_MS2 = 3e5;
15948
- var LOCK_TIMEOUT_MS = 6e4;
15949
16013
  function gitEnv2() {
15950
16014
  const env = { ...process.env };
15951
16015
  for (const key of Object.keys(env)) {
@@ -15960,10 +16024,6 @@ function gitEnv2() {
15960
16024
  GIT_SSH_COMMAND: "ssh -o BatchMode=yes"
15961
16025
  };
15962
16026
  }
15963
- function cacheKey(source) {
15964
- const raw = source.type === "git" ? source.url.toLowerCase().replace(/\.git$/, "") : source.path;
15965
- return (0, import_node_crypto9.createHash)("sha256").update(raw).digest("hex");
15966
- }
15967
16027
  function getSourceUrl(source) {
15968
16028
  return source.type === "git" ? source.url : source.path;
15969
16029
  }
@@ -15977,33 +16037,9 @@ async function git2(args, opts) {
15977
16037
  });
15978
16038
  return stdout.trim();
15979
16039
  }
15980
- async function acquireLock(lockPath) {
15981
- const start = Date.now();
15982
- while (Date.now() - start < LOCK_TIMEOUT_MS) {
15983
- try {
15984
- await (0, import_promises28.writeFile)(lockPath, String(process.pid), { flag: "wx" });
15985
- return;
15986
- } catch (err) {
15987
- if (err.code === "EEXIST") {
15988
- await new Promise((r) => setTimeout(r, 200));
15989
- continue;
15990
- }
15991
- throw err;
15992
- }
15993
- }
15994
- throw new Error(`Timed out waiting for lock: ${lockPath}`);
15995
- }
15996
- async function releaseLock(lockPath) {
15997
- try {
15998
- await (0, import_promises28.unlink)(lockPath);
15999
- } catch {
16000
- }
16001
- }
16002
16040
  var RepoManager = class {
16003
- cacheDir;
16004
16041
  verbose;
16005
- constructor(cacheDir, verbose = false) {
16006
- this.cacheDir = cacheDir ?? getGitCacheRoot();
16042
+ constructor(verbose = false) {
16007
16043
  this.verbose = verbose;
16008
16044
  }
16009
16045
  async runGit(args, opts) {
@@ -16028,86 +16064,18 @@ var RepoManager = class {
16028
16064
  }
16029
16065
  }
16030
16066
  /**
16031
- * Ensure a bare mirror cache exists for the given source.
16032
- * Creates on first access, fetches updates on subsequent calls.
16033
- * Returns the absolute path to the cache directory.
16034
- */
16035
- async ensureCache(source, depth, resolve) {
16036
- const key = cacheKey(source);
16037
- const cachePath = import_node_path40.default.join(this.cacheDir, key);
16038
- const lockPath = `${cachePath}.lock`;
16039
- const cacheExists = (0, import_node_fs12.existsSync)(import_node_path40.default.join(cachePath, "HEAD"));
16040
- if (this.verbose) {
16041
- console.log(
16042
- `[repo] ensureCache source=${getSourceUrl(source)} resolve=${resolve ?? "remote"} cache=${cachePath}`
16043
- );
16044
- }
16045
- if (resolve === "local") {
16046
- if (cacheExists) {
16047
- if (this.verbose) {
16048
- console.log(`[repo] using existing local cache ${cachePath}`);
16049
- }
16050
- return cachePath;
16051
- }
16052
- const url = getSourceUrl(source);
16053
- throw new Error(
16054
- `No cache found for \`${url}\`. Run \`agentv cache add --url ${url} --from <local-path>\` to seed it.`
16055
- );
16056
- }
16057
- await (0, import_promises28.mkdir)(this.cacheDir, { recursive: true });
16058
- const lockStartedAt = Date.now();
16059
- await acquireLock(lockPath);
16060
- if (this.verbose) {
16061
- console.log(`[repo] lock acquired path=${lockPath} waitedMs=${Date.now() - lockStartedAt}`);
16062
- }
16063
- try {
16064
- if (cacheExists) {
16065
- if (this.verbose) {
16066
- console.log(`[repo] refreshing existing cache ${cachePath}`);
16067
- }
16068
- const fetchArgs = ["fetch", "--prune"];
16069
- if (depth) {
16070
- fetchArgs.push("--depth", String(depth));
16071
- }
16072
- await this.runGit(fetchArgs, { cwd: cachePath });
16073
- } else {
16074
- if (this.verbose) {
16075
- console.log(`[repo] creating new cache ${cachePath}`);
16076
- }
16077
- const cloneArgs = ["clone", "--mirror", "--bare"];
16078
- if (depth) {
16079
- cloneArgs.push("--depth", String(depth));
16080
- }
16081
- const sourceUrl = getSourceUrl(source);
16082
- const cloneUrl = depth && source.type === "local" ? `file://${sourceUrl}` : sourceUrl;
16083
- cloneArgs.push(cloneUrl, cachePath);
16084
- await this.runGit(cloneArgs);
16085
- }
16086
- } finally {
16087
- await releaseLock(lockPath);
16088
- if (this.verbose) {
16089
- console.log(`[repo] lock released path=${lockPath}`);
16090
- }
16091
- }
16092
- return cachePath;
16093
- }
16094
- /**
16095
- * Clone a repo from cache into the workspace at the configured path.
16067
+ * Clone a repo directly from source into the workspace at the configured path.
16096
16068
  * Handles checkout, ref resolution, ancestor walking, shallow clone, sparse checkout.
16097
16069
  */
16098
16070
  async materialize(repo, workspacePath) {
16099
16071
  const targetDir = import_node_path40.default.join(workspacePath, repo.path);
16072
+ const sourceUrl = getSourceUrl(repo.source);
16100
16073
  const startedAt = Date.now();
16101
16074
  if (this.verbose) {
16102
16075
  console.log(
16103
- `[repo] materialize start path=${repo.path} source=${getSourceUrl(repo.source)} workspace=${workspacePath}`
16076
+ `[repo] materialize start path=${repo.path} source=${sourceUrl} workspace=${workspacePath}`
16104
16077
  );
16105
16078
  }
16106
- const cachePath = await this.ensureCache(
16107
- repo.source,
16108
- repo.clone?.depth,
16109
- repo.checkout?.resolve
16110
- );
16111
16079
  const cloneArgs = ["clone"];
16112
16080
  if (repo.clone?.depth) {
16113
16081
  cloneArgs.push("--depth", String(repo.clone.depth));
@@ -16116,7 +16084,7 @@ var RepoManager = class {
16116
16084
  cloneArgs.push("--filter", repo.clone.filter);
16117
16085
  }
16118
16086
  cloneArgs.push("--no-checkout");
16119
- const cloneUrl = repo.clone?.depth || repo.clone?.filter ? `file://${cachePath}` : cachePath;
16087
+ const cloneUrl = (repo.clone?.depth || repo.clone?.filter) && repo.source.type === "local" ? `file://${sourceUrl}` : sourceUrl;
16120
16088
  cloneArgs.push(cloneUrl, targetDir);
16121
16089
  await this.runGit(cloneArgs);
16122
16090
  if (repo.clone?.sparse?.length) {
@@ -16188,63 +16156,25 @@ var RepoManager = class {
16188
16156
  }
16189
16157
  }
16190
16158
  /** Reset repos in workspace to their checkout state. */
16191
- async reset(repos, workspacePath, strategy) {
16192
- if (strategy === "recreate") {
16193
- for (const repo of repos) {
16194
- const targetDir = import_node_path40.default.join(workspacePath, repo.path);
16195
- await (0, import_promises28.rm)(targetDir, { recursive: true, force: true });
16196
- }
16197
- await this.materializeAll(repos, workspacePath);
16198
- return;
16199
- }
16159
+ async reset(repos, workspacePath, reset) {
16160
+ const cleanFlag = reset === "strict" ? "-fdx" : "-fd";
16200
16161
  for (const repo of repos) {
16201
16162
  const targetDir = import_node_path40.default.join(workspacePath, repo.path);
16202
16163
  await this.runGit(["reset", "--hard", "HEAD"], { cwd: targetDir });
16203
- await this.runGit(["clean", "-fd"], { cwd: targetDir });
16204
- }
16205
- }
16206
- /**
16207
- * Seed the cache from a local repository, setting the remote to a given URL.
16208
- * Useful for avoiding slow network clones when a local clone already exists.
16209
- */
16210
- async seedCache(localPath, remoteUrl, opts) {
16211
- const source = { type: "git", url: remoteUrl };
16212
- const key = cacheKey(source);
16213
- const cachePath = import_node_path40.default.join(this.cacheDir, key);
16214
- const lockPath = `${cachePath}.lock`;
16215
- await (0, import_promises28.mkdir)(this.cacheDir, { recursive: true });
16216
- await acquireLock(lockPath);
16217
- try {
16218
- if ((0, import_node_fs12.existsSync)(import_node_path40.default.join(cachePath, "HEAD"))) {
16219
- if (!opts?.force) {
16220
- throw new Error(
16221
- `Cache already exists for ${remoteUrl} at ${cachePath}. Use force to overwrite.`
16222
- );
16223
- }
16224
- await (0, import_promises28.rm)(cachePath, { recursive: true, force: true });
16225
- }
16226
- await git2(["clone", "--mirror", "--bare", localPath, cachePath]);
16227
- await git2(["remote", "set-url", "origin", remoteUrl], { cwd: cachePath });
16228
- } finally {
16229
- await releaseLock(lockPath);
16164
+ await this.runGit(["clean", cleanFlag], { cwd: targetDir });
16230
16165
  }
16231
- return cachePath;
16232
- }
16233
- /** Remove the entire cache directory. */
16234
- async cleanCache() {
16235
- await (0, import_promises28.rm)(this.cacheDir, { recursive: true, force: true });
16236
16166
  }
16237
16167
  };
16238
16168
 
16239
16169
  // src/evaluation/workspace/resolve.ts
16240
- var import_promises29 = require("fs/promises");
16170
+ var import_promises28 = require("fs/promises");
16241
16171
  var import_node_path41 = __toESM(require("path"), 1);
16242
16172
  async function resolveWorkspaceTemplate(templatePath) {
16243
16173
  if (!templatePath) {
16244
16174
  return void 0;
16245
16175
  }
16246
16176
  const resolved = import_node_path41.default.resolve(templatePath);
16247
- const stats = await (0, import_promises29.stat)(resolved);
16177
+ const stats = await (0, import_promises28.stat)(resolved);
16248
16178
  if (stats.isFile()) {
16249
16179
  return {
16250
16180
  dir: import_node_path41.default.dirname(resolved),
@@ -16254,7 +16184,7 @@ async function resolveWorkspaceTemplate(templatePath) {
16254
16184
  if (!stats.isDirectory()) {
16255
16185
  throw new Error(`workspace template is neither a file nor a directory: ${resolved}`);
16256
16186
  }
16257
- const entries = await (0, import_promises29.readdir)(resolved);
16187
+ const entries = await (0, import_promises28.readdir)(resolved);
16258
16188
  const workspaceFiles = entries.filter((e) => e.endsWith(".code-workspace"));
16259
16189
  if (workspaceFiles.length === 1) {
16260
16190
  return {
@@ -16318,6 +16248,22 @@ function classifyQualityStatus(score) {
16318
16248
  function usesFileReferencePrompt(provider) {
16319
16249
  return isAgentProvider(provider) || provider.kind === "cli";
16320
16250
  }
16251
+ function toScriptConfig(hook, hookName, context2) {
16252
+ const command = hook.command ?? hook.script;
16253
+ if (!command || command.length === 0) {
16254
+ throw new Error(`${hookName} hook in ${context2} requires command or script`);
16255
+ }
16256
+ return {
16257
+ command,
16258
+ ...hook.timeout_ms !== void 0 && { timeout_ms: hook.timeout_ms },
16259
+ ...hook.timeoutMs !== void 0 && { timeoutMs: hook.timeoutMs },
16260
+ ...hook.cwd !== void 0 && { cwd: hook.cwd },
16261
+ ...hook.script !== void 0 && { script: hook.script }
16262
+ };
16263
+ }
16264
+ function hasHookCommand(hook) {
16265
+ return !!(hook?.command && hook.command.length > 0 || hook?.script && hook.script.length > 0);
16266
+ }
16321
16267
  function getWorkspaceTemplate(target) {
16322
16268
  const config = target.config;
16323
16269
  if ("workspaceTemplate" in config && typeof config.workspaceTemplate === "string") {
@@ -16351,7 +16297,12 @@ async function runEvaluation(options) {
16351
16297
  failOnError,
16352
16298
  poolWorkspaces,
16353
16299
  poolMaxSlots: configPoolMaxSlots,
16354
- workspace: userWorkspacePath
16300
+ workspace: legacyWorkspacePath,
16301
+ workspaceMode,
16302
+ workspacePath,
16303
+ workspaceClean,
16304
+ retainOnSuccess,
16305
+ retainOnFailure
16355
16306
  } = options;
16356
16307
  let useCache = options.useCache;
16357
16308
  if (trials && trials.count > 1 && useCache) {
@@ -16360,7 +16311,7 @@ async function runEvaluation(options) {
16360
16311
  );
16361
16312
  useCache = false;
16362
16313
  }
16363
- const evalRunId = (0, import_node_crypto10.randomUUID)();
16314
+ const evalRunId = (0, import_node_crypto9.randomUUID)();
16364
16315
  const evalCases = preloadedEvalCases ?? await loadTests(evalFilePath, repoRoot, { verbose, filter });
16365
16316
  const filteredEvalCases = filterEvalCases(evalCases, filter);
16366
16317
  if (filteredEvalCases.length === 0) {
@@ -16487,13 +16438,22 @@ async function runEvaluation(options) {
16487
16438
  }
16488
16439
  };
16489
16440
  const isPerTestIsolation = suiteWorkspace?.isolation === "per_test";
16490
- if (userWorkspacePath && isPerTestIsolation) {
16441
+ const configuredMode = suiteWorkspace?.mode ?? workspaceMode;
16442
+ const configuredStaticPath = suiteWorkspace?.static_path ?? workspacePath ?? legacyWorkspacePath;
16443
+ const useStaticWorkspace = configuredMode === "static" || !!configuredStaticPath && !configuredMode;
16444
+ if (useStaticWorkspace && isPerTestIsolation) {
16491
16445
  throw new Error(
16492
- "--workspace is incompatible with isolation: per_test. Use isolation: shared (default)."
16446
+ "static workspace mode is incompatible with isolation: per_test. Use isolation: shared (default)."
16493
16447
  );
16494
16448
  }
16495
- const hasSharedWorkspace = !!(userWorkspacePath || workspaceTemplate || suiteWorkspace?.before_all || suiteWorkspace?.repos?.length && !isPerTestIsolation);
16496
- const usePool = poolWorkspaces === true && !!suiteWorkspace?.repos?.length && !isPerTestIsolation && !userWorkspacePath;
16449
+ if (configuredMode === "static" && !configuredStaticPath) {
16450
+ throw new Error("workspace.mode=static requires workspace.static_path or --workspace-path");
16451
+ }
16452
+ const hasSharedWorkspace = !!(useStaticWorkspace || workspaceTemplate || suiteWorkspace?.hooks || suiteWorkspace?.repos?.length && !isPerTestIsolation);
16453
+ const poolEnabled = configuredMode === "pooled" ? true : configuredMode === "ephemeral" || useStaticWorkspace ? false : suiteWorkspace?.pool ?? poolWorkspaces ?? true;
16454
+ const usePool = poolEnabled !== false && !!suiteWorkspace?.repos?.length && !isPerTestIsolation && !useStaticWorkspace;
16455
+ const resolvedRetainOnSuccess = retainOnSuccess ?? (keepWorkspaces ? "keep" : "cleanup");
16456
+ const resolvedRetainOnFailure = retainOnFailure ?? (cleanupWorkspaces ? "cleanup" : "keep");
16497
16457
  const requestedWorkers = options.maxConcurrency ?? target.workers ?? 1;
16498
16458
  const workers = hasSharedWorkspace && !usePool ? 1 : requestedWorkers;
16499
16459
  setupLog(
@@ -16514,20 +16474,21 @@ async function runEvaluation(options) {
16514
16474
  const availablePoolSlots = [];
16515
16475
  const poolSlotBaselines = /* @__PURE__ */ new Map();
16516
16476
  const poolMaxSlots = Math.min(configPoolMaxSlots ?? 10, 50);
16517
- if (userWorkspacePath) {
16518
- sharedWorkspacePath = userWorkspacePath;
16519
- setupLog(`using user-provided workspace: ${userWorkspacePath}`);
16477
+ if (useStaticWorkspace && configuredStaticPath) {
16478
+ sharedWorkspacePath = configuredStaticPath;
16479
+ setupLog(`using static workspace: ${configuredStaticPath}`);
16520
16480
  } else if (usePool && suiteWorkspace?.repos) {
16521
16481
  const slotsNeeded = workers;
16522
16482
  setupLog(`acquiring ${slotsNeeded} workspace pool slot(s) (pool capacity: ${poolMaxSlots})`);
16523
16483
  poolManager = new WorkspacePoolManager(getWorkspacePoolRoot());
16524
- const poolRepoManager = new RepoManager(void 0, verbose);
16484
+ const poolRepoManager = new RepoManager(verbose);
16525
16485
  for (let i = 0; i < slotsNeeded; i++) {
16526
16486
  const slot = await poolManager.acquireWorkspace({
16527
16487
  templatePath: workspaceTemplate,
16528
16488
  repos: suiteWorkspace.repos,
16529
16489
  maxSlots: poolMaxSlots,
16530
- repoManager: poolRepoManager
16490
+ repoManager: poolRepoManager,
16491
+ poolReset: (workspaceClean === "full" ? "strict" : workspaceClean === "standard" ? "fast" : null) ?? "fast"
16531
16492
  });
16532
16493
  poolSlots.push(slot);
16533
16494
  setupLog(`pool slot ${i} acquired at: ${slot.path} (existing=${slot.isExisting})`);
@@ -16547,21 +16508,21 @@ async function runEvaluation(options) {
16547
16508
  const message = error instanceof Error ? error.message : String(error);
16548
16509
  throw new Error(`Failed to create shared workspace: ${message}`);
16549
16510
  }
16550
- } else if (suiteWorkspace?.before_all || suiteWorkspace?.repos?.length && !isPerTestIsolation) {
16511
+ } else if (suiteWorkspace?.hooks || suiteWorkspace?.repos?.length && !isPerTestIsolation) {
16551
16512
  sharedWorkspacePath = getWorkspacePath(evalRunId, "shared");
16552
- await (0, import_promises30.mkdir)(sharedWorkspacePath, { recursive: true });
16513
+ await (0, import_promises29.mkdir)(sharedWorkspacePath, { recursive: true });
16553
16514
  setupLog(`created empty shared workspace at: ${sharedWorkspacePath}`);
16554
16515
  }
16555
16516
  try {
16556
16517
  if (suiteWorkspaceFile && sharedWorkspacePath) {
16557
16518
  const copiedWorkspaceFile = import_node_path42.default.join(sharedWorkspacePath, import_node_path42.default.basename(suiteWorkspaceFile));
16558
16519
  try {
16559
- await (0, import_promises30.stat)(copiedWorkspaceFile);
16520
+ await (0, import_promises29.stat)(copiedWorkspaceFile);
16560
16521
  suiteWorkspaceFile = copiedWorkspaceFile;
16561
16522
  } catch {
16562
16523
  }
16563
16524
  }
16564
- const repoManager = suiteWorkspace?.repos?.length && !usePool && !userWorkspacePath ? new RepoManager(void 0, verbose) : void 0;
16525
+ const repoManager = suiteWorkspace?.repos?.length && !usePool && !useStaticWorkspace ? new RepoManager(verbose) : void 0;
16565
16526
  if (repoManager && sharedWorkspacePath && suiteWorkspace?.repos && !isPerTestIsolation) {
16566
16527
  setupLog(
16567
16528
  `materializing ${suiteWorkspace.repos.length} shared repo(s) into ${sharedWorkspacePath}`
@@ -16571,17 +16532,19 @@ async function runEvaluation(options) {
16571
16532
  setupLog("shared repo materialization complete");
16572
16533
  } catch (error) {
16573
16534
  const message = error instanceof Error ? error.message : String(error);
16574
- if (sharedWorkspacePath && !userWorkspacePath) {
16535
+ if (sharedWorkspacePath && !useStaticWorkspace) {
16575
16536
  await cleanupWorkspace(sharedWorkspacePath).catch(() => {
16576
16537
  });
16577
16538
  }
16578
16539
  throw new Error(`Failed to materialize repos: ${message}`);
16579
16540
  }
16580
16541
  }
16581
- if (sharedWorkspacePath && suiteWorkspace?.before_all) {
16582
- const beforeAllCommand = (suiteWorkspace.before_all.command ?? suiteWorkspace.before_all.script ?? []).join(" ");
16542
+ const suiteBeforeAllHook = suiteWorkspace?.hooks?.before_all;
16543
+ if (sharedWorkspacePath && hasHookCommand(suiteBeforeAllHook)) {
16544
+ const beforeAllHook = suiteBeforeAllHook;
16545
+ const beforeAllCommand = (beforeAllHook.command ?? beforeAllHook.script ?? []).join(" ");
16583
16546
  setupLog(
16584
- `running shared before_all in cwd=${suiteWorkspace.before_all.cwd ?? evalDir} command=${beforeAllCommand}`
16547
+ `running shared before_all in cwd=${beforeAllHook.cwd ?? evalDir} command=${beforeAllCommand}`
16585
16548
  );
16586
16549
  const scriptContext = {
16587
16550
  workspacePath: sharedWorkspacePath,
@@ -16590,18 +16553,22 @@ async function runEvaluation(options) {
16590
16553
  evalDir
16591
16554
  };
16592
16555
  try {
16593
- beforeAllOutput = await executeWorkspaceScript(suiteWorkspace.before_all, scriptContext);
16556
+ beforeAllOutput = await executeWorkspaceScript(
16557
+ toScriptConfig(beforeAllHook, "before_all", "suite workspace"),
16558
+ scriptContext
16559
+ );
16594
16560
  setupLog("shared before_all completed");
16595
16561
  } catch (error) {
16596
16562
  const message = error instanceof Error ? error.message : String(error);
16597
- if (sharedWorkspacePath && !userWorkspacePath) {
16563
+ if (sharedWorkspacePath && !useStaticWorkspace) {
16598
16564
  await cleanupWorkspace(sharedWorkspacePath).catch(() => {
16599
16565
  });
16600
16566
  }
16601
16567
  throw new Error(`before_all script failed: ${message}`);
16602
16568
  }
16603
16569
  }
16604
- if (availablePoolSlots.length > 0 && suiteWorkspace?.before_all) {
16570
+ if (availablePoolSlots.length > 0 && hasHookCommand(suiteBeforeAllHook)) {
16571
+ const beforeAllHook = suiteBeforeAllHook;
16605
16572
  for (const slot of availablePoolSlots) {
16606
16573
  setupLog(`running before_all on pool slot ${slot.index}`);
16607
16574
  const scriptContext = {
@@ -16611,7 +16578,10 @@ async function runEvaluation(options) {
16611
16578
  evalDir
16612
16579
  };
16613
16580
  try {
16614
- const output = await executeWorkspaceScript(suiteWorkspace.before_all, scriptContext);
16581
+ const output = await executeWorkspaceScript(
16582
+ toScriptConfig(beforeAllHook, "before_all", "suite workspace"),
16583
+ scriptContext
16584
+ );
16615
16585
  if (!beforeAllOutput) beforeAllOutput = output;
16616
16586
  setupLog(`before_all completed on pool slot ${slot.index}`);
16617
16587
  } catch (error) {
@@ -16743,6 +16713,8 @@ async function runEvaluation(options) {
16743
16713
  evalRunId,
16744
16714
  keepWorkspaces,
16745
16715
  cleanupWorkspaces,
16716
+ retainOnSuccess: resolvedRetainOnSuccess,
16717
+ retainOnFailure: resolvedRetainOnFailure,
16746
16718
  sharedWorkspacePath: testWorkspacePath,
16747
16719
  sharedBaselineCommit: testBaselineCommit,
16748
16720
  suiteWorkspaceFile,
@@ -16836,7 +16808,9 @@ async function runEvaluation(options) {
16836
16808
  }
16837
16809
  }
16838
16810
  const afterAllWorkspaces = poolSlots.length > 1 ? poolSlots.map((s) => s.path) : sharedWorkspacePath ? [sharedWorkspacePath] : [];
16839
- if (afterAllWorkspaces.length > 0 && suiteWorkspace?.after_all) {
16811
+ const suiteAfterAllHook = suiteWorkspace?.hooks?.after_all;
16812
+ if (afterAllWorkspaces.length > 0 && hasHookCommand(suiteAfterAllHook)) {
16813
+ const afterAllHook = suiteAfterAllHook;
16840
16814
  for (const wsPath of afterAllWorkspaces) {
16841
16815
  const scriptContext = {
16842
16816
  workspacePath: wsPath,
@@ -16846,7 +16820,7 @@ async function runEvaluation(options) {
16846
16820
  };
16847
16821
  try {
16848
16822
  const afterAllOutput = await executeWorkspaceScript(
16849
- suiteWorkspace.after_all,
16823
+ toScriptConfig(afterAllHook, "after_all", "suite workspace"),
16850
16824
  scriptContext,
16851
16825
  "warn"
16852
16826
  );
@@ -16857,12 +16831,14 @@ async function runEvaluation(options) {
16857
16831
  }
16858
16832
  }
16859
16833
  }
16860
- if (sharedWorkspacePath && !poolSlot && poolSlots.length === 0 && !userWorkspacePath) {
16834
+ if (sharedWorkspacePath && !poolSlot && poolSlots.length === 0 && !useStaticWorkspace) {
16861
16835
  const hasFailure = results.some((r) => !!r.error || r.score < 0.5);
16862
- if (cleanupWorkspaces) {
16863
- await cleanupWorkspace(sharedWorkspacePath).catch(() => {
16864
- });
16865
- } else if (!hasFailure && !keepWorkspaces) {
16836
+ if (hasFailure) {
16837
+ if (resolvedRetainOnFailure === "cleanup") {
16838
+ await cleanupWorkspace(sharedWorkspacePath).catch(() => {
16839
+ });
16840
+ }
16841
+ } else if (resolvedRetainOnSuccess === "cleanup") {
16866
16842
  await cleanupWorkspace(sharedWorkspacePath).catch(() => {
16867
16843
  });
16868
16844
  }
@@ -17056,6 +17032,8 @@ async function runEvalCase(options) {
17056
17032
  evalRunId,
17057
17033
  keepWorkspaces,
17058
17034
  cleanupWorkspaces: forceCleanup,
17035
+ retainOnSuccess,
17036
+ retainOnFailure,
17059
17037
  sharedWorkspacePath,
17060
17038
  sharedBaselineCommit,
17061
17039
  suiteWorkspaceFile,
@@ -17067,10 +17045,10 @@ async function runEvalCase(options) {
17067
17045
  const formattingMode = usesFileReferencePrompt(provider) ? "agent" : "lm";
17068
17046
  const promptInputs = await buildPromptInputs(evalCase, formattingMode);
17069
17047
  const typeRegistry = providedTypeRegistry ?? createBuiltinRegistry();
17070
- const cacheKey2 = useCache ? createCacheKey(provider, target, evalCase, promptInputs) : void 0;
17048
+ const cacheKey = useCache ? createCacheKey(provider, target, evalCase, promptInputs) : void 0;
17071
17049
  let cachedResponse;
17072
- if (cacheKey2 && cache) {
17073
- cachedResponse = await cache.get(cacheKey2);
17050
+ if (cacheKey && cache) {
17051
+ cachedResponse = await cache.get(cacheKey);
17074
17052
  }
17075
17053
  const nowFn = now ?? (() => /* @__PURE__ */ new Date());
17076
17054
  let workspacePath = sharedWorkspacePath;
@@ -17103,18 +17081,18 @@ async function runEvalCase(options) {
17103
17081
  if (caseWorkspaceFile && workspacePath) {
17104
17082
  const copiedFile = import_node_path42.default.join(workspacePath, import_node_path42.default.basename(caseWorkspaceFile));
17105
17083
  try {
17106
- await (0, import_promises30.stat)(copiedFile);
17084
+ await (0, import_promises29.stat)(copiedFile);
17107
17085
  caseWorkspaceFile = copiedFile;
17108
17086
  } catch {
17109
17087
  }
17110
17088
  }
17111
17089
  }
17112
- if (!workspacePath && (evalCase.workspace?.before_all || evalCase.workspace?.repos?.length) && evalRunId) {
17090
+ if (!workspacePath && (evalCase.workspace?.hooks || evalCase.workspace?.repos?.length) && evalRunId) {
17113
17091
  workspacePath = getWorkspacePath(evalRunId, evalCase.id);
17114
- await (0, import_promises30.mkdir)(workspacePath, { recursive: true });
17092
+ await (0, import_promises29.mkdir)(workspacePath, { recursive: true });
17115
17093
  }
17116
17094
  if (evalCase.workspace?.repos?.length && workspacePath) {
17117
- const perCaseRepoManager = new RepoManager(void 0, setupDebug);
17095
+ const perCaseRepoManager = new RepoManager(setupDebug);
17118
17096
  try {
17119
17097
  if (setupDebug) {
17120
17098
  console.log(
@@ -17139,11 +17117,13 @@ async function runEvalCase(options) {
17139
17117
  );
17140
17118
  }
17141
17119
  }
17142
- if (workspacePath && evalCase.workspace?.before_all) {
17143
- const beforeAllCommand = (evalCase.workspace.before_all.command ?? evalCase.workspace.before_all.script ?? []).join(" ");
17120
+ const caseBeforeAllHook = evalCase.workspace?.hooks?.before_all;
17121
+ if (workspacePath && hasHookCommand(caseBeforeAllHook)) {
17122
+ const beforeAllHook = caseBeforeAllHook;
17123
+ const beforeAllCommand = (beforeAllHook.command ?? beforeAllHook.script ?? []).join(" ");
17144
17124
  if (setupDebug) {
17145
17125
  console.log(
17146
- `[setup] test=${evalCase.id} running before_all in cwd=${evalCase.workspace.before_all.cwd ?? evalDir} command=${beforeAllCommand}`
17126
+ `[setup] test=${evalCase.id} running before_all in cwd=${beforeAllHook.cwd ?? evalDir} command=${beforeAllCommand}`
17147
17127
  );
17148
17128
  }
17149
17129
  const scriptContext = {
@@ -17156,7 +17136,7 @@ async function runEvalCase(options) {
17156
17136
  };
17157
17137
  try {
17158
17138
  beforeAllOutput = await executeWorkspaceScript(
17159
- evalCase.workspace.before_all,
17139
+ toScriptConfig(beforeAllHook, "before_all", `test '${evalCase.id}'`),
17160
17140
  scriptContext
17161
17141
  );
17162
17142
  if (setupDebug) {
@@ -17181,7 +17161,9 @@ async function runEvalCase(options) {
17181
17161
  }
17182
17162
  }
17183
17163
  }
17184
- if (workspacePath && evalCase.workspace?.before_each) {
17164
+ const caseBeforeEachHook = evalCase.workspace?.hooks?.before_each;
17165
+ if (workspacePath && hasHookCommand(caseBeforeEachHook)) {
17166
+ const beforeEachHook = caseBeforeEachHook;
17185
17167
  const scriptContext = {
17186
17168
  workspacePath,
17187
17169
  testId: evalCase.id,
@@ -17192,7 +17174,7 @@ async function runEvalCase(options) {
17192
17174
  };
17193
17175
  try {
17194
17176
  beforeEachOutput = await executeWorkspaceScript(
17195
- evalCase.workspace.before_each,
17177
+ toScriptConfig(beforeEachHook, "before_each", `test '${evalCase.id}'`),
17196
17178
  scriptContext
17197
17179
  );
17198
17180
  } catch (error) {
@@ -17280,8 +17262,8 @@ async function runEvalCase(options) {
17280
17262
  }
17281
17263
  return errorResult;
17282
17264
  }
17283
- if (cacheKey2 && cache && !cachedResponse) {
17284
- await cache.set(cacheKey2, providerResponse);
17265
+ if (cacheKey && cache && !cachedResponse) {
17266
+ await cache.set(cacheKey, providerResponse);
17285
17267
  }
17286
17268
  const output = providerResponse.output;
17287
17269
  const hasExecutionMetrics = providerResponse.tokenUsage !== void 0 || providerResponse.costUsd !== void 0 || providerResponse.durationMs !== void 0;
@@ -17309,17 +17291,19 @@ async function runEvalCase(options) {
17309
17291
  }
17310
17292
  }
17311
17293
  const providerError = extractProviderError(providerResponse);
17312
- if (repoManager && workspacePath && evalCase.workspace?.reset?.after_each && evalCase.workspace.reset.strategy && evalCase.workspace.reset.strategy !== "none" && evalCase.workspace.repos) {
17294
+ if (repoManager && workspacePath && evalCase.workspace?.hooks?.after_each?.reset && evalCase.workspace.hooks.after_each.reset !== "none" && evalCase.workspace.repos) {
17313
17295
  try {
17314
17296
  await repoManager.reset(
17315
17297
  evalCase.workspace.repos,
17316
17298
  workspacePath,
17317
- evalCase.workspace.reset.strategy
17299
+ evalCase.workspace.hooks.after_each.reset
17318
17300
  );
17319
17301
  } catch {
17320
17302
  }
17321
17303
  }
17322
- if (workspacePath && evalCase.workspace?.after_each) {
17304
+ const caseAfterEachHook = evalCase.workspace?.hooks?.after_each;
17305
+ if (workspacePath && hasHookCommand(caseAfterEachHook)) {
17306
+ const afterEachHook = caseAfterEachHook;
17323
17307
  const scriptContext = {
17324
17308
  workspacePath,
17325
17309
  testId: evalCase.id,
@@ -17330,7 +17314,7 @@ async function runEvalCase(options) {
17330
17314
  };
17331
17315
  try {
17332
17316
  afterEachOutput = await executeWorkspaceScript(
17333
- evalCase.workspace.after_each,
17317
+ toScriptConfig(afterEachHook, "after_each", `test '${evalCase.id}'`),
17334
17318
  scriptContext,
17335
17319
  "warn"
17336
17320
  );
@@ -17380,8 +17364,13 @@ async function runEvalCase(options) {
17380
17364
  await cleanupWorkspace(workspacePath).catch(() => {
17381
17365
  });
17382
17366
  } else if (isFailure) {
17383
- return { ...finalResult, workspacePath };
17384
- } else if (!keepWorkspaces) {
17367
+ if ((retainOnFailure ?? "keep") === "cleanup") {
17368
+ await cleanupWorkspace(workspacePath).catch(() => {
17369
+ });
17370
+ } else {
17371
+ return { ...finalResult, workspacePath };
17372
+ }
17373
+ } else if ((retainOnSuccess ?? (keepWorkspaces ? "keep" : "cleanup")) !== "keep") {
17385
17374
  await cleanupWorkspace(workspacePath).catch(() => {
17386
17375
  });
17387
17376
  }
@@ -17399,11 +17388,12 @@ async function runEvalCase(options) {
17399
17388
  "evaluator_error"
17400
17389
  );
17401
17390
  if (workspacePath && !isSharedWorkspace) {
17402
- if (forceCleanup) {
17391
+ if (forceCleanup || (retainOnFailure ?? "keep") === "cleanup") {
17403
17392
  await cleanupWorkspace(workspacePath).catch(() => {
17404
17393
  });
17394
+ } else {
17395
+ return { ...errorResult, workspacePath, beforeEachOutput, afterEachOutput };
17405
17396
  }
17406
- return { ...errorResult, workspacePath, beforeEachOutput, afterEachOutput };
17407
17397
  }
17408
17398
  return { ...errorResult, beforeEachOutput, afterEachOutput };
17409
17399
  }
@@ -17422,7 +17412,9 @@ async function runEvalCaseWithTrials(options, trialsConfig) {
17422
17412
  useCache: false,
17423
17413
  // Force cleanup for intermediate trials
17424
17414
  cleanupWorkspaces: isLastDeclaredTrial ? options.cleanupWorkspaces : true,
17425
- keepWorkspaces: isLastDeclaredTrial ? options.keepWorkspaces : false
17415
+ keepWorkspaces: isLastDeclaredTrial ? options.keepWorkspaces : false,
17416
+ retainOnSuccess: isLastDeclaredTrial ? options.retainOnSuccess : "cleanup",
17417
+ retainOnFailure: isLastDeclaredTrial ? options.retainOnFailure : "cleanup"
17426
17418
  };
17427
17419
  const result = await runEvalCase(trialOptions);
17428
17420
  allResults.push(result);
@@ -17945,7 +17937,7 @@ function extractProviderError(response) {
17945
17937
  return trimmed.length > 0 ? trimmed : void 0;
17946
17938
  }
17947
17939
  function createCacheKey(provider, target, evalCase, promptInputs) {
17948
- const hash = (0, import_node_crypto10.createHash)("sha256");
17940
+ const hash = (0, import_node_crypto9.createHash)("sha256");
17949
17941
  hash.update(provider.id);
17950
17942
  hash.update(target.name);
17951
17943
  hash.update(evalCase.id);
@@ -18013,7 +18005,7 @@ function computeWeightedMean(entries) {
18013
18005
  }
18014
18006
 
18015
18007
  // src/evaluation/evaluate.ts
18016
- var import_node_fs13 = require("fs");
18008
+ var import_node_fs12 = require("fs");
18017
18009
  var import_node_path43 = __toESM(require("path"), 1);
18018
18010
  async function evaluate(config) {
18019
18011
  const startTime = Date.now();
@@ -18132,7 +18124,7 @@ async function discoverDefaultTarget(repoRoot) {
18132
18124
  for (const dir of chain) {
18133
18125
  for (const candidate of TARGET_FILE_CANDIDATES) {
18134
18126
  const targetsPath = import_node_path43.default.join(dir, candidate);
18135
- if (!(0, import_node_fs13.existsSync)(targetsPath)) continue;
18127
+ if (!(0, import_node_fs12.existsSync)(targetsPath)) continue;
18136
18128
  try {
18137
18129
  const definitions = await readTargetDefinitions(targetsPath);
18138
18130
  const defaultTarget = definitions.find((d) => d.name === "default");
@@ -18150,7 +18142,7 @@ async function loadEnvHierarchy(repoRoot) {
18150
18142
  const envFiles = [];
18151
18143
  for (const dir of chain) {
18152
18144
  const envPath = import_node_path43.default.join(dir, ".env");
18153
- if ((0, import_node_fs13.existsSync)(envPath)) envFiles.push(envPath);
18145
+ if ((0, import_node_fs12.existsSync)(envPath)) envFiles.push(envPath);
18154
18146
  }
18155
18147
  for (let i = envFiles.length - 1; i >= 0; i--) {
18156
18148
  try {
@@ -18228,12 +18220,12 @@ var CONFIG_FILE_NAMES = [
18228
18220
  ".agentv/config.js"
18229
18221
  ];
18230
18222
  async function loadTsConfig(projectRoot) {
18231
- const { existsSync: existsSync5 } = await import("fs");
18223
+ const { existsSync: existsSync4 } = await import("fs");
18232
18224
  const { pathToFileURL } = await import("url");
18233
18225
  const { join: join2 } = await import("path");
18234
18226
  for (const fileName of CONFIG_FILE_NAMES) {
18235
18227
  const filePath = join2(projectRoot, fileName);
18236
- if (!existsSync5(filePath)) {
18228
+ if (!existsSync4(filePath)) {
18237
18229
  continue;
18238
18230
  }
18239
18231
  try {
@@ -18330,7 +18322,7 @@ function buildPrompt(criteria, question, referenceAnswer) {
18330
18322
  }
18331
18323
 
18332
18324
  // src/evaluation/cache/response-cache.ts
18333
- var import_promises31 = require("fs/promises");
18325
+ var import_promises30 = require("fs/promises");
18334
18326
  var import_node_path44 = __toESM(require("path"), 1);
18335
18327
  var DEFAULT_CACHE_PATH = ".agentv/cache";
18336
18328
  var ResponseCache = class {
@@ -18341,7 +18333,7 @@ var ResponseCache = class {
18341
18333
  async get(key) {
18342
18334
  const filePath = this.keyToPath(key);
18343
18335
  try {
18344
- const data = await (0, import_promises31.readFile)(filePath, "utf8");
18336
+ const data = await (0, import_promises30.readFile)(filePath, "utf8");
18345
18337
  return JSON.parse(data);
18346
18338
  } catch {
18347
18339
  return void 0;
@@ -18350,8 +18342,8 @@ var ResponseCache = class {
18350
18342
  async set(key, value) {
18351
18343
  const filePath = this.keyToPath(key);
18352
18344
  const dir = import_node_path44.default.dirname(filePath);
18353
- await (0, import_promises31.mkdir)(dir, { recursive: true });
18354
- await (0, import_promises31.writeFile)(filePath, JSON.stringify(value, null, 2), "utf8");
18345
+ await (0, import_promises30.mkdir)(dir, { recursive: true });
18346
+ await (0, import_promises30.writeFile)(filePath, JSON.stringify(value, null, 2), "utf8");
18355
18347
  }
18356
18348
  keyToPath(key) {
18357
18349
  const prefix = key.slice(0, 2);
@@ -18890,7 +18882,6 @@ function createAgentKernel() {
18890
18882
  freeformEvaluationSchema,
18891
18883
  generateRubrics,
18892
18884
  getAgentvHome,
18893
- getGitCacheRoot,
18894
18885
  getHitCount,
18895
18886
  getSubagentsRoot,
18896
18887
  getTraceStateRoot,