@agentv/core 2.15.0 → 2.16.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.cjs CHANGED
@@ -1244,11 +1244,11 @@ function serializeAttributeValue(value) {
1244
1244
  if (Array.isArray(value)) return { arrayValue: { values: value.map(serializeAttributeValue) } };
1245
1245
  return { stringValue: String(value) };
1246
1246
  }
1247
- var import_promises32, import_node_path45, OtlpJsonFileExporter;
1247
+ var import_promises31, import_node_path45, OtlpJsonFileExporter;
1248
1248
  var init_otlp_json_file_exporter = __esm({
1249
1249
  "src/observability/otlp-json-file-exporter.ts"() {
1250
1250
  "use strict";
1251
- import_promises32 = require("fs/promises");
1251
+ import_promises31 = require("fs/promises");
1252
1252
  import_node_path45 = require("path");
1253
1253
  OtlpJsonFileExporter = class {
1254
1254
  // biome-ignore lint/suspicious/noExplicitAny: serialized span data
@@ -1288,7 +1288,7 @@ var init_otlp_json_file_exporter = __esm({
1288
1288
  }
1289
1289
  async flush() {
1290
1290
  if (this.spans.length === 0) return;
1291
- await (0, import_promises32.mkdir)((0, import_node_path45.dirname)(this.filePath), { recursive: true });
1291
+ await (0, import_promises31.mkdir)((0, import_node_path45.dirname)(this.filePath), { recursive: true });
1292
1292
  const otlpJson = {
1293
1293
  resourceSpans: [
1294
1294
  {
@@ -1302,8 +1302,8 @@ var init_otlp_json_file_exporter = __esm({
1302
1302
  }
1303
1303
  ]
1304
1304
  };
1305
- const { writeFile: writeFile10 } = await import("fs/promises");
1306
- await writeFile10(this.filePath, JSON.stringify(otlpJson, null, 2));
1305
+ const { writeFile: writeFile9 } = await import("fs/promises");
1306
+ await writeFile9(this.filePath, JSON.stringify(otlpJson, null, 2));
1307
1307
  }
1308
1308
  };
1309
1309
  }
@@ -1319,12 +1319,12 @@ function hrTimeDiffMs(start, end) {
1319
1319
  const diffNano = end[1] - start[1];
1320
1320
  return Math.round(diffSec * 1e3 + diffNano / 1e6);
1321
1321
  }
1322
- var import_node_fs14, import_promises33, import_node_path46, SimpleTraceFileExporter;
1322
+ var import_node_fs13, import_promises32, import_node_path46, SimpleTraceFileExporter;
1323
1323
  var init_simple_trace_file_exporter = __esm({
1324
1324
  "src/observability/simple-trace-file-exporter.ts"() {
1325
1325
  "use strict";
1326
- import_node_fs14 = require("fs");
1327
- import_promises33 = require("fs/promises");
1326
+ import_node_fs13 = require("fs");
1327
+ import_promises32 = require("fs/promises");
1328
1328
  import_node_path46 = require("path");
1329
1329
  SimpleTraceFileExporter = class {
1330
1330
  stream = null;
@@ -1338,8 +1338,8 @@ var init_simple_trace_file_exporter = __esm({
1338
1338
  async ensureStream() {
1339
1339
  if (!this.streamReady) {
1340
1340
  this.streamReady = (async () => {
1341
- await (0, import_promises33.mkdir)((0, import_node_path46.dirname)(this.filePath), { recursive: true });
1342
- this.stream = (0, import_node_fs14.createWriteStream)(this.filePath, { flags: "w" });
1341
+ await (0, import_promises32.mkdir)((0, import_node_path46.dirname)(this.filePath), { recursive: true });
1342
+ this.stream = (0, import_node_fs13.createWriteStream)(this.filePath, { flags: "w" });
1343
1343
  return this.stream;
1344
1344
  })();
1345
1345
  }
@@ -1505,7 +1505,6 @@ __export(index_exports, {
1505
1505
  freeformEvaluationSchema: () => freeformEvaluationSchema,
1506
1506
  generateRubrics: () => generateRubrics,
1507
1507
  getAgentvHome: () => getAgentvHome,
1508
- getGitCacheRoot: () => getGitCacheRoot,
1509
1508
  getHitCount: () => getHitCount,
1510
1509
  getSubagentsRoot: () => getSubagentsRoot,
1511
1510
  getTraceStateRoot: () => getTraceStateRoot,
@@ -4741,16 +4740,37 @@ function parseRepoConfig(raw) {
4741
4740
  ...clone !== void 0 && { clone }
4742
4741
  };
4743
4742
  }
4744
- function parseResetConfig(raw) {
4743
+ function parseWorkspaceHookConfig(raw, evalFileDir) {
4745
4744
  if (!isJsonObject(raw)) return void 0;
4745
+ const script = parseWorkspaceScriptConfig(raw, evalFileDir);
4746
4746
  const obj = raw;
4747
- const strategy = obj.strategy === "none" || obj.strategy === "hard" || obj.strategy === "recreate" ? obj.strategy : void 0;
4748
- const afterEach = typeof obj.after_each === "boolean" ? obj.after_each : void 0;
4749
- if (!strategy && afterEach === void 0) return void 0;
4747
+ const reset = obj.reset === "none" || obj.reset === "fast" || obj.reset === "strict" ? obj.reset : void 0;
4748
+ const clean = obj.clean === "always" || obj.clean === "on_success" || obj.clean === "on_failure" || obj.clean === "never" ? obj.clean : void 0;
4749
+ if (!script && !reset && !clean) return void 0;
4750
4750
  return {
4751
- ...strategy !== void 0 && { strategy },
4752
- ...afterEach !== void 0 && { after_each: afterEach }
4751
+ ...script ?? {},
4752
+ ...reset !== void 0 && { reset },
4753
+ ...clean !== void 0 && { clean }
4754
+ };
4755
+ }
4756
+ function parseWorkspaceHooksConfig(raw, evalFileDir) {
4757
+ if (!isJsonObject(raw)) return void 0;
4758
+ const obj = raw;
4759
+ const beforeAllTests = parseWorkspaceHookConfig(obj.before_all_tests, evalFileDir);
4760
+ const beforeEachTest = parseWorkspaceHookConfig(obj.before_each_test, evalFileDir);
4761
+ const afterEachTest = parseWorkspaceHookConfig(obj.after_each_test, evalFileDir);
4762
+ const afterAllTests = parseWorkspaceHookConfig(obj.after_all_tests, evalFileDir);
4763
+ const onReuse = parseWorkspaceHookConfig(obj.on_reuse, evalFileDir);
4764
+ const onFinish = parseWorkspaceHookConfig(obj.on_finish, evalFileDir);
4765
+ const hooks = {
4766
+ ...beforeAllTests !== void 0 && { before_all_tests: beforeAllTests },
4767
+ ...beforeEachTest !== void 0 && { before_each_test: beforeEachTest },
4768
+ ...afterEachTest !== void 0 && { after_each_test: afterEachTest },
4769
+ ...afterAllTests !== void 0 && { after_all_tests: afterAllTests },
4770
+ ...onReuse !== void 0 && { on_reuse: onReuse },
4771
+ ...onFinish !== void 0 && { on_finish: onFinish }
4753
4772
  };
4773
+ return Object.keys(hooks).length > 0 ? hooks : void 0;
4754
4774
  }
4755
4775
  async function resolveWorkspaceConfig(raw, evalFileDir) {
4756
4776
  if (typeof raw === "string") {
@@ -4781,37 +4801,56 @@ function parseWorkspaceConfig(raw, evalFileDir) {
4781
4801
  }
4782
4802
  const isolation = obj.isolation === "shared" || obj.isolation === "per_test" ? obj.isolation : void 0;
4783
4803
  const repos = Array.isArray(obj.repos) ? obj.repos.map(parseRepoConfig).filter(Boolean) : void 0;
4784
- const reset = parseResetConfig(obj.reset);
4785
- const beforeAll = parseWorkspaceScriptConfig(obj.before_all, evalFileDir);
4786
- const afterAll = parseWorkspaceScriptConfig(obj.after_all, evalFileDir);
4787
- const beforeEach = parseWorkspaceScriptConfig(obj.before_each, evalFileDir);
4788
- const afterEach = parseWorkspaceScriptConfig(obj.after_each, evalFileDir);
4789
- if (!template && !isolation && !repos && !reset && !beforeAll && !afterAll && !beforeEach && !afterEach)
4804
+ const hooks = parseWorkspaceHooksConfig(obj.hooks, evalFileDir);
4805
+ const mode = obj.mode === "pooled" || obj.mode === "ephemeral" || obj.mode === "static" ? obj.mode : void 0;
4806
+ const staticPath = typeof obj.static_path === "string" ? obj.static_path : void 0;
4807
+ const pool = typeof obj.pool === "boolean" ? obj.pool : void 0;
4808
+ if (!template && !isolation && !repos && !hooks && !mode && !staticPath && pool === void 0)
4790
4809
  return void 0;
4791
4810
  return {
4792
4811
  ...template !== void 0 && { template },
4793
4812
  ...isolation !== void 0 && { isolation },
4794
4813
  ...repos !== void 0 && { repos },
4795
- ...reset !== void 0 && { reset },
4796
- ...beforeAll !== void 0 && { before_all: beforeAll },
4797
- ...afterAll !== void 0 && { after_all: afterAll },
4798
- ...beforeEach !== void 0 && { before_each: beforeEach },
4799
- ...afterEach !== void 0 && { after_each: afterEach }
4814
+ ...hooks !== void 0 && { hooks },
4815
+ ...mode !== void 0 && { mode },
4816
+ ...staticPath !== void 0 && { static_path: staticPath },
4817
+ ...pool !== void 0 && { pool }
4800
4818
  };
4801
4819
  }
4802
4820
  function mergeWorkspaceConfigs(suiteLevel, caseLevel) {
4803
4821
  if (!suiteLevel && !caseLevel) return void 0;
4804
4822
  if (!suiteLevel) return caseLevel;
4805
4823
  if (!caseLevel) return suiteLevel;
4824
+ const mergeHook = (suiteHook, caseHook) => {
4825
+ if (!suiteHook && !caseHook) return void 0;
4826
+ return {
4827
+ ...suiteHook ?? {},
4828
+ ...caseHook ?? {}
4829
+ };
4830
+ };
4831
+ const mergedHooks = {
4832
+ before_all_tests: mergeHook(
4833
+ suiteLevel.hooks?.before_all_tests,
4834
+ caseLevel.hooks?.before_all_tests
4835
+ ),
4836
+ before_each_test: mergeHook(
4837
+ suiteLevel.hooks?.before_each_test,
4838
+ caseLevel.hooks?.before_each_test
4839
+ ),
4840
+ after_each_test: mergeHook(suiteLevel.hooks?.after_each_test, caseLevel.hooks?.after_each_test),
4841
+ after_all_tests: mergeHook(suiteLevel.hooks?.after_all_tests, caseLevel.hooks?.after_all_tests),
4842
+ on_reuse: mergeHook(suiteLevel.hooks?.on_reuse, caseLevel.hooks?.on_reuse),
4843
+ on_finish: mergeHook(suiteLevel.hooks?.on_finish, caseLevel.hooks?.on_finish)
4844
+ };
4845
+ const hasHooks = Object.values(mergedHooks).some((hook) => hook !== void 0);
4806
4846
  return {
4807
4847
  template: caseLevel.template ?? suiteLevel.template,
4808
4848
  isolation: caseLevel.isolation ?? suiteLevel.isolation,
4809
4849
  repos: caseLevel.repos ?? suiteLevel.repos,
4810
- reset: caseLevel.reset ?? suiteLevel.reset,
4811
- before_all: caseLevel.before_all ?? suiteLevel.before_all,
4812
- after_all: caseLevel.after_all ?? suiteLevel.after_all,
4813
- before_each: caseLevel.before_each ?? suiteLevel.before_each,
4814
- after_each: caseLevel.after_each ?? suiteLevel.after_each
4850
+ ...hasHooks && { hooks: mergedHooks },
4851
+ mode: caseLevel.mode ?? suiteLevel.mode,
4852
+ static_path: caseLevel.static_path ?? suiteLevel.static_path,
4853
+ pool: caseLevel.pool ?? suiteLevel.pool
4815
4854
  };
4816
4855
  }
4817
4856
  function asString6(value) {
@@ -10077,9 +10116,6 @@ function getAgentvHome() {
10077
10116
  function getWorkspacesRoot() {
10078
10117
  return import_node_path23.default.join(getAgentvHome(), "workspaces");
10079
10118
  }
10080
- function getGitCacheRoot() {
10081
- return import_node_path23.default.join(getAgentvHome(), "git-cache");
10082
- }
10083
10119
  function getSubagentsRoot() {
10084
10120
  return import_node_path23.default.join(getAgentvHome(), "subagents");
10085
10121
  }
@@ -11539,16 +11575,16 @@ async function execFileWithStdinNode(argv, stdinPayload, options) {
11539
11575
  });
11540
11576
  }
11541
11577
  async function execShellWithStdin(command, stdinPayload, options = {}) {
11542
- const { mkdir: mkdir17, readFile: readFile14, rm: rm7, writeFile: writeFile10 } = await import("fs/promises");
11578
+ const { mkdir: mkdir16, readFile: readFile14, rm: rm6, writeFile: writeFile9 } = await import("fs/promises");
11543
11579
  const { tmpdir: tmpdir3 } = await import("os");
11544
11580
  const path44 = await import("path");
11545
11581
  const { randomUUID: randomUUID8 } = await import("crypto");
11546
11582
  const dir = path44.join(tmpdir3(), `agentv-exec-${randomUUID8()}`);
11547
- await mkdir17(dir, { recursive: true });
11583
+ await mkdir16(dir, { recursive: true });
11548
11584
  const stdinPath = path44.join(dir, "stdin.txt");
11549
11585
  const stdoutPath = path44.join(dir, "stdout.txt");
11550
11586
  const stderrPath = path44.join(dir, "stderr.txt");
11551
- await writeFile10(stdinPath, stdinPayload, "utf8");
11587
+ await writeFile9(stdinPath, stdinPayload, "utf8");
11552
11588
  const wrappedCommand = process.platform === "win32" ? `(${command}) < ${shellEscapePath(stdinPath)} > ${shellEscapePath(stdoutPath)} 2> ${shellEscapePath(stderrPath)}` : `(${command}) < ${shellEscapePath(stdinPath)} > ${shellEscapePath(stdoutPath)} 2> ${shellEscapePath(stderrPath)}`;
11553
11589
  const { spawn: spawn4 } = await import("child_process");
11554
11590
  try {
@@ -11581,7 +11617,7 @@ async function execShellWithStdin(command, stdinPayload, options = {}) {
11581
11617
  const stderr = (await readFile14(stderrPath, "utf8")).replace(/\r\n/g, "\n");
11582
11618
  return { stdout, stderr, exitCode };
11583
11619
  } finally {
11584
- await rm7(dir, { recursive: true, force: true });
11620
+ await rm6(dir, { recursive: true, force: true });
11585
11621
  }
11586
11622
  }
11587
11623
 
@@ -14798,8 +14834,8 @@ function runEqualsAssertion(output, value) {
14798
14834
  }
14799
14835
 
14800
14836
  // src/evaluation/orchestrator.ts
14801
- var import_node_crypto10 = require("crypto");
14802
- var import_promises30 = require("fs/promises");
14837
+ var import_node_crypto9 = require("crypto");
14838
+ var import_promises29 = require("fs/promises");
14803
14839
  var import_node_path42 = __toESM(require("path"), 1);
14804
14840
  var import_micromatch4 = __toESM(require("micromatch"), 1);
14805
14841
 
@@ -15762,7 +15798,7 @@ var WorkspacePoolManager = class {
15762
15798
  * 7. Return the slot (with path, index, isExisting)
15763
15799
  */
15764
15800
  async acquireWorkspace(options) {
15765
- const { templatePath, repos, maxSlots, repoManager } = options;
15801
+ const { templatePath, repos, maxSlots, repoManager, poolReset } = options;
15766
15802
  const fingerprint = computeWorkspaceFingerprint(templatePath, repos);
15767
15803
  const poolDir = import_node_path39.default.join(this.poolRoot, fingerprint);
15768
15804
  await (0, import_promises27.mkdir)(poolDir, { recursive: true });
@@ -15782,7 +15818,7 @@ var WorkspacePoolManager = class {
15782
15818
  }
15783
15819
  const slotExists = (0, import_node_fs11.existsSync)(slotPath);
15784
15820
  if (slotExists) {
15785
- await this.resetSlot(slotPath, templatePath, repos);
15821
+ await this.resetSlot(slotPath, templatePath, repos, poolReset);
15786
15822
  return {
15787
15823
  index: i,
15788
15824
  path: slotPath,
@@ -15914,15 +15950,19 @@ var WorkspacePoolManager = class {
15914
15950
  * 1. Reset repos (git reset --hard {ref} && git clean -fd per repo)
15915
15951
  * 2. Re-copy template files (skip repo directories)
15916
15952
  */
15917
- async resetSlot(slotPath, templatePath, repos) {
15953
+ async resetSlot(slotPath, templatePath, repos, poolReset = "fast") {
15918
15954
  for (const repo of repos) {
15919
15955
  const repoDir = import_node_path39.default.join(slotPath, repo.path);
15920
15956
  if (!(0, import_node_fs11.existsSync)(repoDir)) {
15921
15957
  continue;
15922
15958
  }
15959
+ if (poolReset === "none") {
15960
+ continue;
15961
+ }
15923
15962
  const ref = repo.checkout?.ref ?? "HEAD";
15924
15963
  await git(["reset", "--hard", ref], { cwd: repoDir });
15925
- await git(["clean", "-fd"], { cwd: repoDir });
15964
+ const cleanFlag = poolReset === "strict" ? "-fdx" : "-fd";
15965
+ await git(["clean", cleanFlag], { cwd: repoDir });
15926
15966
  }
15927
15967
  if (templatePath) {
15928
15968
  const repoDirNames = new Set(
@@ -15938,14 +15978,10 @@ var WorkspacePoolManager = class {
15938
15978
 
15939
15979
  // src/evaluation/workspace/repo-manager.ts
15940
15980
  var import_node_child_process8 = require("child_process");
15941
- var import_node_crypto9 = require("crypto");
15942
- var import_node_fs12 = require("fs");
15943
- var import_promises28 = require("fs/promises");
15944
15981
  var import_node_path40 = __toESM(require("path"), 1);
15945
15982
  var import_node_util6 = require("util");
15946
15983
  var execFileAsync2 = (0, import_node_util6.promisify)(import_node_child_process8.execFile);
15947
15984
  var DEFAULT_TIMEOUT_MS2 = 3e5;
15948
- var LOCK_TIMEOUT_MS = 6e4;
15949
15985
  function gitEnv2() {
15950
15986
  const env = { ...process.env };
15951
15987
  for (const key of Object.keys(env)) {
@@ -15960,10 +15996,6 @@ function gitEnv2() {
15960
15996
  GIT_SSH_COMMAND: "ssh -o BatchMode=yes"
15961
15997
  };
15962
15998
  }
15963
- function cacheKey(source) {
15964
- const raw = source.type === "git" ? source.url.toLowerCase().replace(/\.git$/, "") : source.path;
15965
- return (0, import_node_crypto9.createHash)("sha256").update(raw).digest("hex");
15966
- }
15967
15999
  function getSourceUrl(source) {
15968
16000
  return source.type === "git" ? source.url : source.path;
15969
16001
  }
@@ -15977,33 +16009,9 @@ async function git2(args, opts) {
15977
16009
  });
15978
16010
  return stdout.trim();
15979
16011
  }
15980
- async function acquireLock(lockPath) {
15981
- const start = Date.now();
15982
- while (Date.now() - start < LOCK_TIMEOUT_MS) {
15983
- try {
15984
- await (0, import_promises28.writeFile)(lockPath, String(process.pid), { flag: "wx" });
15985
- return;
15986
- } catch (err) {
15987
- if (err.code === "EEXIST") {
15988
- await new Promise((r) => setTimeout(r, 200));
15989
- continue;
15990
- }
15991
- throw err;
15992
- }
15993
- }
15994
- throw new Error(`Timed out waiting for lock: ${lockPath}`);
15995
- }
15996
- async function releaseLock(lockPath) {
15997
- try {
15998
- await (0, import_promises28.unlink)(lockPath);
15999
- } catch {
16000
- }
16001
- }
16002
16012
  var RepoManager = class {
16003
- cacheDir;
16004
16013
  verbose;
16005
- constructor(cacheDir, verbose = false) {
16006
- this.cacheDir = cacheDir ?? getGitCacheRoot();
16014
+ constructor(verbose = false) {
16007
16015
  this.verbose = verbose;
16008
16016
  }
16009
16017
  async runGit(args, opts) {
@@ -16028,86 +16036,18 @@ var RepoManager = class {
16028
16036
  }
16029
16037
  }
16030
16038
  /**
16031
- * Ensure a bare mirror cache exists for the given source.
16032
- * Creates on first access, fetches updates on subsequent calls.
16033
- * Returns the absolute path to the cache directory.
16034
- */
16035
- async ensureCache(source, depth, resolve) {
16036
- const key = cacheKey(source);
16037
- const cachePath = import_node_path40.default.join(this.cacheDir, key);
16038
- const lockPath = `${cachePath}.lock`;
16039
- const cacheExists = (0, import_node_fs12.existsSync)(import_node_path40.default.join(cachePath, "HEAD"));
16040
- if (this.verbose) {
16041
- console.log(
16042
- `[repo] ensureCache source=${getSourceUrl(source)} resolve=${resolve ?? "remote"} cache=${cachePath}`
16043
- );
16044
- }
16045
- if (resolve === "local") {
16046
- if (cacheExists) {
16047
- if (this.verbose) {
16048
- console.log(`[repo] using existing local cache ${cachePath}`);
16049
- }
16050
- return cachePath;
16051
- }
16052
- const url = getSourceUrl(source);
16053
- throw new Error(
16054
- `No cache found for \`${url}\`. Run \`agentv cache add --url ${url} --from <local-path>\` to seed it.`
16055
- );
16056
- }
16057
- await (0, import_promises28.mkdir)(this.cacheDir, { recursive: true });
16058
- const lockStartedAt = Date.now();
16059
- await acquireLock(lockPath);
16060
- if (this.verbose) {
16061
- console.log(`[repo] lock acquired path=${lockPath} waitedMs=${Date.now() - lockStartedAt}`);
16062
- }
16063
- try {
16064
- if (cacheExists) {
16065
- if (this.verbose) {
16066
- console.log(`[repo] refreshing existing cache ${cachePath}`);
16067
- }
16068
- const fetchArgs = ["fetch", "--prune"];
16069
- if (depth) {
16070
- fetchArgs.push("--depth", String(depth));
16071
- }
16072
- await this.runGit(fetchArgs, { cwd: cachePath });
16073
- } else {
16074
- if (this.verbose) {
16075
- console.log(`[repo] creating new cache ${cachePath}`);
16076
- }
16077
- const cloneArgs = ["clone", "--mirror", "--bare"];
16078
- if (depth) {
16079
- cloneArgs.push("--depth", String(depth));
16080
- }
16081
- const sourceUrl = getSourceUrl(source);
16082
- const cloneUrl = depth && source.type === "local" ? `file://${sourceUrl}` : sourceUrl;
16083
- cloneArgs.push(cloneUrl, cachePath);
16084
- await this.runGit(cloneArgs);
16085
- }
16086
- } finally {
16087
- await releaseLock(lockPath);
16088
- if (this.verbose) {
16089
- console.log(`[repo] lock released path=${lockPath}`);
16090
- }
16091
- }
16092
- return cachePath;
16093
- }
16094
- /**
16095
- * Clone a repo from cache into the workspace at the configured path.
16039
+ * Clone a repo directly from source into the workspace at the configured path.
16096
16040
  * Handles checkout, ref resolution, ancestor walking, shallow clone, sparse checkout.
16097
16041
  */
16098
16042
  async materialize(repo, workspacePath) {
16099
16043
  const targetDir = import_node_path40.default.join(workspacePath, repo.path);
16044
+ const sourceUrl = getSourceUrl(repo.source);
16100
16045
  const startedAt = Date.now();
16101
16046
  if (this.verbose) {
16102
16047
  console.log(
16103
- `[repo] materialize start path=${repo.path} source=${getSourceUrl(repo.source)} workspace=${workspacePath}`
16048
+ `[repo] materialize start path=${repo.path} source=${sourceUrl} workspace=${workspacePath}`
16104
16049
  );
16105
16050
  }
16106
- const cachePath = await this.ensureCache(
16107
- repo.source,
16108
- repo.clone?.depth,
16109
- repo.checkout?.resolve
16110
- );
16111
16051
  const cloneArgs = ["clone"];
16112
16052
  if (repo.clone?.depth) {
16113
16053
  cloneArgs.push("--depth", String(repo.clone.depth));
@@ -16116,7 +16056,7 @@ var RepoManager = class {
16116
16056
  cloneArgs.push("--filter", repo.clone.filter);
16117
16057
  }
16118
16058
  cloneArgs.push("--no-checkout");
16119
- const cloneUrl = repo.clone?.depth || repo.clone?.filter ? `file://${cachePath}` : cachePath;
16059
+ const cloneUrl = (repo.clone?.depth || repo.clone?.filter) && repo.source.type === "local" ? `file://${sourceUrl}` : sourceUrl;
16120
16060
  cloneArgs.push(cloneUrl, targetDir);
16121
16061
  await this.runGit(cloneArgs);
16122
16062
  if (repo.clone?.sparse?.length) {
@@ -16188,63 +16128,25 @@ var RepoManager = class {
16188
16128
  }
16189
16129
  }
16190
16130
  /** Reset repos in workspace to their checkout state. */
16191
- async reset(repos, workspacePath, strategy) {
16192
- if (strategy === "recreate") {
16193
- for (const repo of repos) {
16194
- const targetDir = import_node_path40.default.join(workspacePath, repo.path);
16195
- await (0, import_promises28.rm)(targetDir, { recursive: true, force: true });
16196
- }
16197
- await this.materializeAll(repos, workspacePath);
16198
- return;
16199
- }
16131
+ async reset(repos, workspacePath, reset) {
16132
+ const cleanFlag = reset === "strict" ? "-fdx" : "-fd";
16200
16133
  for (const repo of repos) {
16201
16134
  const targetDir = import_node_path40.default.join(workspacePath, repo.path);
16202
16135
  await this.runGit(["reset", "--hard", "HEAD"], { cwd: targetDir });
16203
- await this.runGit(["clean", "-fd"], { cwd: targetDir });
16136
+ await this.runGit(["clean", cleanFlag], { cwd: targetDir });
16204
16137
  }
16205
16138
  }
16206
- /**
16207
- * Seed the cache from a local repository, setting the remote to a given URL.
16208
- * Useful for avoiding slow network clones when a local clone already exists.
16209
- */
16210
- async seedCache(localPath, remoteUrl, opts) {
16211
- const source = { type: "git", url: remoteUrl };
16212
- const key = cacheKey(source);
16213
- const cachePath = import_node_path40.default.join(this.cacheDir, key);
16214
- const lockPath = `${cachePath}.lock`;
16215
- await (0, import_promises28.mkdir)(this.cacheDir, { recursive: true });
16216
- await acquireLock(lockPath);
16217
- try {
16218
- if ((0, import_node_fs12.existsSync)(import_node_path40.default.join(cachePath, "HEAD"))) {
16219
- if (!opts?.force) {
16220
- throw new Error(
16221
- `Cache already exists for ${remoteUrl} at ${cachePath}. Use force to overwrite.`
16222
- );
16223
- }
16224
- await (0, import_promises28.rm)(cachePath, { recursive: true, force: true });
16225
- }
16226
- await git2(["clone", "--mirror", "--bare", localPath, cachePath]);
16227
- await git2(["remote", "set-url", "origin", remoteUrl], { cwd: cachePath });
16228
- } finally {
16229
- await releaseLock(lockPath);
16230
- }
16231
- return cachePath;
16232
- }
16233
- /** Remove the entire cache directory. */
16234
- async cleanCache() {
16235
- await (0, import_promises28.rm)(this.cacheDir, { recursive: true, force: true });
16236
- }
16237
16139
  };
16238
16140
 
16239
16141
  // src/evaluation/workspace/resolve.ts
16240
- var import_promises29 = require("fs/promises");
16142
+ var import_promises28 = require("fs/promises");
16241
16143
  var import_node_path41 = __toESM(require("path"), 1);
16242
16144
  async function resolveWorkspaceTemplate(templatePath) {
16243
16145
  if (!templatePath) {
16244
16146
  return void 0;
16245
16147
  }
16246
16148
  const resolved = import_node_path41.default.resolve(templatePath);
16247
- const stats = await (0, import_promises29.stat)(resolved);
16149
+ const stats = await (0, import_promises28.stat)(resolved);
16248
16150
  if (stats.isFile()) {
16249
16151
  return {
16250
16152
  dir: import_node_path41.default.dirname(resolved),
@@ -16254,7 +16156,7 @@ async function resolveWorkspaceTemplate(templatePath) {
16254
16156
  if (!stats.isDirectory()) {
16255
16157
  throw new Error(`workspace template is neither a file nor a directory: ${resolved}`);
16256
16158
  }
16257
- const entries = await (0, import_promises29.readdir)(resolved);
16159
+ const entries = await (0, import_promises28.readdir)(resolved);
16258
16160
  const workspaceFiles = entries.filter((e) => e.endsWith(".code-workspace"));
16259
16161
  if (workspaceFiles.length === 1) {
16260
16162
  return {
@@ -16318,6 +16220,22 @@ function classifyQualityStatus(score) {
16318
16220
  function usesFileReferencePrompt(provider) {
16319
16221
  return isAgentProvider(provider) || provider.kind === "cli";
16320
16222
  }
16223
+ function toScriptConfig(hook, hookName, context2) {
16224
+ const command = hook.command ?? hook.script;
16225
+ if (!command || command.length === 0) {
16226
+ throw new Error(`${hookName} hook in ${context2} requires command or script`);
16227
+ }
16228
+ return {
16229
+ command,
16230
+ ...hook.timeout_ms !== void 0 && { timeout_ms: hook.timeout_ms },
16231
+ ...hook.timeoutMs !== void 0 && { timeoutMs: hook.timeoutMs },
16232
+ ...hook.cwd !== void 0 && { cwd: hook.cwd },
16233
+ ...hook.script !== void 0 && { script: hook.script }
16234
+ };
16235
+ }
16236
+ function hasHookCommand(hook) {
16237
+ return !!(hook?.command && hook.command.length > 0 || hook?.script && hook.script.length > 0);
16238
+ }
16321
16239
  function getWorkspaceTemplate(target) {
16322
16240
  const config = target.config;
16323
16241
  if ("workspaceTemplate" in config && typeof config.workspaceTemplate === "string") {
@@ -16351,7 +16269,12 @@ async function runEvaluation(options) {
16351
16269
  failOnError,
16352
16270
  poolWorkspaces,
16353
16271
  poolMaxSlots: configPoolMaxSlots,
16354
- workspace: userWorkspacePath
16272
+ workspace: legacyWorkspacePath,
16273
+ workspaceMode,
16274
+ workspacePath,
16275
+ workspaceClean,
16276
+ retainOnSuccess,
16277
+ retainOnFailure
16355
16278
  } = options;
16356
16279
  let useCache = options.useCache;
16357
16280
  if (trials && trials.count > 1 && useCache) {
@@ -16360,7 +16283,7 @@ async function runEvaluation(options) {
16360
16283
  );
16361
16284
  useCache = false;
16362
16285
  }
16363
- const evalRunId = (0, import_node_crypto10.randomUUID)();
16286
+ const evalRunId = (0, import_node_crypto9.randomUUID)();
16364
16287
  const evalCases = preloadedEvalCases ?? await loadTests(evalFilePath, repoRoot, { verbose, filter });
16365
16288
  const filteredEvalCases = filterEvalCases(evalCases, filter);
16366
16289
  if (filteredEvalCases.length === 0) {
@@ -16487,13 +16410,23 @@ async function runEvaluation(options) {
16487
16410
  }
16488
16411
  };
16489
16412
  const isPerTestIsolation = suiteWorkspace?.isolation === "per_test";
16490
- if (userWorkspacePath && isPerTestIsolation) {
16413
+ const configuredMode = suiteWorkspace?.mode ?? workspaceMode;
16414
+ const configuredStaticPath = suiteWorkspace?.static_path ?? workspacePath ?? legacyWorkspacePath;
16415
+ const useStaticWorkspace = configuredMode === "static" || !!configuredStaticPath && !configuredMode;
16416
+ if (useStaticWorkspace && isPerTestIsolation) {
16491
16417
  throw new Error(
16492
- "--workspace is incompatible with isolation: per_test. Use isolation: shared (default)."
16418
+ "static workspace mode is incompatible with isolation: per_test. Use isolation: shared (default)."
16493
16419
  );
16494
16420
  }
16495
- const hasSharedWorkspace = !!(userWorkspacePath || workspaceTemplate || suiteWorkspace?.before_all || suiteWorkspace?.repos?.length && !isPerTestIsolation);
16496
- const usePool = poolWorkspaces === true && !!suiteWorkspace?.repos?.length && !isPerTestIsolation && !userWorkspacePath;
16421
+ if (configuredMode === "static" && !configuredStaticPath) {
16422
+ throw new Error("workspace.mode=static requires workspace.static_path or --workspace-path");
16423
+ }
16424
+ const hasSharedWorkspace = !!(useStaticWorkspace || workspaceTemplate || suiteWorkspace?.hooks || suiteWorkspace?.repos?.length && !isPerTestIsolation);
16425
+ const poolEnabled = configuredMode === "pooled" ? true : configuredMode === "ephemeral" || useStaticWorkspace ? false : suiteWorkspace?.pool ?? poolWorkspaces ?? true;
16426
+ const usePool = poolEnabled !== false && !!suiteWorkspace?.repos?.length && !isPerTestIsolation && !useStaticWorkspace;
16427
+ const finishCleanPolicy = suiteWorkspace?.hooks?.on_finish?.clean;
16428
+ const resolvedRetainOnSuccess = (finishCleanPolicy === "always" || finishCleanPolicy === "on_success" ? "cleanup" : finishCleanPolicy === "on_failure" || finishCleanPolicy === "never" ? "keep" : void 0) ?? retainOnSuccess ?? (keepWorkspaces ? "keep" : "cleanup");
16429
+ const resolvedRetainOnFailure = (finishCleanPolicy === "always" || finishCleanPolicy === "on_failure" ? "cleanup" : finishCleanPolicy === "on_success" || finishCleanPolicy === "never" ? "keep" : void 0) ?? retainOnFailure ?? (cleanupWorkspaces ? "cleanup" : "keep");
16497
16430
  const requestedWorkers = options.maxConcurrency ?? target.workers ?? 1;
16498
16431
  const workers = hasSharedWorkspace && !usePool ? 1 : requestedWorkers;
16499
16432
  setupLog(
@@ -16514,20 +16447,21 @@ async function runEvaluation(options) {
16514
16447
  const availablePoolSlots = [];
16515
16448
  const poolSlotBaselines = /* @__PURE__ */ new Map();
16516
16449
  const poolMaxSlots = Math.min(configPoolMaxSlots ?? 10, 50);
16517
- if (userWorkspacePath) {
16518
- sharedWorkspacePath = userWorkspacePath;
16519
- setupLog(`using user-provided workspace: ${userWorkspacePath}`);
16450
+ if (useStaticWorkspace && configuredStaticPath) {
16451
+ sharedWorkspacePath = configuredStaticPath;
16452
+ setupLog(`using static workspace: ${configuredStaticPath}`);
16520
16453
  } else if (usePool && suiteWorkspace?.repos) {
16521
16454
  const slotsNeeded = workers;
16522
16455
  setupLog(`acquiring ${slotsNeeded} workspace pool slot(s) (pool capacity: ${poolMaxSlots})`);
16523
16456
  poolManager = new WorkspacePoolManager(getWorkspacePoolRoot());
16524
- const poolRepoManager = new RepoManager(void 0, verbose);
16457
+ const poolRepoManager = new RepoManager(verbose);
16525
16458
  for (let i = 0; i < slotsNeeded; i++) {
16526
16459
  const slot = await poolManager.acquireWorkspace({
16527
16460
  templatePath: workspaceTemplate,
16528
16461
  repos: suiteWorkspace.repos,
16529
16462
  maxSlots: poolMaxSlots,
16530
- repoManager: poolRepoManager
16463
+ repoManager: poolRepoManager,
16464
+ poolReset: (workspaceClean === "full" ? "strict" : workspaceClean === "standard" ? "fast" : null) ?? suiteWorkspace.hooks?.on_reuse?.reset ?? "fast"
16531
16465
  });
16532
16466
  poolSlots.push(slot);
16533
16467
  setupLog(`pool slot ${i} acquired at: ${slot.path} (existing=${slot.isExisting})`);
@@ -16547,21 +16481,21 @@ async function runEvaluation(options) {
16547
16481
  const message = error instanceof Error ? error.message : String(error);
16548
16482
  throw new Error(`Failed to create shared workspace: ${message}`);
16549
16483
  }
16550
- } else if (suiteWorkspace?.before_all || suiteWorkspace?.repos?.length && !isPerTestIsolation) {
16484
+ } else if (suiteWorkspace?.hooks || suiteWorkspace?.repos?.length && !isPerTestIsolation) {
16551
16485
  sharedWorkspacePath = getWorkspacePath(evalRunId, "shared");
16552
- await (0, import_promises30.mkdir)(sharedWorkspacePath, { recursive: true });
16486
+ await (0, import_promises29.mkdir)(sharedWorkspacePath, { recursive: true });
16553
16487
  setupLog(`created empty shared workspace at: ${sharedWorkspacePath}`);
16554
16488
  }
16555
16489
  try {
16556
16490
  if (suiteWorkspaceFile && sharedWorkspacePath) {
16557
16491
  const copiedWorkspaceFile = import_node_path42.default.join(sharedWorkspacePath, import_node_path42.default.basename(suiteWorkspaceFile));
16558
16492
  try {
16559
- await (0, import_promises30.stat)(copiedWorkspaceFile);
16493
+ await (0, import_promises29.stat)(copiedWorkspaceFile);
16560
16494
  suiteWorkspaceFile = copiedWorkspaceFile;
16561
16495
  } catch {
16562
16496
  }
16563
16497
  }
16564
- const repoManager = suiteWorkspace?.repos?.length && !usePool && !userWorkspacePath ? new RepoManager(void 0, verbose) : void 0;
16498
+ const repoManager = suiteWorkspace?.repos?.length && !usePool && !useStaticWorkspace ? new RepoManager(verbose) : void 0;
16565
16499
  if (repoManager && sharedWorkspacePath && suiteWorkspace?.repos && !isPerTestIsolation) {
16566
16500
  setupLog(
16567
16501
  `materializing ${suiteWorkspace.repos.length} shared repo(s) into ${sharedWorkspacePath}`
@@ -16571,17 +16505,19 @@ async function runEvaluation(options) {
16571
16505
  setupLog("shared repo materialization complete");
16572
16506
  } catch (error) {
16573
16507
  const message = error instanceof Error ? error.message : String(error);
16574
- if (sharedWorkspacePath && !userWorkspacePath) {
16508
+ if (sharedWorkspacePath && !useStaticWorkspace) {
16575
16509
  await cleanupWorkspace(sharedWorkspacePath).catch(() => {
16576
16510
  });
16577
16511
  }
16578
16512
  throw new Error(`Failed to materialize repos: ${message}`);
16579
16513
  }
16580
16514
  }
16581
- if (sharedWorkspacePath && suiteWorkspace?.before_all) {
16582
- const beforeAllCommand = (suiteWorkspace.before_all.command ?? suiteWorkspace.before_all.script ?? []).join(" ");
16515
+ const suiteBeforeAllHook = suiteWorkspace?.hooks?.before_all_tests;
16516
+ if (sharedWorkspacePath && hasHookCommand(suiteBeforeAllHook)) {
16517
+ const beforeAllHook = suiteBeforeAllHook;
16518
+ const beforeAllCommand = (beforeAllHook.command ?? beforeAllHook.script ?? []).join(" ");
16583
16519
  setupLog(
16584
- `running shared before_all in cwd=${suiteWorkspace.before_all.cwd ?? evalDir} command=${beforeAllCommand}`
16520
+ `running shared before_all in cwd=${beforeAllHook.cwd ?? evalDir} command=${beforeAllCommand}`
16585
16521
  );
16586
16522
  const scriptContext = {
16587
16523
  workspacePath: sharedWorkspacePath,
@@ -16590,18 +16526,22 @@ async function runEvaluation(options) {
16590
16526
  evalDir
16591
16527
  };
16592
16528
  try {
16593
- beforeAllOutput = await executeWorkspaceScript(suiteWorkspace.before_all, scriptContext);
16529
+ beforeAllOutput = await executeWorkspaceScript(
16530
+ toScriptConfig(beforeAllHook, "before_all_tests", "suite workspace"),
16531
+ scriptContext
16532
+ );
16594
16533
  setupLog("shared before_all completed");
16595
16534
  } catch (error) {
16596
16535
  const message = error instanceof Error ? error.message : String(error);
16597
- if (sharedWorkspacePath && !userWorkspacePath) {
16536
+ if (sharedWorkspacePath && !useStaticWorkspace) {
16598
16537
  await cleanupWorkspace(sharedWorkspacePath).catch(() => {
16599
16538
  });
16600
16539
  }
16601
16540
  throw new Error(`before_all script failed: ${message}`);
16602
16541
  }
16603
16542
  }
16604
- if (availablePoolSlots.length > 0 && suiteWorkspace?.before_all) {
16543
+ if (availablePoolSlots.length > 0 && hasHookCommand(suiteBeforeAllHook)) {
16544
+ const beforeAllHook = suiteBeforeAllHook;
16605
16545
  for (const slot of availablePoolSlots) {
16606
16546
  setupLog(`running before_all on pool slot ${slot.index}`);
16607
16547
  const scriptContext = {
@@ -16611,7 +16551,10 @@ async function runEvaluation(options) {
16611
16551
  evalDir
16612
16552
  };
16613
16553
  try {
16614
- const output = await executeWorkspaceScript(suiteWorkspace.before_all, scriptContext);
16554
+ const output = await executeWorkspaceScript(
16555
+ toScriptConfig(beforeAllHook, "before_all_tests", "suite workspace"),
16556
+ scriptContext
16557
+ );
16615
16558
  if (!beforeAllOutput) beforeAllOutput = output;
16616
16559
  setupLog(`before_all completed on pool slot ${slot.index}`);
16617
16560
  } catch (error) {
@@ -16743,6 +16686,8 @@ async function runEvaluation(options) {
16743
16686
  evalRunId,
16744
16687
  keepWorkspaces,
16745
16688
  cleanupWorkspaces,
16689
+ retainOnSuccess: resolvedRetainOnSuccess,
16690
+ retainOnFailure: resolvedRetainOnFailure,
16746
16691
  sharedWorkspacePath: testWorkspacePath,
16747
16692
  sharedBaselineCommit: testBaselineCommit,
16748
16693
  suiteWorkspaceFile,
@@ -16836,7 +16781,9 @@ async function runEvaluation(options) {
16836
16781
  }
16837
16782
  }
16838
16783
  const afterAllWorkspaces = poolSlots.length > 1 ? poolSlots.map((s) => s.path) : sharedWorkspacePath ? [sharedWorkspacePath] : [];
16839
- if (afterAllWorkspaces.length > 0 && suiteWorkspace?.after_all) {
16784
+ const suiteAfterAllHook = suiteWorkspace?.hooks?.after_all_tests;
16785
+ if (afterAllWorkspaces.length > 0 && hasHookCommand(suiteAfterAllHook)) {
16786
+ const afterAllHook = suiteAfterAllHook;
16840
16787
  for (const wsPath of afterAllWorkspaces) {
16841
16788
  const scriptContext = {
16842
16789
  workspacePath: wsPath,
@@ -16846,7 +16793,7 @@ async function runEvaluation(options) {
16846
16793
  };
16847
16794
  try {
16848
16795
  const afterAllOutput = await executeWorkspaceScript(
16849
- suiteWorkspace.after_all,
16796
+ toScriptConfig(afterAllHook, "after_all_tests", "suite workspace"),
16850
16797
  scriptContext,
16851
16798
  "warn"
16852
16799
  );
@@ -16857,12 +16804,14 @@ async function runEvaluation(options) {
16857
16804
  }
16858
16805
  }
16859
16806
  }
16860
- if (sharedWorkspacePath && !poolSlot && poolSlots.length === 0 && !userWorkspacePath) {
16807
+ if (sharedWorkspacePath && !poolSlot && poolSlots.length === 0 && !useStaticWorkspace) {
16861
16808
  const hasFailure = results.some((r) => !!r.error || r.score < 0.5);
16862
- if (cleanupWorkspaces) {
16863
- await cleanupWorkspace(sharedWorkspacePath).catch(() => {
16864
- });
16865
- } else if (!hasFailure && !keepWorkspaces) {
16809
+ if (hasFailure) {
16810
+ if (resolvedRetainOnFailure === "cleanup") {
16811
+ await cleanupWorkspace(sharedWorkspacePath).catch(() => {
16812
+ });
16813
+ }
16814
+ } else if (resolvedRetainOnSuccess === "cleanup") {
16866
16815
  await cleanupWorkspace(sharedWorkspacePath).catch(() => {
16867
16816
  });
16868
16817
  }
@@ -17056,6 +17005,8 @@ async function runEvalCase(options) {
17056
17005
  evalRunId,
17057
17006
  keepWorkspaces,
17058
17007
  cleanupWorkspaces: forceCleanup,
17008
+ retainOnSuccess,
17009
+ retainOnFailure,
17059
17010
  sharedWorkspacePath,
17060
17011
  sharedBaselineCommit,
17061
17012
  suiteWorkspaceFile,
@@ -17067,10 +17018,10 @@ async function runEvalCase(options) {
17067
17018
  const formattingMode = usesFileReferencePrompt(provider) ? "agent" : "lm";
17068
17019
  const promptInputs = await buildPromptInputs(evalCase, formattingMode);
17069
17020
  const typeRegistry = providedTypeRegistry ?? createBuiltinRegistry();
17070
- const cacheKey2 = useCache ? createCacheKey(provider, target, evalCase, promptInputs) : void 0;
17021
+ const cacheKey = useCache ? createCacheKey(provider, target, evalCase, promptInputs) : void 0;
17071
17022
  let cachedResponse;
17072
- if (cacheKey2 && cache) {
17073
- cachedResponse = await cache.get(cacheKey2);
17023
+ if (cacheKey && cache) {
17024
+ cachedResponse = await cache.get(cacheKey);
17074
17025
  }
17075
17026
  const nowFn = now ?? (() => /* @__PURE__ */ new Date());
17076
17027
  let workspacePath = sharedWorkspacePath;
@@ -17103,18 +17054,18 @@ async function runEvalCase(options) {
17103
17054
  if (caseWorkspaceFile && workspacePath) {
17104
17055
  const copiedFile = import_node_path42.default.join(workspacePath, import_node_path42.default.basename(caseWorkspaceFile));
17105
17056
  try {
17106
- await (0, import_promises30.stat)(copiedFile);
17057
+ await (0, import_promises29.stat)(copiedFile);
17107
17058
  caseWorkspaceFile = copiedFile;
17108
17059
  } catch {
17109
17060
  }
17110
17061
  }
17111
17062
  }
17112
- if (!workspacePath && (evalCase.workspace?.before_all || evalCase.workspace?.repos?.length) && evalRunId) {
17063
+ if (!workspacePath && (evalCase.workspace?.hooks || evalCase.workspace?.repos?.length) && evalRunId) {
17113
17064
  workspacePath = getWorkspacePath(evalRunId, evalCase.id);
17114
- await (0, import_promises30.mkdir)(workspacePath, { recursive: true });
17065
+ await (0, import_promises29.mkdir)(workspacePath, { recursive: true });
17115
17066
  }
17116
17067
  if (evalCase.workspace?.repos?.length && workspacePath) {
17117
- const perCaseRepoManager = new RepoManager(void 0, setupDebug);
17068
+ const perCaseRepoManager = new RepoManager(setupDebug);
17118
17069
  try {
17119
17070
  if (setupDebug) {
17120
17071
  console.log(
@@ -17139,11 +17090,13 @@ async function runEvalCase(options) {
17139
17090
  );
17140
17091
  }
17141
17092
  }
17142
- if (workspacePath && evalCase.workspace?.before_all) {
17143
- const beforeAllCommand = (evalCase.workspace.before_all.command ?? evalCase.workspace.before_all.script ?? []).join(" ");
17093
+ const caseBeforeAllHook = evalCase.workspace?.hooks?.before_all_tests;
17094
+ if (workspacePath && hasHookCommand(caseBeforeAllHook)) {
17095
+ const beforeAllHook = caseBeforeAllHook;
17096
+ const beforeAllCommand = (beforeAllHook.command ?? beforeAllHook.script ?? []).join(" ");
17144
17097
  if (setupDebug) {
17145
17098
  console.log(
17146
- `[setup] test=${evalCase.id} running before_all in cwd=${evalCase.workspace.before_all.cwd ?? evalDir} command=${beforeAllCommand}`
17099
+ `[setup] test=${evalCase.id} running before_all in cwd=${beforeAllHook.cwd ?? evalDir} command=${beforeAllCommand}`
17147
17100
  );
17148
17101
  }
17149
17102
  const scriptContext = {
@@ -17156,7 +17109,7 @@ async function runEvalCase(options) {
17156
17109
  };
17157
17110
  try {
17158
17111
  beforeAllOutput = await executeWorkspaceScript(
17159
- evalCase.workspace.before_all,
17112
+ toScriptConfig(beforeAllHook, "before_all_tests", `test '${evalCase.id}'`),
17160
17113
  scriptContext
17161
17114
  );
17162
17115
  if (setupDebug) {
@@ -17181,7 +17134,9 @@ async function runEvalCase(options) {
17181
17134
  }
17182
17135
  }
17183
17136
  }
17184
- if (workspacePath && evalCase.workspace?.before_each) {
17137
+ const caseBeforeEachHook = evalCase.workspace?.hooks?.before_each_test;
17138
+ if (workspacePath && hasHookCommand(caseBeforeEachHook)) {
17139
+ const beforeEachHook = caseBeforeEachHook;
17185
17140
  const scriptContext = {
17186
17141
  workspacePath,
17187
17142
  testId: evalCase.id,
@@ -17192,7 +17147,7 @@ async function runEvalCase(options) {
17192
17147
  };
17193
17148
  try {
17194
17149
  beforeEachOutput = await executeWorkspaceScript(
17195
- evalCase.workspace.before_each,
17150
+ toScriptConfig(beforeEachHook, "before_each_test", `test '${evalCase.id}'`),
17196
17151
  scriptContext
17197
17152
  );
17198
17153
  } catch (error) {
@@ -17280,8 +17235,8 @@ async function runEvalCase(options) {
17280
17235
  }
17281
17236
  return errorResult;
17282
17237
  }
17283
- if (cacheKey2 && cache && !cachedResponse) {
17284
- await cache.set(cacheKey2, providerResponse);
17238
+ if (cacheKey && cache && !cachedResponse) {
17239
+ await cache.set(cacheKey, providerResponse);
17285
17240
  }
17286
17241
  const output = providerResponse.output;
17287
17242
  const hasExecutionMetrics = providerResponse.tokenUsage !== void 0 || providerResponse.costUsd !== void 0 || providerResponse.durationMs !== void 0;
@@ -17309,17 +17264,19 @@ async function runEvalCase(options) {
17309
17264
  }
17310
17265
  }
17311
17266
  const providerError = extractProviderError(providerResponse);
17312
- if (repoManager && workspacePath && evalCase.workspace?.reset?.after_each && evalCase.workspace.reset.strategy && evalCase.workspace.reset.strategy !== "none" && evalCase.workspace.repos) {
17267
+ if (repoManager && workspacePath && evalCase.workspace?.hooks?.after_each_test?.reset && evalCase.workspace.hooks.after_each_test.reset !== "none" && evalCase.workspace.repos) {
17313
17268
  try {
17314
17269
  await repoManager.reset(
17315
17270
  evalCase.workspace.repos,
17316
17271
  workspacePath,
17317
- evalCase.workspace.reset.strategy
17272
+ evalCase.workspace.hooks.after_each_test.reset
17318
17273
  );
17319
17274
  } catch {
17320
17275
  }
17321
17276
  }
17322
- if (workspacePath && evalCase.workspace?.after_each) {
17277
+ const caseAfterEachHook = evalCase.workspace?.hooks?.after_each_test;
17278
+ if (workspacePath && hasHookCommand(caseAfterEachHook)) {
17279
+ const afterEachHook = caseAfterEachHook;
17323
17280
  const scriptContext = {
17324
17281
  workspacePath,
17325
17282
  testId: evalCase.id,
@@ -17330,7 +17287,7 @@ async function runEvalCase(options) {
17330
17287
  };
17331
17288
  try {
17332
17289
  afterEachOutput = await executeWorkspaceScript(
17333
- evalCase.workspace.after_each,
17290
+ toScriptConfig(afterEachHook, "after_each_test", `test '${evalCase.id}'`),
17334
17291
  scriptContext,
17335
17292
  "warn"
17336
17293
  );
@@ -17380,8 +17337,13 @@ async function runEvalCase(options) {
17380
17337
  await cleanupWorkspace(workspacePath).catch(() => {
17381
17338
  });
17382
17339
  } else if (isFailure) {
17383
- return { ...finalResult, workspacePath };
17384
- } else if (!keepWorkspaces) {
17340
+ if ((retainOnFailure ?? "keep") === "cleanup") {
17341
+ await cleanupWorkspace(workspacePath).catch(() => {
17342
+ });
17343
+ } else {
17344
+ return { ...finalResult, workspacePath };
17345
+ }
17346
+ } else if ((retainOnSuccess ?? (keepWorkspaces ? "keep" : "cleanup")) !== "keep") {
17385
17347
  await cleanupWorkspace(workspacePath).catch(() => {
17386
17348
  });
17387
17349
  }
@@ -17399,11 +17361,12 @@ async function runEvalCase(options) {
17399
17361
  "evaluator_error"
17400
17362
  );
17401
17363
  if (workspacePath && !isSharedWorkspace) {
17402
- if (forceCleanup) {
17364
+ if (forceCleanup || (retainOnFailure ?? "keep") === "cleanup") {
17403
17365
  await cleanupWorkspace(workspacePath).catch(() => {
17404
17366
  });
17367
+ } else {
17368
+ return { ...errorResult, workspacePath, beforeEachOutput, afterEachOutput };
17405
17369
  }
17406
- return { ...errorResult, workspacePath, beforeEachOutput, afterEachOutput };
17407
17370
  }
17408
17371
  return { ...errorResult, beforeEachOutput, afterEachOutput };
17409
17372
  }
@@ -17422,7 +17385,9 @@ async function runEvalCaseWithTrials(options, trialsConfig) {
17422
17385
  useCache: false,
17423
17386
  // Force cleanup for intermediate trials
17424
17387
  cleanupWorkspaces: isLastDeclaredTrial ? options.cleanupWorkspaces : true,
17425
- keepWorkspaces: isLastDeclaredTrial ? options.keepWorkspaces : false
17388
+ keepWorkspaces: isLastDeclaredTrial ? options.keepWorkspaces : false,
17389
+ retainOnSuccess: isLastDeclaredTrial ? options.retainOnSuccess : "cleanup",
17390
+ retainOnFailure: isLastDeclaredTrial ? options.retainOnFailure : "cleanup"
17426
17391
  };
17427
17392
  const result = await runEvalCase(trialOptions);
17428
17393
  allResults.push(result);
@@ -17945,7 +17910,7 @@ function extractProviderError(response) {
17945
17910
  return trimmed.length > 0 ? trimmed : void 0;
17946
17911
  }
17947
17912
  function createCacheKey(provider, target, evalCase, promptInputs) {
17948
- const hash = (0, import_node_crypto10.createHash)("sha256");
17913
+ const hash = (0, import_node_crypto9.createHash)("sha256");
17949
17914
  hash.update(provider.id);
17950
17915
  hash.update(target.name);
17951
17916
  hash.update(evalCase.id);
@@ -18013,7 +17978,7 @@ function computeWeightedMean(entries) {
18013
17978
  }
18014
17979
 
18015
17980
  // src/evaluation/evaluate.ts
18016
- var import_node_fs13 = require("fs");
17981
+ var import_node_fs12 = require("fs");
18017
17982
  var import_node_path43 = __toESM(require("path"), 1);
18018
17983
  async function evaluate(config) {
18019
17984
  const startTime = Date.now();
@@ -18132,7 +18097,7 @@ async function discoverDefaultTarget(repoRoot) {
18132
18097
  for (const dir of chain) {
18133
18098
  for (const candidate of TARGET_FILE_CANDIDATES) {
18134
18099
  const targetsPath = import_node_path43.default.join(dir, candidate);
18135
- if (!(0, import_node_fs13.existsSync)(targetsPath)) continue;
18100
+ if (!(0, import_node_fs12.existsSync)(targetsPath)) continue;
18136
18101
  try {
18137
18102
  const definitions = await readTargetDefinitions(targetsPath);
18138
18103
  const defaultTarget = definitions.find((d) => d.name === "default");
@@ -18150,7 +18115,7 @@ async function loadEnvHierarchy(repoRoot) {
18150
18115
  const envFiles = [];
18151
18116
  for (const dir of chain) {
18152
18117
  const envPath = import_node_path43.default.join(dir, ".env");
18153
- if ((0, import_node_fs13.existsSync)(envPath)) envFiles.push(envPath);
18118
+ if ((0, import_node_fs12.existsSync)(envPath)) envFiles.push(envPath);
18154
18119
  }
18155
18120
  for (let i = envFiles.length - 1; i >= 0; i--) {
18156
18121
  try {
@@ -18228,12 +18193,12 @@ var CONFIG_FILE_NAMES = [
18228
18193
  ".agentv/config.js"
18229
18194
  ];
18230
18195
  async function loadTsConfig(projectRoot) {
18231
- const { existsSync: existsSync5 } = await import("fs");
18196
+ const { existsSync: existsSync4 } = await import("fs");
18232
18197
  const { pathToFileURL } = await import("url");
18233
18198
  const { join: join2 } = await import("path");
18234
18199
  for (const fileName of CONFIG_FILE_NAMES) {
18235
18200
  const filePath = join2(projectRoot, fileName);
18236
- if (!existsSync5(filePath)) {
18201
+ if (!existsSync4(filePath)) {
18237
18202
  continue;
18238
18203
  }
18239
18204
  try {
@@ -18330,7 +18295,7 @@ function buildPrompt(criteria, question, referenceAnswer) {
18330
18295
  }
18331
18296
 
18332
18297
  // src/evaluation/cache/response-cache.ts
18333
- var import_promises31 = require("fs/promises");
18298
+ var import_promises30 = require("fs/promises");
18334
18299
  var import_node_path44 = __toESM(require("path"), 1);
18335
18300
  var DEFAULT_CACHE_PATH = ".agentv/cache";
18336
18301
  var ResponseCache = class {
@@ -18341,7 +18306,7 @@ var ResponseCache = class {
18341
18306
  async get(key) {
18342
18307
  const filePath = this.keyToPath(key);
18343
18308
  try {
18344
- const data = await (0, import_promises31.readFile)(filePath, "utf8");
18309
+ const data = await (0, import_promises30.readFile)(filePath, "utf8");
18345
18310
  return JSON.parse(data);
18346
18311
  } catch {
18347
18312
  return void 0;
@@ -18350,8 +18315,8 @@ var ResponseCache = class {
18350
18315
  async set(key, value) {
18351
18316
  const filePath = this.keyToPath(key);
18352
18317
  const dir = import_node_path44.default.dirname(filePath);
18353
- await (0, import_promises31.mkdir)(dir, { recursive: true });
18354
- await (0, import_promises31.writeFile)(filePath, JSON.stringify(value, null, 2), "utf8");
18318
+ await (0, import_promises30.mkdir)(dir, { recursive: true });
18319
+ await (0, import_promises30.writeFile)(filePath, JSON.stringify(value, null, 2), "utf8");
18355
18320
  }
18356
18321
  keyToPath(key) {
18357
18322
  const prefix = key.slice(0, 2);
@@ -18890,7 +18855,6 @@ function createAgentKernel() {
18890
18855
  freeformEvaluationSchema,
18891
18856
  generateRubrics,
18892
18857
  getAgentvHome,
18893
- getGitCacheRoot,
18894
18858
  getHitCount,
18895
18859
  getSubagentsRoot,
18896
18860
  getTraceStateRoot,