npm - agentv - Versions diffs - 2.14.3 → 2.16.0 - Mend

agentv 2.14.3 → 2.16.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (16) hide show

package/dist/{chunk-D3WKMO6D.js → chunk-JZ62HLUC.js} +254 -119
package/dist/chunk-JZ62HLUC.js.map +1 -0
package/dist/{chunk-TK4PB62M.js → chunk-LZ5MPQFM.js} +1085 -620
package/dist/chunk-LZ5MPQFM.js.map +1 -0
package/dist/{chunk-MDAFSKDI.js → chunk-QLCVA3ZS.js} +56 -12
package/dist/chunk-QLCVA3ZS.js.map +1 -0
package/dist/cli.js +3 -3
package/dist/{dist-F62E44UQ.js → dist-BMNEH377.js} +8 -4
package/dist/index.js +3 -3
package/dist/{interactive-SQSP4PL3.js → interactive-ZXYNPRCT.js} +3 -3
package/package.json +1 -1
package/dist/chunk-D3WKMO6D.js.map +0 -1
package/dist/chunk-MDAFSKDI.js.map +0 -1
package/dist/chunk-TK4PB62M.js.map +0 -1
/package/dist/{dist-F62E44UQ.js.map → dist-BMNEH377.js.map} +0 -0
/package/dist/{interactive-SQSP4PL3.js.map → interactive-ZXYNPRCT.js.map} +0 -0

package/dist/{chunk-TK4PB62M.js → chunk-LZ5MPQFM.js} RENAMED Viewed

@@ -148,7 +148,7 @@ var require_dist = __commonJS({
   }
 });
-// ../../packages/core/dist/chunk-N55K52OO.js
+// ../../packages/core/dist/chunk-E6AJPAXM.js
 import { constants } from "node:fs";
 import { access, readFile } from "node:fs/promises";
 import path from "node:path";
@@ -632,8 +632,8 @@ function getErrorMap() {
 // ../../node_modules/.bun/zod@3.25.76/node_modules/zod/v3/helpers/parseUtil.js
 var makeIssue = (params) => {
-  const { data, path: path41, errorMaps, issueData } = params;
-  const fullPath = [...path41, ...issueData.path || []];
+  const { data, path: path42, errorMaps, issueData } = params;
+  const fullPath = [...path42, ...issueData.path || []];
   const fullIssue = {
     ...issueData,
     path: fullPath
@@ -749,11 +749,11 @@ var errorUtil;
 // ../../node_modules/.bun/zod@3.25.76/node_modules/zod/v3/types.js
 var ParseInputLazyPath = class {
-  constructor(parent, value, path41, key) {
+  constructor(parent, value, path42, key) {
     this._cachedPath = [];
     this.parent = parent;
     this.data = value;
-    this._path = path41;
+    this._path = path42;
     this._key = key;
   }
   get path() {
@@ -4195,7 +4195,7 @@ var coerce = {
 };
 var NEVER = INVALID;
-// ../../packages/core/dist/chunk-N55K52OO.js
+// ../../packages/core/dist/chunk-E6AJPAXM.js
 var TEST_MESSAGE_ROLE_VALUES = ["system", "user", "assistant", "tool"];
 var TEST_MESSAGE_ROLES = TEST_MESSAGE_ROLE_VALUES;
 var TEST_MESSAGE_ROLE_SET = new Set(TEST_MESSAGE_ROLE_VALUES);
@@ -6661,10 +6661,10 @@ function assignProp(target, prop, value) {
     configurable: true
   });
 }
-function getElementAtPath(obj, path41) {
-  if (!path41)
+function getElementAtPath(obj, path42) {
+  if (!path42)
     return obj;
-  return path41.reduce((acc, key) => acc?.[key], obj);
+  return path42.reduce((acc, key) => acc?.[key], obj);
 }
 function promiseAllObject(promisesObj) {
   const keys = Object.keys(promisesObj);
@@ -6984,11 +6984,11 @@ function aborted(x, startIndex = 0) {
   }
   return false;
 }
-function prefixIssues(path41, issues) {
+function prefixIssues(path42, issues) {
   return issues.map((iss) => {
     var _a17;
     (_a17 = iss).path ?? (_a17.path = []);
-    iss.path.unshift(path41);
+    iss.path.unshift(path42);
     return iss;
   });
 }
@@ -7125,7 +7125,7 @@ function treeifyError(error40, _mapper) {
     return issue2.message;
   };
   const result = { errors: [] };
-  const processError = (error41, path41 = []) => {
+  const processError = (error41, path42 = []) => {
     var _a17, _b8;
     for (const issue2 of error41.issues) {
       if (issue2.code === "invalid_union" && issue2.errors.length) {
@@ -7135,7 +7135,7 @@ function treeifyError(error40, _mapper) {
       } else if (issue2.code === "invalid_element") {
         processError({ issues: issue2.issues }, issue2.path);
       } else {
-        const fullpath = [...path41, ...issue2.path];
+        const fullpath = [...path42, ...issue2.path];
         if (fullpath.length === 0) {
           result.errors.push(mapper(issue2));
           continue;
@@ -7165,9 +7165,9 @@ function treeifyError(error40, _mapper) {
   processError(error40);
   return result;
 }
-function toDotPath(path41) {
+function toDotPath(path42) {
   const segs = [];
-  for (const seg of path41) {
+  for (const seg of path42) {
     if (typeof seg === "number")
       segs.push(`[${seg}]`);
     else if (typeof seg === "symbol")
@@ -26720,14 +26720,14 @@ function createAzure(options = {}) {
     description: "Azure OpenAI resource name"
   });
   const apiVersion = (_a17 = options.apiVersion) != null ? _a17 : "v1";
-  const url2 = ({ path: path41, modelId }) => {
+  const url2 = ({ path: path42, modelId }) => {
     var _a24;
     const baseUrlPrefix = (_a24 = options.baseURL) != null ? _a24 : `https://${getResourceName()}.openai.azure.com/openai`;
     let fullUrl;
     if (options.useDeploymentBasedUrls) {
-      fullUrl = new URL(`${baseUrlPrefix}/deployments/${modelId}${path41}`);
+      fullUrl = new URL(`${baseUrlPrefix}/deployments/${modelId}${path42}`);
     } else {
-      fullUrl = new URL(`${baseUrlPrefix}/v1${path41}`);
+      fullUrl = new URL(`${baseUrlPrefix}/v1${path42}`);
     }
     fullUrl.searchParams.set("api-version", apiVersion);
     return fullUrl.toString();
@@ -33962,7 +33962,7 @@ import fs2 from "node:fs/promises";
 import path31 from "node:path";
 import { createHash as createHash2, randomUUID as randomUUID7 } from "node:crypto";
 import { mkdir as mkdir12, stat as stat7 } from "node:fs/promises";
-import path38 from "node:path";
+import path39 from "node:path";
 import micromatch4 from "micromatch";
 import { readFileSync } from "node:fs";
 import path322 from "node:path";
@@ -33977,15 +33977,18 @@ import path35 from "node:path";
 import { execFile } from "node:child_process";
 import { createHash } from "node:crypto";
 import { existsSync as existsSync2 } from "node:fs";
-import { mkdir as mkdir11, rm as rm5, unlink, writeFile as writeFile7 } from "node:fs/promises";
+import { cp as cp2, mkdir as mkdir11, readFile as readFile11, readdir as readdir4, rm as rm5, unlink, writeFile as writeFile7 } from "node:fs/promises";
 import path36 from "node:path";
 import { promisify as promisify5 } from "node:util";
-import { readdir as readdir4, stat as stat6 } from "node:fs/promises";
+import { execFile as execFile2 } from "node:child_process";
 import path37 from "node:path";
+import { promisify as promisify6 } from "node:util";
+import { readdir as readdir5, stat as stat6 } from "node:fs/promises";
+import path38 from "node:path";
 import { existsSync as existsSync3 } from "node:fs";
-import path39 from "node:path";
-import { mkdir as mkdir13, readFile as readFile11, writeFile as writeFile8 } from "node:fs/promises";
 import path40 from "node:path";
+import { mkdir as mkdir13, readFile as readFile12, writeFile as writeFile8 } from "node:fs/promises";
+import path41 from "node:path";
 function computeTraceSummary(messages) {
   const toolCallCounts = {};
   const toolDurations = {};
@@ -34546,6 +34549,17 @@ function parseExecutionDefaults(raw, configPath) {
   } else if (otelFile !== void 0) {
     logWarning(`Invalid execution.otel_file in ${configPath}, expected non-empty string`);
   }
+  if (typeof obj.pool_workspaces === "boolean") {
+    result.pool_workspaces = obj.pool_workspaces;
+  } else if (obj.pool_workspaces !== void 0) {
+    logWarning(`Invalid execution.pool_workspaces in ${configPath}, expected boolean`);
+  }
+  const poolSlots = obj.pool_slots;
+  if (typeof poolSlots === "number" && Number.isInteger(poolSlots) && poolSlots >= 1 && poolSlots <= 50) {
+    result.pool_slots = poolSlots;
+  } else if (poolSlots !== void 0) {
+    logWarning(`Invalid execution.pool_slots in ${configPath}, expected integer 1-50`);
+  }
   return Object.keys(result).length > 0 ? result : void 0;
 }
 function logWarning(message) {
@@ -35961,6 +35975,7 @@ async function processMessages(options) {
     repoRootPath,
     guidelinePatterns,
     guidelinePaths,
+    treatFileSegmentsAsGuidelines,
     textParts,
     messageType,
     verbose
@@ -36008,16 +36023,20 @@ async function processMessages(options) {
         }
         try {
           const fileContent = (await readFile4(resolvedPath, "utf8")).replace(/\r\n/g, "\n");
-          if (messageType === "input" && guidelinePatterns && guidelinePaths) {
-            const relativeToRepo = path5.relative(repoRootPath, resolvedPath);
-            if (isGuidelineFile(relativeToRepo, guidelinePatterns)) {
-              guidelinePaths.push(path5.resolve(resolvedPath));
-              if (verbose) {
-                console.log(`  [Guideline] Found: ${displayPath}`);
-                console.log(`    Resolved to: ${resolvedPath}`);
-              }
-              continue;
+          const classifyAsGuideline = shouldTreatAsGuideline({
+            messageType,
+            resolvedPath,
+            repoRootPath,
+            guidelinePatterns,
+            treatFileSegmentsAsGuidelines
+          });
+          if (classifyAsGuideline && guidelinePaths) {
+            guidelinePaths.push(path5.resolve(resolvedPath));
+            if (verbose) {
+              console.log(`  [Guideline] Found: ${displayPath}`);
+              console.log(`    Resolved to: ${resolvedPath}`);
             }
+            continue;
           }
           segments.push({
             type: "file",
@@ -36046,6 +36065,26 @@ async function processMessages(options) {
   }
   return segments;
 }
+function shouldTreatAsGuideline(options) {
+  const {
+    messageType,
+    resolvedPath,
+    repoRootPath,
+    guidelinePatterns,
+    treatFileSegmentsAsGuidelines
+  } = options;
+  if (messageType !== "input") {
+    return false;
+  }
+  if (treatFileSegmentsAsGuidelines) {
+    return true;
+  }
+  if (!guidelinePatterns || guidelinePatterns.length === 0) {
+    return false;
+  }
+  const relativeToRepo = path5.relative(repoRootPath, resolvedPath);
+  return isGuidelineFile(relativeToRepo, guidelinePatterns);
+}
 function asString3(value) {
   return typeof value === "string" ? value : void 0;
 }
@@ -36380,6 +36419,8 @@ async function loadTestsFromJsonl(evalFilePath, repoRoot, options) {
         for (const guidelinePath of testCase.guideline_paths) {
           console.log(`    - ${guidelinePath}`);
         }
+      } else if (!guidelinePatterns || guidelinePatterns.length === 0) {
+        console.log("  No guidelines found (guideline_patterns not configured)");
       } else {
         console.log("  No guidelines found");
       }
@@ -36740,7 +36781,7 @@ async function loadTestsFromYaml(evalFilePath, repoRoot, options) {
   } else {
     throw new Error(`Invalid test file format: ${evalFilePath} - missing 'tests' field`);
   }
-  const suiteWorkspace = parseWorkspaceConfig(suite.workspace, evalFileDir);
+  const suiteWorkspace = await resolveWorkspaceConfig(suite.workspace, evalFileDir);
   const suiteInputMessages = expandInputShorthand(suite.input);
   const rawGlobalExecution = isJsonObject(suite.execution) ? suite.execution : void 0;
   const _globalTarget = asString6(rawGlobalExecution?.target) ?? asString6(suite.target);
@@ -36776,12 +36817,24 @@ async function loadTestsFromYaml(evalFilePath, repoRoot, options) {
     }
     const caseExecution = isJsonObject(evalcase.execution) ? evalcase.execution : void 0;
     const skipDefaults = caseExecution?.skip_defaults === true;
-    const inputMessages = suiteInputMessages && !skipDefaults ? [...suiteInputMessages, ...testInputMessages] : testInputMessages;
+    const effectiveSuiteInputMessages = suiteInputMessages && !skipDefaults ? suiteInputMessages : void 0;
+    const inputMessages = effectiveSuiteInputMessages ? [...effectiveSuiteInputMessages, ...testInputMessages] : testInputMessages;
     const hasExpectedMessages = expectedMessages.length > 0;
     const guidelinePaths = [];
     const inputTextParts = [];
-    const inputSegments = await processMessages({
-      messages: inputMessages,
+    const suiteInputSegments = effectiveSuiteInputMessages ? await processMessages({
+      messages: effectiveSuiteInputMessages,
+      searchRoots,
+      repoRootPath,
+      guidelinePatterns,
+      guidelinePaths,
+      treatFileSegmentsAsGuidelines: true,
+      textParts: inputTextParts,
+      messageType: "input",
+      verbose
+    }) : [];
+    const testInputSegments = await processMessages({
+      messages: testInputMessages,
       searchRoots,
       repoRootPath,
       guidelinePatterns,
@@ -36790,6 +36843,7 @@ async function loadTestsFromYaml(evalFilePath, repoRoot, options) {
       messageType: "input",
       verbose
     });
+    const inputSegments = [...suiteInputSegments, ...testInputSegments];
     const outputSegments = hasExpectedMessages ? await processExpectedMessages({
       messages: expectedMessages,
       searchRoots,
@@ -36837,7 +36891,7 @@ async function loadTestsFromYaml(evalFilePath, repoRoot, options) {
       ...guidelinePaths.map((guidelinePath) => path8.resolve(guidelinePath)),
       ...userFilePaths
     ];
-    const caseWorkspace = parseWorkspaceConfig(evalcase.workspace, evalFileDir);
+    const caseWorkspace = await resolveWorkspaceConfig(evalcase.workspace, evalFileDir);
     const mergedWorkspace = mergeWorkspaceConfigs(suiteWorkspace, caseWorkspace);
     const metadata = isJsonObject(evalcase.metadata) ? evalcase.metadata : void 0;
     const caseTargets = extractTargetsFromTestCase(evalcase);
@@ -36868,6 +36922,8 @@ async function loadTestsFromYaml(evalFilePath, repoRoot, options) {
         for (const guidelinePath of testCase.guideline_paths) {
           console.log(`    - ${guidelinePath}`);
         }
+      } else if (!guidelinePatterns || guidelinePatterns.length === 0) {
+        console.log("  No guidelines found (guideline_patterns not configured)");
       } else {
         console.log("  No guidelines found");
       }
@@ -36956,17 +37012,58 @@ function parseRepoConfig(raw) {
     ...clone2 !== void 0 && { clone: clone2 }
   };
 }
-function parseResetConfig(raw) {
+function parseWorkspaceHookConfig(raw, evalFileDir) {
   if (!isJsonObject(raw)) return void 0;
+  const script = parseWorkspaceScriptConfig(raw, evalFileDir);
   const obj = raw;
-  const strategy = obj.strategy === "none" || obj.strategy === "hard" || obj.strategy === "recreate" ? obj.strategy : void 0;
-  const afterEach = typeof obj.after_each === "boolean" ? obj.after_each : void 0;
-  if (!strategy && afterEach === void 0) return void 0;
+  const reset = obj.reset === "none" || obj.reset === "fast" || obj.reset === "strict" ? obj.reset : void 0;
+  const clean = obj.clean === "always" || obj.clean === "on_success" || obj.clean === "on_failure" || obj.clean === "never" ? obj.clean : void 0;
+  if (!script && !reset && !clean) return void 0;
   return {
-    ...strategy !== void 0 && { strategy },
-    ...afterEach !== void 0 && { after_each: afterEach }
+    ...script ?? {},
+    ...reset !== void 0 && { reset },
+    ...clean !== void 0 && { clean }
   };
 }
+function parseWorkspaceHooksConfig(raw, evalFileDir) {
+  if (!isJsonObject(raw)) return void 0;
+  const obj = raw;
+  const beforeAllTests = parseWorkspaceHookConfig(obj.before_all_tests, evalFileDir);
+  const beforeEachTest = parseWorkspaceHookConfig(obj.before_each_test, evalFileDir);
+  const afterEachTest = parseWorkspaceHookConfig(obj.after_each_test, evalFileDir);
+  const afterAllTests = parseWorkspaceHookConfig(obj.after_all_tests, evalFileDir);
+  const onReuse = parseWorkspaceHookConfig(obj.on_reuse, evalFileDir);
+  const onFinish = parseWorkspaceHookConfig(obj.on_finish, evalFileDir);
+  const hooks = {
+    ...beforeAllTests !== void 0 && { before_all_tests: beforeAllTests },
+    ...beforeEachTest !== void 0 && { before_each_test: beforeEachTest },
+    ...afterEachTest !== void 0 && { after_each_test: afterEachTest },
+    ...afterAllTests !== void 0 && { after_all_tests: afterAllTests },
+    ...onReuse !== void 0 && { on_reuse: onReuse },
+    ...onFinish !== void 0 && { on_finish: onFinish }
+  };
+  return Object.keys(hooks).length > 0 ? hooks : void 0;
+}
+async function resolveWorkspaceConfig(raw, evalFileDir) {
+  if (typeof raw === "string") {
+    const workspaceFilePath = path8.resolve(evalFileDir, raw);
+    let content;
+    try {
+      content = await readFile7(workspaceFilePath, "utf8");
+    } catch {
+      throw new Error(`Workspace file not found: ${raw} (resolved to ${workspaceFilePath})`);
+    }
+    const parsed = parse22(content);
+    if (!isJsonObject(parsed)) {
+      throw new Error(
+        `Invalid workspace file format: ${workspaceFilePath} (expected a YAML object)`
+      );
+    }
+    const workspaceFileDir = path8.dirname(workspaceFilePath);
+    return parseWorkspaceConfig(parsed, workspaceFileDir);
+  }
+  return parseWorkspaceConfig(raw, evalFileDir);
+}
 function parseWorkspaceConfig(raw, evalFileDir) {
   if (!isJsonObject(raw)) return void 0;
   const obj = raw;
@@ -36976,37 +37073,56 @@ function parseWorkspaceConfig(raw, evalFileDir) {
   }
   const isolation = obj.isolation === "shared" || obj.isolation === "per_test" ? obj.isolation : void 0;
   const repos = Array.isArray(obj.repos) ? obj.repos.map(parseRepoConfig).filter(Boolean) : void 0;
-  const reset = parseResetConfig(obj.reset);
-  const beforeAll = parseWorkspaceScriptConfig(obj.before_all, evalFileDir);
-  const afterAll = parseWorkspaceScriptConfig(obj.after_all, evalFileDir);
-  const beforeEach = parseWorkspaceScriptConfig(obj.before_each, evalFileDir);
-  const afterEach = parseWorkspaceScriptConfig(obj.after_each, evalFileDir);
-  if (!template && !isolation && !repos && !reset && !beforeAll && !afterAll && !beforeEach && !afterEach)
+  const hooks = parseWorkspaceHooksConfig(obj.hooks, evalFileDir);
+  const mode = obj.mode === "pooled" || obj.mode === "ephemeral" || obj.mode === "static" ? obj.mode : void 0;
+  const staticPath = typeof obj.static_path === "string" ? obj.static_path : void 0;
+  const pool = typeof obj.pool === "boolean" ? obj.pool : void 0;
+  if (!template && !isolation && !repos && !hooks && !mode && !staticPath && pool === void 0)
     return void 0;
   return {
     ...template !== void 0 && { template },
     ...isolation !== void 0 && { isolation },
     ...repos !== void 0 && { repos },
-    ...reset !== void 0 && { reset },
-    ...beforeAll !== void 0 && { before_all: beforeAll },
-    ...afterAll !== void 0 && { after_all: afterAll },
-    ...beforeEach !== void 0 && { before_each: beforeEach },
-    ...afterEach !== void 0 && { after_each: afterEach }
+    ...hooks !== void 0 && { hooks },
+    ...mode !== void 0 && { mode },
+    ...staticPath !== void 0 && { static_path: staticPath },
+    ...pool !== void 0 && { pool }
   };
 }
 function mergeWorkspaceConfigs(suiteLevel, caseLevel) {
   if (!suiteLevel && !caseLevel) return void 0;
   if (!suiteLevel) return caseLevel;
   if (!caseLevel) return suiteLevel;
+  const mergeHook = (suiteHook, caseHook) => {
+    if (!suiteHook && !caseHook) return void 0;
+    return {
+      ...suiteHook ?? {},
+      ...caseHook ?? {}
+    };
+  };
+  const mergedHooks = {
+    before_all_tests: mergeHook(
+      suiteLevel.hooks?.before_all_tests,
+      caseLevel.hooks?.before_all_tests
+    ),
+    before_each_test: mergeHook(
+      suiteLevel.hooks?.before_each_test,
+      caseLevel.hooks?.before_each_test
+    ),
+    after_each_test: mergeHook(suiteLevel.hooks?.after_each_test, caseLevel.hooks?.after_each_test),
+    after_all_tests: mergeHook(suiteLevel.hooks?.after_all_tests, caseLevel.hooks?.after_all_tests),
+    on_reuse: mergeHook(suiteLevel.hooks?.on_reuse, caseLevel.hooks?.on_reuse),
+    on_finish: mergeHook(suiteLevel.hooks?.on_finish, caseLevel.hooks?.on_finish)
+  };
+  const hasHooks = Object.values(mergedHooks).some((hook) => hook !== void 0);
   return {
     template: caseLevel.template ?? suiteLevel.template,
     isolation: caseLevel.isolation ?? suiteLevel.isolation,
     repos: caseLevel.repos ?? suiteLevel.repos,
-    reset: caseLevel.reset ?? suiteLevel.reset,
-    before_all: caseLevel.before_all ?? suiteLevel.before_all,
-    after_all: caseLevel.after_all ?? suiteLevel.after_all,
-    before_each: caseLevel.before_each ?? suiteLevel.before_each,
-    after_each: caseLevel.after_each ?? suiteLevel.after_each
+    ...hasHooks && { hooks: mergedHooks },
+    mode: caseLevel.mode ?? suiteLevel.mode,
+    static_path: caseLevel.static_path ?? suiteLevel.static_path,
+    pool: caseLevel.pool ?? suiteLevel.pool
   };
 }
 function asString6(value) {
@@ -40929,15 +41045,15 @@ function getAgentvHome() {
 function getWorkspacesRoot() {
   return path21.join(getAgentvHome(), "workspaces");
 }
-function getGitCacheRoot() {
-  return path21.join(getAgentvHome(), "git-cache");
-}
 function getSubagentsRoot() {
   return path21.join(getAgentvHome(), "subagents");
 }
 function getTraceStateRoot() {
   return path21.join(getAgentvHome(), "trace-state");
 }
+function getWorkspacePoolRoot() {
+  return path21.join(getAgentvHome(), "workspace-pool");
+}
 var DEFAULT_LOCK_NAME = "subagent.lock";
 var DEFAULT_ALIVE_FILENAME = ".alive";
 function getDefaultSubagentRoot(vscodeCmd = "code") {
@@ -41738,8 +41854,6 @@ var AGENTV_REQUEST_TEMPLATE = `[[ ## task ## ]]
 **IMPORTANT**: Follow these exact steps:
 1. Create and write your complete response to: {{responseFileTmp}}
-    - All intended file outputs/changes MUST be written in your response file.
-    - For each intended file, include the repo name, relative path and unified git diff following the convention \`diff --git ...\`.
 2. When completely finished, run these PowerShell commands to signal completion:
 \`\`\`
 Move-Item -LiteralPath '{{responseFileTmp}}' -Destination '{{responseFileFinal}}'
@@ -41756,8 +41870,6 @@ var AGENTV_BATCH_REQUEST_TEMPLATE = `[[ ## task ## ]]
 **IMPORTANT**: Follow these exact steps:
 1. Create and write your complete response to: {{responseFileTmp}}
-    - All intended file outputs/changes MUST be written in your response file.
-    - For each intended file, include the repo name, relative path and unified git diff following the convention \`diff --git ...\`.
 2. When completely finished and the response is stable, rename it to: {{responseFileFinal}}
 3. Do not unlock the workspace from this request; batch orchestration will handle unlocking after all responses are ready.
 `;
@@ -42347,15 +42459,15 @@ async function execFileWithStdinNode(argv, stdinPayload, options) {
   });
 }
 async function execShellWithStdin(command, stdinPayload, options = {}) {
-  const { mkdir: mkdir14, readFile: readFile12, rm: rm6, writeFile: writeFile9 } = await import("node:fs/promises");
+  const { mkdir: mkdir14, readFile: readFile13, rm: rm6, writeFile: writeFile9 } = await import("node:fs/promises");
   const { tmpdir: tmpdir3 } = await import("node:os");
-  const path41 = await import("node:path");
+  const path42 = await import("node:path");
   const { randomUUID: randomUUID8 } = await import("node:crypto");
-  const dir = path41.join(tmpdir3(), `agentv-exec-${randomUUID8()}`);
+  const dir = path42.join(tmpdir3(), `agentv-exec-${randomUUID8()}`);
   await mkdir14(dir, { recursive: true });
-  const stdinPath = path41.join(dir, "stdin.txt");
-  const stdoutPath = path41.join(dir, "stdout.txt");
-  const stderrPath = path41.join(dir, "stderr.txt");
+  const stdinPath = path42.join(dir, "stdin.txt");
+  const stdoutPath = path42.join(dir, "stdout.txt");
+  const stderrPath = path42.join(dir, "stderr.txt");
   await writeFile9(stdinPath, stdinPayload, "utf8");
   const wrappedCommand = process.platform === "win32" ? `(${command}) < ${shellEscapePath(stdinPath)} > ${shellEscapePath(stdoutPath)} 2> ${shellEscapePath(stderrPath)}` : `(${command}) < ${shellEscapePath(stdinPath)} > ${shellEscapePath(stdoutPath)} 2> ${shellEscapePath(stderrPath)}`;
   const { spawn: spawn4 } = await import("node:child_process");
@@ -42385,8 +42497,8 @@ async function execShellWithStdin(command, stdinPayload, options = {}) {
         resolve2(code ?? 0);
       });
     });
-    const stdout = (await readFile12(stdoutPath, "utf8")).replace(/\r\n/g, "\n");
-    const stderr = (await readFile12(stderrPath, "utf8")).replace(/\r\n/g, "\n");
+    const stdout = (await readFile13(stdoutPath, "utf8")).replace(/\r\n/g, "\n");
+    const stderr = (await readFile13(stderrPath, "utf8")).replace(/\r\n/g, "\n");
     return { stdout, stderr, exitCode };
   } finally {
     await rm6(dir, { recursive: true, force: true });
@@ -42699,7 +42811,7 @@ var CodeEvaluator = class {
       outputPath,
       guidelineFiles: context.evalCase.guideline_paths,
       inputFiles: context.evalCase.file_paths.filter(
-        (path41) => !context.evalCase.guideline_paths.includes(path41)
+        (path42) => !context.evalCase.guideline_paths.includes(path42)
       ),
       input: context.evalCase.input,
       trace: context.trace ?? null,
@@ -42942,6 +43054,8 @@ ${context.fileChanges}`;
       };
     } catch (e) {
       const message = e instanceof Error ? e.message : String(e);
+      const evalName = context.evaluator?.name ?? "llm-judge";
+      console.warn(`\u26A0 LLM judge "${evalName}" failed after 3 attempts (${message}) \u2014 skipped`);
       return {
         score: 0,
         verdict: "skip",
@@ -42970,24 +43084,39 @@ ${context.fileChanges}`;
       systemPrompt,
       target: judgeProvider.targetName
     };
-    const { data, tokenUsage } = await this.runWithRetry({
-      context,
-      judgeProvider,
-      systemPrompt,
-      userPrompt: prompt,
-      schema: rubricEvaluationSchema
-    });
-    const { score, verdict, hits, misses } = calculateRubricScore(data, rubrics);
-    return {
-      score,
-      verdict,
-      hits,
-      misses,
-      expectedAspectCount: rubrics.length,
-      reasoning: data.overall_reasoning,
-      evaluatorRawRequest,
-      tokenUsage
-    };
+    try {
+      const { data, tokenUsage } = await this.runWithRetry({
+        context,
+        judgeProvider,
+        systemPrompt,
+        userPrompt: prompt,
+        schema: rubricEvaluationSchema
+      });
+      const { score, verdict, hits, misses } = calculateRubricScore(data, rubrics);
+      return {
+        score,
+        verdict,
+        hits,
+        misses,
+        expectedAspectCount: rubrics.length,
+        reasoning: data.overall_reasoning,
+        evaluatorRawRequest,
+        tokenUsage
+      };
+    } catch (e) {
+      const message = e instanceof Error ? e.message : String(e);
+      const evalName = context.evaluator?.name ?? "llm-judge";
+      console.warn(`\u26A0 LLM judge "${evalName}" failed after 3 attempts (${message}) \u2014 skipped`);
+      return {
+        score: 0,
+        verdict: "skip",
+        hits: [],
+        misses: [`Judge parse failure after 3 attempts: ${message}`],
+        expectedAspectCount: rubrics.length,
+        reasoning: `Judge parse failure after 3 attempts: ${message}`,
+        evaluatorRawRequest
+      };
+    }
   }
   /**
    * Evaluate using score-range rubrics (analytic rubric scoring).
@@ -43001,25 +43130,40 @@ ${context.fileChanges}`;
       systemPrompt,
       target: judgeProvider.targetName
     };
-    const { data, tokenUsage } = await this.runWithRetry({
-      context,
-      judgeProvider,
-      systemPrompt,
-      userPrompt: prompt,
-      schema: scoreRangeEvaluationSchema
-    });
-    const { score, verdict, hits, misses, details } = calculateScoreRangeResult(data, rubrics);
-    return {
-      score,
-      verdict,
-      hits,
-      misses,
-      expectedAspectCount: rubrics.length,
-      reasoning: data.overall_reasoning,
-      evaluatorRawRequest,
-      details,
-      tokenUsage
-    };
+    try {
+      const { data, tokenUsage } = await this.runWithRetry({
+        context,
+        judgeProvider,
+        systemPrompt,
+        userPrompt: prompt,
+        schema: scoreRangeEvaluationSchema
+      });
+      const { score, verdict, hits, misses, details } = calculateScoreRangeResult(data, rubrics);
+      return {
+        score,
+        verdict,
+        hits,
+        misses,
+        expectedAspectCount: rubrics.length,
+        reasoning: data.overall_reasoning,
+        evaluatorRawRequest,
+        details,
+        tokenUsage
+      };
+    } catch (e) {
+      const message = e instanceof Error ? e.message : String(e);
+      const evalName = context.evaluator?.name ?? "llm-judge";
+      console.warn(`\u26A0 LLM judge "${evalName}" failed after 3 attempts (${message}) \u2014 skipped`);
+      return {
+        score: 0,
+        verdict: "skip",
+        hits: [],
+        misses: [`Judge parse failure after 3 attempts: ${message}`],
+        expectedAspectCount: rubrics.length,
+        reasoning: `Judge parse failure after 3 attempts: ${message}`,
+        evaluatorRawRequest
+      };
+    }
   }
   /**
    * Build prompt for score-range rubric evaluation.
@@ -43303,19 +43447,13 @@ var CompositeEvaluator = class {
   runWeightedAverage(results, weights) {
     let totalWeight = 0;
     let weightedSum = 0;
+    let evaluatedCount = 0;
     const allHits = [];
     const allMisses = [];
     const reasoningParts = [];
     const scores = [];
     for (const member of results) {
       const weight = weights?.[member.id] ?? 1;
-      totalWeight += weight;
-      weightedSum += member.result.score * weight;
-      allHits.push(...member.result.hits.map((h) => `[${member.id}] ${h}`));
-      allMisses.push(...member.result.misses.map((m) => `[${member.id}] ${m}`));
-      if (member.result.reasoning) {
-        reasoningParts.push(`${member.id}: ${member.result.reasoning}`);
-      }
       scores.push({
         name: member.id,
         type: member.type,
@@ -43330,6 +43468,32 @@ var CompositeEvaluator = class {
         details: member.result.details,
         tokenUsage: member.result.tokenUsage
       });
+      if (member.result.verdict === "skip") {
+        continue;
+      }
+      evaluatedCount++;
+      totalWeight += weight;
+      weightedSum += member.result.score * weight;
+      allHits.push(...member.result.hits.map((h) => `[${member.id}] ${h}`));
+      allMisses.push(...member.result.misses.map((m) => `[${member.id}] ${m}`));
+      if (member.result.reasoning) {
+        reasoningParts.push(`${member.id}: ${member.result.reasoning}`);
+      }
+    }
+    if (evaluatedCount === 0 && results.length > 0) {
+      return {
+        score: 0,
+        verdict: "skip",
+        hits: [],
+        misses: [],
+        expectedAspectCount: 1,
+        reasoning: "All evaluators skipped (infrastructure failure)",
+        evaluatorRawRequest: {
+          aggregator: "weighted_average",
+          ...weights ? { weights } : {}
+        },
+        scores
+      };
     }
     const finalScore = totalWeight > 0 ? weightedSum / totalWeight : 0;
     return {
@@ -43353,19 +43517,8 @@ var CompositeEvaluator = class {
     const reasoningParts = [];
     let passingCount = 0;
     let borderlineCount = 0;
+    let evaluatedCount = 0;
     for (const member of results) {
-      const isPassing = member.result.verdict === "pass" || member.result.verdict === "borderline";
-      if (isPassing) {
-        passingCount++;
-        if (member.result.verdict === "borderline") {
-          borderlineCount++;
-        }
-      }
-      allHits.push(...member.result.hits.map((h) => `[${member.id}] ${h}`));
-      allMisses.push(...member.result.misses.map((m) => `[${member.id}] ${m}`));
-      if (member.result.reasoning) {
-        reasoningParts.push(`${member.id}: ${member.result.reasoning}`);
-      }
       scores.push({
         name: member.id,
         type: member.type,
@@ -43379,8 +43532,39 @@ var CompositeEvaluator = class {
         details: member.result.details,
         tokenUsage: member.result.tokenUsage
       });
+      if (member.result.verdict === "skip") {
+        continue;
+      }
+      evaluatedCount++;
+      const isPassing = member.result.verdict === "pass" || member.result.verdict === "borderline";
+      if (isPassing) {
+        passingCount++;
+        if (member.result.verdict === "borderline") {
+          borderlineCount++;
+        }
+      }
+      allHits.push(...member.result.hits.map((h) => `[${member.id}] ${h}`));
+      allMisses.push(...member.result.misses.map((m) => `[${member.id}] ${m}`));
+      if (member.result.reasoning) {
+        reasoningParts.push(`${member.id}: ${member.result.reasoning}`);
+      }
     }
-    const totalCount = results.length;
+    if (evaluatedCount === 0 && results.length > 0) {
+      return {
+        score: 0,
+        verdict: "skip",
+        hits: [],
+        misses: [],
+        expectedAspectCount: 1,
+        reasoning: "All evaluators skipped (infrastructure failure)",
+        evaluatorRawRequest: {
+          aggregator: "threshold",
+          threshold
+        },
+        scores
+      };
+    }
+    const totalCount = evaluatedCount;
     const score = totalCount > 0 ? passingCount / totalCount : 0;
     const pass = score >= threshold;
     if (pass && borderlineCount > 0) {
@@ -43882,115 +44066,115 @@ var FieldAccuracyEvaluator = class {
    * Evaluate a single field against the expected value.
    */
   evaluateField(fieldConfig, candidateData, expectedData) {
-    const { path: path41, match, required: required2 = true, weight = 1 } = fieldConfig;
-    const candidateValue = resolvePath(candidateData, path41);
-    const expectedValue = resolvePath(expectedData, path41);
+    const { path: path42, match, required: required2 = true, weight = 1 } = fieldConfig;
+    const candidateValue = resolvePath(candidateData, path42);
+    const expectedValue = resolvePath(expectedData, path42);
     if (expectedValue === void 0) {
       return {
-        path: path41,
+        path: path42,
         score: 1,
         // No expected value means no comparison needed
         weight,
         hit: true,
-        message: `${path41}: no expected value`
+        message: `${path42}: no expected value`
       };
     }
     if (candidateValue === void 0) {
       if (required2) {
         return {
-          path: path41,
+          path: path42,
           score: 0,
           weight,
           hit: false,
-          message: `${path41} (required, missing)`
+          message: `${path42} (required, missing)`
         };
       }
       return {
-        path: path41,
+        path: path42,
         score: 1,
         // Don't penalize missing optional fields
         weight: 0,
         // Zero weight means it won't affect the score
         hit: true,
-        message: `${path41}: optional field missing`
+        message: `${path42}: optional field missing`
       };
     }
     switch (match) {
       case "exact":
-        return this.compareExact(path41, candidateValue, expectedValue, weight);
+        return this.compareExact(path42, candidateValue, expectedValue, weight);
       case "numeric_tolerance":
         return this.compareNumericTolerance(
-          path41,
+          path42,
           candidateValue,
           expectedValue,
           fieldConfig,
           weight
         );
       case "date":
-        return this.compareDate(path41, candidateValue, expectedValue, fieldConfig, weight);
+        return this.compareDate(path42, candidateValue, expectedValue, fieldConfig, weight);
       default:
         return {
-          path: path41,
+          path: path42,
           score: 0,
           weight,
           hit: false,
-          message: `${path41}: unknown match type "${match}"`
+          message: `${path42}: unknown match type "${match}"`
         };
     }
   }
   /**
    * Exact equality comparison.
    */
-  compareExact(path41, candidateValue, expectedValue, weight) {
+  compareExact(path42, candidateValue, expectedValue, weight) {
     if (deepEqual(candidateValue, expectedValue)) {
       return {
-        path: path41,
+        path: path42,
         score: 1,
         weight,
         hit: true,
-        message: path41
+        message: path42
       };
     }
     if (typeof candidateValue !== typeof expectedValue) {
       return {
-        path: path41,
+        path: path42,
         score: 0,
         weight,
         hit: false,
-        message: `${path41} (type mismatch: got ${typeof candidateValue}, expected ${typeof expectedValue})`
+        message: `${path42} (type mismatch: got ${typeof candidateValue}, expected ${typeof expectedValue})`
       };
     }
     return {
-      path: path41,
+      path: path42,
       score: 0,
       weight,
       hit: false,
-      message: `${path41} (value mismatch)`
+      message: `${path42} (value mismatch)`
     };
   }
   /**
    * Numeric comparison with absolute or relative tolerance.
    */
-  compareNumericTolerance(path41, candidateValue, expectedValue, fieldConfig, weight) {
+  compareNumericTolerance(path42, candidateValue, expectedValue, fieldConfig, weight) {
     const { tolerance = 0, relative = false } = fieldConfig;
     const candidateNum = toNumber2(candidateValue);
     const expectedNum = toNumber2(expectedValue);
     if (candidateNum === null || expectedNum === null) {
       return {
-        path: path41,
+        path: path42,
         score: 0,
         weight,
         hit: false,
-        message: `${path41} (non-numeric value)`
+        message: `${path42} (non-numeric value)`
       };
     }
     if (!Number.isFinite(candidateNum) || !Number.isFinite(expectedNum)) {
       return {
-        path: path41,
+        path: path42,
         score: 0,
         weight,
         hit: false,
-        message: `${path41} (invalid numeric value)`
+        message: `${path42} (invalid numeric value)`
       };
     }
     const diff = Math.abs(candidateNum - expectedNum);
@@ -44003,61 +44187,61 @@ var FieldAccuracyEvaluator = class {
     }
     if (withinTolerance) {
       return {
-        path: path41,
+        path: path42,
         score: 1,
         weight,
         hit: true,
-        message: `${path41} (within tolerance: diff=${diff.toFixed(2)})`
+        message: `${path42} (within tolerance: diff=${diff.toFixed(2)})`
       };
     }
     return {
-      path: path41,
+      path: path42,
       score: 0,
       weight,
       hit: false,
-      message: `${path41} (outside tolerance: diff=${diff.toFixed(2)}, tolerance=${tolerance})`
+      message: `${path42} (outside tolerance: diff=${diff.toFixed(2)}, tolerance=${tolerance})`
     };
   }
   /**
    * Date comparison with format normalization.
    */
-  compareDate(path41, candidateValue, expectedValue, fieldConfig, weight) {
+  compareDate(path42, candidateValue, expectedValue, fieldConfig, weight) {
     const formats = fieldConfig.formats ?? DEFAULT_DATE_FORMATS;
     const candidateDate = parseDate(String(candidateValue), formats);
     const expectedDate = parseDate(String(expectedValue), formats);
     if (candidateDate === null) {
       return {
-        path: path41,
+        path: path42,
         score: 0,
         weight,
         hit: false,
-        message: `${path41} (unparseable candidate date)`
+        message: `${path42} (unparseable candidate date)`
       };
     }
     if (expectedDate === null) {
       return {
-        path: path41,
+        path: path42,
         score: 0,
         weight,
         hit: false,
-        message: `${path41} (unparseable expected date)`
+        message: `${path42} (unparseable expected date)`
       };
     }
     if (candidateDate.getFullYear() === expectedDate.getFullYear() && candidateDate.getMonth() === expectedDate.getMonth() && candidateDate.getDate() === expectedDate.getDate()) {
       return {
-        path: path41,
+        path: path42,
         score: 1,
         weight,
         hit: true,
-        message: path41
+        message: path42
       };
     }
     return {
-      path: path41,
+      path: path42,
       score: 0,
       weight,
       hit: false,
-      message: `${path41} (date mismatch: got ${formatDateISO(candidateDate)}, expected ${formatDateISO(expectedDate)})`
+      message: `${path42} (date mismatch: got ${formatDateISO(candidateDate)}, expected ${formatDateISO(expectedDate)})`
     };
   }
   /**
@@ -44098,11 +44282,11 @@ var FieldAccuracyEvaluator = class {
     };
   }
 };
-function resolvePath(obj, path41) {
-  if (!path41 || !obj) {
+function resolvePath(obj, path42) {
+  if (!path42 || !obj) {
     return void 0;
   }
-  const parts = path41.split(/\.|\[|\]/).filter((p) => p.length > 0);
+  const parts = path42.split(/\.|\[|\]/).filter((p) => p.length > 0);
   let current = obj;
   for (const part of parts) {
     if (current === null || current === void 0) {
@@ -44906,8 +45090,8 @@ var TokenUsageEvaluator = class {
     };
   }
 };
-function getNestedValue(obj, path41) {
-  const parts = path41.split(".");
+function getNestedValue(obj, path42) {
+  const parts = path42.split(".");
   let current = obj;
   for (const part of parts) {
     if (current === null || current === void 0 || typeof current !== "object") {
@@ -46302,8 +46486,6 @@ async function cleanupEvalWorkspaces(evalRunId, workspaceRoot) {
   }
 }
 var execFileAsync = promisify5(execFile);
-var DEFAULT_TIMEOUT_MS2 = 3e5;
-var LOCK_TIMEOUT_MS = 6e4;
 function gitEnv() {
   const env = { ...process.env };
   for (const key of Object.keys(env)) {
@@ -46318,65 +46500,296 @@ function gitEnv() {
     GIT_SSH_COMMAND: "ssh -o BatchMode=yes"
   };
 }
-function cacheKey(source) {
-  const raw = source.type === "git" ? source.url.toLowerCase().replace(/\.git$/, "") : source.path;
-  return createHash("sha256").update(raw).digest("hex");
-}
-function getSourceUrl(source) {
-  return source.type === "git" ? source.url : source.path;
-}
 async function git(args, opts) {
   const { stdout } = await execFileAsync("git", args, {
     cwd: opts?.cwd,
-    timeout: opts?.timeout ?? DEFAULT_TIMEOUT_MS2,
+    timeout: opts?.timeout ?? 3e5,
     env: gitEnv(),
     maxBuffer: 50 * 1024 * 1024
-    // 50MB
   });
   return stdout.trim();
 }
-async function acquireLock(lockPath) {
-  const start = Date.now();
-  while (Date.now() - start < LOCK_TIMEOUT_MS) {
-    try {
-      await writeFile7(lockPath, String(process.pid), { flag: "wx" });
-      return;
-    } catch (err) {
-      if (err.code === "EEXIST") {
-        await new Promise((r) => setTimeout(r, 200));
+function normalizeRepoForFingerprint(repo) {
+  const source = repo.source.type === "git" ? { type: "git", url: repo.source.url.toLowerCase().replace(/\.git$/, "") } : { type: "local", path: repo.source.path };
+  const result = {
+    path: repo.path,
+    source,
+    ref: repo.checkout?.ref ?? "HEAD"
+  };
+  if (repo.clone?.depth !== void 0) {
+    result.depth = repo.clone.depth;
+  }
+  if (repo.clone?.filter !== void 0) {
+    result.filter = repo.clone.filter;
+  }
+  if (repo.clone?.sparse?.length) {
+    result.sparse = [...repo.clone.sparse].sort();
+  }
+  return result;
+}
+function computeWorkspaceFingerprint(templatePath, repos) {
+  const canonical = {
+    templatePath: templatePath ?? null,
+    repos: [...repos].sort((a, b) => a.path.localeCompare(b.path)).map(normalizeRepoForFingerprint)
+  };
+  return createHash("sha256").update(JSON.stringify(canonical)).digest("hex");
+}
+async function copyDirectoryRecursive2(src, dest, skipDirs) {
+  await mkdir11(dest, { recursive: true });
+  const entries = await readdir4(src, { withFileTypes: true });
+  for (const entry of entries) {
+    const srcPath = path36.join(src, entry.name);
+    const destPath = path36.join(dest, entry.name);
+    if (entry.name === ".git") {
+      continue;
+    }
+    if (entry.isDirectory()) {
+      if (skipDirs?.has(entry.name)) {
         continue;
       }
-      throw err;
+      await copyDirectoryRecursive2(srcPath, destPath, skipDirs);
+    } else {
+      await cp2(srcPath, destPath, { preserveTimestamps: true, force: true });
     }
   }
-  throw new Error(`Timed out waiting for lock: ${lockPath}`);
 }
-async function releaseLock(lockPath) {
-  try {
-    await unlink(lockPath);
-  } catch {
+var WorkspacePoolManager = class {
+  poolRoot;
+  constructor(poolRoot) {
+    this.poolRoot = poolRoot ?? getWorkspacePoolRoot();
+  }
+  /**
+   * Acquire a workspace slot from the pool.
+   *
+   * 1. Compute fingerprint from template + repos
+   * 2. Check drift (compare stored metadata.json fingerprint vs computed)
+   * 3. If drift: warn, remove all slots, rematerialize
+   * 4. Acquire a slot (try-lock slot-0, slot-1, ..., up to maxSlots)
+   * 5. If slot exists: reset repos, re-copy template files (skip repo directories)
+   * 6. If new slot: copy template, materialize all repos, write metadata.json
+   * 7. Return the slot (with path, index, isExisting)
+   */
+  async acquireWorkspace(options) {
+    const { templatePath, repos, maxSlots, repoManager, poolReset } = options;
+    const fingerprint = computeWorkspaceFingerprint(templatePath, repos);
+    const poolDir = path36.join(this.poolRoot, fingerprint);
+    await mkdir11(poolDir, { recursive: true });
+    const drifted = await this.checkDrift(poolDir, fingerprint);
+    if (drifted) {
+      console.warn(
+        `[workspace-pool] Drift detected for fingerprint ${fingerprint.slice(0, 12)}... Removing stale slots.`
+      );
+      await this.removeAllSlots(poolDir);
+    }
+    for (let i = 0; i < maxSlots; i++) {
+      const slotPath = path36.join(poolDir, `slot-${i}`);
+      const lockPath = `${slotPath}.lock`;
+      const locked = await this.tryLock(lockPath);
+      if (!locked) {
+        continue;
+      }
+      const slotExists = existsSync2(slotPath);
+      if (slotExists) {
+        await this.resetSlot(slotPath, templatePath, repos, poolReset);
+        return {
+          index: i,
+          path: slotPath,
+          isExisting: true,
+          lockPath,
+          fingerprint,
+          poolDir
+        };
+      }
+      await mkdir11(slotPath, { recursive: true });
+      if (templatePath) {
+        await copyDirectoryRecursive2(templatePath, slotPath);
+      }
+      if (repos.length > 0) {
+        await repoManager.materializeAll(repos, slotPath);
+      }
+      await this.writeMetadata(poolDir, fingerprint, templatePath ?? null, repos);
+      return {
+        index: i,
+        path: slotPath,
+        isExisting: false,
+        lockPath,
+        fingerprint,
+        poolDir
+      };
+    }
+    throw new Error(
+      `All ${maxSlots} pool slots are locked for fingerprint ${fingerprint.slice(0, 12)}...`
+    );
+  }
+  /** Remove lock file to release a slot. */
+  async releaseSlot(slot) {
+    try {
+      await unlink(slot.lockPath);
+    } catch {
+    }
+  }
+  /**
+   * Try to acquire a PID-based lock file.
+   * On EEXIST, read PID and check if process is alive. If dead, stale lock — remove and retry.
+   * Returns true if lock acquired, false if slot is actively locked.
+   * Uses a bounded loop (max 3 attempts) to avoid unbounded recursion.
+   */
+  async tryLock(lockPath) {
+    for (let attempt = 0; attempt < 3; attempt++) {
+      try {
+        await writeFile7(lockPath, String(process.pid), { flag: "wx" });
+        return true;
+      } catch (err) {
+        if (err.code !== "EEXIST") {
+          throw err;
+        }
+        try {
+          const pidStr = await readFile11(lockPath, "utf-8");
+          const pid = Number.parseInt(pidStr.trim(), 10);
+          if (!Number.isNaN(pid)) {
+            try {
+              process.kill(pid, 0);
+              return false;
+            } catch {
+              await unlink(lockPath).catch(() => {
+              });
+              continue;
+            }
+          }
+        } catch {
+        }
+        return false;
+      }
+    }
+    return false;
+  }
+  /**
+   * Check if the stored fingerprint in metadata.json differs from the computed one.
+   * Returns true if drifted, false otherwise.
+   * Returns false (no drift) if metadata.json doesn't exist (first use).
+   */
+  async checkDrift(poolDir, fingerprint) {
+    const metadataPath = path36.join(poolDir, "metadata.json");
+    try {
+      const raw = await readFile11(metadataPath, "utf-8");
+      const metadata = JSON.parse(raw);
+      return metadata.fingerprint !== fingerprint;
+    } catch {
+      return false;
+    }
+  }
+  /** Write metadata.json with fingerprint, inputs, and timestamp. */
+  async writeMetadata(poolDir, fingerprint, templatePath, repos) {
+    const metadata = {
+      fingerprint,
+      templatePath,
+      repos,
+      createdAt: (/* @__PURE__ */ new Date()).toISOString()
+    };
+    await writeFile7(path36.join(poolDir, "metadata.json"), JSON.stringify(metadata, null, 2));
+  }
+  /** Remove all slot directories and their lock files from a pool directory. */
+  async removeAllSlots(poolDir) {
+    const entries = await readdir4(poolDir);
+    for (const entry of entries) {
+      if (entry.startsWith("slot-") && !entry.endsWith(".lock")) {
+        const lockPath = path36.join(poolDir, `${entry}.lock`);
+        if (existsSync2(lockPath)) {
+          try {
+            const pidStr = await readFile11(lockPath, "utf-8");
+            const pid = Number.parseInt(pidStr.trim(), 10);
+            if (!Number.isNaN(pid)) {
+              try {
+                process.kill(pid, 0);
+                console.warn(`[workspace-pool] Skipping slot ${entry}: locked by PID ${pid}`);
+                continue;
+              } catch {
+              }
+            }
+          } catch {
+          }
+        }
+        await rm5(path36.join(poolDir, entry), { recursive: true, force: true });
+        await rm5(lockPath, { force: true }).catch(() => {
+        });
+      }
+    }
+    await rm5(path36.join(poolDir, "metadata.json"), { force: true }).catch(() => {
+    });
+  }
+  /**
+   * Reset an existing slot for reuse:
+   * 1. Reset repos (git reset --hard {ref} && git clean -fd per repo)
+   * 2. Re-copy template files (skip repo directories)
+   */
+  async resetSlot(slotPath, templatePath, repos, poolReset = "fast") {
+    for (const repo of repos) {
+      const repoDir = path36.join(slotPath, repo.path);
+      if (!existsSync2(repoDir)) {
+        continue;
+      }
+      if (poolReset === "none") {
+        continue;
+      }
+      const ref = repo.checkout?.ref ?? "HEAD";
+      await git(["reset", "--hard", ref], { cwd: repoDir });
+      const cleanFlag = poolReset === "strict" ? "-fdx" : "-fd";
+      await git(["clean", cleanFlag], { cwd: repoDir });
+    }
+    if (templatePath) {
+      const repoDirNames = new Set(
+        repos.map((r) => {
+          const normalized = r.path.replace(/^\.\//, "");
+          return normalized.split("/")[0];
+        })
+      );
+      await copyDirectoryRecursive2(templatePath, slotPath, repoDirNames);
+    }
   }
+};
+var execFileAsync2 = promisify6(execFile2);
+var DEFAULT_TIMEOUT_MS2 = 3e5;
+function gitEnv2() {
+  const env = { ...process.env };
+  for (const key of Object.keys(env)) {
+    if (key.startsWith("GIT_") && key !== "GIT_SSH_COMMAND") {
+      delete env[key];
+    }
+  }
+  return {
+    ...env,
+    GIT_TERMINAL_PROMPT: "0",
+    GIT_ASKPASS: "",
+    GIT_SSH_COMMAND: "ssh -o BatchMode=yes"
+  };
+}
+function getSourceUrl(source) {
+  return source.type === "git" ? source.url : source.path;
+}
+async function git2(args, opts) {
+  const { stdout } = await execFileAsync2("git", args, {
+    cwd: opts?.cwd,
+    timeout: opts?.timeout ?? DEFAULT_TIMEOUT_MS2,
+    env: gitEnv2(),
+    maxBuffer: 50 * 1024 * 1024
+    // 50MB
+  });
+  return stdout.trim();
 }
 var RepoManager = class {
-  cacheDir;
   verbose;
-  constructor(cacheDir, verbose = false) {
-    this.cacheDir = cacheDir ?? getGitCacheRoot();
+  constructor(verbose = false) {
     this.verbose = verbose;
   }
   async runGit(args, opts) {
     const startedAt = Date.now();
     if (this.verbose) {
-      console.log(
-        `[repo] git start cwd=${opts?.cwd ?? process.cwd()} args=${args.join(" ")}`
-      );
+      console.log(`[repo] git start cwd=${opts?.cwd ?? process.cwd()} args=${args.join(" ")}`);
     }
     try {
-      const output = await git(args, opts);
+      const output = await git2(args, opts);
       if (this.verbose) {
-        console.log(
-          `[repo] git ok durationMs=${Date.now() - startedAt} args=${args.join(" ")}`
-        );
+        console.log(`[repo] git ok durationMs=${Date.now() - startedAt} args=${args.join(" ")}`);
       }
       return output;
     } catch (error40) {
@@ -46390,88 +46803,18 @@ var RepoManager = class {
     }
   }
   /**
-   * Ensure a bare mirror cache exists for the given source.
-   * Creates on first access, fetches updates on subsequent calls.
-   * Returns the absolute path to the cache directory.
-   */
-  async ensureCache(source, depth, resolve2) {
-    const key = cacheKey(source);
-    const cachePath = path36.join(this.cacheDir, key);
-    const lockPath = `${cachePath}.lock`;
-    const cacheExists = existsSync2(path36.join(cachePath, "HEAD"));
-    if (this.verbose) {
-      console.log(
-        `[repo] ensureCache source=${getSourceUrl(source)} resolve=${resolve2 ?? "remote"} cache=${cachePath}`
-      );
-    }
-    if (resolve2 === "local") {
-      if (cacheExists) {
-        if (this.verbose) {
-          console.log(`[repo] using existing local cache ${cachePath}`);
-        }
-        return cachePath;
-      }
-      const url2 = getSourceUrl(source);
-      throw new Error(
-        `No cache found for \`${url2}\`. Run \`agentv cache add --url ${url2} --from <local-path>\` to seed it.`
-      );
-    }
-    await mkdir11(this.cacheDir, { recursive: true });
-    const lockStartedAt = Date.now();
-    await acquireLock(lockPath);
-    if (this.verbose) {
-      console.log(
-        `[repo] lock acquired path=${lockPath} waitedMs=${Date.now() - lockStartedAt}`
-      );
-    }
-    try {
-      if (cacheExists) {
-        if (this.verbose) {
-          console.log(`[repo] refreshing existing cache ${cachePath}`);
-        }
-        const fetchArgs = ["fetch", "--prune"];
-        if (depth) {
-          fetchArgs.push("--depth", String(depth));
-        }
-        await this.runGit(fetchArgs, { cwd: cachePath });
-      } else {
-        if (this.verbose) {
-          console.log(`[repo] creating new cache ${cachePath}`);
-        }
-        const cloneArgs = ["clone", "--mirror", "--bare"];
-        if (depth) {
-          cloneArgs.push("--depth", String(depth));
-        }
-        const sourceUrl = getSourceUrl(source);
-        const cloneUrl = depth && source.type === "local" ? `file://${sourceUrl}` : sourceUrl;
-        cloneArgs.push(cloneUrl, cachePath);
-        await this.runGit(cloneArgs);
-      }
-    } finally {
-      await releaseLock(lockPath);
-      if (this.verbose) {
-        console.log(`[repo] lock released path=${lockPath}`);
-      }
-    }
-    return cachePath;
-  }
-  /**
-   * Clone a repo from cache into the workspace at the configured path.
+   * Clone a repo directly from source into the workspace at the configured path.
    * Handles checkout, ref resolution, ancestor walking, shallow clone, sparse checkout.
    */
   async materialize(repo, workspacePath) {
-    const targetDir = path36.join(workspacePath, repo.path);
+    const targetDir = path37.join(workspacePath, repo.path);
+    const sourceUrl = getSourceUrl(repo.source);
     const startedAt = Date.now();
     if (this.verbose) {
       console.log(
-        `[repo] materialize start path=${repo.path} source=${getSourceUrl(repo.source)} workspace=${workspacePath}`
+        `[repo] materialize start path=${repo.path} source=${sourceUrl} workspace=${workspacePath}`
       );
     }
-    const cachePath = await this.ensureCache(
-      repo.source,
-      repo.clone?.depth,
-      repo.checkout?.resolve
-    );
     const cloneArgs = ["clone"];
     if (repo.clone?.depth) {
       cloneArgs.push("--depth", String(repo.clone.depth));
@@ -46480,7 +46823,7 @@ var RepoManager = class {
       cloneArgs.push("--filter", repo.clone.filter);
     }
     cloneArgs.push("--no-checkout");
-    const cloneUrl = repo.clone?.depth || repo.clone?.filter ? `file://${cachePath}` : cachePath;
+    const cloneUrl = (repo.clone?.depth || repo.clone?.filter) && repo.source.type === "local" ? `file://${sourceUrl}` : sourceUrl;
     cloneArgs.push(cloneUrl, targetDir);
     await this.runGit(cloneArgs);
     if (repo.clone?.sparse?.length) {
@@ -46552,81 +46895,43 @@ var RepoManager = class {
     }
   }
   /** Reset repos in workspace to their checkout state. */
-  async reset(repos, workspacePath, strategy) {
-    if (strategy === "recreate") {
-      for (const repo of repos) {
-        const targetDir = path36.join(workspacePath, repo.path);
-        await rm5(targetDir, { recursive: true, force: true });
-      }
-      await this.materializeAll(repos, workspacePath);
-      return;
-    }
+  async reset(repos, workspacePath, reset) {
+    const cleanFlag = reset === "strict" ? "-fdx" : "-fd";
     for (const repo of repos) {
-      const targetDir = path36.join(workspacePath, repo.path);
+      const targetDir = path37.join(workspacePath, repo.path);
       await this.runGit(["reset", "--hard", "HEAD"], { cwd: targetDir });
-      await this.runGit(["clean", "-fd"], { cwd: targetDir });
+      await this.runGit(["clean", cleanFlag], { cwd: targetDir });
     }
   }
-  /**
-   * Seed the cache from a local repository, setting the remote to a given URL.
-   * Useful for avoiding slow network clones when a local clone already exists.
-   */
-  async seedCache(localPath, remoteUrl, opts) {
-    const source = { type: "git", url: remoteUrl };
-    const key = cacheKey(source);
-    const cachePath = path36.join(this.cacheDir, key);
-    const lockPath = `${cachePath}.lock`;
-    await mkdir11(this.cacheDir, { recursive: true });
-    await acquireLock(lockPath);
-    try {
-      if (existsSync2(path36.join(cachePath, "HEAD"))) {
-        if (!opts?.force) {
-          throw new Error(
-            `Cache already exists for ${remoteUrl} at ${cachePath}. Use force to overwrite.`
-          );
-        }
-        await rm5(cachePath, { recursive: true, force: true });
-      }
-      await git(["clone", "--mirror", "--bare", localPath, cachePath]);
-      await git(["remote", "set-url", "origin", remoteUrl], { cwd: cachePath });
-    } finally {
-      await releaseLock(lockPath);
-    }
-    return cachePath;
-  }
-  /** Remove the entire cache directory. */
-  async cleanCache() {
-    await rm5(this.cacheDir, { recursive: true, force: true });
-  }
 };
 async function resolveWorkspaceTemplate(templatePath) {
   if (!templatePath) {
     return void 0;
   }
-  const resolved = path37.resolve(templatePath);
+  const resolved = path38.resolve(templatePath);
   const stats = await stat6(resolved);
   if (stats.isFile()) {
     return {
-      dir: path37.dirname(resolved),
+      dir: path38.dirname(resolved),
       workspaceFile: resolved
     };
   }
   if (!stats.isDirectory()) {
     throw new Error(`workspace template is neither a file nor a directory: ${resolved}`);
   }
-  const entries = await readdir4(resolved);
+  const entries = await readdir5(resolved);
   const workspaceFiles = entries.filter((e) => e.endsWith(".code-workspace"));
   if (workspaceFiles.length === 1) {
     return {
       dir: resolved,
-      workspaceFile: path37.join(resolved, workspaceFiles[0])
+      workspaceFile: path38.join(resolved, workspaceFiles[0])
     };
   }
   if (workspaceFiles.length > 1) {
     const conventionFile = workspaceFiles.find((f) => f === "template.code-workspace");
     return {
       dir: resolved,
-      workspaceFile: conventionFile ? path37.join(resolved, conventionFile) : void 0
+      workspaceFile: conventionFile ? path38.join(resolved, conventionFile) : void 0
     };
   }
   return { dir: resolved };
@@ -46674,6 +46979,22 @@ function classifyQualityStatus(score) {
 function usesFileReferencePrompt(provider) {
   return isAgentProvider(provider) || provider.kind === "cli";
 }
+function toScriptConfig(hook, hookName, context) {
+  const command = hook.command ?? hook.script;
+  if (!command || command.length === 0) {
+    throw new Error(`${hookName} hook in ${context} requires command or script`);
+  }
+  return {
+    command,
+    ...hook.timeout_ms !== void 0 && { timeout_ms: hook.timeout_ms },
+    ...hook.timeoutMs !== void 0 && { timeoutMs: hook.timeoutMs },
+    ...hook.cwd !== void 0 && { cwd: hook.cwd },
+    ...hook.script !== void 0 && { script: hook.script }
+  };
+}
+function hasHookCommand(hook) {
+  return !!(hook?.command && hook.command.length > 0 || hook?.script && hook.script.length > 0);
+}
 function getWorkspaceTemplate(target) {
   const config2 = target.config;
   if ("workspaceTemplate" in config2 && typeof config2.workspaceTemplate === "string") {
@@ -46704,7 +47025,15 @@ async function runEvaluation(options) {
     trials,
     streamCallbacks,
     totalBudgetUsd,
-    failOnError
+    failOnError,
+    poolWorkspaces,
+    poolMaxSlots: configPoolMaxSlots,
+    workspace: legacyWorkspacePath,
+    workspaceMode,
+    workspacePath,
+    workspaceClean,
+    retainOnSuccess,
+    retainOnFailure
   } = options;
   let useCache = options.useCache;
   if (trials && trials.count > 1 && useCache) {
@@ -46778,7 +47107,7 @@ async function runEvaluation(options) {
   ];
   const evaluatorRegistry = buildEvaluatorRegistry(evaluators, resolveJudgeProvider);
   const typeRegistry = createBuiltinRegistry();
-  const discoveryBaseDir = evalFilePath ? path38.dirname(path38.resolve(evalFilePath)) : process.cwd();
+  const discoveryBaseDir = evalFilePath ? path39.dirname(path39.resolve(evalFilePath)) : process.cwd();
   const evalDir = discoveryBaseDir;
   await discoverAssertions(typeRegistry, discoveryBaseDir);
   const providerRegistry = createBuiltinProviderRegistry();
@@ -46840,13 +47169,29 @@ async function runEvaluation(options) {
     }
   };
   const isPerTestIsolation = suiteWorkspace?.isolation === "per_test";
-  const hasSharedWorkspace = !!(workspaceTemplate || suiteWorkspace?.before_all || suiteWorkspace?.repos?.length && !isPerTestIsolation);
+  const configuredMode = suiteWorkspace?.mode ?? workspaceMode;
+  const configuredStaticPath = suiteWorkspace?.static_path ?? workspacePath ?? legacyWorkspacePath;
+  const useStaticWorkspace = configuredMode === "static" || !!configuredStaticPath && !configuredMode;
+  if (useStaticWorkspace && isPerTestIsolation) {
+    throw new Error(
+      "static workspace mode is incompatible with isolation: per_test. Use isolation: shared (default)."
+    );
+  }
+  if (configuredMode === "static" && !configuredStaticPath) {
+    throw new Error("workspace.mode=static requires workspace.static_path or --workspace-path");
+  }
+  const hasSharedWorkspace = !!(useStaticWorkspace || workspaceTemplate || suiteWorkspace?.hooks || suiteWorkspace?.repos?.length && !isPerTestIsolation);
+  const poolEnabled = configuredMode === "pooled" ? true : configuredMode === "ephemeral" || useStaticWorkspace ? false : suiteWorkspace?.pool ?? poolWorkspaces ?? true;
+  const usePool = poolEnabled !== false && !!suiteWorkspace?.repos?.length && !isPerTestIsolation && !useStaticWorkspace;
+  const finishCleanPolicy = suiteWorkspace?.hooks?.on_finish?.clean;
+  const resolvedRetainOnSuccess = (finishCleanPolicy === "always" || finishCleanPolicy === "on_success" ? "cleanup" : finishCleanPolicy === "on_failure" || finishCleanPolicy === "never" ? "keep" : void 0) ?? retainOnSuccess ?? (keepWorkspaces ? "keep" : "cleanup");
+  const resolvedRetainOnFailure = (finishCleanPolicy === "always" || finishCleanPolicy === "on_failure" ? "cleanup" : finishCleanPolicy === "on_success" || finishCleanPolicy === "never" ? "keep" : void 0) ?? retainOnFailure ?? (cleanupWorkspaces ? "cleanup" : "keep");
   const requestedWorkers = options.maxConcurrency ?? target.workers ?? 1;
-  const workers = hasSharedWorkspace ? 1 : requestedWorkers;
+  const workers = hasSharedWorkspace && !usePool ? 1 : requestedWorkers;
   setupLog(
-    `sharedWorkspace=${hasSharedWorkspace} perTestIsolation=${isPerTestIsolation} requestedWorkers=${requestedWorkers} effectiveWorkers=${workers}`
+    `sharedWorkspace=${hasSharedWorkspace} perTestIsolation=${isPerTestIsolation} usePool=${usePool} requestedWorkers=${requestedWorkers} effectiveWorkers=${workers}`
   );
-  if (hasSharedWorkspace && requestedWorkers > 1) {
+  if (hasSharedWorkspace && !usePool && requestedWorkers > 1) {
     console.warn(
       `Warning: Shared workspace requires sequential execution. Overriding workers from ${requestedWorkers} to 1.`
     );
@@ -46855,7 +47200,38 @@ async function runEvaluation(options) {
   let sharedWorkspacePath;
   let sharedBaselineCommit;
   let beforeAllOutput;
-  if (workspaceTemplate) {
+  let poolManager;
+  let poolSlot;
+  const poolSlots = [];
+  const availablePoolSlots = [];
+  const poolSlotBaselines = /* @__PURE__ */ new Map();
+  const poolMaxSlots = Math.min(configPoolMaxSlots ?? 10, 50);
+  if (useStaticWorkspace && configuredStaticPath) {
+    sharedWorkspacePath = configuredStaticPath;
+    setupLog(`using static workspace: ${configuredStaticPath}`);
+  } else if (usePool && suiteWorkspace?.repos) {
+    const slotsNeeded = workers;
+    setupLog(`acquiring ${slotsNeeded} workspace pool slot(s) (pool capacity: ${poolMaxSlots})`);
+    poolManager = new WorkspacePoolManager(getWorkspacePoolRoot());
+    const poolRepoManager = new RepoManager(verbose);
+    for (let i = 0; i < slotsNeeded; i++) {
+      const slot = await poolManager.acquireWorkspace({
+        templatePath: workspaceTemplate,
+        repos: suiteWorkspace.repos,
+        maxSlots: poolMaxSlots,
+        repoManager: poolRepoManager,
+        poolReset: (workspaceClean === "full" ? "strict" : workspaceClean === "standard" ? "fast" : null) ?? suiteWorkspace.hooks?.on_reuse?.reset ?? "fast"
+      });
+      poolSlots.push(slot);
+      setupLog(`pool slot ${i} acquired at: ${slot.path} (existing=${slot.isExisting})`);
+    }
+    if (slotsNeeded === 1) {
+      poolSlot = poolSlots[0];
+      sharedWorkspacePath = poolSlot.path;
+    } else {
+      availablePoolSlots.push(...poolSlots);
+    }
+  } else if (workspaceTemplate) {
     setupLog(`creating shared workspace from template: ${workspaceTemplate}`);
     try {
       sharedWorkspacePath = await createTempWorkspace(workspaceTemplate, evalRunId, "shared");
@@ -46864,288 +47240,359 @@ async function runEvaluation(options) {
       const message = error40 instanceof Error ? error40.message : String(error40);
       throw new Error(`Failed to create shared workspace: ${message}`);
     }
+  } else if (suiteWorkspace?.hooks || suiteWorkspace?.repos?.length && !isPerTestIsolation) {
+    sharedWorkspacePath = getWorkspacePath(evalRunId, "shared");
+    await mkdir12(sharedWorkspacePath, { recursive: true });
+    setupLog(`created empty shared workspace at: ${sharedWorkspacePath}`);
+  }
+  try {
     if (suiteWorkspaceFile && sharedWorkspacePath) {
-      const copiedWorkspaceFile = path38.join(sharedWorkspacePath, path38.basename(suiteWorkspaceFile));
+      const copiedWorkspaceFile = path39.join(sharedWorkspacePath, path39.basename(suiteWorkspaceFile));
       try {
         await stat7(copiedWorkspaceFile);
         suiteWorkspaceFile = copiedWorkspaceFile;
       } catch {
       }
     }
-  } else if (suiteWorkspace?.before_all || suiteWorkspace?.repos?.length && !isPerTestIsolation) {
-    sharedWorkspacePath = getWorkspacePath(evalRunId, "shared");
-    await mkdir12(sharedWorkspacePath, { recursive: true });
-    setupLog(`created empty shared workspace at: ${sharedWorkspacePath}`);
-  }
-  const repoManager = suiteWorkspace?.repos?.length ? new RepoManager(void 0, verbose) : void 0;
-  if (repoManager && sharedWorkspacePath && suiteWorkspace?.repos && !isPerTestIsolation) {
-    setupLog(`materializing ${suiteWorkspace.repos.length} shared repo(s) into ${sharedWorkspacePath}`);
-    try {
-      await repoManager.materializeAll(suiteWorkspace.repos, sharedWorkspacePath);
-      setupLog("shared repo materialization complete");
-    } catch (error40) {
-      const message = error40 instanceof Error ? error40.message : String(error40);
-      if (sharedWorkspacePath) {
-        await cleanupWorkspace(sharedWorkspacePath).catch(() => {
-        });
-      }
-      throw new Error(`Failed to materialize repos: ${message}`);
-    }
-  }
-  if (sharedWorkspacePath && suiteWorkspace?.before_all) {
-    const beforeAllCommand = (suiteWorkspace.before_all.command ?? suiteWorkspace.before_all.script ?? []).join(" ");
-    setupLog(
-      `running shared before_all in cwd=${suiteWorkspace.before_all.cwd ?? evalDir} command=${beforeAllCommand}`
-    );
-    const scriptContext = {
-      workspacePath: sharedWorkspacePath,
-      testId: "__before_all__",
-      evalRunId,
-      evalDir
-    };
-    try {
-      beforeAllOutput = await executeWorkspaceScript(suiteWorkspace.before_all, scriptContext);
-      setupLog("shared before_all completed");
-    } catch (error40) {
-      const message = error40 instanceof Error ? error40.message : String(error40);
-      if (sharedWorkspacePath) {
-        await cleanupWorkspace(sharedWorkspacePath).catch(() => {
-        });
-      }
-      throw new Error(`before_all script failed: ${message}`);
-    }
-  }
-  if (sharedWorkspacePath) {
-    try {
-      sharedBaselineCommit = await initializeBaseline(sharedWorkspacePath);
-      setupLog(`shared baseline initialized: ${sharedBaselineCommit}`);
-    } catch {
-      setupLog("shared baseline initialization skipped (non-fatal)");
-    }
-  }
-  let nextWorkerId = 1;
-  const workerIdByEvalId = /* @__PURE__ */ new Map();
-  let beforeAllOutputAttached = false;
-  let cumulativeBudgetCost = 0;
-  let budgetExhausted = false;
-  let failOnErrorTriggered = false;
-  const promises = filteredEvalCases.map(
-    (evalCase) => limit(async () => {
-      const workerId = nextWorkerId++;
-      workerIdByEvalId.set(evalCase.id, workerId);
-      if (totalBudgetUsd !== void 0 && budgetExhausted) {
-        const budgetResult = {
-          timestamp: (now ?? (() => /* @__PURE__ */ new Date()))().toISOString(),
-          testId: evalCase.id,
-          dataset: evalCase.dataset,
-          score: 0,
-          hits: [],
-          misses: [],
-          answer: "",
-          target: target.name,
-          error: `Suite budget exceeded ($${cumulativeBudgetCost.toFixed(4)} / $${totalBudgetUsd.toFixed(4)})`,
-          budgetExceeded: true,
-          executionStatus: "execution_error",
-          failureStage: "setup",
-          failureReasonCode: "budget_exceeded",
-          executionError: {
-            message: `Suite budget exceeded ($${cumulativeBudgetCost.toFixed(4)} / $${totalBudgetUsd.toFixed(4)})`,
-            stage: "setup"
-          }
-        };
-        if (onProgress) {
-          await onProgress({
-            workerId,
-            testId: evalCase.id,
-            status: "failed",
-            completedAt: Date.now(),
-            error: budgetResult.error
+    const repoManager = suiteWorkspace?.repos?.length && !usePool && !useStaticWorkspace ? new RepoManager(verbose) : void 0;
+    if (repoManager && sharedWorkspacePath && suiteWorkspace?.repos && !isPerTestIsolation) {
+      setupLog(
+        `materializing ${suiteWorkspace.repos.length} shared repo(s) into ${sharedWorkspacePath}`
+      );
+      try {
+        await repoManager.materializeAll(suiteWorkspace.repos, sharedWorkspacePath);
+        setupLog("shared repo materialization complete");
+      } catch (error40) {
+        const message = error40 instanceof Error ? error40.message : String(error40);
+        if (sharedWorkspacePath && !useStaticWorkspace) {
+          await cleanupWorkspace(sharedWorkspacePath).catch(() => {
           });
         }
-        if (onResult) {
-          await onResult(budgetResult);
-        }
-        return budgetResult;
+        throw new Error(`Failed to materialize repos: ${message}`);
       }
-      if (failOnError === true && failOnErrorTriggered) {
-        const errorMsg = "Halted: execution error encountered with fail_on_error enabled";
-        const haltResult = {
-          timestamp: (now ?? (() => /* @__PURE__ */ new Date()))().toISOString(),
-          testId: evalCase.id,
-          dataset: evalCase.dataset,
-          score: 0,
-          hits: [],
-          misses: [],
-          answer: "",
-          target: target.name,
-          error: errorMsg,
-          executionStatus: "execution_error",
-          failureStage: "setup",
-          failureReasonCode: "error_threshold_exceeded",
-          executionError: { message: errorMsg, stage: "setup" }
-        };
-        if (onProgress) {
-          await onProgress({
-            workerId,
-            testId: evalCase.id,
-            status: "failed",
-            completedAt: Date.now(),
-            error: haltResult.error
+    }
+    const suiteBeforeAllHook = suiteWorkspace?.hooks?.before_all_tests;
+    if (sharedWorkspacePath && hasHookCommand(suiteBeforeAllHook)) {
+      const beforeAllHook = suiteBeforeAllHook;
+      const beforeAllCommand = (beforeAllHook.command ?? beforeAllHook.script ?? []).join(" ");
+      setupLog(
+        `running shared before_all in cwd=${beforeAllHook.cwd ?? evalDir} command=${beforeAllCommand}`
+      );
+      const scriptContext = {
+        workspacePath: sharedWorkspacePath,
+        testId: "__before_all__",
+        evalRunId,
+        evalDir
+      };
+      try {
+        beforeAllOutput = await executeWorkspaceScript(
+          toScriptConfig(beforeAllHook, "before_all_tests", "suite workspace"),
+          scriptContext
+        );
+        setupLog("shared before_all completed");
+      } catch (error40) {
+        const message = error40 instanceof Error ? error40.message : String(error40);
+        if (sharedWorkspacePath && !useStaticWorkspace) {
+          await cleanupWorkspace(sharedWorkspacePath).catch(() => {
           });
         }
-        if (onResult) {
-          await onResult(haltResult);
-        }
-        return haltResult;
+        throw new Error(`before_all script failed: ${message}`);
       }
-      if (onProgress) {
-        await onProgress({
-          workerId,
-          testId: evalCase.id,
-          status: "running",
-          startedAt: Date.now()
-        });
-      }
-      try {
-        const judgeProvider = await resolveJudgeProvider(target);
-        const runCaseOptions = {
-          evalCase,
-          provider: primaryProvider,
-          target,
-          evaluators: evaluatorRegistry,
-          maxRetries,
-          agentTimeoutMs,
-          cache,
-          useCache,
-          now,
-          judgeProvider,
-          targetResolver,
-          availableTargets,
+    }
+    if (availablePoolSlots.length > 0 && hasHookCommand(suiteBeforeAllHook)) {
+      const beforeAllHook = suiteBeforeAllHook;
+      for (const slot of availablePoolSlots) {
+        setupLog(`running before_all on pool slot ${slot.index}`);
+        const scriptContext = {
+          workspacePath: slot.path,
+          testId: "__before_all__",
           evalRunId,
-          keepWorkspaces,
-          cleanupWorkspaces,
-          sharedWorkspacePath,
-          sharedBaselineCommit,
-          suiteWorkspaceFile,
-          streamCallbacks,
-          typeRegistry,
-          repoManager,
           evalDir
         };
-        let result = trials && trials.count > 1 ? await runEvalCaseWithTrials(runCaseOptions, trials) : await runEvalCase(runCaseOptions);
-        if (totalBudgetUsd !== void 0) {
-          let caseCost;
-          if (result.trials && result.trials.length > 0) {
-            const trialCostSum = result.trials.reduce((sum, t) => sum + (t.costUsd ?? 0), 0);
-            if (trialCostSum > 0) {
-              caseCost = trialCostSum;
+        try {
+          const output = await executeWorkspaceScript(
+            toScriptConfig(beforeAllHook, "before_all_tests", "suite workspace"),
+            scriptContext
+          );
+          if (!beforeAllOutput) beforeAllOutput = output;
+          setupLog(`before_all completed on pool slot ${slot.index}`);
+        } catch (error40) {
+          const message = error40 instanceof Error ? error40.message : String(error40);
+          throw new Error(`before_all script failed on pool slot ${slot.index}: ${message}`);
+        }
+      }
+    }
+    if (sharedWorkspacePath) {
+      try {
+        sharedBaselineCommit = await initializeBaseline(sharedWorkspacePath);
+        setupLog(`shared baseline initialized: ${sharedBaselineCommit}`);
+      } catch {
+        setupLog("shared baseline initialization skipped (non-fatal)");
+      }
+    }
+    if (availablePoolSlots.length > 0) {
+      for (const slot of availablePoolSlots) {
+        try {
+          const baseline = await initializeBaseline(slot.path);
+          poolSlotBaselines.set(slot.path, baseline);
+          setupLog(`pool slot ${slot.index} baseline initialized: ${baseline}`);
+        } catch {
+          setupLog(`pool slot ${slot.index} baseline initialization skipped (non-fatal)`);
+        }
+      }
+    }
+    let nextWorkerId = 1;
+    const workerIdByEvalId = /* @__PURE__ */ new Map();
+    let beforeAllOutputAttached = false;
+    let cumulativeBudgetCost = 0;
+    let budgetExhausted = false;
+    let failOnErrorTriggered = false;
+    const promises = filteredEvalCases.map(
+      (evalCase) => limit(async () => {
+        const workerId = nextWorkerId++;
+        workerIdByEvalId.set(evalCase.id, workerId);
+        if (totalBudgetUsd !== void 0 && budgetExhausted) {
+          const budgetResult = {
+            timestamp: (now ?? (() => /* @__PURE__ */ new Date()))().toISOString(),
+            testId: evalCase.id,
+            dataset: evalCase.dataset,
+            score: 0,
+            hits: [],
+            misses: [],
+            answer: "",
+            target: target.name,
+            error: `Suite budget exceeded ($${cumulativeBudgetCost.toFixed(4)} / $${totalBudgetUsd.toFixed(4)})`,
+            budgetExceeded: true,
+            executionStatus: "execution_error",
+            failureStage: "setup",
+            failureReasonCode: "budget_exceeded",
+            executionError: {
+              message: `Suite budget exceeded ($${cumulativeBudgetCost.toFixed(4)} / $${totalBudgetUsd.toFixed(4)})`,
+              stage: "setup"
             }
-          } else {
-            caseCost = result.costUsd;
+          };
+          if (onProgress) {
+            await onProgress({
+              workerId,
+              testId: evalCase.id,
+              status: "failed",
+              completedAt: Date.now(),
+              error: budgetResult.error
+            });
           }
-          if (caseCost !== void 0) {
-            cumulativeBudgetCost += caseCost;
-            if (cumulativeBudgetCost >= totalBudgetUsd) {
-              budgetExhausted = true;
-            }
+          if (onResult) {
+            await onResult(budgetResult);
           }
+          return budgetResult;
         }
-        if (failOnError === true && result.executionStatus === "execution_error") {
-          failOnErrorTriggered = true;
-        }
-        if (beforeAllOutput && !beforeAllOutputAttached) {
-          result = { ...result, beforeAllOutput };
-          beforeAllOutputAttached = true;
+        if (failOnError === true && failOnErrorTriggered) {
+          const errorMsg = "Halted: execution error encountered with fail_on_error enabled";
+          const haltResult = {
+            timestamp: (now ?? (() => /* @__PURE__ */ new Date()))().toISOString(),
+            testId: evalCase.id,
+            dataset: evalCase.dataset,
+            score: 0,
+            hits: [],
+            misses: [],
+            answer: "",
+            target: target.name,
+            error: errorMsg,
+            executionStatus: "execution_error",
+            failureStage: "setup",
+            failureReasonCode: "error_threshold_exceeded",
+            executionError: { message: errorMsg, stage: "setup" }
+          };
+          if (onProgress) {
+            await onProgress({
+              workerId,
+              testId: evalCase.id,
+              status: "failed",
+              completedAt: Date.now(),
+              error: haltResult.error
+            });
+          }
+          if (onResult) {
+            await onResult(haltResult);
+          }
+          return haltResult;
         }
         if (onProgress) {
           await onProgress({
             workerId,
             testId: evalCase.id,
-            status: result.error ? "failed" : "completed",
-            startedAt: 0,
-            // Not used for completed status
-            completedAt: Date.now(),
-            error: result.error
+            status: "running",
+            startedAt: Date.now()
           });
         }
-        if (onResult) {
-          await onResult(result);
+        const testPoolSlot = availablePoolSlots.length > 0 ? availablePoolSlots.pop() : void 0;
+        const testWorkspacePath = testPoolSlot?.path ?? sharedWorkspacePath;
+        const testBaselineCommit = testPoolSlot ? poolSlotBaselines.get(testPoolSlot.path) : sharedBaselineCommit;
+        try {
+          const judgeProvider = await resolveJudgeProvider(target);
+          const runCaseOptions = {
+            evalCase,
+            provider: primaryProvider,
+            target,
+            evaluators: evaluatorRegistry,
+            maxRetries,
+            agentTimeoutMs,
+            cache,
+            useCache,
+            now,
+            judgeProvider,
+            targetResolver,
+            availableTargets,
+            evalRunId,
+            keepWorkspaces,
+            cleanupWorkspaces,
+            retainOnSuccess: resolvedRetainOnSuccess,
+            retainOnFailure: resolvedRetainOnFailure,
+            sharedWorkspacePath: testWorkspacePath,
+            sharedBaselineCommit: testBaselineCommit,
+            suiteWorkspaceFile,
+            streamCallbacks,
+            typeRegistry,
+            repoManager,
+            evalDir
+          };
+          let result = trials && trials.count > 1 ? await runEvalCaseWithTrials(runCaseOptions, trials) : await runEvalCase(runCaseOptions);
+          if (totalBudgetUsd !== void 0) {
+            let caseCost;
+            if (result.trials && result.trials.length > 0) {
+              const trialCostSum = result.trials.reduce((sum, t) => sum + (t.costUsd ?? 0), 0);
+              if (trialCostSum > 0) {
+                caseCost = trialCostSum;
+              }
+            } else {
+              caseCost = result.costUsd;
+            }
+            if (caseCost !== void 0) {
+              cumulativeBudgetCost += caseCost;
+              if (cumulativeBudgetCost >= totalBudgetUsd) {
+                budgetExhausted = true;
+              }
+            }
+          }
+          if (failOnError === true && result.executionStatus === "execution_error") {
+            failOnErrorTriggered = true;
+          }
+          if (beforeAllOutput && !beforeAllOutputAttached) {
+            result = { ...result, beforeAllOutput };
+            beforeAllOutputAttached = true;
+          }
+          if (onProgress) {
+            await onProgress({
+              workerId,
+              testId: evalCase.id,
+              status: result.error ? "failed" : "completed",
+              startedAt: 0,
+              // Not used for completed status
+              completedAt: Date.now(),
+              error: result.error
+            });
+          }
+          if (onResult) {
+            await onResult(result);
+          }
+          return result;
+        } catch (error40) {
+          if (onProgress) {
+            await onProgress({
+              workerId,
+              testId: evalCase.id,
+              status: "failed",
+              completedAt: Date.now(),
+              error: error40 instanceof Error ? error40.message : String(error40)
+            });
+          }
+          throw error40;
+        } finally {
+          if (testPoolSlot) {
+            availablePoolSlots.push(testPoolSlot);
+          }
         }
-        return result;
-      } catch (error40) {
-        if (onProgress) {
-          await onProgress({
-            workerId,
-            testId: evalCase.id,
-            status: "failed",
-            completedAt: Date.now(),
-            error: error40 instanceof Error ? error40.message : String(error40)
-          });
+      })
+    );
+    const settled = await Promise.allSettled(promises);
+    const results = [];
+    for (let i = 0; i < settled.length; i++) {
+      const outcome = settled[i];
+      if (outcome.status === "fulfilled") {
+        results.push(outcome.value);
+      } else {
+        const evalCase = filteredEvalCases[i];
+        const formattingMode = usesFileReferencePrompt(primaryProvider) ? "agent" : "lm";
+        const promptInputs = await buildPromptInputs(evalCase, formattingMode);
+        const errorResult = buildErrorResult(
+          evalCase,
+          target.name,
+          (now ?? (() => /* @__PURE__ */ new Date()))(),
+          outcome.reason,
+          promptInputs,
+          primaryProvider,
+          "agent",
+          "provider_error"
+        );
+        results.push(errorResult);
+        if (onResult) {
+          await onResult(errorResult);
         }
-        throw error40;
       }
-    })
-  );
-  const settled = await Promise.allSettled(promises);
-  const results = [];
-  for (let i = 0; i < settled.length; i++) {
-    const outcome = settled[i];
-    if (outcome.status === "fulfilled") {
-      results.push(outcome.value);
-    } else {
-      const evalCase = filteredEvalCases[i];
-      const formattingMode = usesFileReferencePrompt(primaryProvider) ? "agent" : "lm";
-      const promptInputs = await buildPromptInputs(evalCase, formattingMode);
-      const errorResult = buildErrorResult(
-        evalCase,
-        target.name,
-        (now ?? (() => /* @__PURE__ */ new Date()))(),
-        outcome.reason,
-        promptInputs,
-        primaryProvider,
-        "agent",
-        "provider_error"
-      );
-      results.push(errorResult);
-      if (onResult) {
-        await onResult(errorResult);
+    }
+    const afterAllWorkspaces = poolSlots.length > 1 ? poolSlots.map((s) => s.path) : sharedWorkspacePath ? [sharedWorkspacePath] : [];
+    const suiteAfterAllHook = suiteWorkspace?.hooks?.after_all_tests;
+    if (afterAllWorkspaces.length > 0 && hasHookCommand(suiteAfterAllHook)) {
+      const afterAllHook = suiteAfterAllHook;
+      for (const wsPath of afterAllWorkspaces) {
+        const scriptContext = {
+          workspacePath: wsPath,
+          testId: "__after_all__",
+          evalRunId,
+          evalDir
+        };
+        try {
+          const afterAllOutput = await executeWorkspaceScript(
+            toScriptConfig(afterAllHook, "after_all_tests", "suite workspace"),
+            scriptContext,
+            "warn"
+          );
+          if (afterAllOutput && results.length > 0 && wsPath === afterAllWorkspaces[0]) {
+            results[results.length - 1] = { ...results[results.length - 1], afterAllOutput };
+          }
+        } catch {
+        }
       }
     }
-  }
-  if (sharedWorkspacePath && suiteWorkspace?.after_all) {
-    const scriptContext = {
-      workspacePath: sharedWorkspacePath,
-      testId: "__after_all__",
-      evalRunId,
-      evalDir
-    };
-    try {
-      const afterAllOutput = await executeWorkspaceScript(
-        suiteWorkspace.after_all,
-        scriptContext,
-        "warn"
-      );
-      if (afterAllOutput && results.length > 0) {
-        results[results.length - 1] = { ...results[results.length - 1], afterAllOutput };
+    if (sharedWorkspacePath && !poolSlot && poolSlots.length === 0 && !useStaticWorkspace) {
+      const hasFailure = results.some((r) => !!r.error || r.score < 0.5);
+      if (hasFailure) {
+        if (resolvedRetainOnFailure === "cleanup") {
+          await cleanupWorkspace(sharedWorkspacePath).catch(() => {
+          });
+        }
+      } else if (resolvedRetainOnSuccess === "cleanup") {
+        await cleanupWorkspace(sharedWorkspacePath).catch(() => {
+        });
       }
-    } catch {
     }
-  }
-  if (sharedWorkspacePath) {
-    const hasFailure = results.some((r) => !!r.error || r.score < 0.5);
     if (cleanupWorkspaces) {
-      await cleanupWorkspace(sharedWorkspacePath).catch(() => {
-      });
-    } else if (!hasFailure && !keepWorkspaces) {
-      await cleanupWorkspace(sharedWorkspacePath).catch(() => {
+      await cleanupEvalWorkspaces(evalRunId).catch(() => {
       });
     }
+    return results;
+  } finally {
+    if (poolManager) {
+      if (poolSlot) {
+        await poolManager.releaseSlot(poolSlot);
+      }
+      for (const slot of poolSlots) {
+        if (slot !== poolSlot) {
+          await poolManager.releaseSlot(slot).catch(() => {
+          });
+        }
+      }
+    }
   }
-  if (cleanupWorkspaces) {
-    await cleanupEvalWorkspaces(evalRunId).catch(() => {
-    });
-  }
-  return results;
 }
 async function runBatchEvaluation(options) {
   const {
@@ -47317,6 +47764,8 @@ async function runEvalCase(options) {
     evalRunId,
     keepWorkspaces,
     cleanupWorkspaces: forceCleanup,
+    retainOnSuccess,
+    retainOnFailure,
     sharedWorkspacePath,
     sharedBaselineCommit,
     suiteWorkspaceFile,
@@ -47328,10 +47777,10 @@ async function runEvalCase(options) {
   const formattingMode = usesFileReferencePrompt(provider) ? "agent" : "lm";
   const promptInputs = await buildPromptInputs(evalCase, formattingMode);
   const typeRegistry = providedTypeRegistry ?? createBuiltinRegistry();
-  const cacheKey2 = useCache ? createCacheKey(provider, target, evalCase, promptInputs) : void 0;
+  const cacheKey = useCache ? createCacheKey(provider, target, evalCase, promptInputs) : void 0;
   let cachedResponse;
-  if (cacheKey2 && cache) {
-    cachedResponse = await cache.get(cacheKey2);
+  if (cacheKey && cache) {
+    cachedResponse = await cache.get(cacheKey);
   }
   const nowFn = now ?? (() => /* @__PURE__ */ new Date());
   let workspacePath = sharedWorkspacePath;
@@ -47362,7 +47811,7 @@ async function runEvalCase(options) {
         );
       }
       if (caseWorkspaceFile && workspacePath) {
-        const copiedFile = path38.join(workspacePath, path38.basename(caseWorkspaceFile));
+        const copiedFile = path39.join(workspacePath, path39.basename(caseWorkspaceFile));
         try {
           await stat7(copiedFile);
           caseWorkspaceFile = copiedFile;
@@ -47370,12 +47819,12 @@ async function runEvalCase(options) {
         }
       }
     }
-    if (!workspacePath && (evalCase.workspace?.before_all || evalCase.workspace?.repos?.length) && evalRunId) {
+    if (!workspacePath && (evalCase.workspace?.hooks || evalCase.workspace?.repos?.length) && evalRunId) {
       workspacePath = getWorkspacePath(evalRunId, evalCase.id);
       await mkdir12(workspacePath, { recursive: true });
     }
     if (evalCase.workspace?.repos?.length && workspacePath) {
-      const perCaseRepoManager = new RepoManager(void 0, setupDebug);
+      const perCaseRepoManager = new RepoManager(setupDebug);
       try {
         if (setupDebug) {
           console.log(
@@ -47400,11 +47849,13 @@ async function runEvalCase(options) {
         );
       }
     }
-    if (workspacePath && evalCase.workspace?.before_all) {
-      const beforeAllCommand = (evalCase.workspace.before_all.command ?? evalCase.workspace.before_all.script ?? []).join(" ");
+    const caseBeforeAllHook = evalCase.workspace?.hooks?.before_all_tests;
+    if (workspacePath && hasHookCommand(caseBeforeAllHook)) {
+      const beforeAllHook = caseBeforeAllHook;
+      const beforeAllCommand = (beforeAllHook.command ?? beforeAllHook.script ?? []).join(" ");
       if (setupDebug) {
         console.log(
-          `[setup] test=${evalCase.id} running before_all in cwd=${evalCase.workspace.before_all.cwd ?? evalDir} command=${beforeAllCommand}`
+          `[setup] test=${evalCase.id} running before_all in cwd=${beforeAllHook.cwd ?? evalDir} command=${beforeAllCommand}`
         );
       }
       const scriptContext = {
@@ -47417,7 +47868,7 @@ async function runEvalCase(options) {
       };
       try {
         beforeAllOutput = await executeWorkspaceScript(
-          evalCase.workspace.before_all,
+          toScriptConfig(beforeAllHook, "before_all_tests", `test '${evalCase.id}'`),
           scriptContext
         );
         if (setupDebug) {
@@ -47442,7 +47893,9 @@ async function runEvalCase(options) {
       }
     }
   }
-  if (workspacePath && evalCase.workspace?.before_each) {
+  const caseBeforeEachHook = evalCase.workspace?.hooks?.before_each_test;
+  if (workspacePath && hasHookCommand(caseBeforeEachHook)) {
+    const beforeEachHook = caseBeforeEachHook;
     const scriptContext = {
       workspacePath,
       testId: evalCase.id,
@@ -47453,7 +47906,7 @@ async function runEvalCase(options) {
     };
     try {
       beforeEachOutput = await executeWorkspaceScript(
-        evalCase.workspace.before_each,
+        toScriptConfig(beforeEachHook, "before_each_test", `test '${evalCase.id}'`),
         scriptContext
       );
     } catch (error40) {
@@ -47541,8 +47994,8 @@ async function runEvalCase(options) {
     }
     return errorResult;
   }
-  if (cacheKey2 && cache && !cachedResponse) {
-    await cache.set(cacheKey2, providerResponse);
+  if (cacheKey && cache && !cachedResponse) {
+    await cache.set(cacheKey, providerResponse);
   }
   const output = providerResponse.output;
   const hasExecutionMetrics = providerResponse.tokenUsage !== void 0 || providerResponse.costUsd !== void 0 || providerResponse.durationMs !== void 0;
@@ -47570,17 +48023,19 @@ async function runEvalCase(options) {
     }
   }
   const providerError = extractProviderError(providerResponse);
-  if (repoManager && workspacePath && evalCase.workspace?.reset?.after_each && evalCase.workspace.reset.strategy && evalCase.workspace.reset.strategy !== "none" && evalCase.workspace.repos) {
+  if (repoManager && workspacePath && evalCase.workspace?.hooks?.after_each_test?.reset && evalCase.workspace.hooks.after_each_test.reset !== "none" && evalCase.workspace.repos) {
     try {
       await repoManager.reset(
         evalCase.workspace.repos,
         workspacePath,
-        evalCase.workspace.reset.strategy
+        evalCase.workspace.hooks.after_each_test.reset
       );
     } catch {
     }
   }
-  if (workspacePath && evalCase.workspace?.after_each) {
+  const caseAfterEachHook = evalCase.workspace?.hooks?.after_each_test;
+  if (workspacePath && hasHookCommand(caseAfterEachHook)) {
+    const afterEachHook = caseAfterEachHook;
     const scriptContext = {
       workspacePath,
       testId: evalCase.id,
@@ -47591,7 +48046,7 @@ async function runEvalCase(options) {
     };
     try {
       afterEachOutput = await executeWorkspaceScript(
-        evalCase.workspace.after_each,
+        toScriptConfig(afterEachHook, "after_each_test", `test '${evalCase.id}'`),
         scriptContext,
         "warn"
       );
@@ -47641,8 +48096,13 @@ async function runEvalCase(options) {
         await cleanupWorkspace(workspacePath).catch(() => {
         });
       } else if (isFailure) {
-        return { ...finalResult, workspacePath };
-      } else if (!keepWorkspaces) {
+        if ((retainOnFailure ?? "keep") === "cleanup") {
+          await cleanupWorkspace(workspacePath).catch(() => {
+          });
+        } else {
+          return { ...finalResult, workspacePath };
+        }
+      } else if ((retainOnSuccess ?? (keepWorkspaces ? "keep" : "cleanup")) !== "keep") {
         await cleanupWorkspace(workspacePath).catch(() => {
         });
       }
@@ -47660,11 +48120,12 @@ async function runEvalCase(options) {
       "evaluator_error"
     );
     if (workspacePath && !isSharedWorkspace) {
-      if (forceCleanup) {
+      if (forceCleanup || (retainOnFailure ?? "keep") === "cleanup") {
         await cleanupWorkspace(workspacePath).catch(() => {
         });
+      } else {
+        return { ...errorResult, workspacePath, beforeEachOutput, afterEachOutput };
       }
-      return { ...errorResult, workspacePath, beforeEachOutput, afterEachOutput };
     }
     return { ...errorResult, beforeEachOutput, afterEachOutput };
   }
@@ -47683,7 +48144,9 @@ async function runEvalCaseWithTrials(options, trialsConfig) {
       useCache: false,
       // Force cleanup for intermediate trials
       cleanupWorkspaces: isLastDeclaredTrial ? options.cleanupWorkspaces : true,
-      keepWorkspaces: isLastDeclaredTrial ? options.keepWorkspaces : false
+      keepWorkspaces: isLastDeclaredTrial ? options.keepWorkspaces : false,
+      retainOnSuccess: isLastDeclaredTrial ? options.retainOnSuccess : "cleanup",
+      retainOnFailure: isLastDeclaredTrial ? options.retainOnFailure : "cleanup"
     };
     const result = await runEvalCase(trialOptions);
     allResults.push(result);
@@ -47972,7 +48435,7 @@ async function runEvaluatorList(options) {
     fileChanges,
     workspacePath
   };
-  const evalFileDir = evalCase.guideline_paths[0] ? path38.dirname(evalCase.guideline_paths[0]) : process.cwd();
+  const evalFileDir = evalCase.guideline_paths[0] ? path39.dirname(evalCase.guideline_paths[0]) : process.cwd();
   const dispatchContext = {
     judgeProvider,
     targetResolver,
@@ -48293,13 +48756,13 @@ async function evaluate(config2) {
   let evalCases;
   let testFilePath;
   if (config2.specFile) {
-    testFilePath = path39.resolve(config2.specFile);
+    testFilePath = path40.resolve(config2.specFile);
     evalCases = await loadTests(testFilePath, repoRoot, {
       verbose: config2.verbose,
       filter: config2.filter
     });
   } else {
-    testFilePath = path39.join(process.cwd(), "__programmatic__.yaml");
+    testFilePath = path40.join(process.cwd(), "__programmatic__.yaml");
     evalCases = (config2.tests ?? []).map((test) => {
       const input = typeof test.input === "string" ? [{ role: "user", content: test.input }] : test.input;
       const question = typeof test.input === "string" ? test.input : test.input.find((m) => m.role === "user")?.content ?? "";
@@ -48385,10 +48848,10 @@ function computeSummary(results, durationMs) {
 var TARGET_FILE_CANDIDATES = [".agentv/targets.yaml", ".agentv/targets.yml"];
 async function discoverDefaultTarget(repoRoot) {
   const cwd = process.cwd();
-  const chain = buildDirectoryChain(path39.join(cwd, "_placeholder"), repoRoot);
+  const chain = buildDirectoryChain(path40.join(cwd, "_placeholder"), repoRoot);
   for (const dir of chain) {
     for (const candidate of TARGET_FILE_CANDIDATES) {
-      const targetsPath = path39.join(dir, candidate);
+      const targetsPath = path40.join(dir, candidate);
       if (!existsSync3(targetsPath)) continue;
       try {
         const definitions = await readTargetDefinitions(targetsPath);
@@ -48403,10 +48866,10 @@ async function discoverDefaultTarget(repoRoot) {
 async function loadEnvHierarchy(repoRoot) {
   const { readFileSync: readFileSync2 } = await import("node:fs");
   const cwd = process.cwd();
-  const chain = buildDirectoryChain(path39.join(cwd, "_placeholder"), repoRoot);
+  const chain = buildDirectoryChain(path40.join(cwd, "_placeholder"), repoRoot);
   const envFiles = [];
   for (const dir of chain) {
-    const envPath = path39.join(dir, ".env");
+    const envPath = path40.join(dir, ".env");
     if (existsSync3(envPath)) envFiles.push(envPath);
   }
   for (let i = envFiles.length - 1; i >= 0; i--) {
@@ -48587,7 +49050,7 @@ var ResponseCache = class {
   async get(key) {
     const filePath = this.keyToPath(key);
     try {
-      const data = await readFile11(filePath, "utf8");
+      const data = await readFile12(filePath, "utf8");
       return JSON.parse(data);
     } catch {
       return void 0;
@@ -48595,13 +49058,13 @@ var ResponseCache = class {
   }
   async set(key, value) {
     const filePath = this.keyToPath(key);
-    const dir = path40.dirname(filePath);
+    const dir = path41.dirname(filePath);
     await mkdir13(dir, { recursive: true });
     await writeFile8(filePath, JSON.stringify(value, null, 2), "utf8");
   }
   keyToPath(key) {
     const prefix = key.slice(0, 2);
-    return path40.join(this.cachePath, prefix, `${key}.json`);
+    return path41.join(this.cachePath, prefix, `${key}.json`);
   }
 };
 function shouldEnableCache(params) {
@@ -49107,9 +49570,9 @@ export {
   ProviderRegistry,
   getAgentvHome,
   getWorkspacesRoot,
-  getGitCacheRoot,
   getSubagentsRoot,
   getTraceStateRoot,
+  getWorkspacePoolRoot,
   ensureVSCodeSubagents,
   readTargetDefinitions,
   listTargetNames,
@@ -49171,6 +49634,8 @@ export {
   createTempWorkspace,
   cleanupWorkspace,
   cleanupEvalWorkspaces,
+  computeWorkspaceFingerprint,
+  WorkspacePoolManager,
   RepoManager,
   resolveWorkspaceTemplate,
   executeWorkspaceScript,
@@ -49189,4 +49654,4 @@ export {
   OtelStreamingObserver,
   createAgentKernel
 };
-//# sourceMappingURL=chunk-TK4PB62M.js.map
+//# sourceMappingURL=chunk-LZ5MPQFM.js.map