npm - @agentv/core - Versions diffs - 2.14.3 → 2.16.0 - Mend

@agentv/core 2.14.3 → 2.16.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (14) hide show

package/dist/{chunk-N55K52OO.js → chunk-E6AJPAXM.js} +1 -1
package/dist/chunk-E6AJPAXM.js.map +1 -0
package/dist/evaluation/validation/index.cjs +8 -7
package/dist/evaluation/validation/index.cjs.map +1 -1
package/dist/evaluation/validation/index.js +9 -8
package/dist/evaluation/validation/index.js.map +1 -1
package/dist/index.cjs +1079 -610
package/dist/index.cjs.map +1 -1
package/dist/index.d.cts +139 -34
package/dist/index.d.ts +139 -34
package/dist/index.js +1074 -607
package/dist/index.js.map +1 -1
package/package.json +1 -1
package/dist/chunk-N55K52OO.js.map +0 -1

package/dist/index.cjs CHANGED Viewed

@@ -1244,12 +1244,12 @@ function serializeAttributeValue(value) {
   if (Array.isArray(value)) return { arrayValue: { values: value.map(serializeAttributeValue) } };
   return { stringValue: String(value) };
 }
-var import_promises31, import_node_path44, OtlpJsonFileExporter;
+var import_promises31, import_node_path45, OtlpJsonFileExporter;
 var init_otlp_json_file_exporter = __esm({
   "src/observability/otlp-json-file-exporter.ts"() {
     "use strict";
     import_promises31 = require("fs/promises");
-    import_node_path44 = require("path");
+    import_node_path45 = require("path");
     OtlpJsonFileExporter = class {
       // biome-ignore lint/suspicious/noExplicitAny: serialized span data
       spans = [];
@@ -1288,7 +1288,7 @@ var init_otlp_json_file_exporter = __esm({
       }
       async flush() {
         if (this.spans.length === 0) return;
-        await (0, import_promises31.mkdir)((0, import_node_path44.dirname)(this.filePath), { recursive: true });
+        await (0, import_promises31.mkdir)((0, import_node_path45.dirname)(this.filePath), { recursive: true });
         const otlpJson = {
           resourceSpans: [
             {
@@ -1319,13 +1319,13 @@ function hrTimeDiffMs(start, end) {
   const diffNano = end[1] - start[1];
   return Math.round(diffSec * 1e3 + diffNano / 1e6);
 }
-var import_node_fs13, import_promises32, import_node_path45, SimpleTraceFileExporter;
+var import_node_fs13, import_promises32, import_node_path46, SimpleTraceFileExporter;
 var init_simple_trace_file_exporter = __esm({
   "src/observability/simple-trace-file-exporter.ts"() {
     "use strict";
     import_node_fs13 = require("fs");
     import_promises32 = require("fs/promises");
-    import_node_path45 = require("path");
+    import_node_path46 = require("path");
     SimpleTraceFileExporter = class {
       stream = null;
       filePath;
@@ -1338,7 +1338,7 @@ var init_simple_trace_file_exporter = __esm({
       async ensureStream() {
         if (!this.streamReady) {
           this.streamReady = (async () => {
-            await (0, import_promises32.mkdir)((0, import_node_path45.dirname)(this.filePath), { recursive: true });
+            await (0, import_promises32.mkdir)((0, import_node_path46.dirname)(this.filePath), { recursive: true });
             this.stream = (0, import_node_fs13.createWriteStream)(this.filePath, { flags: "w" });
             return this.stream;
           })();
@@ -1457,6 +1457,7 @@ __export(index_exports, {
   TokenUsageEvaluator: () => TokenUsageEvaluator,
   ToolTrajectoryEvaluator: () => ToolTrajectoryEvaluator,
   WorkspaceCreationError: () => WorkspaceCreationError,
+  WorkspacePoolManager: () => WorkspacePoolManager,
   assembleLlmJudgePrompt: () => assembleLlmJudgePrompt,
   avgToolDurationMs: () => avgToolDurationMs,
   buildDirectoryChain: () => buildDirectoryChain2,
@@ -1471,6 +1472,7 @@ __export(index_exports, {
   cleanupEvalWorkspaces: () => cleanupEvalWorkspaces,
   cleanupWorkspace: () => cleanupWorkspace,
   computeTraceSummary: () => computeTraceSummary,
+  computeWorkspaceFingerprint: () => computeWorkspaceFingerprint,
   consumeClaudeLogEntries: () => consumeClaudeLogEntries,
   consumeCodexLogEntries: () => consumeCodexLogEntries,
   consumeCopilotCliLogEntries: () => consumeCopilotCliLogEntries,
@@ -1503,11 +1505,11 @@ __export(index_exports, {
   freeformEvaluationSchema: () => freeformEvaluationSchema,
   generateRubrics: () => generateRubrics,
   getAgentvHome: () => getAgentvHome,
-  getGitCacheRoot: () => getGitCacheRoot,
   getHitCount: () => getHitCount,
   getSubagentsRoot: () => getSubagentsRoot,
   getTraceStateRoot: () => getTraceStateRoot,
   getWorkspacePath: () => getWorkspacePath,
+  getWorkspacePoolRoot: () => getWorkspacePoolRoot,
   getWorkspacesRoot: () => getWorkspacesRoot,
   initializeBaseline: () => initializeBaseline,
   isEvaluatorKind: () => isEvaluatorKind,
@@ -2236,6 +2238,17 @@ function parseExecutionDefaults(raw, configPath) {
   } else if (otelFile !== void 0) {
     logWarning(`Invalid execution.otel_file in ${configPath}, expected non-empty string`);
   }
+  if (typeof obj.pool_workspaces === "boolean") {
+    result.pool_workspaces = obj.pool_workspaces;
+  } else if (obj.pool_workspaces !== void 0) {
+    logWarning(`Invalid execution.pool_workspaces in ${configPath}, expected boolean`);
+  }
+  const poolSlots = obj.pool_slots;
+  if (typeof poolSlots === "number" && Number.isInteger(poolSlots) && poolSlots >= 1 && poolSlots <= 50) {
+    result.pool_slots = poolSlots;
+  } else if (poolSlots !== void 0) {
+    logWarning(`Invalid execution.pool_slots in ${configPath}, expected integer 1-50`);
+  }
   return Object.keys(result).length > 0 ? result : void 0;
 }
 function logWarning(message) {
@@ -3677,6 +3690,7 @@ async function processMessages(options) {
     repoRootPath,
     guidelinePatterns,
     guidelinePaths,
+    treatFileSegmentsAsGuidelines,
     textParts,
     messageType,
     verbose
@@ -3724,16 +3738,20 @@ async function processMessages(options) {
         }
         try {
           const fileContent = (await (0, import_promises5.readFile)(resolvedPath, "utf8")).replace(/\r\n/g, "\n");
-          if (messageType === "input" && guidelinePatterns && guidelinePaths) {
-            const relativeToRepo = import_node_path5.default.relative(repoRootPath, resolvedPath);
-            if (isGuidelineFile(relativeToRepo, guidelinePatterns)) {
-              guidelinePaths.push(import_node_path5.default.resolve(resolvedPath));
-              if (verbose) {
-                console.log(`  [Guideline] Found: ${displayPath}`);
-                console.log(`    Resolved to: ${resolvedPath}`);
-              }
-              continue;
+          const classifyAsGuideline = shouldTreatAsGuideline({
+            messageType,
+            resolvedPath,
+            repoRootPath,
+            guidelinePatterns,
+            treatFileSegmentsAsGuidelines
+          });
+          if (classifyAsGuideline && guidelinePaths) {
+            guidelinePaths.push(import_node_path5.default.resolve(resolvedPath));
+            if (verbose) {
+              console.log(`  [Guideline] Found: ${displayPath}`);
+              console.log(`    Resolved to: ${resolvedPath}`);
             }
+            continue;
           }
           segments.push({
             type: "file",
@@ -3762,6 +3780,26 @@ async function processMessages(options) {
   }
   return segments;
 }
+function shouldTreatAsGuideline(options) {
+  const {
+    messageType,
+    resolvedPath,
+    repoRootPath,
+    guidelinePatterns,
+    treatFileSegmentsAsGuidelines
+  } = options;
+  if (messageType !== "input") {
+    return false;
+  }
+  if (treatFileSegmentsAsGuidelines) {
+    return true;
+  }
+  if (!guidelinePatterns || guidelinePatterns.length === 0) {
+    return false;
+  }
+  const relativeToRepo = import_node_path5.default.relative(repoRootPath, resolvedPath);
+  return isGuidelineFile(relativeToRepo, guidelinePatterns);
+}
 function asString3(value) {
   return typeof value === "string" ? value : void 0;
 }
@@ -4100,6 +4138,8 @@ async function loadTestsFromJsonl(evalFilePath, repoRoot, options) {
         for (const guidelinePath of testCase.guideline_paths) {
           console.log(`    - ${guidelinePath}`);
         }
+      } else if (!guidelinePatterns || guidelinePatterns.length === 0) {
+        console.log("  No guidelines found (guideline_patterns not configured)");
       } else {
         console.log("  No guidelines found");
       }
@@ -4469,7 +4509,7 @@ async function loadTestsFromYaml(evalFilePath, repoRoot, options) {
   } else {
     throw new Error(`Invalid test file format: ${evalFilePath} - missing 'tests' field`);
   }
-  const suiteWorkspace = parseWorkspaceConfig(suite.workspace, evalFileDir);
+  const suiteWorkspace = await resolveWorkspaceConfig(suite.workspace, evalFileDir);
   const suiteInputMessages = expandInputShorthand(suite.input);
   const rawGlobalExecution = isJsonObject(suite.execution) ? suite.execution : void 0;
   const _globalTarget = asString6(rawGlobalExecution?.target) ?? asString6(suite.target);
@@ -4505,12 +4545,24 @@ async function loadTestsFromYaml(evalFilePath, repoRoot, options) {
     }
     const caseExecution = isJsonObject(evalcase.execution) ? evalcase.execution : void 0;
     const skipDefaults = caseExecution?.skip_defaults === true;
-    const inputMessages = suiteInputMessages && !skipDefaults ? [...suiteInputMessages, ...testInputMessages] : testInputMessages;
+    const effectiveSuiteInputMessages = suiteInputMessages && !skipDefaults ? suiteInputMessages : void 0;
+    const inputMessages = effectiveSuiteInputMessages ? [...effectiveSuiteInputMessages, ...testInputMessages] : testInputMessages;
     const hasExpectedMessages = expectedMessages.length > 0;
     const guidelinePaths = [];
     const inputTextParts = [];
-    const inputSegments = await processMessages({
-      messages: inputMessages,
+    const suiteInputSegments = effectiveSuiteInputMessages ? await processMessages({
+      messages: effectiveSuiteInputMessages,
+      searchRoots,
+      repoRootPath,
+      guidelinePatterns,
+      guidelinePaths,
+      treatFileSegmentsAsGuidelines: true,
+      textParts: inputTextParts,
+      messageType: "input",
+      verbose
+    }) : [];
+    const testInputSegments = await processMessages({
+      messages: testInputMessages,
       searchRoots,
       repoRootPath,
       guidelinePatterns,
@@ -4519,6 +4571,7 @@ async function loadTestsFromYaml(evalFilePath, repoRoot, options) {
       messageType: "input",
       verbose
     });
+    const inputSegments = [...suiteInputSegments, ...testInputSegments];
     const outputSegments = hasExpectedMessages ? await processExpectedMessages({
       messages: expectedMessages,
       searchRoots,
@@ -4566,7 +4619,7 @@ async function loadTestsFromYaml(evalFilePath, repoRoot, options) {
       ...guidelinePaths.map((guidelinePath) => import_node_path8.default.resolve(guidelinePath)),
       ...userFilePaths
     ];
-    const caseWorkspace = parseWorkspaceConfig(evalcase.workspace, evalFileDir);
+    const caseWorkspace = await resolveWorkspaceConfig(evalcase.workspace, evalFileDir);
     const mergedWorkspace = mergeWorkspaceConfigs(suiteWorkspace, caseWorkspace);
     const metadata = isJsonObject(evalcase.metadata) ? evalcase.metadata : void 0;
     const caseTargets = extractTargetsFromTestCase(evalcase);
@@ -4597,6 +4650,8 @@ async function loadTestsFromYaml(evalFilePath, repoRoot, options) {
         for (const guidelinePath of testCase.guideline_paths) {
           console.log(`    - ${guidelinePath}`);
         }
+      } else if (!guidelinePatterns || guidelinePatterns.length === 0) {
+        console.log("  No guidelines found (guideline_patterns not configured)");
       } else {
         console.log("  No guidelines found");
       }
@@ -4685,16 +4740,57 @@ function parseRepoConfig(raw) {
     ...clone !== void 0 && { clone }
   };
 }
-function parseResetConfig(raw) {
+function parseWorkspaceHookConfig(raw, evalFileDir) {
   if (!isJsonObject(raw)) return void 0;
+  const script = parseWorkspaceScriptConfig(raw, evalFileDir);
   const obj = raw;
-  const strategy = obj.strategy === "none" || obj.strategy === "hard" || obj.strategy === "recreate" ? obj.strategy : void 0;
-  const afterEach = typeof obj.after_each === "boolean" ? obj.after_each : void 0;
-  if (!strategy && afterEach === void 0) return void 0;
+  const reset = obj.reset === "none" || obj.reset === "fast" || obj.reset === "strict" ? obj.reset : void 0;
+  const clean = obj.clean === "always" || obj.clean === "on_success" || obj.clean === "on_failure" || obj.clean === "never" ? obj.clean : void 0;
+  if (!script && !reset && !clean) return void 0;
   return {
-    ...strategy !== void 0 && { strategy },
-    ...afterEach !== void 0 && { after_each: afterEach }
+    ...script ?? {},
+    ...reset !== void 0 && { reset },
+    ...clean !== void 0 && { clean }
+  };
+}
+function parseWorkspaceHooksConfig(raw, evalFileDir) {
+  if (!isJsonObject(raw)) return void 0;
+  const obj = raw;
+  const beforeAllTests = parseWorkspaceHookConfig(obj.before_all_tests, evalFileDir);
+  const beforeEachTest = parseWorkspaceHookConfig(obj.before_each_test, evalFileDir);
+  const afterEachTest = parseWorkspaceHookConfig(obj.after_each_test, evalFileDir);
+  const afterAllTests = parseWorkspaceHookConfig(obj.after_all_tests, evalFileDir);
+  const onReuse = parseWorkspaceHookConfig(obj.on_reuse, evalFileDir);
+  const onFinish = parseWorkspaceHookConfig(obj.on_finish, evalFileDir);
+  const hooks = {
+    ...beforeAllTests !== void 0 && { before_all_tests: beforeAllTests },
+    ...beforeEachTest !== void 0 && { before_each_test: beforeEachTest },
+    ...afterEachTest !== void 0 && { after_each_test: afterEachTest },
+    ...afterAllTests !== void 0 && { after_all_tests: afterAllTests },
+    ...onReuse !== void 0 && { on_reuse: onReuse },
+    ...onFinish !== void 0 && { on_finish: onFinish }
   };
+  return Object.keys(hooks).length > 0 ? hooks : void 0;
+}
+async function resolveWorkspaceConfig(raw, evalFileDir) {
+  if (typeof raw === "string") {
+    const workspaceFilePath = import_node_path8.default.resolve(evalFileDir, raw);
+    let content;
+    try {
+      content = await (0, import_promises8.readFile)(workspaceFilePath, "utf8");
+    } catch {
+      throw new Error(`Workspace file not found: ${raw} (resolved to ${workspaceFilePath})`);
+    }
+    const parsed = (0, import_yaml4.parse)(content);
+    if (!isJsonObject(parsed)) {
+      throw new Error(
+        `Invalid workspace file format: ${workspaceFilePath} (expected a YAML object)`
+      );
+    }
+    const workspaceFileDir = import_node_path8.default.dirname(workspaceFilePath);
+    return parseWorkspaceConfig(parsed, workspaceFileDir);
+  }
+  return parseWorkspaceConfig(raw, evalFileDir);
 }
 function parseWorkspaceConfig(raw, evalFileDir) {
   if (!isJsonObject(raw)) return void 0;
@@ -4705,37 +4801,56 @@ function parseWorkspaceConfig(raw, evalFileDir) {
   }
   const isolation = obj.isolation === "shared" || obj.isolation === "per_test" ? obj.isolation : void 0;
   const repos = Array.isArray(obj.repos) ? obj.repos.map(parseRepoConfig).filter(Boolean) : void 0;
-  const reset = parseResetConfig(obj.reset);
-  const beforeAll = parseWorkspaceScriptConfig(obj.before_all, evalFileDir);
-  const afterAll = parseWorkspaceScriptConfig(obj.after_all, evalFileDir);
-  const beforeEach = parseWorkspaceScriptConfig(obj.before_each, evalFileDir);
-  const afterEach = parseWorkspaceScriptConfig(obj.after_each, evalFileDir);
-  if (!template && !isolation && !repos && !reset && !beforeAll && !afterAll && !beforeEach && !afterEach)
+  const hooks = parseWorkspaceHooksConfig(obj.hooks, evalFileDir);
+  const mode = obj.mode === "pooled" || obj.mode === "ephemeral" || obj.mode === "static" ? obj.mode : void 0;
+  const staticPath = typeof obj.static_path === "string" ? obj.static_path : void 0;
+  const pool = typeof obj.pool === "boolean" ? obj.pool : void 0;
+  if (!template && !isolation && !repos && !hooks && !mode && !staticPath && pool === void 0)
     return void 0;
   return {
     ...template !== void 0 && { template },
     ...isolation !== void 0 && { isolation },
     ...repos !== void 0 && { repos },
-    ...reset !== void 0 && { reset },
-    ...beforeAll !== void 0 && { before_all: beforeAll },
-    ...afterAll !== void 0 && { after_all: afterAll },
-    ...beforeEach !== void 0 && { before_each: beforeEach },
-    ...afterEach !== void 0 && { after_each: afterEach }
+    ...hooks !== void 0 && { hooks },
+    ...mode !== void 0 && { mode },
+    ...staticPath !== void 0 && { static_path: staticPath },
+    ...pool !== void 0 && { pool }
   };
 }
 function mergeWorkspaceConfigs(suiteLevel, caseLevel) {
   if (!suiteLevel && !caseLevel) return void 0;
   if (!suiteLevel) return caseLevel;
   if (!caseLevel) return suiteLevel;
+  const mergeHook = (suiteHook, caseHook) => {
+    if (!suiteHook && !caseHook) return void 0;
+    return {
+      ...suiteHook ?? {},
+      ...caseHook ?? {}
+    };
+  };
+  const mergedHooks = {
+    before_all_tests: mergeHook(
+      suiteLevel.hooks?.before_all_tests,
+      caseLevel.hooks?.before_all_tests
+    ),
+    before_each_test: mergeHook(
+      suiteLevel.hooks?.before_each_test,
+      caseLevel.hooks?.before_each_test
+    ),
+    after_each_test: mergeHook(suiteLevel.hooks?.after_each_test, caseLevel.hooks?.after_each_test),
+    after_all_tests: mergeHook(suiteLevel.hooks?.after_all_tests, caseLevel.hooks?.after_all_tests),
+    on_reuse: mergeHook(suiteLevel.hooks?.on_reuse, caseLevel.hooks?.on_reuse),
+    on_finish: mergeHook(suiteLevel.hooks?.on_finish, caseLevel.hooks?.on_finish)
+  };
+  const hasHooks = Object.values(mergedHooks).some((hook) => hook !== void 0);
   return {
     template: caseLevel.template ?? suiteLevel.template,
     isolation: caseLevel.isolation ?? suiteLevel.isolation,
     repos: caseLevel.repos ?? suiteLevel.repos,
-    reset: caseLevel.reset ?? suiteLevel.reset,
-    before_all: caseLevel.before_all ?? suiteLevel.before_all,
-    after_all: caseLevel.after_all ?? suiteLevel.after_all,
-    before_each: caseLevel.before_each ?? suiteLevel.before_each,
-    after_each: caseLevel.after_each ?? suiteLevel.after_each
+    ...hasHooks && { hooks: mergedHooks },
+    mode: caseLevel.mode ?? suiteLevel.mode,
+    static_path: caseLevel.static_path ?? suiteLevel.static_path,
+    pool: caseLevel.pool ?? suiteLevel.pool
   };
 }
 function asString6(value) {
@@ -9493,8 +9608,8 @@ function resolveCliConfig(target, env, evalFilePath) {
   const parseResult = CliTargetInputSchema.safeParse(target, { errorMap: cliErrorMap });
   if (!parseResult.success) {
     const firstError = parseResult.error.errors[0];
-    const path43 = firstError?.path.join(".") || "";
-    const prefix = path43 ? `${target.name} ${path43}: ` : `${target.name}: `;
+    const path44 = firstError?.path.join(".") || "";
+    const prefix = path44 ? `${target.name} ${path44}: ` : `${target.name}: `;
     throw new Error(`${prefix}${firstError?.message}`);
   }
   const normalized = normalizeCliTargetInput(parseResult.data, env, evalFilePath);
@@ -10001,15 +10116,15 @@ function getAgentvHome() {
 function getWorkspacesRoot() {
   return import_node_path23.default.join(getAgentvHome(), "workspaces");
 }
-function getGitCacheRoot() {
-  return import_node_path23.default.join(getAgentvHome(), "git-cache");
-}
 function getSubagentsRoot() {
   return import_node_path23.default.join(getAgentvHome(), "subagents");
 }
 function getTraceStateRoot() {
   return import_node_path23.default.join(getAgentvHome(), "trace-state");
 }
+function getWorkspacePoolRoot() {
+  return import_node_path23.default.join(getAgentvHome(), "workspace-pool");
+}
 // src/evaluation/providers/vscode/dispatch/constants.ts
 var DEFAULT_LOCK_NAME = "subagent.lock";
@@ -10832,8 +10947,6 @@ var AGENTV_REQUEST_TEMPLATE = `[[ ## task ## ]]
 **IMPORTANT**: Follow these exact steps:
 1. Create and write your complete response to: {{responseFileTmp}}
-    - All intended file outputs/changes MUST be written in your response file.
-    - For each intended file, include the repo name, relative path and unified git diff following the convention \`diff --git ...\`.
 2. When completely finished, run these PowerShell commands to signal completion:
 \`\`\`
 Move-Item -LiteralPath '{{responseFileTmp}}' -Destination '{{responseFileFinal}}'
@@ -10850,8 +10963,6 @@ var AGENTV_BATCH_REQUEST_TEMPLATE = `[[ ## task ## ]]
 **IMPORTANT**: Follow these exact steps:
 1. Create and write your complete response to: {{responseFileTmp}}
-    - All intended file outputs/changes MUST be written in your response file.
-    - For each intended file, include the repo name, relative path and unified git diff following the convention \`diff --git ...\`.
 2. When completely finished and the response is stable, rename it to: {{responseFileFinal}}
 3. Do not unlock the workspace from this request; batch orchestration will handle unlocking after all responses are ready.
 `;
@@ -11464,15 +11575,15 @@ async function execFileWithStdinNode(argv, stdinPayload, options) {
   });
 }
 async function execShellWithStdin(command, stdinPayload, options = {}) {
-  const { mkdir: mkdir16, readFile: readFile13, rm: rm6, writeFile: writeFile9 } = await import("fs/promises");
+  const { mkdir: mkdir16, readFile: readFile14, rm: rm6, writeFile: writeFile9 } = await import("fs/promises");
   const { tmpdir: tmpdir3 } = await import("os");
-  const path43 = await import("path");
+  const path44 = await import("path");
   const { randomUUID: randomUUID8 } = await import("crypto");
-  const dir = path43.join(tmpdir3(), `agentv-exec-${randomUUID8()}`);
+  const dir = path44.join(tmpdir3(), `agentv-exec-${randomUUID8()}`);
   await mkdir16(dir, { recursive: true });
-  const stdinPath = path43.join(dir, "stdin.txt");
-  const stdoutPath = path43.join(dir, "stdout.txt");
-  const stderrPath = path43.join(dir, "stderr.txt");
+  const stdinPath = path44.join(dir, "stdin.txt");
+  const stdoutPath = path44.join(dir, "stdout.txt");
+  const stderrPath = path44.join(dir, "stderr.txt");
   await writeFile9(stdinPath, stdinPayload, "utf8");
   const wrappedCommand = process.platform === "win32" ? `(${command}) < ${shellEscapePath(stdinPath)} > ${shellEscapePath(stdoutPath)} 2> ${shellEscapePath(stderrPath)}` : `(${command}) < ${shellEscapePath(stdinPath)} > ${shellEscapePath(stdoutPath)} 2> ${shellEscapePath(stderrPath)}`;
   const { spawn: spawn4 } = await import("child_process");
@@ -11502,8 +11613,8 @@ async function execShellWithStdin(command, stdinPayload, options = {}) {
         resolve(code ?? 0);
       });
     });
-    const stdout = (await readFile13(stdoutPath, "utf8")).replace(/\r\n/g, "\n");
-    const stderr = (await readFile13(stderrPath, "utf8")).replace(/\r\n/g, "\n");
+    const stdout = (await readFile14(stdoutPath, "utf8")).replace(/\r\n/g, "\n");
+    const stderr = (await readFile14(stderrPath, "utf8")).replace(/\r\n/g, "\n");
     return { stdout, stderr, exitCode };
   } finally {
     await rm6(dir, { recursive: true, force: true });
@@ -11824,7 +11935,7 @@ var CodeEvaluator = class {
       outputPath,
       guidelineFiles: context2.evalCase.guideline_paths,
       inputFiles: context2.evalCase.file_paths.filter(
-        (path43) => !context2.evalCase.guideline_paths.includes(path43)
+        (path44) => !context2.evalCase.guideline_paths.includes(path44)
       ),
       input: context2.evalCase.input,
       trace: context2.trace ?? null,
@@ -12103,6 +12214,8 @@ ${context2.fileChanges}`;
       };
     } catch (e) {
       const message = e instanceof Error ? e.message : String(e);
+      const evalName = context2.evaluator?.name ?? "llm-judge";
+      console.warn(`\u26A0 LLM judge "${evalName}" failed after 3 attempts (${message}) \u2014 skipped`);
       return {
         score: 0,
         verdict: "skip",
@@ -12131,24 +12244,39 @@ ${context2.fileChanges}`;
       systemPrompt,
       target: judgeProvider.targetName
     };
-    const { data, tokenUsage } = await this.runWithRetry({
-      context: context2,
-      judgeProvider,
-      systemPrompt,
-      userPrompt: prompt,
-      schema: rubricEvaluationSchema
-    });
-    const { score, verdict, hits, misses } = calculateRubricScore(data, rubrics);
-    return {
-      score,
-      verdict,
-      hits,
-      misses,
-      expectedAspectCount: rubrics.length,
-      reasoning: data.overall_reasoning,
-      evaluatorRawRequest,
-      tokenUsage
-    };
+    try {
+      const { data, tokenUsage } = await this.runWithRetry({
+        context: context2,
+        judgeProvider,
+        systemPrompt,
+        userPrompt: prompt,
+        schema: rubricEvaluationSchema
+      });
+      const { score, verdict, hits, misses } = calculateRubricScore(data, rubrics);
+      return {
+        score,
+        verdict,
+        hits,
+        misses,
+        expectedAspectCount: rubrics.length,
+        reasoning: data.overall_reasoning,
+        evaluatorRawRequest,
+        tokenUsage
+      };
+    } catch (e) {
+      const message = e instanceof Error ? e.message : String(e);
+      const evalName = context2.evaluator?.name ?? "llm-judge";
+      console.warn(`\u26A0 LLM judge "${evalName}" failed after 3 attempts (${message}) \u2014 skipped`);
+      return {
+        score: 0,
+        verdict: "skip",
+        hits: [],
+        misses: [`Judge parse failure after 3 attempts: ${message}`],
+        expectedAspectCount: rubrics.length,
+        reasoning: `Judge parse failure after 3 attempts: ${message}`,
+        evaluatorRawRequest
+      };
+    }
   }
   /**
    * Evaluate using score-range rubrics (analytic rubric scoring).
@@ -12162,25 +12290,40 @@ ${context2.fileChanges}`;
       systemPrompt,
       target: judgeProvider.targetName
     };
-    const { data, tokenUsage } = await this.runWithRetry({
-      context: context2,
-      judgeProvider,
-      systemPrompt,
-      userPrompt: prompt,
-      schema: scoreRangeEvaluationSchema
-    });
-    const { score, verdict, hits, misses, details } = calculateScoreRangeResult(data, rubrics);
-    return {
-      score,
-      verdict,
-      hits,
-      misses,
-      expectedAspectCount: rubrics.length,
-      reasoning: data.overall_reasoning,
-      evaluatorRawRequest,
-      details,
-      tokenUsage
-    };
+    try {
+      const { data, tokenUsage } = await this.runWithRetry({
+        context: context2,
+        judgeProvider,
+        systemPrompt,
+        userPrompt: prompt,
+        schema: scoreRangeEvaluationSchema
+      });
+      const { score, verdict, hits, misses, details } = calculateScoreRangeResult(data, rubrics);
+      return {
+        score,
+        verdict,
+        hits,
+        misses,
+        expectedAspectCount: rubrics.length,
+        reasoning: data.overall_reasoning,
+        evaluatorRawRequest,
+        details,
+        tokenUsage
+      };
+    } catch (e) {
+      const message = e instanceof Error ? e.message : String(e);
+      const evalName = context2.evaluator?.name ?? "llm-judge";
+      console.warn(`\u26A0 LLM judge "${evalName}" failed after 3 attempts (${message}) \u2014 skipped`);
+      return {
+        score: 0,
+        verdict: "skip",
+        hits: [],
+        misses: [`Judge parse failure after 3 attempts: ${message}`],
+        expectedAspectCount: rubrics.length,
+        reasoning: `Judge parse failure after 3 attempts: ${message}`,
+        evaluatorRawRequest
+      };
+    }
   }
   /**
    * Build prompt for score-range rubric evaluation.
@@ -12466,19 +12609,13 @@ var CompositeEvaluator = class {
   runWeightedAverage(results, weights) {
     let totalWeight = 0;
     let weightedSum = 0;
+    let evaluatedCount = 0;
     const allHits = [];
     const allMisses = [];
     const reasoningParts = [];
     const scores = [];
     for (const member of results) {
       const weight = weights?.[member.id] ?? 1;
-      totalWeight += weight;
-      weightedSum += member.result.score * weight;
-      allHits.push(...member.result.hits.map((h) => `[${member.id}] ${h}`));
-      allMisses.push(...member.result.misses.map((m) => `[${member.id}] ${m}`));
-      if (member.result.reasoning) {
-        reasoningParts.push(`${member.id}: ${member.result.reasoning}`);
-      }
       scores.push({
         name: member.id,
         type: member.type,
@@ -12493,6 +12630,32 @@ var CompositeEvaluator = class {
         details: member.result.details,
         tokenUsage: member.result.tokenUsage
       });
+      if (member.result.verdict === "skip") {
+        continue;
+      }
+      evaluatedCount++;
+      totalWeight += weight;
+      weightedSum += member.result.score * weight;
+      allHits.push(...member.result.hits.map((h) => `[${member.id}] ${h}`));
+      allMisses.push(...member.result.misses.map((m) => `[${member.id}] ${m}`));
+      if (member.result.reasoning) {
+        reasoningParts.push(`${member.id}: ${member.result.reasoning}`);
+      }
+    }
+    if (evaluatedCount === 0 && results.length > 0) {
+      return {
+        score: 0,
+        verdict: "skip",
+        hits: [],
+        misses: [],
+        expectedAspectCount: 1,
+        reasoning: "All evaluators skipped (infrastructure failure)",
+        evaluatorRawRequest: {
+          aggregator: "weighted_average",
+          ...weights ? { weights } : {}
+        },
+        scores
+      };
     }
     const finalScore = totalWeight > 0 ? weightedSum / totalWeight : 0;
     return {
@@ -12516,19 +12679,8 @@ var CompositeEvaluator = class {
     const reasoningParts = [];
     let passingCount = 0;
     let borderlineCount = 0;
+    let evaluatedCount = 0;
     for (const member of results) {
-      const isPassing = member.result.verdict === "pass" || member.result.verdict === "borderline";
-      if (isPassing) {
-        passingCount++;
-        if (member.result.verdict === "borderline") {
-          borderlineCount++;
-        }
-      }
-      allHits.push(...member.result.hits.map((h) => `[${member.id}] ${h}`));
-      allMisses.push(...member.result.misses.map((m) => `[${member.id}] ${m}`));
-      if (member.result.reasoning) {
-        reasoningParts.push(`${member.id}: ${member.result.reasoning}`);
-      }
       scores.push({
         name: member.id,
         type: member.type,
@@ -12542,8 +12694,39 @@ var CompositeEvaluator = class {
         details: member.result.details,
         tokenUsage: member.result.tokenUsage
       });
+      if (member.result.verdict === "skip") {
+        continue;
+      }
+      evaluatedCount++;
+      const isPassing = member.result.verdict === "pass" || member.result.verdict === "borderline";
+      if (isPassing) {
+        passingCount++;
+        if (member.result.verdict === "borderline") {
+          borderlineCount++;
+        }
+      }
+      allHits.push(...member.result.hits.map((h) => `[${member.id}] ${h}`));
+      allMisses.push(...member.result.misses.map((m) => `[${member.id}] ${m}`));
+      if (member.result.reasoning) {
+        reasoningParts.push(`${member.id}: ${member.result.reasoning}`);
+      }
+    }
+    if (evaluatedCount === 0 && results.length > 0) {
+      return {
+        score: 0,
+        verdict: "skip",
+        hits: [],
+        misses: [],
+        expectedAspectCount: 1,
+        reasoning: "All evaluators skipped (infrastructure failure)",
+        evaluatorRawRequest: {
+          aggregator: "threshold",
+          threshold
+        },
+        scores
+      };
     }
-    const totalCount = results.length;
+    const totalCount = evaluatedCount;
     const score = totalCount > 0 ? passingCount / totalCount : 0;
     const pass = score >= threshold;
     if (pass && borderlineCount > 0) {
@@ -13051,115 +13234,115 @@ var FieldAccuracyEvaluator = class {
    * Evaluate a single field against the expected value.
    */
   evaluateField(fieldConfig, candidateData, expectedData) {
-    const { path: path43, match, required = true, weight = 1 } = fieldConfig;
-    const candidateValue = resolvePath(candidateData, path43);
-    const expectedValue = resolvePath(expectedData, path43);
+    const { path: path44, match, required = true, weight = 1 } = fieldConfig;
+    const candidateValue = resolvePath(candidateData, path44);
+    const expectedValue = resolvePath(expectedData, path44);
     if (expectedValue === void 0) {
       return {
-        path: path43,
+        path: path44,
         score: 1,
         // No expected value means no comparison needed
         weight,
         hit: true,
-        message: `${path43}: no expected value`
+        message: `${path44}: no expected value`
       };
     }
     if (candidateValue === void 0) {
       if (required) {
         return {
-          path: path43,
+          path: path44,
           score: 0,
           weight,
           hit: false,
-          message: `${path43} (required, missing)`
+          message: `${path44} (required, missing)`
         };
       }
       return {
-        path: path43,
+        path: path44,
         score: 1,
         // Don't penalize missing optional fields
         weight: 0,
         // Zero weight means it won't affect the score
         hit: true,
-        message: `${path43}: optional field missing`
+        message: `${path44}: optional field missing`
       };
     }
     switch (match) {
       case "exact":
-        return this.compareExact(path43, candidateValue, expectedValue, weight);
+        return this.compareExact(path44, candidateValue, expectedValue, weight);
       case "numeric_tolerance":
         return this.compareNumericTolerance(
-          path43,
+          path44,
           candidateValue,
           expectedValue,
           fieldConfig,
           weight
         );
       case "date":
-        return this.compareDate(path43, candidateValue, expectedValue, fieldConfig, weight);
+        return this.compareDate(path44, candidateValue, expectedValue, fieldConfig, weight);
       default:
         return {
-          path: path43,
+          path: path44,
           score: 0,
           weight,
           hit: false,
-          message: `${path43}: unknown match type "${match}"`
+          message: `${path44}: unknown match type "${match}"`
         };
     }
   }
   /**
    * Exact equality comparison.
    */
-  compareExact(path43, candidateValue, expectedValue, weight) {
+  compareExact(path44, candidateValue, expectedValue, weight) {
     if (deepEqual(candidateValue, expectedValue)) {
       return {
-        path: path43,
+        path: path44,
         score: 1,
         weight,
         hit: true,
-        message: path43
+        message: path44
       };
     }
     if (typeof candidateValue !== typeof expectedValue) {
       return {
-        path: path43,
+        path: path44,
         score: 0,
         weight,
         hit: false,
-        message: `${path43} (type mismatch: got ${typeof candidateValue}, expected ${typeof expectedValue})`
+        message: `${path44} (type mismatch: got ${typeof candidateValue}, expected ${typeof expectedValue})`
       };
     }
     return {
-      path: path43,
+      path: path44,
       score: 0,
       weight,
       hit: false,
-      message: `${path43} (value mismatch)`
+      message: `${path44} (value mismatch)`
     };
   }
   /**
    * Numeric comparison with absolute or relative tolerance.
    */
-  compareNumericTolerance(path43, candidateValue, expectedValue, fieldConfig, weight) {
+  compareNumericTolerance(path44, candidateValue, expectedValue, fieldConfig, weight) {
     const { tolerance = 0, relative = false } = fieldConfig;
     const candidateNum = toNumber2(candidateValue);
     const expectedNum = toNumber2(expectedValue);
     if (candidateNum === null || expectedNum === null) {
       return {
-        path: path43,
+        path: path44,
         score: 0,
         weight,
         hit: false,
-        message: `${path43} (non-numeric value)`
+        message: `${path44} (non-numeric value)`
       };
     }
     if (!Number.isFinite(candidateNum) || !Number.isFinite(expectedNum)) {
       return {
-        path: path43,
+        path: path44,
         score: 0,
         weight,
         hit: false,
-        message: `${path43} (invalid numeric value)`
+        message: `${path44} (invalid numeric value)`
       };
     }
     const diff = Math.abs(candidateNum - expectedNum);
@@ -13172,61 +13355,61 @@ var FieldAccuracyEvaluator = class {
     }
     if (withinTolerance) {
       return {
-        path: path43,
+        path: path44,
         score: 1,
         weight,
         hit: true,
-        message: `${path43} (within tolerance: diff=${diff.toFixed(2)})`
+        message: `${path44} (within tolerance: diff=${diff.toFixed(2)})`
       };
     }
     return {
-      path: path43,
+      path: path44,
       score: 0,
       weight,
       hit: false,
-      message: `${path43} (outside tolerance: diff=${diff.toFixed(2)}, tolerance=${tolerance})`
+      message: `${path44} (outside tolerance: diff=${diff.toFixed(2)}, tolerance=${tolerance})`
     };
   }
   /**
    * Date comparison with format normalization.
    */
-  compareDate(path43, candidateValue, expectedValue, fieldConfig, weight) {
+  compareDate(path44, candidateValue, expectedValue, fieldConfig, weight) {
     const formats = fieldConfig.formats ?? DEFAULT_DATE_FORMATS;
     const candidateDate = parseDate(String(candidateValue), formats);
     const expectedDate = parseDate(String(expectedValue), formats);
     if (candidateDate === null) {
       return {
-        path: path43,
+        path: path44,
         score: 0,
         weight,
         hit: false,
-        message: `${path43} (unparseable candidate date)`
+        message: `${path44} (unparseable candidate date)`
       };
     }
     if (expectedDate === null) {
       return {
-        path: path43,
+        path: path44,
         score: 0,
         weight,
         hit: false,
-        message: `${path43} (unparseable expected date)`
+        message: `${path44} (unparseable expected date)`
       };
     }
     if (candidateDate.getFullYear() === expectedDate.getFullYear() && candidateDate.getMonth() === expectedDate.getMonth() && candidateDate.getDate() === expectedDate.getDate()) {
       return {
-        path: path43,
+        path: path44,
         score: 1,
         weight,
         hit: true,
-        message: path43
+        message: path44
       };
     }
     return {
-      path: path43,
+      path: path44,
       score: 0,
       weight,
       hit: false,
-      message: `${path43} (date mismatch: got ${formatDateISO(candidateDate)}, expected ${formatDateISO(expectedDate)})`
+      message: `${path44} (date mismatch: got ${formatDateISO(candidateDate)}, expected ${formatDateISO(expectedDate)})`
     };
   }
   /**
@@ -13267,11 +13450,11 @@ var FieldAccuracyEvaluator = class {
     };
   }
 };
-function resolvePath(obj, path43) {
-  if (!path43 || !obj) {
+function resolvePath(obj, path44) {
+  if (!path44 || !obj) {
     return void 0;
   }
-  const parts = path43.split(/\.|\[|\]/).filter((p) => p.length > 0);
+  const parts = path44.split(/\.|\[|\]/).filter((p) => p.length > 0);
   let current = obj;
   for (const part of parts) {
     if (current === null || current === void 0) {
@@ -14089,8 +14272,8 @@ var TokenUsageEvaluator = class {
 };
 // src/evaluation/evaluators/tool-trajectory.ts
-function getNestedValue(obj, path43) {
-  const parts = path43.split(".");
+function getNestedValue(obj, path44) {
+  const parts = path44.split(".");
   let current = obj;
   for (const part of parts) {
     if (current === null || current === void 0 || typeof current !== "object") {
@@ -14653,7 +14836,7 @@ function runEqualsAssertion(output, value) {
 // src/evaluation/orchestrator.ts
 var import_node_crypto9 = require("crypto");
 var import_promises29 = require("fs/promises");
-var import_node_path41 = __toESM(require("path"), 1);
+var import_node_path42 = __toESM(require("path"), 1);
 var import_micromatch4 = __toESM(require("micromatch"), 1);
 // ../../node_modules/.bun/yocto-queue@1.2.2/node_modules/yocto-queue/index.js
@@ -15523,7 +15706,7 @@ async function cleanupEvalWorkspaces(evalRunId, workspaceRoot) {
   }
 }
-// src/evaluation/workspace/repo-manager.ts
+// src/evaluation/workspace/pool-manager.ts
 var import_node_child_process7 = require("child_process");
 var import_node_crypto8 = require("crypto");
 var import_node_fs11 = require("fs");
@@ -15531,8 +15714,6 @@ var import_promises27 = require("fs/promises");
 var import_node_path39 = __toESM(require("path"), 1);
 var import_node_util5 = require("util");
 var execFileAsync = (0, import_node_util5.promisify)(import_node_child_process7.execFile);
-var DEFAULT_TIMEOUT_MS2 = 3e5;
-var LOCK_TIMEOUT_MS = 6e4;
 function gitEnv() {
   const env = { ...process.env };
   for (const key of Object.keys(env)) {
@@ -15547,160 +15728,326 @@ function gitEnv() {
     GIT_SSH_COMMAND: "ssh -o BatchMode=yes"
   };
 }
-function cacheKey(source) {
-  const raw = source.type === "git" ? source.url.toLowerCase().replace(/\.git$/, "") : source.path;
-  return (0, import_node_crypto8.createHash)("sha256").update(raw).digest("hex");
-}
-function getSourceUrl(source) {
-  return source.type === "git" ? source.url : source.path;
-}
 async function git(args, opts) {
   const { stdout } = await execFileAsync("git", args, {
     cwd: opts?.cwd,
-    timeout: opts?.timeout ?? DEFAULT_TIMEOUT_MS2,
+    timeout: opts?.timeout ?? 3e5,
     env: gitEnv(),
     maxBuffer: 50 * 1024 * 1024
-    // 50MB
   });
   return stdout.trim();
 }
-async function acquireLock(lockPath) {
-  const start = Date.now();
-  while (Date.now() - start < LOCK_TIMEOUT_MS) {
-    try {
-      await (0, import_promises27.writeFile)(lockPath, String(process.pid), { flag: "wx" });
-      return;
-    } catch (err) {
-      if (err.code === "EEXIST") {
-        await new Promise((r) => setTimeout(r, 200));
+function normalizeRepoForFingerprint(repo) {
+  const source = repo.source.type === "git" ? { type: "git", url: repo.source.url.toLowerCase().replace(/\.git$/, "") } : { type: "local", path: repo.source.path };
+  const result = {
+    path: repo.path,
+    source,
+    ref: repo.checkout?.ref ?? "HEAD"
+  };
+  if (repo.clone?.depth !== void 0) {
+    result.depth = repo.clone.depth;
+  }
+  if (repo.clone?.filter !== void 0) {
+    result.filter = repo.clone.filter;
+  }
+  if (repo.clone?.sparse?.length) {
+    result.sparse = [...repo.clone.sparse].sort();
+  }
+  return result;
+}
+function computeWorkspaceFingerprint(templatePath, repos) {
+  const canonical = {
+    templatePath: templatePath ?? null,
+    repos: [...repos].sort((a, b) => a.path.localeCompare(b.path)).map(normalizeRepoForFingerprint)
+  };
+  return (0, import_node_crypto8.createHash)("sha256").update(JSON.stringify(canonical)).digest("hex");
+}
+async function copyDirectoryRecursive2(src, dest, skipDirs) {
+  await (0, import_promises27.mkdir)(dest, { recursive: true });
+  const entries = await (0, import_promises27.readdir)(src, { withFileTypes: true });
+  for (const entry of entries) {
+    const srcPath = import_node_path39.default.join(src, entry.name);
+    const destPath = import_node_path39.default.join(dest, entry.name);
+    if (entry.name === ".git") {
+      continue;
+    }
+    if (entry.isDirectory()) {
+      if (skipDirs?.has(entry.name)) {
         continue;
       }
-      throw err;
+      await copyDirectoryRecursive2(srcPath, destPath, skipDirs);
+    } else {
+      await (0, import_promises27.cp)(srcPath, destPath, { preserveTimestamps: true, force: true });
     }
   }
-  throw new Error(`Timed out waiting for lock: ${lockPath}`);
 }
-async function releaseLock(lockPath) {
-  try {
-    await (0, import_promises27.unlink)(lockPath);
-  } catch {
-  }
-}
-var RepoManager = class {
-  cacheDir;
-  verbose;
-  constructor(cacheDir, verbose = false) {
-    this.cacheDir = cacheDir ?? getGitCacheRoot();
-    this.verbose = verbose;
+var WorkspacePoolManager = class {
+  poolRoot;
+  constructor(poolRoot) {
+    this.poolRoot = poolRoot ?? getWorkspacePoolRoot();
   }
-  async runGit(args, opts) {
-    const startedAt = Date.now();
-    if (this.verbose) {
-      console.log(
-        `[repo] git start cwd=${opts?.cwd ?? process.cwd()} args=${args.join(" ")}`
+  /**
+   * Acquire a workspace slot from the pool.
+   *
+   * 1. Compute fingerprint from template + repos
+   * 2. Check drift (compare stored metadata.json fingerprint vs computed)
+   * 3. If drift: warn, remove all slots, rematerialize
+   * 4. Acquire a slot (try-lock slot-0, slot-1, ..., up to maxSlots)
+   * 5. If slot exists: reset repos, re-copy template files (skip repo directories)
+   * 6. If new slot: copy template, materialize all repos, write metadata.json
+   * 7. Return the slot (with path, index, isExisting)
+   */
+  async acquireWorkspace(options) {
+    const { templatePath, repos, maxSlots, repoManager, poolReset } = options;
+    const fingerprint = computeWorkspaceFingerprint(templatePath, repos);
+    const poolDir = import_node_path39.default.join(this.poolRoot, fingerprint);
+    await (0, import_promises27.mkdir)(poolDir, { recursive: true });
+    const drifted = await this.checkDrift(poolDir, fingerprint);
+    if (drifted) {
+      console.warn(
+        `[workspace-pool] Drift detected for fingerprint ${fingerprint.slice(0, 12)}... Removing stale slots.`
       );
+      await this.removeAllSlots(poolDir);
     }
-    try {
-      const output = await git(args, opts);
-      if (this.verbose) {
-        console.log(
-          `[repo] git ok durationMs=${Date.now() - startedAt} args=${args.join(" ")}`
-        );
+    for (let i = 0; i < maxSlots; i++) {
+      const slotPath = import_node_path39.default.join(poolDir, `slot-${i}`);
+      const lockPath = `${slotPath}.lock`;
+      const locked = await this.tryLock(lockPath);
+      if (!locked) {
+        continue;
       }
-      return output;
-    } catch (error) {
-      if (this.verbose) {
-        const message = error instanceof Error ? error.message : String(error);
-        console.log(
-          `[repo] git fail durationMs=${Date.now() - startedAt} args=${args.join(" ")} error=${message}`
-        );
+      const slotExists = (0, import_node_fs11.existsSync)(slotPath);
+      if (slotExists) {
+        await this.resetSlot(slotPath, templatePath, repos, poolReset);
+        return {
+          index: i,
+          path: slotPath,
+          isExisting: true,
+          lockPath,
+          fingerprint,
+          poolDir
+        };
       }
-      throw error;
+      await (0, import_promises27.mkdir)(slotPath, { recursive: true });
+      if (templatePath) {
+        await copyDirectoryRecursive2(templatePath, slotPath);
+      }
+      if (repos.length > 0) {
+        await repoManager.materializeAll(repos, slotPath);
+      }
+      await this.writeMetadata(poolDir, fingerprint, templatePath ?? null, repos);
+      return {
+        index: i,
+        path: slotPath,
+        isExisting: false,
+        lockPath,
+        fingerprint,
+        poolDir
+      };
+    }
+    throw new Error(
+      `All ${maxSlots} pool slots are locked for fingerprint ${fingerprint.slice(0, 12)}...`
+    );
+  }
+  /** Remove lock file to release a slot. */
+  async releaseSlot(slot) {
+    try {
+      await (0, import_promises27.unlink)(slot.lockPath);
+    } catch {
     }
   }
   /**
-   * Ensure a bare mirror cache exists for the given source.
-   * Creates on first access, fetches updates on subsequent calls.
-   * Returns the absolute path to the cache directory.
+   * Try to acquire a PID-based lock file.
+   * On EEXIST, read PID and check if process is alive. If dead, stale lock — remove and retry.
+   * Returns true if lock acquired, false if slot is actively locked.
+   * Uses a bounded loop (max 3 attempts) to avoid unbounded recursion.
    */
-  async ensureCache(source, depth, resolve) {
-    const key = cacheKey(source);
-    const cachePath = import_node_path39.default.join(this.cacheDir, key);
-    const lockPath = `${cachePath}.lock`;
-    const cacheExists = (0, import_node_fs11.existsSync)(import_node_path39.default.join(cachePath, "HEAD"));
-    if (this.verbose) {
-      console.log(
-        `[repo] ensureCache source=${getSourceUrl(source)} resolve=${resolve ?? "remote"} cache=${cachePath}`
-      );
+  async tryLock(lockPath) {
+    for (let attempt = 0; attempt < 3; attempt++) {
+      try {
+        await (0, import_promises27.writeFile)(lockPath, String(process.pid), { flag: "wx" });
+        return true;
+      } catch (err) {
+        if (err.code !== "EEXIST") {
+          throw err;
+        }
+        try {
+          const pidStr = await (0, import_promises27.readFile)(lockPath, "utf-8");
+          const pid = Number.parseInt(pidStr.trim(), 10);
+          if (!Number.isNaN(pid)) {
+            try {
+              process.kill(pid, 0);
+              return false;
+            } catch {
+              await (0, import_promises27.unlink)(lockPath).catch(() => {
+              });
+              continue;
+            }
+          }
+        } catch {
+        }
+        return false;
+      }
+    }
+    return false;
+  }
+  /**
+   * Check if the stored fingerprint in metadata.json differs from the computed one.
+   * Returns true if drifted, false otherwise.
+   * Returns false (no drift) if metadata.json doesn't exist (first use).
+   */
+  async checkDrift(poolDir, fingerprint) {
+    const metadataPath = import_node_path39.default.join(poolDir, "metadata.json");
+    try {
+      const raw = await (0, import_promises27.readFile)(metadataPath, "utf-8");
+      const metadata = JSON.parse(raw);
+      return metadata.fingerprint !== fingerprint;
+    } catch {
+      return false;
     }
-    if (resolve === "local") {
-      if (cacheExists) {
-        if (this.verbose) {
-          console.log(`[repo] using existing local cache ${cachePath}`);
+  }
+  /** Write metadata.json with fingerprint, inputs, and timestamp. */
+  async writeMetadata(poolDir, fingerprint, templatePath, repos) {
+    const metadata = {
+      fingerprint,
+      templatePath,
+      repos,
+      createdAt: (/* @__PURE__ */ new Date()).toISOString()
+    };
+    await (0, import_promises27.writeFile)(import_node_path39.default.join(poolDir, "metadata.json"), JSON.stringify(metadata, null, 2));
+  }
+  /** Remove all slot directories and their lock files from a pool directory. */
+  async removeAllSlots(poolDir) {
+    const entries = await (0, import_promises27.readdir)(poolDir);
+    for (const entry of entries) {
+      if (entry.startsWith("slot-") && !entry.endsWith(".lock")) {
+        const lockPath = import_node_path39.default.join(poolDir, `${entry}.lock`);
+        if ((0, import_node_fs11.existsSync)(lockPath)) {
+          try {
+            const pidStr = await (0, import_promises27.readFile)(lockPath, "utf-8");
+            const pid = Number.parseInt(pidStr.trim(), 10);
+            if (!Number.isNaN(pid)) {
+              try {
+                process.kill(pid, 0);
+                console.warn(`[workspace-pool] Skipping slot ${entry}: locked by PID ${pid}`);
+                continue;
+              } catch {
+              }
+            }
+          } catch {
+          }
         }
-        return cachePath;
+        await (0, import_promises27.rm)(import_node_path39.default.join(poolDir, entry), { recursive: true, force: true });
+        await (0, import_promises27.rm)(lockPath, { force: true }).catch(() => {
+        });
       }
-      const url = getSourceUrl(source);
-      throw new Error(
-        `No cache found for \`${url}\`. Run \`agentv cache add --url ${url} --from <local-path>\` to seed it.`
+    }
+    await (0, import_promises27.rm)(import_node_path39.default.join(poolDir, "metadata.json"), { force: true }).catch(() => {
+    });
+  }
+  /**
+   * Reset an existing slot for reuse:
+   * 1. Reset repos (git reset --hard {ref} && git clean -fd per repo)
+   * 2. Re-copy template files (skip repo directories)
+   */
+  async resetSlot(slotPath, templatePath, repos, poolReset = "fast") {
+    for (const repo of repos) {
+      const repoDir = import_node_path39.default.join(slotPath, repo.path);
+      if (!(0, import_node_fs11.existsSync)(repoDir)) {
+        continue;
+      }
+      if (poolReset === "none") {
+        continue;
+      }
+      const ref = repo.checkout?.ref ?? "HEAD";
+      await git(["reset", "--hard", ref], { cwd: repoDir });
+      const cleanFlag = poolReset === "strict" ? "-fdx" : "-fd";
+      await git(["clean", cleanFlag], { cwd: repoDir });
+    }
+    if (templatePath) {
+      const repoDirNames = new Set(
+        repos.map((r) => {
+          const normalized = r.path.replace(/^\.\//, "");
+          return normalized.split("/")[0];
+        })
       );
+      await copyDirectoryRecursive2(templatePath, slotPath, repoDirNames);
     }
-    await (0, import_promises27.mkdir)(this.cacheDir, { recursive: true });
-    const lockStartedAt = Date.now();
-    await acquireLock(lockPath);
+  }
+};
+// src/evaluation/workspace/repo-manager.ts
+var import_node_child_process8 = require("child_process");
+var import_node_path40 = __toESM(require("path"), 1);
+var import_node_util6 = require("util");
+var execFileAsync2 = (0, import_node_util6.promisify)(import_node_child_process8.execFile);
+var DEFAULT_TIMEOUT_MS2 = 3e5;
+function gitEnv2() {
+  const env = { ...process.env };
+  for (const key of Object.keys(env)) {
+    if (key.startsWith("GIT_") && key !== "GIT_SSH_COMMAND") {
+      delete env[key];
+    }
+  }
+  return {
+    ...env,
+    GIT_TERMINAL_PROMPT: "0",
+    GIT_ASKPASS: "",
+    GIT_SSH_COMMAND: "ssh -o BatchMode=yes"
+  };
+}
+function getSourceUrl(source) {
+  return source.type === "git" ? source.url : source.path;
+}
+async function git2(args, opts) {
+  const { stdout } = await execFileAsync2("git", args, {
+    cwd: opts?.cwd,
+    timeout: opts?.timeout ?? DEFAULT_TIMEOUT_MS2,
+    env: gitEnv2(),
+    maxBuffer: 50 * 1024 * 1024
+    // 50MB
+  });
+  return stdout.trim();
+}
+var RepoManager = class {
+  verbose;
+  constructor(verbose = false) {
+    this.verbose = verbose;
+  }
+  async runGit(args, opts) {
+    const startedAt = Date.now();
     if (this.verbose) {
-      console.log(
-        `[repo] lock acquired path=${lockPath} waitedMs=${Date.now() - lockStartedAt}`
-      );
+      console.log(`[repo] git start cwd=${opts?.cwd ?? process.cwd()} args=${args.join(" ")}`);
     }
     try {
-      if (cacheExists) {
-        if (this.verbose) {
-          console.log(`[repo] refreshing existing cache ${cachePath}`);
-        }
-        const fetchArgs = ["fetch", "--prune"];
-        if (depth) {
-          fetchArgs.push("--depth", String(depth));
-        }
-        await this.runGit(fetchArgs, { cwd: cachePath });
-      } else {
-        if (this.verbose) {
-          console.log(`[repo] creating new cache ${cachePath}`);
-        }
-        const cloneArgs = ["clone", "--mirror", "--bare"];
-        if (depth) {
-          cloneArgs.push("--depth", String(depth));
-        }
-        const sourceUrl = getSourceUrl(source);
-        const cloneUrl = depth && source.type === "local" ? `file://${sourceUrl}` : sourceUrl;
-        cloneArgs.push(cloneUrl, cachePath);
-        await this.runGit(cloneArgs);
+      const output = await git2(args, opts);
+      if (this.verbose) {
+        console.log(`[repo] git ok durationMs=${Date.now() - startedAt} args=${args.join(" ")}`);
       }
-    } finally {
-      await releaseLock(lockPath);
+      return output;
+    } catch (error) {
       if (this.verbose) {
-        console.log(`[repo] lock released path=${lockPath}`);
+        const message = error instanceof Error ? error.message : String(error);
+        console.log(
+          `[repo] git fail durationMs=${Date.now() - startedAt} args=${args.join(" ")} error=${message}`
+        );
       }
+      throw error;
     }
-    return cachePath;
   }
   /**
-   * Clone a repo from cache into the workspace at the configured path.
+   * Clone a repo directly from source into the workspace at the configured path.
    * Handles checkout, ref resolution, ancestor walking, shallow clone, sparse checkout.
    */
   async materialize(repo, workspacePath) {
-    const targetDir = import_node_path39.default.join(workspacePath, repo.path);
+    const targetDir = import_node_path40.default.join(workspacePath, repo.path);
+    const sourceUrl = getSourceUrl(repo.source);
     const startedAt = Date.now();
     if (this.verbose) {
       console.log(
-        `[repo] materialize start path=${repo.path} source=${getSourceUrl(repo.source)} workspace=${workspacePath}`
+        `[repo] materialize start path=${repo.path} source=${sourceUrl} workspace=${workspacePath}`
       );
     }
-    const cachePath = await this.ensureCache(
-      repo.source,
-      repo.clone?.depth,
-      repo.checkout?.resolve
-    );
     const cloneArgs = ["clone"];
     if (repo.clone?.depth) {
       cloneArgs.push("--depth", String(repo.clone.depth));
@@ -15709,7 +16056,7 @@ var RepoManager = class {
       cloneArgs.push("--filter", repo.clone.filter);
     }
     cloneArgs.push("--no-checkout");
-    const cloneUrl = repo.clone?.depth || repo.clone?.filter ? `file://${cachePath}` : cachePath;
+    const cloneUrl = (repo.clone?.depth || repo.clone?.filter) && repo.source.type === "local" ? `file://${sourceUrl}` : sourceUrl;
     cloneArgs.push(cloneUrl, targetDir);
     await this.runGit(cloneArgs);
     if (repo.clone?.sparse?.length) {
@@ -15781,66 +16128,28 @@ var RepoManager = class {
     }
   }
   /** Reset repos in workspace to their checkout state. */
-  async reset(repos, workspacePath, strategy) {
-    if (strategy === "recreate") {
-      for (const repo of repos) {
-        const targetDir = import_node_path39.default.join(workspacePath, repo.path);
-        await (0, import_promises27.rm)(targetDir, { recursive: true, force: true });
-      }
-      await this.materializeAll(repos, workspacePath);
-      return;
-    }
+  async reset(repos, workspacePath, reset) {
+    const cleanFlag = reset === "strict" ? "-fdx" : "-fd";
     for (const repo of repos) {
-      const targetDir = import_node_path39.default.join(workspacePath, repo.path);
+      const targetDir = import_node_path40.default.join(workspacePath, repo.path);
       await this.runGit(["reset", "--hard", "HEAD"], { cwd: targetDir });
-      await this.runGit(["clean", "-fd"], { cwd: targetDir });
-    }
-  }
-  /**
-   * Seed the cache from a local repository, setting the remote to a given URL.
-   * Useful for avoiding slow network clones when a local clone already exists.
-   */
-  async seedCache(localPath, remoteUrl, opts) {
-    const source = { type: "git", url: remoteUrl };
-    const key = cacheKey(source);
-    const cachePath = import_node_path39.default.join(this.cacheDir, key);
-    const lockPath = `${cachePath}.lock`;
-    await (0, import_promises27.mkdir)(this.cacheDir, { recursive: true });
-    await acquireLock(lockPath);
-    try {
-      if ((0, import_node_fs11.existsSync)(import_node_path39.default.join(cachePath, "HEAD"))) {
-        if (!opts?.force) {
-          throw new Error(
-            `Cache already exists for ${remoteUrl} at ${cachePath}. Use force to overwrite.`
-          );
-        }
-        await (0, import_promises27.rm)(cachePath, { recursive: true, force: true });
-      }
-      await git(["clone", "--mirror", "--bare", localPath, cachePath]);
-      await git(["remote", "set-url", "origin", remoteUrl], { cwd: cachePath });
-    } finally {
-      await releaseLock(lockPath);
+      await this.runGit(["clean", cleanFlag], { cwd: targetDir });
     }
-    return cachePath;
-  }
-  /** Remove the entire cache directory. */
-  async cleanCache() {
-    await (0, import_promises27.rm)(this.cacheDir, { recursive: true, force: true });
   }
 };
 // src/evaluation/workspace/resolve.ts
 var import_promises28 = require("fs/promises");
-var import_node_path40 = __toESM(require("path"), 1);
+var import_node_path41 = __toESM(require("path"), 1);
 async function resolveWorkspaceTemplate(templatePath) {
   if (!templatePath) {
     return void 0;
   }
-  const resolved = import_node_path40.default.resolve(templatePath);
+  const resolved = import_node_path41.default.resolve(templatePath);
   const stats = await (0, import_promises28.stat)(resolved);
   if (stats.isFile()) {
     return {
-      dir: import_node_path40.default.dirname(resolved),
+      dir: import_node_path41.default.dirname(resolved),
       workspaceFile: resolved
     };
   }
@@ -15852,14 +16161,14 @@ async function resolveWorkspaceTemplate(templatePath) {
   if (workspaceFiles.length === 1) {
     return {
       dir: resolved,
-      workspaceFile: import_node_path40.default.join(resolved, workspaceFiles[0])
+      workspaceFile: import_node_path41.default.join(resolved, workspaceFiles[0])
     };
   }
   if (workspaceFiles.length > 1) {
     const conventionFile = workspaceFiles.find((f) => f === "template.code-workspace");
     return {
       dir: resolved,
-      workspaceFile: conventionFile ? import_node_path40.default.join(resolved, conventionFile) : void 0
+      workspaceFile: conventionFile ? import_node_path41.default.join(resolved, conventionFile) : void 0
     };
   }
   return { dir: resolved };
@@ -15911,6 +16220,22 @@ function classifyQualityStatus(score) {
 function usesFileReferencePrompt(provider) {
   return isAgentProvider(provider) || provider.kind === "cli";
 }
+function toScriptConfig(hook, hookName, context2) {
+  const command = hook.command ?? hook.script;
+  if (!command || command.length === 0) {
+    throw new Error(`${hookName} hook in ${context2} requires command or script`);
+  }
+  return {
+    command,
+    ...hook.timeout_ms !== void 0 && { timeout_ms: hook.timeout_ms },
+    ...hook.timeoutMs !== void 0 && { timeoutMs: hook.timeoutMs },
+    ...hook.cwd !== void 0 && { cwd: hook.cwd },
+    ...hook.script !== void 0 && { script: hook.script }
+  };
+}
+function hasHookCommand(hook) {
+  return !!(hook?.command && hook.command.length > 0 || hook?.script && hook.script.length > 0);
+}
 function getWorkspaceTemplate(target) {
   const config = target.config;
   if ("workspaceTemplate" in config && typeof config.workspaceTemplate === "string") {
@@ -15941,7 +16266,15 @@ async function runEvaluation(options) {
     trials,
     streamCallbacks,
     totalBudgetUsd,
-    failOnError
+    failOnError,
+    poolWorkspaces,
+    poolMaxSlots: configPoolMaxSlots,
+    workspace: legacyWorkspacePath,
+    workspaceMode,
+    workspacePath,
+    workspaceClean,
+    retainOnSuccess,
+    retainOnFailure
   } = options;
   let useCache = options.useCache;
   if (trials && trials.count > 1 && useCache) {
@@ -16015,7 +16348,7 @@ async function runEvaluation(options) {
   ];
   const evaluatorRegistry = buildEvaluatorRegistry(evaluators, resolveJudgeProvider);
   const typeRegistry = createBuiltinRegistry();
-  const discoveryBaseDir = evalFilePath ? import_node_path41.default.dirname(import_node_path41.default.resolve(evalFilePath)) : process.cwd();
+  const discoveryBaseDir = evalFilePath ? import_node_path42.default.dirname(import_node_path42.default.resolve(evalFilePath)) : process.cwd();
   const evalDir = discoveryBaseDir;
   await discoverAssertions(typeRegistry, discoveryBaseDir);
   const providerRegistry = createBuiltinProviderRegistry();
@@ -16077,13 +16410,29 @@ async function runEvaluation(options) {
     }
   };
   const isPerTestIsolation = suiteWorkspace?.isolation === "per_test";
-  const hasSharedWorkspace = !!(workspaceTemplate || suiteWorkspace?.before_all || suiteWorkspace?.repos?.length && !isPerTestIsolation);
+  const configuredMode = suiteWorkspace?.mode ?? workspaceMode;
+  const configuredStaticPath = suiteWorkspace?.static_path ?? workspacePath ?? legacyWorkspacePath;
+  const useStaticWorkspace = configuredMode === "static" || !!configuredStaticPath && !configuredMode;
+  if (useStaticWorkspace && isPerTestIsolation) {
+    throw new Error(
+      "static workspace mode is incompatible with isolation: per_test. Use isolation: shared (default)."
+    );
+  }
+  if (configuredMode === "static" && !configuredStaticPath) {
+    throw new Error("workspace.mode=static requires workspace.static_path or --workspace-path");
+  }
+  const hasSharedWorkspace = !!(useStaticWorkspace || workspaceTemplate || suiteWorkspace?.hooks || suiteWorkspace?.repos?.length && !isPerTestIsolation);
+  const poolEnabled = configuredMode === "pooled" ? true : configuredMode === "ephemeral" || useStaticWorkspace ? false : suiteWorkspace?.pool ?? poolWorkspaces ?? true;
+  const usePool = poolEnabled !== false && !!suiteWorkspace?.repos?.length && !isPerTestIsolation && !useStaticWorkspace;
+  const finishCleanPolicy = suiteWorkspace?.hooks?.on_finish?.clean;
+  const resolvedRetainOnSuccess = (finishCleanPolicy === "always" || finishCleanPolicy === "on_success" ? "cleanup" : finishCleanPolicy === "on_failure" || finishCleanPolicy === "never" ? "keep" : void 0) ?? retainOnSuccess ?? (keepWorkspaces ? "keep" : "cleanup");
+  const resolvedRetainOnFailure = (finishCleanPolicy === "always" || finishCleanPolicy === "on_failure" ? "cleanup" : finishCleanPolicy === "on_success" || finishCleanPolicy === "never" ? "keep" : void 0) ?? retainOnFailure ?? (cleanupWorkspaces ? "cleanup" : "keep");
   const requestedWorkers = options.maxConcurrency ?? target.workers ?? 1;
-  const workers = hasSharedWorkspace ? 1 : requestedWorkers;
+  const workers = hasSharedWorkspace && !usePool ? 1 : requestedWorkers;
   setupLog(
-    `sharedWorkspace=${hasSharedWorkspace} perTestIsolation=${isPerTestIsolation} requestedWorkers=${requestedWorkers} effectiveWorkers=${workers}`
+    `sharedWorkspace=${hasSharedWorkspace} perTestIsolation=${isPerTestIsolation} usePool=${usePool} requestedWorkers=${requestedWorkers} effectiveWorkers=${workers}`
   );
-  if (hasSharedWorkspace && requestedWorkers > 1) {
+  if (hasSharedWorkspace && !usePool && requestedWorkers > 1) {
     console.warn(
       `Warning: Shared workspace requires sequential execution. Overriding workers from ${requestedWorkers} to 1.`
     );
@@ -16092,7 +16441,38 @@ async function runEvaluation(options) {
   let sharedWorkspacePath;
   let sharedBaselineCommit;
   let beforeAllOutput;
-  if (workspaceTemplate) {
+  let poolManager;
+  let poolSlot;
+  const poolSlots = [];
+  const availablePoolSlots = [];
+  const poolSlotBaselines = /* @__PURE__ */ new Map();
+  const poolMaxSlots = Math.min(configPoolMaxSlots ?? 10, 50);
+  if (useStaticWorkspace && configuredStaticPath) {
+    sharedWorkspacePath = configuredStaticPath;
+    setupLog(`using static workspace: ${configuredStaticPath}`);
+  } else if (usePool && suiteWorkspace?.repos) {
+    const slotsNeeded = workers;
+    setupLog(`acquiring ${slotsNeeded} workspace pool slot(s) (pool capacity: ${poolMaxSlots})`);
+    poolManager = new WorkspacePoolManager(getWorkspacePoolRoot());
+    const poolRepoManager = new RepoManager(verbose);
+    for (let i = 0; i < slotsNeeded; i++) {
+      const slot = await poolManager.acquireWorkspace({
+        templatePath: workspaceTemplate,
+        repos: suiteWorkspace.repos,
+        maxSlots: poolMaxSlots,
+        repoManager: poolRepoManager,
+        poolReset: (workspaceClean === "full" ? "strict" : workspaceClean === "standard" ? "fast" : null) ?? suiteWorkspace.hooks?.on_reuse?.reset ?? "fast"
+      });
+      poolSlots.push(slot);
+      setupLog(`pool slot ${i} acquired at: ${slot.path} (existing=${slot.isExisting})`);
+    }
+    if (slotsNeeded === 1) {
+      poolSlot = poolSlots[0];
+      sharedWorkspacePath = poolSlot.path;
+    } else {
+      availablePoolSlots.push(...poolSlots);
+    }
+  } else if (workspaceTemplate) {
     setupLog(`creating shared workspace from template: ${workspaceTemplate}`);
     try {
       sharedWorkspacePath = await createTempWorkspace(workspaceTemplate, evalRunId, "shared");
@@ -16101,288 +16481,359 @@ async function runEvaluation(options) {
       const message = error instanceof Error ? error.message : String(error);
       throw new Error(`Failed to create shared workspace: ${message}`);
     }
+  } else if (suiteWorkspace?.hooks || suiteWorkspace?.repos?.length && !isPerTestIsolation) {
+    sharedWorkspacePath = getWorkspacePath(evalRunId, "shared");
+    await (0, import_promises29.mkdir)(sharedWorkspacePath, { recursive: true });
+    setupLog(`created empty shared workspace at: ${sharedWorkspacePath}`);
+  }
+  try {
     if (suiteWorkspaceFile && sharedWorkspacePath) {
-      const copiedWorkspaceFile = import_node_path41.default.join(sharedWorkspacePath, import_node_path41.default.basename(suiteWorkspaceFile));
+      const copiedWorkspaceFile = import_node_path42.default.join(sharedWorkspacePath, import_node_path42.default.basename(suiteWorkspaceFile));
       try {
         await (0, import_promises29.stat)(copiedWorkspaceFile);
         suiteWorkspaceFile = copiedWorkspaceFile;
       } catch {
       }
     }
-  } else if (suiteWorkspace?.before_all || suiteWorkspace?.repos?.length && !isPerTestIsolation) {
-    sharedWorkspacePath = getWorkspacePath(evalRunId, "shared");
-    await (0, import_promises29.mkdir)(sharedWorkspacePath, { recursive: true });
-    setupLog(`created empty shared workspace at: ${sharedWorkspacePath}`);
-  }
-  const repoManager = suiteWorkspace?.repos?.length ? new RepoManager(void 0, verbose) : void 0;
-  if (repoManager && sharedWorkspacePath && suiteWorkspace?.repos && !isPerTestIsolation) {
-    setupLog(`materializing ${suiteWorkspace.repos.length} shared repo(s) into ${sharedWorkspacePath}`);
-    try {
-      await repoManager.materializeAll(suiteWorkspace.repos, sharedWorkspacePath);
-      setupLog("shared repo materialization complete");
-    } catch (error) {
-      const message = error instanceof Error ? error.message : String(error);
-      if (sharedWorkspacePath) {
-        await cleanupWorkspace(sharedWorkspacePath).catch(() => {
-        });
-      }
-      throw new Error(`Failed to materialize repos: ${message}`);
-    }
-  }
-  if (sharedWorkspacePath && suiteWorkspace?.before_all) {
-    const beforeAllCommand = (suiteWorkspace.before_all.command ?? suiteWorkspace.before_all.script ?? []).join(" ");
-    setupLog(
-      `running shared before_all in cwd=${suiteWorkspace.before_all.cwd ?? evalDir} command=${beforeAllCommand}`
-    );
-    const scriptContext = {
-      workspacePath: sharedWorkspacePath,
-      testId: "__before_all__",
-      evalRunId,
-      evalDir
-    };
-    try {
-      beforeAllOutput = await executeWorkspaceScript(suiteWorkspace.before_all, scriptContext);
-      setupLog("shared before_all completed");
-    } catch (error) {
-      const message = error instanceof Error ? error.message : String(error);
-      if (sharedWorkspacePath) {
-        await cleanupWorkspace(sharedWorkspacePath).catch(() => {
-        });
-      }
-      throw new Error(`before_all script failed: ${message}`);
-    }
-  }
-  if (sharedWorkspacePath) {
-    try {
-      sharedBaselineCommit = await initializeBaseline(sharedWorkspacePath);
-      setupLog(`shared baseline initialized: ${sharedBaselineCommit}`);
-    } catch {
-      setupLog("shared baseline initialization skipped (non-fatal)");
-    }
-  }
-  let nextWorkerId = 1;
-  const workerIdByEvalId = /* @__PURE__ */ new Map();
-  let beforeAllOutputAttached = false;
-  let cumulativeBudgetCost = 0;
-  let budgetExhausted = false;
-  let failOnErrorTriggered = false;
-  const promises = filteredEvalCases.map(
-    (evalCase) => limit(async () => {
-      const workerId = nextWorkerId++;
-      workerIdByEvalId.set(evalCase.id, workerId);
-      if (totalBudgetUsd !== void 0 && budgetExhausted) {
-        const budgetResult = {
-          timestamp: (now ?? (() => /* @__PURE__ */ new Date()))().toISOString(),
-          testId: evalCase.id,
-          dataset: evalCase.dataset,
-          score: 0,
-          hits: [],
-          misses: [],
-          answer: "",
-          target: target.name,
-          error: `Suite budget exceeded ($${cumulativeBudgetCost.toFixed(4)} / $${totalBudgetUsd.toFixed(4)})`,
-          budgetExceeded: true,
-          executionStatus: "execution_error",
-          failureStage: "setup",
-          failureReasonCode: "budget_exceeded",
-          executionError: {
-            message: `Suite budget exceeded ($${cumulativeBudgetCost.toFixed(4)} / $${totalBudgetUsd.toFixed(4)})`,
-            stage: "setup"
-          }
-        };
-        if (onProgress) {
-          await onProgress({
-            workerId,
-            testId: evalCase.id,
-            status: "failed",
-            completedAt: Date.now(),
-            error: budgetResult.error
+    const repoManager = suiteWorkspace?.repos?.length && !usePool && !useStaticWorkspace ? new RepoManager(verbose) : void 0;
+    if (repoManager && sharedWorkspacePath && suiteWorkspace?.repos && !isPerTestIsolation) {
+      setupLog(
+        `materializing ${suiteWorkspace.repos.length} shared repo(s) into ${sharedWorkspacePath}`
+      );
+      try {
+        await repoManager.materializeAll(suiteWorkspace.repos, sharedWorkspacePath);
+        setupLog("shared repo materialization complete");
+      } catch (error) {
+        const message = error instanceof Error ? error.message : String(error);
+        if (sharedWorkspacePath && !useStaticWorkspace) {
+          await cleanupWorkspace(sharedWorkspacePath).catch(() => {
           });
         }
-        if (onResult) {
-          await onResult(budgetResult);
-        }
-        return budgetResult;
+        throw new Error(`Failed to materialize repos: ${message}`);
       }
-      if (failOnError === true && failOnErrorTriggered) {
-        const errorMsg = "Halted: execution error encountered with fail_on_error enabled";
-        const haltResult = {
-          timestamp: (now ?? (() => /* @__PURE__ */ new Date()))().toISOString(),
-          testId: evalCase.id,
-          dataset: evalCase.dataset,
-          score: 0,
-          hits: [],
-          misses: [],
-          answer: "",
-          target: target.name,
-          error: errorMsg,
-          executionStatus: "execution_error",
-          failureStage: "setup",
-          failureReasonCode: "error_threshold_exceeded",
-          executionError: { message: errorMsg, stage: "setup" }
-        };
-        if (onProgress) {
-          await onProgress({
-            workerId,
-            testId: evalCase.id,
-            status: "failed",
-            completedAt: Date.now(),
-            error: haltResult.error
+    }
+    const suiteBeforeAllHook = suiteWorkspace?.hooks?.before_all_tests;
+    if (sharedWorkspacePath && hasHookCommand(suiteBeforeAllHook)) {
+      const beforeAllHook = suiteBeforeAllHook;
+      const beforeAllCommand = (beforeAllHook.command ?? beforeAllHook.script ?? []).join(" ");
+      setupLog(
+        `running shared before_all in cwd=${beforeAllHook.cwd ?? evalDir} command=${beforeAllCommand}`
+      );
+      const scriptContext = {
+        workspacePath: sharedWorkspacePath,
+        testId: "__before_all__",
+        evalRunId,
+        evalDir
+      };
+      try {
+        beforeAllOutput = await executeWorkspaceScript(
+          toScriptConfig(beforeAllHook, "before_all_tests", "suite workspace"),
+          scriptContext
+        );
+        setupLog("shared before_all completed");
+      } catch (error) {
+        const message = error instanceof Error ? error.message : String(error);
+        if (sharedWorkspacePath && !useStaticWorkspace) {
+          await cleanupWorkspace(sharedWorkspacePath).catch(() => {
           });
         }
-        if (onResult) {
-          await onResult(haltResult);
-        }
-        return haltResult;
-      }
-      if (onProgress) {
-        await onProgress({
-          workerId,
-          testId: evalCase.id,
-          status: "running",
-          startedAt: Date.now()
-        });
+        throw new Error(`before_all script failed: ${message}`);
       }
-      try {
-        const judgeProvider = await resolveJudgeProvider(target);
-        const runCaseOptions = {
-          evalCase,
-          provider: primaryProvider,
-          target,
-          evaluators: evaluatorRegistry,
-          maxRetries,
-          agentTimeoutMs,
-          cache,
-          useCache,
-          now,
-          judgeProvider,
-          targetResolver,
-          availableTargets,
+    }
+    if (availablePoolSlots.length > 0 && hasHookCommand(suiteBeforeAllHook)) {
+      const beforeAllHook = suiteBeforeAllHook;
+      for (const slot of availablePoolSlots) {
+        setupLog(`running before_all on pool slot ${slot.index}`);
+        const scriptContext = {
+          workspacePath: slot.path,
+          testId: "__before_all__",
           evalRunId,
-          keepWorkspaces,
-          cleanupWorkspaces,
-          sharedWorkspacePath,
-          sharedBaselineCommit,
-          suiteWorkspaceFile,
-          streamCallbacks,
-          typeRegistry,
-          repoManager,
           evalDir
         };
-        let result = trials && trials.count > 1 ? await runEvalCaseWithTrials(runCaseOptions, trials) : await runEvalCase(runCaseOptions);
-        if (totalBudgetUsd !== void 0) {
-          let caseCost;
-          if (result.trials && result.trials.length > 0) {
-            const trialCostSum = result.trials.reduce((sum, t) => sum + (t.costUsd ?? 0), 0);
-            if (trialCostSum > 0) {
-              caseCost = trialCostSum;
+        try {
+          const output = await executeWorkspaceScript(
+            toScriptConfig(beforeAllHook, "before_all_tests", "suite workspace"),
+            scriptContext
+          );
+          if (!beforeAllOutput) beforeAllOutput = output;
+          setupLog(`before_all completed on pool slot ${slot.index}`);
+        } catch (error) {
+          const message = error instanceof Error ? error.message : String(error);
+          throw new Error(`before_all script failed on pool slot ${slot.index}: ${message}`);
+        }
+      }
+    }
+    if (sharedWorkspacePath) {
+      try {
+        sharedBaselineCommit = await initializeBaseline(sharedWorkspacePath);
+        setupLog(`shared baseline initialized: ${sharedBaselineCommit}`);
+      } catch {
+        setupLog("shared baseline initialization skipped (non-fatal)");
+      }
+    }
+    if (availablePoolSlots.length > 0) {
+      for (const slot of availablePoolSlots) {
+        try {
+          const baseline = await initializeBaseline(slot.path);
+          poolSlotBaselines.set(slot.path, baseline);
+          setupLog(`pool slot ${slot.index} baseline initialized: ${baseline}`);
+        } catch {
+          setupLog(`pool slot ${slot.index} baseline initialization skipped (non-fatal)`);
+        }
+      }
+    }
+    let nextWorkerId = 1;
+    const workerIdByEvalId = /* @__PURE__ */ new Map();
+    let beforeAllOutputAttached = false;
+    let cumulativeBudgetCost = 0;
+    let budgetExhausted = false;
+    let failOnErrorTriggered = false;
+    const promises = filteredEvalCases.map(
+      (evalCase) => limit(async () => {
+        const workerId = nextWorkerId++;
+        workerIdByEvalId.set(evalCase.id, workerId);
+        if (totalBudgetUsd !== void 0 && budgetExhausted) {
+          const budgetResult = {
+            timestamp: (now ?? (() => /* @__PURE__ */ new Date()))().toISOString(),
+            testId: evalCase.id,
+            dataset: evalCase.dataset,
+            score: 0,
+            hits: [],
+            misses: [],
+            answer: "",
+            target: target.name,
+            error: `Suite budget exceeded ($${cumulativeBudgetCost.toFixed(4)} / $${totalBudgetUsd.toFixed(4)})`,
+            budgetExceeded: true,
+            executionStatus: "execution_error",
+            failureStage: "setup",
+            failureReasonCode: "budget_exceeded",
+            executionError: {
+              message: `Suite budget exceeded ($${cumulativeBudgetCost.toFixed(4)} / $${totalBudgetUsd.toFixed(4)})`,
+              stage: "setup"
             }
-          } else {
-            caseCost = result.costUsd;
+          };
+          if (onProgress) {
+            await onProgress({
+              workerId,
+              testId: evalCase.id,
+              status: "failed",
+              completedAt: Date.now(),
+              error: budgetResult.error
+            });
           }
-          if (caseCost !== void 0) {
-            cumulativeBudgetCost += caseCost;
-            if (cumulativeBudgetCost >= totalBudgetUsd) {
-              budgetExhausted = true;
-            }
+          if (onResult) {
+            await onResult(budgetResult);
           }
+          return budgetResult;
         }
-        if (failOnError === true && result.executionStatus === "execution_error") {
-          failOnErrorTriggered = true;
-        }
-        if (beforeAllOutput && !beforeAllOutputAttached) {
-          result = { ...result, beforeAllOutput };
-          beforeAllOutputAttached = true;
+        if (failOnError === true && failOnErrorTriggered) {
+          const errorMsg = "Halted: execution error encountered with fail_on_error enabled";
+          const haltResult = {
+            timestamp: (now ?? (() => /* @__PURE__ */ new Date()))().toISOString(),
+            testId: evalCase.id,
+            dataset: evalCase.dataset,
+            score: 0,
+            hits: [],
+            misses: [],
+            answer: "",
+            target: target.name,
+            error: errorMsg,
+            executionStatus: "execution_error",
+            failureStage: "setup",
+            failureReasonCode: "error_threshold_exceeded",
+            executionError: { message: errorMsg, stage: "setup" }
+          };
+          if (onProgress) {
+            await onProgress({
+              workerId,
+              testId: evalCase.id,
+              status: "failed",
+              completedAt: Date.now(),
+              error: haltResult.error
+            });
+          }
+          if (onResult) {
+            await onResult(haltResult);
+          }
+          return haltResult;
         }
         if (onProgress) {
           await onProgress({
             workerId,
             testId: evalCase.id,
-            status: result.error ? "failed" : "completed",
-            startedAt: 0,
-            // Not used for completed status
-            completedAt: Date.now(),
-            error: result.error
+            status: "running",
+            startedAt: Date.now()
           });
         }
-        if (onResult) {
-          await onResult(result);
+        const testPoolSlot = availablePoolSlots.length > 0 ? availablePoolSlots.pop() : void 0;
+        const testWorkspacePath = testPoolSlot?.path ?? sharedWorkspacePath;
+        const testBaselineCommit = testPoolSlot ? poolSlotBaselines.get(testPoolSlot.path) : sharedBaselineCommit;
+        try {
+          const judgeProvider = await resolveJudgeProvider(target);
+          const runCaseOptions = {
+            evalCase,
+            provider: primaryProvider,
+            target,
+            evaluators: evaluatorRegistry,
+            maxRetries,
+            agentTimeoutMs,
+            cache,
+            useCache,
+            now,
+            judgeProvider,
+            targetResolver,
+            availableTargets,
+            evalRunId,
+            keepWorkspaces,
+            cleanupWorkspaces,
+            retainOnSuccess: resolvedRetainOnSuccess,
+            retainOnFailure: resolvedRetainOnFailure,
+            sharedWorkspacePath: testWorkspacePath,
+            sharedBaselineCommit: testBaselineCommit,
+            suiteWorkspaceFile,
+            streamCallbacks,
+            typeRegistry,
+            repoManager,
+            evalDir
+          };
+          let result = trials && trials.count > 1 ? await runEvalCaseWithTrials(runCaseOptions, trials) : await runEvalCase(runCaseOptions);
+          if (totalBudgetUsd !== void 0) {
+            let caseCost;
+            if (result.trials && result.trials.length > 0) {
+              const trialCostSum = result.trials.reduce((sum, t) => sum + (t.costUsd ?? 0), 0);
+              if (trialCostSum > 0) {
+                caseCost = trialCostSum;
+              }
+            } else {
+              caseCost = result.costUsd;
+            }
+            if (caseCost !== void 0) {
+              cumulativeBudgetCost += caseCost;
+              if (cumulativeBudgetCost >= totalBudgetUsd) {
+                budgetExhausted = true;
+              }
+            }
+          }
+          if (failOnError === true && result.executionStatus === "execution_error") {
+            failOnErrorTriggered = true;
+          }
+          if (beforeAllOutput && !beforeAllOutputAttached) {
+            result = { ...result, beforeAllOutput };
+            beforeAllOutputAttached = true;
+          }
+          if (onProgress) {
+            await onProgress({
+              workerId,
+              testId: evalCase.id,
+              status: result.error ? "failed" : "completed",
+              startedAt: 0,
+              // Not used for completed status
+              completedAt: Date.now(),
+              error: result.error
+            });
+          }
+          if (onResult) {
+            await onResult(result);
+          }
+          return result;
+        } catch (error) {
+          if (onProgress) {
+            await onProgress({
+              workerId,
+              testId: evalCase.id,
+              status: "failed",
+              completedAt: Date.now(),
+              error: error instanceof Error ? error.message : String(error)
+            });
+          }
+          throw error;
+        } finally {
+          if (testPoolSlot) {
+            availablePoolSlots.push(testPoolSlot);
+          }
         }
-        return result;
-      } catch (error) {
-        if (onProgress) {
-          await onProgress({
-            workerId,
-            testId: evalCase.id,
-            status: "failed",
-            completedAt: Date.now(),
-            error: error instanceof Error ? error.message : String(error)
-          });
+      })
+    );
+    const settled = await Promise.allSettled(promises);
+    const results = [];
+    for (let i = 0; i < settled.length; i++) {
+      const outcome = settled[i];
+      if (outcome.status === "fulfilled") {
+        results.push(outcome.value);
+      } else {
+        const evalCase = filteredEvalCases[i];
+        const formattingMode = usesFileReferencePrompt(primaryProvider) ? "agent" : "lm";
+        const promptInputs = await buildPromptInputs(evalCase, formattingMode);
+        const errorResult = buildErrorResult(
+          evalCase,
+          target.name,
+          (now ?? (() => /* @__PURE__ */ new Date()))(),
+          outcome.reason,
+          promptInputs,
+          primaryProvider,
+          "agent",
+          "provider_error"
+        );
+        results.push(errorResult);
+        if (onResult) {
+          await onResult(errorResult);
         }
-        throw error;
       }
-    })
-  );
-  const settled = await Promise.allSettled(promises);
-  const results = [];
-  for (let i = 0; i < settled.length; i++) {
-    const outcome = settled[i];
-    if (outcome.status === "fulfilled") {
-      results.push(outcome.value);
-    } else {
-      const evalCase = filteredEvalCases[i];
-      const formattingMode = usesFileReferencePrompt(primaryProvider) ? "agent" : "lm";
-      const promptInputs = await buildPromptInputs(evalCase, formattingMode);
-      const errorResult = buildErrorResult(
-        evalCase,
-        target.name,
-        (now ?? (() => /* @__PURE__ */ new Date()))(),
-        outcome.reason,
-        promptInputs,
-        primaryProvider,
-        "agent",
-        "provider_error"
-      );
-      results.push(errorResult);
-      if (onResult) {
-        await onResult(errorResult);
+    }
+    const afterAllWorkspaces = poolSlots.length > 1 ? poolSlots.map((s) => s.path) : sharedWorkspacePath ? [sharedWorkspacePath] : [];
+    const suiteAfterAllHook = suiteWorkspace?.hooks?.after_all_tests;
+    if (afterAllWorkspaces.length > 0 && hasHookCommand(suiteAfterAllHook)) {
+      const afterAllHook = suiteAfterAllHook;
+      for (const wsPath of afterAllWorkspaces) {
+        const scriptContext = {
+          workspacePath: wsPath,
+          testId: "__after_all__",
+          evalRunId,
+          evalDir
+        };
+        try {
+          const afterAllOutput = await executeWorkspaceScript(
+            toScriptConfig(afterAllHook, "after_all_tests", "suite workspace"),
+            scriptContext,
+            "warn"
+          );
+          if (afterAllOutput && results.length > 0 && wsPath === afterAllWorkspaces[0]) {
+            results[results.length - 1] = { ...results[results.length - 1], afterAllOutput };
+          }
+        } catch {
+        }
       }
     }
-  }
-  if (sharedWorkspacePath && suiteWorkspace?.after_all) {
-    const scriptContext = {
-      workspacePath: sharedWorkspacePath,
-      testId: "__after_all__",
-      evalRunId,
-      evalDir
-    };
-    try {
-      const afterAllOutput = await executeWorkspaceScript(
-        suiteWorkspace.after_all,
-        scriptContext,
-        "warn"
-      );
-      if (afterAllOutput && results.length > 0) {
-        results[results.length - 1] = { ...results[results.length - 1], afterAllOutput };
+    if (sharedWorkspacePath && !poolSlot && poolSlots.length === 0 && !useStaticWorkspace) {
+      const hasFailure = results.some((r) => !!r.error || r.score < 0.5);
+      if (hasFailure) {
+        if (resolvedRetainOnFailure === "cleanup") {
+          await cleanupWorkspace(sharedWorkspacePath).catch(() => {
+          });
+        }
+      } else if (resolvedRetainOnSuccess === "cleanup") {
+        await cleanupWorkspace(sharedWorkspacePath).catch(() => {
+        });
       }
-    } catch {
     }
-  }
-  if (sharedWorkspacePath) {
-    const hasFailure = results.some((r) => !!r.error || r.score < 0.5);
     if (cleanupWorkspaces) {
-      await cleanupWorkspace(sharedWorkspacePath).catch(() => {
-      });
-    } else if (!hasFailure && !keepWorkspaces) {
-      await cleanupWorkspace(sharedWorkspacePath).catch(() => {
+      await cleanupEvalWorkspaces(evalRunId).catch(() => {
       });
     }
+    return results;
+  } finally {
+    if (poolManager) {
+      if (poolSlot) {
+        await poolManager.releaseSlot(poolSlot);
+      }
+      for (const slot of poolSlots) {
+        if (slot !== poolSlot) {
+          await poolManager.releaseSlot(slot).catch(() => {
+          });
+        }
+      }
+    }
   }
-  if (cleanupWorkspaces) {
-    await cleanupEvalWorkspaces(evalRunId).catch(() => {
-    });
-  }
-  return results;
 }
 async function runBatchEvaluation(options) {
   const {
@@ -16554,6 +17005,8 @@ async function runEvalCase(options) {
     evalRunId,
     keepWorkspaces,
     cleanupWorkspaces: forceCleanup,
+    retainOnSuccess,
+    retainOnFailure,
     sharedWorkspacePath,
     sharedBaselineCommit,
     suiteWorkspaceFile,
@@ -16565,10 +17018,10 @@ async function runEvalCase(options) {
   const formattingMode = usesFileReferencePrompt(provider) ? "agent" : "lm";
   const promptInputs = await buildPromptInputs(evalCase, formattingMode);
   const typeRegistry = providedTypeRegistry ?? createBuiltinRegistry();
-  const cacheKey2 = useCache ? createCacheKey(provider, target, evalCase, promptInputs) : void 0;
+  const cacheKey = useCache ? createCacheKey(provider, target, evalCase, promptInputs) : void 0;
   let cachedResponse;
-  if (cacheKey2 && cache) {
-    cachedResponse = await cache.get(cacheKey2);
+  if (cacheKey && cache) {
+    cachedResponse = await cache.get(cacheKey);
   }
   const nowFn = now ?? (() => /* @__PURE__ */ new Date());
   let workspacePath = sharedWorkspacePath;
@@ -16599,7 +17052,7 @@ async function runEvalCase(options) {
         );
       }
       if (caseWorkspaceFile && workspacePath) {
-        const copiedFile = import_node_path41.default.join(workspacePath, import_node_path41.default.basename(caseWorkspaceFile));
+        const copiedFile = import_node_path42.default.join(workspacePath, import_node_path42.default.basename(caseWorkspaceFile));
         try {
           await (0, import_promises29.stat)(copiedFile);
           caseWorkspaceFile = copiedFile;
@@ -16607,12 +17060,12 @@ async function runEvalCase(options) {
         }
       }
     }
-    if (!workspacePath && (evalCase.workspace?.before_all || evalCase.workspace?.repos?.length) && evalRunId) {
+    if (!workspacePath && (evalCase.workspace?.hooks || evalCase.workspace?.repos?.length) && evalRunId) {
       workspacePath = getWorkspacePath(evalRunId, evalCase.id);
       await (0, import_promises29.mkdir)(workspacePath, { recursive: true });
     }
     if (evalCase.workspace?.repos?.length && workspacePath) {
-      const perCaseRepoManager = new RepoManager(void 0, setupDebug);
+      const perCaseRepoManager = new RepoManager(setupDebug);
       try {
         if (setupDebug) {
           console.log(
@@ -16637,11 +17090,13 @@ async function runEvalCase(options) {
         );
       }
     }
-    if (workspacePath && evalCase.workspace?.before_all) {
-      const beforeAllCommand = (evalCase.workspace.before_all.command ?? evalCase.workspace.before_all.script ?? []).join(" ");
+    const caseBeforeAllHook = evalCase.workspace?.hooks?.before_all_tests;
+    if (workspacePath && hasHookCommand(caseBeforeAllHook)) {
+      const beforeAllHook = caseBeforeAllHook;
+      const beforeAllCommand = (beforeAllHook.command ?? beforeAllHook.script ?? []).join(" ");
       if (setupDebug) {
         console.log(
-          `[setup] test=${evalCase.id} running before_all in cwd=${evalCase.workspace.before_all.cwd ?? evalDir} command=${beforeAllCommand}`
+          `[setup] test=${evalCase.id} running before_all in cwd=${beforeAllHook.cwd ?? evalDir} command=${beforeAllCommand}`
         );
       }
       const scriptContext = {
@@ -16654,7 +17109,7 @@ async function runEvalCase(options) {
       };
       try {
         beforeAllOutput = await executeWorkspaceScript(
-          evalCase.workspace.before_all,
+          toScriptConfig(beforeAllHook, "before_all_tests", `test '${evalCase.id}'`),
           scriptContext
         );
         if (setupDebug) {
@@ -16679,7 +17134,9 @@ async function runEvalCase(options) {
       }
     }
   }
-  if (workspacePath && evalCase.workspace?.before_each) {
+  const caseBeforeEachHook = evalCase.workspace?.hooks?.before_each_test;
+  if (workspacePath && hasHookCommand(caseBeforeEachHook)) {
+    const beforeEachHook = caseBeforeEachHook;
     const scriptContext = {
       workspacePath,
       testId: evalCase.id,
@@ -16690,7 +17147,7 @@ async function runEvalCase(options) {
     };
     try {
       beforeEachOutput = await executeWorkspaceScript(
-        evalCase.workspace.before_each,
+        toScriptConfig(beforeEachHook, "before_each_test", `test '${evalCase.id}'`),
         scriptContext
       );
     } catch (error) {
@@ -16778,8 +17235,8 @@ async function runEvalCase(options) {
     }
     return errorResult;
   }
-  if (cacheKey2 && cache && !cachedResponse) {
-    await cache.set(cacheKey2, providerResponse);
+  if (cacheKey && cache && !cachedResponse) {
+    await cache.set(cacheKey, providerResponse);
   }
   const output = providerResponse.output;
   const hasExecutionMetrics = providerResponse.tokenUsage !== void 0 || providerResponse.costUsd !== void 0 || providerResponse.durationMs !== void 0;
@@ -16807,17 +17264,19 @@ async function runEvalCase(options) {
     }
   }
   const providerError = extractProviderError(providerResponse);
-  if (repoManager && workspacePath && evalCase.workspace?.reset?.after_each && evalCase.workspace.reset.strategy && evalCase.workspace.reset.strategy !== "none" && evalCase.workspace.repos) {
+  if (repoManager && workspacePath && evalCase.workspace?.hooks?.after_each_test?.reset && evalCase.workspace.hooks.after_each_test.reset !== "none" && evalCase.workspace.repos) {
     try {
       await repoManager.reset(
         evalCase.workspace.repos,
         workspacePath,
-        evalCase.workspace.reset.strategy
+        evalCase.workspace.hooks.after_each_test.reset
       );
     } catch {
     }
   }
-  if (workspacePath && evalCase.workspace?.after_each) {
+  const caseAfterEachHook = evalCase.workspace?.hooks?.after_each_test;
+  if (workspacePath && hasHookCommand(caseAfterEachHook)) {
+    const afterEachHook = caseAfterEachHook;
     const scriptContext = {
       workspacePath,
       testId: evalCase.id,
@@ -16828,7 +17287,7 @@ async function runEvalCase(options) {
     };
     try {
       afterEachOutput = await executeWorkspaceScript(
-        evalCase.workspace.after_each,
+        toScriptConfig(afterEachHook, "after_each_test", `test '${evalCase.id}'`),
         scriptContext,
         "warn"
       );
@@ -16878,8 +17337,13 @@ async function runEvalCase(options) {
         await cleanupWorkspace(workspacePath).catch(() => {
         });
       } else if (isFailure) {
-        return { ...finalResult, workspacePath };
-      } else if (!keepWorkspaces) {
+        if ((retainOnFailure ?? "keep") === "cleanup") {
+          await cleanupWorkspace(workspacePath).catch(() => {
+          });
+        } else {
+          return { ...finalResult, workspacePath };
+        }
+      } else if ((retainOnSuccess ?? (keepWorkspaces ? "keep" : "cleanup")) !== "keep") {
         await cleanupWorkspace(workspacePath).catch(() => {
         });
       }
@@ -16897,11 +17361,12 @@ async function runEvalCase(options) {
       "evaluator_error"
     );
     if (workspacePath && !isSharedWorkspace) {
-      if (forceCleanup) {
+      if (forceCleanup || (retainOnFailure ?? "keep") === "cleanup") {
         await cleanupWorkspace(workspacePath).catch(() => {
         });
+      } else {
+        return { ...errorResult, workspacePath, beforeEachOutput, afterEachOutput };
       }
-      return { ...errorResult, workspacePath, beforeEachOutput, afterEachOutput };
     }
     return { ...errorResult, beforeEachOutput, afterEachOutput };
   }
@@ -16920,7 +17385,9 @@ async function runEvalCaseWithTrials(options, trialsConfig) {
       useCache: false,
       // Force cleanup for intermediate trials
       cleanupWorkspaces: isLastDeclaredTrial ? options.cleanupWorkspaces : true,
-      keepWorkspaces: isLastDeclaredTrial ? options.keepWorkspaces : false
+      keepWorkspaces: isLastDeclaredTrial ? options.keepWorkspaces : false,
+      retainOnSuccess: isLastDeclaredTrial ? options.retainOnSuccess : "cleanup",
+      retainOnFailure: isLastDeclaredTrial ? options.retainOnFailure : "cleanup"
     };
     const result = await runEvalCase(trialOptions);
     allResults.push(result);
@@ -17209,7 +17676,7 @@ async function runEvaluatorList(options) {
     fileChanges,
     workspacePath
   };
-  const evalFileDir = evalCase.guideline_paths[0] ? import_node_path41.default.dirname(evalCase.guideline_paths[0]) : process.cwd();
+  const evalFileDir = evalCase.guideline_paths[0] ? import_node_path42.default.dirname(evalCase.guideline_paths[0]) : process.cwd();
   const dispatchContext = {
     judgeProvider,
     targetResolver,
@@ -17512,7 +17979,7 @@ function computeWeightedMean(entries) {
 // src/evaluation/evaluate.ts
 var import_node_fs12 = require("fs");
-var import_node_path42 = __toESM(require("path"), 1);
+var import_node_path43 = __toESM(require("path"), 1);
 async function evaluate(config) {
   const startTime = Date.now();
   if (config.tests && config.specFile) {
@@ -17534,13 +18001,13 @@ async function evaluate(config) {
   let evalCases;
   let testFilePath;
   if (config.specFile) {
-    testFilePath = import_node_path42.default.resolve(config.specFile);
+    testFilePath = import_node_path43.default.resolve(config.specFile);
     evalCases = await loadTests(testFilePath, repoRoot, {
       verbose: config.verbose,
       filter: config.filter
     });
   } else {
-    testFilePath = import_node_path42.default.join(process.cwd(), "__programmatic__.yaml");
+    testFilePath = import_node_path43.default.join(process.cwd(), "__programmatic__.yaml");
     evalCases = (config.tests ?? []).map((test) => {
       const input = typeof test.input === "string" ? [{ role: "user", content: test.input }] : test.input;
       const question = typeof test.input === "string" ? test.input : test.input.find((m) => m.role === "user")?.content ?? "";
@@ -17626,10 +18093,10 @@ function computeSummary(results, durationMs) {
 var TARGET_FILE_CANDIDATES = [".agentv/targets.yaml", ".agentv/targets.yml"];
 async function discoverDefaultTarget(repoRoot) {
   const cwd = process.cwd();
-  const chain = buildDirectoryChain2(import_node_path42.default.join(cwd, "_placeholder"), repoRoot);
+  const chain = buildDirectoryChain2(import_node_path43.default.join(cwd, "_placeholder"), repoRoot);
   for (const dir of chain) {
     for (const candidate of TARGET_FILE_CANDIDATES) {
-      const targetsPath = import_node_path42.default.join(dir, candidate);
+      const targetsPath = import_node_path43.default.join(dir, candidate);
       if (!(0, import_node_fs12.existsSync)(targetsPath)) continue;
       try {
         const definitions = await readTargetDefinitions(targetsPath);
@@ -17644,10 +18111,10 @@ async function discoverDefaultTarget(repoRoot) {
 async function loadEnvHierarchy(repoRoot) {
   const { readFileSync: readFileSync2 } = await import("fs");
   const cwd = process.cwd();
-  const chain = buildDirectoryChain2(import_node_path42.default.join(cwd, "_placeholder"), repoRoot);
+  const chain = buildDirectoryChain2(import_node_path43.default.join(cwd, "_placeholder"), repoRoot);
   const envFiles = [];
   for (const dir of chain) {
-    const envPath = import_node_path42.default.join(dir, ".env");
+    const envPath = import_node_path43.default.join(dir, ".env");
     if ((0, import_node_fs12.existsSync)(envPath)) envFiles.push(envPath);
   }
   for (let i = envFiles.length - 1; i >= 0; i--) {
@@ -17829,7 +18296,7 @@ function buildPrompt(criteria, question, referenceAnswer) {
 // src/evaluation/cache/response-cache.ts
 var import_promises30 = require("fs/promises");
-var import_node_path43 = __toESM(require("path"), 1);
+var import_node_path44 = __toESM(require("path"), 1);
 var DEFAULT_CACHE_PATH = ".agentv/cache";
 var ResponseCache = class {
   cachePath;
@@ -17847,13 +18314,13 @@ var ResponseCache = class {
   }
   async set(key, value) {
     const filePath = this.keyToPath(key);
-    const dir = import_node_path43.default.dirname(filePath);
+    const dir = import_node_path44.default.dirname(filePath);
     await (0, import_promises30.mkdir)(dir, { recursive: true });
     await (0, import_promises30.writeFile)(filePath, JSON.stringify(value, null, 2), "utf8");
   }
   keyToPath(key) {
     const prefix = key.slice(0, 2);
-    return import_node_path43.default.join(this.cachePath, prefix, `${key}.json`);
+    return import_node_path44.default.join(this.cachePath, prefix, `${key}.json`);
   }
 };
 function shouldEnableCache(params) {
@@ -18340,6 +18807,7 @@ function createAgentKernel() {
   TokenUsageEvaluator,
   ToolTrajectoryEvaluator,
   WorkspaceCreationError,
+  WorkspacePoolManager,
   assembleLlmJudgePrompt,
   avgToolDurationMs,
   buildDirectoryChain,
@@ -18354,6 +18822,7 @@ function createAgentKernel() {
   cleanupEvalWorkspaces,
   cleanupWorkspace,
   computeTraceSummary,
+  computeWorkspaceFingerprint,
   consumeClaudeLogEntries,
   consumeCodexLogEntries,
   consumeCopilotCliLogEntries,
@@ -18386,11 +18855,11 @@ function createAgentKernel() {
   freeformEvaluationSchema,
   generateRubrics,
   getAgentvHome,
-  getGitCacheRoot,
   getHitCount,
   getSubagentsRoot,
   getTraceStateRoot,
   getWorkspacePath,
+  getWorkspacePoolRoot,
   getWorkspacesRoot,
   initializeBaseline,
   isEvaluatorKind,