npm - agentv - Versions diffs - 4.40.1 → 4.41.0-next.1 - Mend

agentv 4.40.1 → 4.41.0-next.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (28) hide show

package/dist/{chunk-BLXYBUU4.js → chunk-ENHX2CCS.js} RENAMED Viewed

@@ -493,8 +493,8 @@ function getErrorMap() {
 // ../../node_modules/.bun/zod@3.25.76/node_modules/zod/v3/helpers/parseUtil.js
 var makeIssue = (params) => {
-  const { data, path: path50, errorMaps, issueData } = params;
-  const fullPath = [...path50, ...issueData.path || []];
+  const { data, path: path51, errorMaps, issueData } = params;
+  const fullPath = [...path51, ...issueData.path || []];
   const fullIssue = {
     ...issueData,
     path: fullPath
@@ -610,11 +610,11 @@ var errorUtil;
 // ../../node_modules/.bun/zod@3.25.76/node_modules/zod/v3/types.js
 var ParseInputLazyPath = class {
-  constructor(parent, value, path50, key) {
+  constructor(parent, value, path51, key) {
     this._cachedPath = [];
     this.parent = parent;
     this.data = value;
-    this._path = path50;
+    this._path = path51;
     this._key = key;
   }
   get path() {
@@ -4056,7 +4056,7 @@ var coerce = {
 };
 var NEVER = INVALID;
-// ../../packages/core/dist/chunk-5JNFEE7J.js
+// ../../packages/core/dist/chunk-3EAL7M5J.js
 import { parse } from "yaml";
 import os from "node:os";
 import path from "node:path";
@@ -5252,17 +5252,22 @@ function resolveCopilotFlatProviderConfig(target, env) {
       optionalEnv: true
     }
   );
-  const wireApi = resolveOptionalString(target.wire_api, env, `${target.name} copilot wire API`, {
-    allowLiteral: true,
-    optionalEnv: true
-  });
+  const apiFormat = resolveOptionalString(
+    target.api_format,
+    env,
+    `${target.name} copilot API format`,
+    {
+      allowLiteral: true,
+      optionalEnv: true
+    }
+  );
   return {
     ...type ? { type } : {},
     baseUrl,
     ...apiKey ? { apiKey } : {},
     ...bearerToken ? { bearerToken } : {},
     ...apiVersion ? { apiVersion } : {},
-    ...wireApi ? { wireApi } : {}
+    ...apiFormat ? { wireApi: apiFormat } : {}
   };
 }
 function resolveCopilotCliConfig(target, env, _evalFilePath) {
@@ -6151,21 +6156,17 @@ async function expandFileReferences(tests, evalFileDir) {
   return expanded;
 }
-// ../../packages/core/dist/chunk-M6LF2BEU.js
-import path49 from "node:path";
+// ../../packages/core/dist/chunk-REU6TJT4.js
+import path50 from "node:path";
 import { pathToFileURL as pathToFileURL2 } from "node:url";
 import { existsSync as existsSync7 } from "node:fs";
-import path48 from "node:path";
+import path49 from "node:path";
 import micromatch4 from "micromatch";
 import { mkdir, readFile as readFile3, writeFile } from "node:fs/promises";
 import path5 from "node:path";
-import { execFile as execFile2 } from "node:child_process";
 import { createHash as createHash5, randomUUID as randomUUID10 } from "node:crypto";
-import { existsSync as existsSync6 } from "node:fs";
-import { copyFile as copyFile2, mkdir as mkdir17, readdir as readdir8, stat as stat9 } from "node:fs/promises";
-import path46 from "node:path";
+import path47 from "node:path";
 import { fileURLToPath as fileURLToPath5 } from "node:url";
-import { promisify as promisify6 } from "node:util";
 import micromatch3 from "micromatch";
 import { mkdtemp, rm, writeFile as writeFile2 } from "node:fs/promises";
 import { tmpdir } from "node:os";
@@ -6883,10 +6884,10 @@ function assignProp(target, prop, value) {
     configurable: true
   });
 }
-function getElementAtPath(obj, path50) {
-  if (!path50)
+function getElementAtPath(obj, path51) {
+  if (!path51)
     return obj;
-  return path50.reduce((acc, key) => acc?.[key], obj);
+  return path51.reduce((acc, key) => acc?.[key], obj);
 }
 function promiseAllObject(promisesObj) {
   const keys = Object.keys(promisesObj);
@@ -7206,11 +7207,11 @@ function aborted(x, startIndex = 0) {
   }
   return false;
 }
-function prefixIssues(path50, issues) {
+function prefixIssues(path51, issues) {
   return issues.map((iss) => {
     var _a;
     (_a = iss).path ?? (_a.path = []);
-    iss.path.unshift(path50);
+    iss.path.unshift(path51);
     return iss;
   });
 }
@@ -7347,7 +7348,7 @@ function treeifyError(error40, _mapper) {
     return issue2.message;
   };
   const result = { errors: [] };
-  const processError = (error41, path50 = []) => {
+  const processError = (error41, path51 = []) => {
     var _a, _b;
     for (const issue2 of error41.issues) {
       if (issue2.code === "invalid_union" && issue2.errors.length) {
@@ -7357,7 +7358,7 @@ function treeifyError(error40, _mapper) {
       } else if (issue2.code === "invalid_element") {
         processError({ issues: issue2.issues }, issue2.path);
       } else {
-        const fullpath = [...path50, ...issue2.path];
+        const fullpath = [...path51, ...issue2.path];
         if (fullpath.length === 0) {
           result.errors.push(mapper(issue2));
           continue;
@@ -7387,9 +7388,9 @@ function treeifyError(error40, _mapper) {
   processError(error40);
   return result;
 }
-function toDotPath(path50) {
+function toDotPath(path51) {
   const segs = [];
-  for (const seg of path50) {
+  for (const seg of path51) {
     if (typeof seg === "number")
       segs.push(`[${seg}]`);
     else if (typeof seg === "symbol")
@@ -18819,7 +18820,7 @@ var RequestError = class _RequestError extends Error {
   }
 };
-// ../../packages/core/dist/chunk-M6LF2BEU.js
+// ../../packages/core/dist/chunk-REU6TJT4.js
 import { exec as execCallback } from "node:child_process";
 import { readdirSync, statSync } from "node:fs";
 import { readFile as readFile32, readdir as readdir2, stat as stat2 } from "node:fs/promises";
@@ -18894,6 +18895,11 @@ import path33 from "node:path";
 import fg3 from "fast-glob";
 import { cp, mkdir as mkdir14, readdir as readdir5, rm as rm4, stat as stat6 } from "node:fs/promises";
 import path34 from "node:path";
+import { execFile as execFile2 } from "node:child_process";
+import { existsSync as existsSync6 } from "node:fs";
+import { copyFile as copyFile2, mkdir as mkdir17, readdir as readdir8, stat as stat8 } from "node:fs/promises";
+import path39 from "node:path";
+import { promisify as promisify6 } from "node:util";
 import { createHash as createHash3 } from "node:crypto";
 import { existsSync as existsSync3 } from "node:fs";
 import { cp as cp2, mkdir as mkdir15, readFile as readFile11, readdir as readdir6, rm as rm5, unlink, writeFile as writeFile9 } from "node:fs/promises";
@@ -18909,28 +18915,28 @@ import path36 from "node:path";
 import { stringify as stringifyYaml } from "yaml";
 import { readdir as readdir7, stat as stat7 } from "node:fs/promises";
 import path38 from "node:path";
-import { readFile as readFile18, stat as stat8 } from "node:fs/promises";
-import path45 from "node:path";
+import { readFile as readFile18, stat as stat9 } from "node:fs/promises";
+import path46 from "node:path";
 import micromatch2 from "micromatch";
 import { stringify as stringifyYaml2 } from "yaml";
 import { readFile as readFile12 } from "node:fs/promises";
-import path39 from "node:path";
+import path40 from "node:path";
 import { readFile as readFile13 } from "node:fs/promises";
-import path41 from "node:path";
+import path422 from "node:path";
 import { constants as constants4 } from "node:fs";
 import { access as access4 } from "node:fs/promises";
-import path40 from "node:path";
+import path41 from "node:path";
 import { fileURLToPath as fileURLToPath4 } from "node:url";
 import { readFile as readFile15 } from "node:fs/promises";
-import path422 from "node:path";
+import path43 from "node:path";
 import { readFile as readFile14 } from "node:fs/promises";
 import { readFile as readFile17 } from "node:fs/promises";
-import path44 from "node:path";
+import path45 from "node:path";
 import micromatch from "micromatch";
 import { readFile as readFile16 } from "node:fs/promises";
-import path43 from "node:path";
+import path44 from "node:path";
 import { mkdir as mkdir18, readFile as readFile20, writeFile as writeFile10 } from "node:fs/promises";
-import path47 from "node:path";
+import path48 from "node:path";
 import { readFile as readFile19 } from "node:fs/promises";
 var DEFAULT_CACHE_PATH = ".agentv/cache";
 var ResponseCache = class {
@@ -22462,115 +22468,115 @@ var FieldAccuracyGrader = class {
    * Evaluate a single field against the expected value.
    */
   evaluateField(fieldConfig, candidateData, expectedData) {
-    const { path: path50, match, required: required2 = true, weight = 1 } = fieldConfig;
-    const candidateValue = resolvePath(candidateData, path50);
-    const expectedValue = resolvePath(expectedData, path50);
+    const { path: path51, match, required: required2 = true, weight = 1 } = fieldConfig;
+    const candidateValue = resolvePath(candidateData, path51);
+    const expectedValue = resolvePath(expectedData, path51);
     if (expectedValue === void 0) {
       return {
-        path: path50,
+        path: path51,
         score: 1,
         // No expected value means no comparison needed
         weight,
         hit: true,
-        message: `${path50}: no expected value`
+        message: `${path51}: no expected value`
       };
     }
     if (candidateValue === void 0) {
       if (required2) {
         return {
-          path: path50,
+          path: path51,
           score: 0,
           weight,
           hit: false,
-          message: `${path50} (required, missing)`
+          message: `${path51} (required, missing)`
         };
       }
       return {
-        path: path50,
+        path: path51,
         score: 1,
         // Don't penalize missing optional fields
         weight: 0,
         // Zero weight means it won't affect the score
         hit: true,
-        message: `${path50}: optional field missing`
+        message: `${path51}: optional field missing`
       };
     }
     switch (match) {
       case "exact":
-        return this.compareExact(path50, candidateValue, expectedValue, weight);
+        return this.compareExact(path51, candidateValue, expectedValue, weight);
       case "numeric_tolerance":
         return this.compareNumericTolerance(
-          path50,
+          path51,
           candidateValue,
           expectedValue,
           fieldConfig,
           weight
         );
       case "date":
-        return this.compareDate(path50, candidateValue, expectedValue, fieldConfig, weight);
+        return this.compareDate(path51, candidateValue, expectedValue, fieldConfig, weight);
       default:
         return {
-          path: path50,
+          path: path51,
           score: 0,
           weight,
           hit: false,
-          message: `${path50}: unknown match type "${match}"`
+          message: `${path51}: unknown match type "${match}"`
         };
     }
   }
   /**
    * Exact equality comparison.
    */
-  compareExact(path50, candidateValue, expectedValue, weight) {
+  compareExact(path51, candidateValue, expectedValue, weight) {
     if (deepEqual(candidateValue, expectedValue)) {
       return {
-        path: path50,
+        path: path51,
         score: 1,
         weight,
         hit: true,
-        message: path50
+        message: path51
       };
     }
     if (typeof candidateValue !== typeof expectedValue) {
       return {
-        path: path50,
+        path: path51,
         score: 0,
         weight,
         hit: false,
-        message: `${path50} (type mismatch: got ${typeof candidateValue}, expected ${typeof expectedValue})`
+        message: `${path51} (type mismatch: got ${typeof candidateValue}, expected ${typeof expectedValue})`
       };
     }
     return {
-      path: path50,
+      path: path51,
       score: 0,
       weight,
       hit: false,
-      message: `${path50} (value mismatch)`
+      message: `${path51} (value mismatch)`
     };
   }
   /**
    * Numeric comparison with absolute or relative tolerance.
    */
-  compareNumericTolerance(path50, candidateValue, expectedValue, fieldConfig, weight) {
+  compareNumericTolerance(path51, candidateValue, expectedValue, fieldConfig, weight) {
     const { tolerance = 0, relative = false } = fieldConfig;
     const candidateNum = toNumber(candidateValue);
     const expectedNum = toNumber(expectedValue);
     if (candidateNum === null || expectedNum === null) {
       return {
-        path: path50,
+        path: path51,
         score: 0,
         weight,
         hit: false,
-        message: `${path50} (non-numeric value)`
+        message: `${path51} (non-numeric value)`
       };
     }
     if (!Number.isFinite(candidateNum) || !Number.isFinite(expectedNum)) {
       return {
-        path: path50,
+        path: path51,
         score: 0,
         weight,
         hit: false,
-        message: `${path50} (invalid numeric value)`
+        message: `${path51} (invalid numeric value)`
       };
     }
     const diff = Math.abs(candidateNum - expectedNum);
@@ -22583,61 +22589,61 @@ var FieldAccuracyGrader = class {
     }
     if (withinTolerance) {
       return {
-        path: path50,
+        path: path51,
         score: 1,
         weight,
         hit: true,
-        message: `${path50} (within tolerance: diff=${diff.toFixed(2)})`
+        message: `${path51} (within tolerance: diff=${diff.toFixed(2)})`
       };
     }
     return {
-      path: path50,
+      path: path51,
       score: 0,
       weight,
       hit: false,
-      message: `${path50} (outside tolerance: diff=${diff.toFixed(2)}, tolerance=${tolerance})`
+      message: `${path51} (outside tolerance: diff=${diff.toFixed(2)}, tolerance=${tolerance})`
     };
   }
   /**
    * Date comparison with format normalization.
    */
-  compareDate(path50, candidateValue, expectedValue, fieldConfig, weight) {
+  compareDate(path51, candidateValue, expectedValue, fieldConfig, weight) {
     const formats = fieldConfig.formats ?? DEFAULT_DATE_FORMATS;
     const candidateDate = parseDate(String(candidateValue), formats);
     const expectedDate = parseDate(String(expectedValue), formats);
     if (candidateDate === null) {
       return {
-        path: path50,
+        path: path51,
         score: 0,
         weight,
         hit: false,
-        message: `${path50} (unparseable candidate date)`
+        message: `${path51} (unparseable candidate date)`
       };
     }
     if (expectedDate === null) {
       return {
-        path: path50,
+        path: path51,
         score: 0,
         weight,
         hit: false,
-        message: `${path50} (unparseable expected date)`
+        message: `${path51} (unparseable expected date)`
       };
     }
     if (candidateDate.getFullYear() === expectedDate.getFullYear() && candidateDate.getMonth() === expectedDate.getMonth() && candidateDate.getDate() === expectedDate.getDate()) {
       return {
-        path: path50,
+        path: path51,
         score: 1,
         weight,
         hit: true,
-        message: path50
+        message: path51
       };
     }
     return {
-      path: path50,
+      path: path51,
       score: 0,
       weight,
       hit: false,
-      message: `${path50} (date mismatch: got ${formatDateISO(candidateDate)}, expected ${formatDateISO(expectedDate)})`
+      message: `${path51} (date mismatch: got ${formatDateISO(candidateDate)}, expected ${formatDateISO(expectedDate)})`
     };
   }
   /**
@@ -22670,11 +22676,11 @@ var FieldAccuracyGrader = class {
     };
   }
 };
-function resolvePath(obj, path50) {
-  if (!path50 || !obj) {
+function resolvePath(obj, path51) {
+  if (!path51 || !obj) {
     return void 0;
   }
-  const parts = path50.split(/\.|\[|\]/).filter((p) => p.length > 0);
+  const parts = path51.split(/\.|\[|\]/).filter((p) => p.length > 0);
   let current = obj;
   for (const part of parts) {
     if (current === null || current === void 0) {
@@ -23205,8 +23211,8 @@ var TokenUsageGrader = class {
     };
   }
 };
-function getNestedValue(obj, path50) {
-  const parts = path50.split(".");
+function getNestedValue(obj, path51) {
+  const parts = path51.split(".");
   let current = obj;
   for (const part of parts) {
     if (current === null || current === void 0 || typeof current !== "object") {
@@ -33200,6 +33206,47 @@ async function cleanupEvalWorkspaces(evalRunId, workspaceRoot) {
     await rm4(evalDir, { recursive: true, force: true });
   }
 }
+function interpolateArgs(args, context) {
+  const vars = {
+    workspace_path: context.workspacePath,
+    test_id: context.testId,
+    eval_run_id: context.evalRunId,
+    case_input: context.caseInput ?? "",
+    case_metadata: context.caseMetadata ? JSON.stringify(context.caseMetadata) : ""
+  };
+  return args.map((arg) => arg.replace(/\{\{(\w+)\}\}/g, (match, name) => vars[name] ?? match));
+}
+async function executeWorkspaceScript(config2, context, failureMode = "fatal") {
+  const stdin = JSON.stringify({
+    workspace_path: context.workspacePath,
+    test_id: context.testId,
+    eval_run_id: context.evalRunId,
+    case_input: context.caseInput ?? null,
+    case_metadata: context.caseMetadata ?? null
+  });
+  const timeoutMs = config2.timeout_ms ?? (failureMode === "fatal" ? 6e4 : 3e4);
+  const cwd = config2.cwd ?? context.workspaceFileDir ?? context.evalDir;
+  if (config2.script !== void 0 && config2.command === void 0) {
+    console.warn(
+      "\x1B[33mWarning: 'script' is deprecated in workspace config. Use 'command' instead.\x1B[0m"
+    );
+  }
+  const rawCommand = config2.command ?? config2.script ?? [];
+  const commandArray = interpolateArgs(rawCommand, context);
+  const result = await execFileWithStdin(commandArray, stdin, {
+    timeoutMs,
+    cwd
+  });
+  if (result.exitCode !== 0) {
+    const stderr = result.stderr.trim();
+    const message = stderr ? `${stderr}` : `Process exited with code ${result.exitCode}`;
+    if (failureMode === "fatal") {
+      throw new Error(`Script failed: ${message}`);
+    }
+    console.warn(`Script warning: ${message}`);
+  }
+  return result.stdout;
+}
 var GITHUB_SHORTHAND_RE = /^[A-Za-z0-9_.-]+\/[A-Za-z0-9_.-]+$/;
 function resolveRepoCloneUrl(repo) {
   const trimmed = repo.trim();
@@ -34184,46 +34231,919 @@ async function resolveWorkspaceTemplate(templatePath) {
   }
   return { dir: resolved };
 }
-function interpolateArgs(args, context) {
-  const vars = {
-    workspace_path: context.workspacePath,
-    test_id: context.testId,
-    eval_run_id: context.evalRunId,
-    case_input: context.caseInput ?? "",
-    case_metadata: context.caseMetadata ? JSON.stringify(context.caseMetadata) : ""
+var execFileAsync2 = promisify6(execFile2);
+var WORKSPACE_GIT_TIMEOUT_MS = 3e5;
+var WorkspaceSetupError = class extends Error {
+  failureStage;
+  failureReasonCode;
+  hookExecutions;
+  constructor(message, options) {
+    super(message);
+    this.name = "WorkspaceSetupError";
+    this.failureStage = options.failureStage;
+    this.failureReasonCode = options.failureReasonCode;
+    this.hookExecutions = options.hookExecutions ?? [];
+    if (options.cause !== void 0) {
+      this.cause = options.cause;
+    }
+  }
+};
+function toScriptConfig(hook, hookName, context) {
+  const command = hook.command ?? hook.script;
+  if (!command || command.length === 0) {
+    throw new Error(`${hookName} hook in ${context} requires command or script`);
+  }
+  return {
+    command,
+    ...hook.timeout_ms !== void 0 && { timeout_ms: hook.timeout_ms },
+    ...hook.timeoutMs !== void 0 && { timeoutMs: hook.timeoutMs },
+    ...hook.cwd !== void 0 && { cwd: hook.cwd },
+    ...hook.script !== void 0 && { script: hook.script }
   };
-  return args.map((arg) => arg.replace(/\{\{(\w+)\}\}/g, (match, name) => vars[name] ?? match));
 }
-async function executeWorkspaceScript(config2, context, failureMode = "fatal") {
-  const stdin = JSON.stringify({
-    workspace_path: context.workspacePath,
-    test_id: context.testId,
-    eval_run_id: context.evalRunId,
-    case_input: context.caseInput ?? null,
-    case_metadata: context.caseMetadata ?? null
-  });
-  const timeoutMs = config2.timeout_ms ?? (failureMode === "fatal" ? 6e4 : 3e4);
-  const cwd = config2.cwd ?? context.workspaceFileDir ?? context.evalDir;
-  if (config2.script !== void 0 && config2.command === void 0) {
+function hasHookCommand(hook) {
+  return !!(hook?.command && hook.command.length > 0 || hook?.script && hook.script.length > 0);
+}
+function hooksEnabled(workspace) {
+  return workspace?.hooks?.enabled !== false;
+}
+function workspaceGitEnv() {
+  const env = { ...process.env };
+  for (const key of Object.keys(env)) {
+    if (key.startsWith("GIT_") && key !== "GIT_SSH_COMMAND") {
+      delete env[key];
+    }
+  }
+  return {
+    ...env,
+    GIT_TERMINAL_PROMPT: "0",
+    GIT_ASKPASS: "",
+    GIT_SSH_COMMAND: "ssh -o BatchMode=yes"
+  };
+}
+async function resetWorkspaceRoot(workspacePath, resetMode, baselineRef) {
+  if (!existsSync6(path39.join(workspacePath, ".git"))) {
+    return false;
+  }
+  const cleanFlag = resetMode === "strict" ? "-fdx" : "-fd";
+  const opts = {
+    cwd: workspacePath,
+    timeout: WORKSPACE_GIT_TIMEOUT_MS,
+    env: workspaceGitEnv(),
+    maxBuffer: 50 * 1024 * 1024
+  };
+  await execFileAsync2("git", ["reset", "--hard", baselineRef ?? "HEAD"], opts);
+  await execFileAsync2("git", ["clean", cleanFlag], opts);
+  return true;
+}
+function commandForHook(hook) {
+  return hook?.command ?? hook?.script;
+}
+function hookExecution(options) {
+  const command = commandForHook(options.hook);
+  return {
+    scope: options.scope,
+    name: options.name,
+    status: options.status,
+    testId: options.testId,
+    ...options.workspacePath !== void 0 && { workspacePath: options.workspacePath },
+    ...command !== void 0 && { command },
+    ...options.hook?.cwd !== void 0 && { cwd: options.hook.cwd },
+    ...options.output !== void 0 && { output: options.output },
+    ...options.error !== void 0 && { error: options.error }
+  };
+}
+async function releasePoolSlots(setup) {
+  if (!setup.poolManager) {
+    return;
+  }
+  if (setup.poolSlot) {
+    await setup.poolManager.releaseSlot(setup.poolSlot);
+  }
+  for (const slot of setup.poolSlots) {
+    if (slot !== setup.poolSlot) {
+      await setup.poolManager.releaseSlot(slot).catch(() => {
+      });
+    }
+  }
+}
+async function releaseSharedWorkspaceSetup(setup) {
+  await releasePoolSlots(setup);
+}
+async function prepareSharedWorkspaceSetup(options) {
+  const {
+    evalRunId,
+    evalCases,
+    targetHooks,
+    evalDir,
+    verbose,
+    workers,
+    poolMaxSlots: configPoolMaxSlots,
+    workspacePath,
+    legacyWorkspacePath,
+    workspaceMode,
+    workspaceClean
+  } = options;
+  const suiteWorkspace = evalCases[0]?.workspace;
+  const rawTemplate = suiteWorkspace?.template;
+  const resolvedTemplate = await resolveWorkspaceTemplate(rawTemplate);
+  const workspaceTemplate = resolvedTemplate?.dir;
+  let suiteWorkspaceFile = resolvedTemplate?.workspaceFile;
+  const setupLog = (message) => {
+    if (verbose) {
+      console.log(`[setup] ${message}`);
+    }
+  };
+  const isPerTestIsolation = suiteWorkspace?.isolation === "per_test";
+  const cliWorkspacePath = workspacePath ?? legacyWorkspacePath;
+  const yamlWorkspacePath = suiteWorkspace?.path;
+  if (cliWorkspacePath && workspaceMode && workspaceMode !== "static") {
+    throw new Error("--workspace-path requires --workspace-mode static when both are provided");
+  }
+  let configuredMode = cliWorkspacePath ? "static" : workspaceMode ?? suiteWorkspace?.mode ?? (yamlWorkspacePath ? "static" : "pooled");
+  const configuredStaticPath = cliWorkspacePath ?? yamlWorkspacePath;
+  if (configuredMode === "static" && !configuredStaticPath) {
+    if (!suiteWorkspace?.repos?.length) {
+      setupLog("workspace.mode=static with no path and no repos \u2014 falling back to temp mode");
+      configuredMode = "temp";
+    } else {
+      throw new Error("workspace.mode=static requires workspace.path or --workspace-path");
+    }
+  }
+  const useStaticWorkspace = configuredMode === "static";
+  if (useStaticWorkspace && isPerTestIsolation) {
+    throw new Error(
+      "static workspace mode is incompatible with isolation: per_test. Use isolation: shared (default)."
+    );
+  }
+  if (configuredMode !== "static" && configuredStaticPath) {
+    throw new Error("workspace.path requires workspace.mode=static");
+  }
+  const hasSharedWorkspace = !!(useStaticWorkspace || !isPerTestIsolation && (workspaceTemplate || suiteWorkspace?.hooks || suiteWorkspace?.repos?.length));
+  const poolEnabled = configuredMode === "pooled";
+  const usePool = poolEnabled !== false && !!suiteWorkspace?.repos?.length && !isPerTestIsolation && !useStaticWorkspace;
+  setupLog(
+    `sharedWorkspace=${hasSharedWorkspace} perTestIsolation=${isPerTestIsolation} usePool=${usePool} workers=${workers}`
+  );
+  if (hasSharedWorkspace && !usePool && workers > 1 && evalCases.length > 1) {
     console.warn(
-      "\x1B[33mWarning: 'script' is deprecated in workspace config. Use 'command' instead.\x1B[0m"
+      [
+        `Warning: This eval uses a shared workspace with ${workers} workers.`,
+        "If the agent under test makes file edits, concurrent runs may corrupt each other.",
+        "To limit concurrency, add this to your eval YAML:",
+        "",
+        "  execution:",
+        "    workers: 1",
+        "",
+        "Or pass --workers 1 on the command line."
+      ].join("\n")
     );
   }
-  const rawCommand = config2.command ?? config2.script ?? [];
-  const commandArray = interpolateArgs(rawCommand, context);
-  const result = await execFileWithStdin(commandArray, stdin, {
-    timeoutMs,
-    cwd
-  });
-  if (result.exitCode !== 0) {
-    const stderr = result.stderr.trim();
-    const message = stderr ? `${stderr}` : `Process exited with code ${result.exitCode}`;
-    if (failureMode === "fatal") {
-      throw new Error(`Script failed: ${message}`);
+  let sharedWorkspacePath;
+  let sharedBaselineCommit;
+  let beforeAllOutput;
+  let poolManager;
+  let poolSlot;
+  const poolSlots = [];
+  const availablePoolSlots = [];
+  const poolSlotBaselines = /* @__PURE__ */ new Map();
+  const hookExecutions = [];
+  const poolMaxSlots = Math.min(configPoolMaxSlots ?? 10, 50);
+  let staticMaterialised = false;
+  const isYamlConfiguredPath = !cliWorkspacePath && !!yamlWorkspacePath;
+  let repoManager;
+  try {
+    if (useStaticWorkspace && configuredStaticPath) {
+      const dirExists = await stat8(configuredStaticPath).then(
+        (s) => s.isDirectory(),
+        () => false
+      );
+      const isEmpty = dirExists ? (await readdir8(configuredStaticPath)).length === 0 : false;
+      if (isYamlConfiguredPath && (!dirExists || isEmpty)) {
+        if (!dirExists) {
+          await mkdir17(configuredStaticPath, { recursive: true });
+        }
+        if (workspaceTemplate) {
+          await copyDirectoryRecursive(workspaceTemplate, configuredStaticPath);
+          setupLog(`copied template into static workspace: ${configuredStaticPath}`);
+        }
+        staticMaterialised = true;
+        setupLog(`materialised static workspace at: ${configuredStaticPath}`);
+      } else {
+        setupLog(`reusing existing static workspace: ${configuredStaticPath}`);
+      }
+      sharedWorkspacePath = configuredStaticPath;
+    } else if (!isPerTestIsolation && usePool && suiteWorkspace?.repos) {
+      const slotsNeeded = workers;
+      setupLog(`acquiring ${slotsNeeded} workspace pool slot(s) (pool capacity: ${poolMaxSlots})`);
+      poolManager = new WorkspacePoolManager(getWorkspacePoolRoot());
+      const poolRepoManager = new RepoManager(verbose);
+      for (let i = 0; i < slotsNeeded; i++) {
+        const slot = await poolManager.acquireWorkspace({
+          templatePath: workspaceTemplate,
+          repos: suiteWorkspace.repos,
+          maxSlots: poolMaxSlots,
+          repoManager: poolRepoManager,
+          poolReset: (workspaceClean === "full" ? "strict" : workspaceClean === "standard" ? "fast" : null) ?? "fast"
+        });
+        poolSlots.push(slot);
+        setupLog(`pool slot ${i} acquired at: ${slot.path} (existing=${slot.isExisting})`);
+      }
+      if (slotsNeeded === 1) {
+        poolSlot = poolSlots[0];
+        sharedWorkspacePath = poolSlot.path;
+      } else {
+        availablePoolSlots.push(...poolSlots);
+      }
+    } else if (!isPerTestIsolation && workspaceTemplate) {
+      setupLog(`creating shared workspace from template: ${workspaceTemplate}`);
+      try {
+        sharedWorkspacePath = await createTempWorkspace(workspaceTemplate, evalRunId, "shared");
+        setupLog(`shared workspace created at: ${sharedWorkspacePath}`);
+      } catch (error40) {
+        const message = error40 instanceof Error ? error40.message : String(error40);
+        throw new WorkspaceSetupError(`Failed to create shared workspace: ${message}`, {
+          failureStage: "setup",
+          failureReasonCode: "template_error",
+          hookExecutions,
+          cause: error40
+        });
+      }
+    } else if (!isPerTestIsolation && (suiteWorkspace?.hooks || suiteWorkspace?.repos?.length)) {
+      sharedWorkspacePath = getWorkspacePath(evalRunId, "shared");
+      await mkdir17(sharedWorkspacePath, { recursive: true });
+      setupLog(`created empty shared workspace at: ${sharedWorkspacePath}`);
     }
-    console.warn(`Script warning: ${message}`);
+    if (suiteWorkspaceFile && sharedWorkspacePath) {
+      const copiedWorkspaceFile = path39.join(sharedWorkspacePath, path39.basename(suiteWorkspaceFile));
+      try {
+        await stat8(copiedWorkspaceFile);
+        suiteWorkspaceFile = copiedWorkspaceFile;
+      } catch {
+      }
+    }
+    const hasReposToMaterialize = !!suiteWorkspace?.repos?.length && !usePool && !isPerTestIsolation;
+    const needsRepoMaterialisation = hasReposToMaterialize && (!useStaticWorkspace || staticMaterialised);
+    const needsPerRepoCheck = hasReposToMaterialize && useStaticWorkspace && !staticMaterialised && isYamlConfiguredPath;
+    repoManager = needsRepoMaterialisation || needsPerRepoCheck ? new RepoManager(verbose) : void 0;
+    if (repoManager && sharedWorkspacePath && suiteWorkspace?.repos) {
+      try {
+        if (needsPerRepoCheck) {
+          for (const repo of suiteWorkspace.repos) {
+            if (!repo.path || !repo.repo) continue;
+            const targetDir = path39.join(sharedWorkspacePath, repo.path);
+            if (existsSync6(targetDir)) {
+              setupLog(`reusing existing repo at: ${targetDir}`);
+              continue;
+            }
+            setupLog(`materializing missing repo: ${repo.path}`);
+            await repoManager.materialize(repo, sharedWorkspacePath);
+          }
+        } else {
+          setupLog(
+            `materializing ${suiteWorkspace.repos.length} shared repo(s) into ${sharedWorkspacePath}`
+          );
+          await repoManager.materializeAll(suiteWorkspace.repos, sharedWorkspacePath);
+        }
+        setupLog("shared repo materialization complete");
+      } catch (error40) {
+        const message = error40 instanceof Error ? error40.message : String(error40);
+        if (sharedWorkspacePath && !useStaticWorkspace) {
+          await cleanupWorkspace(sharedWorkspacePath).catch(() => {
+          });
+        }
+        throw new WorkspaceSetupError(`Failed to materialize repos: ${message}`, {
+          failureStage: "repo_setup",
+          failureReasonCode: "clone_error",
+          hookExecutions,
+          cause: error40
+        });
+      }
+    }
+    const suiteDockerConfig = suiteWorkspace?.docker;
+    if (suiteDockerConfig) {
+      setupLog(`pulling Docker image: ${suiteDockerConfig.image}`);
+      const { DockerWorkspaceProvider } = await import("./docker-workspace-RPPXBT27-B4AQHVWA.js");
+      const dockerSetup = new DockerWorkspaceProvider(suiteDockerConfig);
+      if (!await dockerSetup.isDockerAvailable()) {
+        throw new Error(
+          "Docker workspace configured but Docker CLI is not available. Install Docker and ensure it is running."
+        );
+      }
+      await dockerSetup.pullImage();
+      setupLog("Docker image pull complete");
+    }
+    if (suiteWorkspace?.env) {
+      try {
+        await runPreflightChecks(suiteWorkspace.env, sharedWorkspacePath ?? void 0, setupLog);
+        setupLog("preflight checks passed");
+      } catch (error40) {
+        const message = error40 instanceof Error ? error40.message : String(error40);
+        if (sharedWorkspacePath && !useStaticWorkspace) {
+          await cleanupWorkspace(sharedWorkspacePath).catch(() => {
+          });
+        }
+        throw new WorkspaceSetupError(message, {
+          failureStage: "setup",
+          failureReasonCode: "preflight_error",
+          hookExecutions,
+          cause: error40
+        });
+      }
+    }
+    const suiteHooksEnabled = hooksEnabled(suiteWorkspace);
+    const suiteBeforeAllHook = suiteWorkspace?.hooks?.before_all;
+    if (sharedWorkspacePath && suiteHooksEnabled && hasHookCommand(suiteBeforeAllHook)) {
+      const beforeAllHook = suiteBeforeAllHook;
+      const beforeAllCommand = (beforeAllHook.command ?? beforeAllHook.script ?? []).join(" ");
+      setupLog(
+        `running shared before_all in cwd=${beforeAllHook.cwd ?? evalDir} command=${beforeAllCommand}`
+      );
+      const scriptContext = {
+        workspacePath: sharedWorkspacePath,
+        testId: "__before_all__",
+        evalRunId,
+        evalDir,
+        workspaceFileDir: suiteWorkspace?.workspaceFileDir
+      };
+      try {
+        beforeAllOutput = await executeWorkspaceScript(
+          toScriptConfig(beforeAllHook, "before_all", "suite workspace"),
+          scriptContext
+        );
+        hookExecutions.push(
+          hookExecution({
+            scope: "workspace",
+            name: "before_all",
+            status: "success",
+            testId: "__before_all__",
+            workspacePath: sharedWorkspacePath,
+            hook: beforeAllHook,
+            output: beforeAllOutput
+          })
+        );
+        setupLog("shared before_all completed");
+      } catch (error40) {
+        const message = error40 instanceof Error ? error40.message : String(error40);
+        hookExecutions.push(
+          hookExecution({
+            scope: "workspace",
+            name: "before_all",
+            status: "failed",
+            testId: "__before_all__",
+            workspacePath: sharedWorkspacePath,
+            hook: beforeAllHook,
+            error: message
+          })
+        );
+        if (sharedWorkspacePath && !useStaticWorkspace) {
+          await cleanupWorkspace(sharedWorkspacePath).catch(() => {
+          });
+        }
+        throw new WorkspaceSetupError(`before_all script failed: ${message}`, {
+          failureStage: "setup",
+          failureReasonCode: "script_error",
+          hookExecutions,
+          cause: error40
+        });
+      }
+    }
+    if (availablePoolSlots.length > 0 && suiteHooksEnabled && hasHookCommand(suiteBeforeAllHook)) {
+      const beforeAllHook = suiteBeforeAllHook;
+      for (const slot of availablePoolSlots) {
+        setupLog(`running before_all on pool slot ${slot.index}`);
+        const scriptContext = {
+          workspacePath: slot.path,
+          testId: "__before_all__",
+          evalRunId,
+          evalDir,
+          workspaceFileDir: suiteWorkspace?.workspaceFileDir
+        };
+        try {
+          const output = await executeWorkspaceScript(
+            toScriptConfig(beforeAllHook, "before_all", "suite workspace"),
+            scriptContext
+          );
+          if (!beforeAllOutput) beforeAllOutput = output;
+          hookExecutions.push(
+            hookExecution({
+              scope: "workspace",
+              name: "before_all",
+              status: "success",
+              testId: "__before_all__",
+              workspacePath: slot.path,
+              hook: beforeAllHook,
+              output
+            })
+          );
+          setupLog(`before_all completed on pool slot ${slot.index}`);
+        } catch (error40) {
+          const message = error40 instanceof Error ? error40.message : String(error40);
+          hookExecutions.push(
+            hookExecution({
+              scope: "workspace",
+              name: "before_all",
+              status: "failed",
+              testId: "__before_all__",
+              workspacePath: slot.path,
+              hook: beforeAllHook,
+              error: message
+            })
+          );
+          throw new WorkspaceSetupError(
+            `before_all script failed on pool slot ${slot.index}: ${message}`,
+            {
+              failureStage: "setup",
+              failureReasonCode: "script_error",
+              hookExecutions,
+              cause: error40
+            }
+          );
+        }
+      }
+    }
+    const targetBeforeAllHook = targetHooks?.before_all;
+    if (sharedWorkspacePath && hasHookCommand(targetBeforeAllHook)) {
+      const beforeAllCommand = (targetBeforeAllHook.command ?? []).join(" ");
+      setupLog(`running target before_all command=${beforeAllCommand}`);
+      const scriptContext = {
+        workspacePath: sharedWorkspacePath,
+        testId: "__target_before_all__",
+        evalRunId,
+        evalDir,
+        workspaceFileDir: suiteWorkspace?.workspaceFileDir
+      };
+      try {
+        await executeWorkspaceScript(
+          toScriptConfig(targetBeforeAllHook, "before_all", "target hooks"),
+          scriptContext
+        );
+        hookExecutions.push(
+          hookExecution({
+            scope: "target",
+            name: "before_all",
+            status: "success",
+            testId: "__target_before_all__",
+            workspacePath: sharedWorkspacePath,
+            hook: targetBeforeAllHook
+          })
+        );
+        setupLog("target before_all completed");
+      } catch (error40) {
+        const message = error40 instanceof Error ? error40.message : String(error40);
+        hookExecutions.push(
+          hookExecution({
+            scope: "target",
+            name: "before_all",
+            status: "failed",
+            testId: "__target_before_all__",
+            workspacePath: sharedWorkspacePath,
+            hook: targetBeforeAllHook,
+            error: message
+          })
+        );
+        if (sharedWorkspacePath && !useStaticWorkspace) {
+          await cleanupWorkspace(sharedWorkspacePath).catch(() => {
+          });
+        }
+        throw new WorkspaceSetupError(`target before_all hook failed: ${message}`, {
+          failureStage: "setup",
+          failureReasonCode: "script_error",
+          hookExecutions,
+          cause: error40
+        });
+      }
+    }
+    if (availablePoolSlots.length > 0 && hasHookCommand(targetBeforeAllHook)) {
+      for (const slot of availablePoolSlots) {
+        setupLog(`running target before_all on pool slot ${slot.index}`);
+        const scriptContext = {
+          workspacePath: slot.path,
+          testId: "__target_before_all__",
+          evalRunId,
+          evalDir,
+          workspaceFileDir: suiteWorkspace?.workspaceFileDir
+        };
+        try {
+          await executeWorkspaceScript(
+            toScriptConfig(targetBeforeAllHook, "before_all", "target hooks"),
+            scriptContext
+          );
+          hookExecutions.push(
+            hookExecution({
+              scope: "target",
+              name: "before_all",
+              status: "success",
+              testId: "__target_before_all__",
+              workspacePath: slot.path,
+              hook: targetBeforeAllHook
+            })
+          );
+        } catch (error40) {
+          const message = error40 instanceof Error ? error40.message : String(error40);
+          hookExecutions.push(
+            hookExecution({
+              scope: "target",
+              name: "before_all",
+              status: "failed",
+              testId: "__target_before_all__",
+              workspacePath: slot.path,
+              hook: targetBeforeAllHook,
+              error: message
+            })
+          );
+          throw new WorkspaceSetupError(
+            `target before_all hook failed on pool slot ${slot.index}: ${message}`,
+            {
+              failureStage: "setup",
+              failureReasonCode: "script_error",
+              hookExecutions,
+              cause: error40
+            }
+          );
+        }
+      }
+    }
+    if (sharedWorkspacePath) {
+      try {
+        sharedBaselineCommit = await initializeBaseline(sharedWorkspacePath);
+        setupLog(`shared baseline initialized: ${sharedBaselineCommit}`);
+      } catch (error40) {
+        const message = error40 instanceof Error ? error40.message : String(error40);
+        setupLog(`shared baseline initialization failed (file_changes unavailable): ${message}`);
+      }
+    }
+    if (availablePoolSlots.length > 0) {
+      for (const slot of availablePoolSlots) {
+        try {
+          const baseline = await initializeBaseline(slot.path);
+          poolSlotBaselines.set(slot.path, baseline);
+          setupLog(`pool slot ${slot.index} baseline initialized: ${baseline}`);
+        } catch (error40) {
+          const message = error40 instanceof Error ? error40.message : String(error40);
+          setupLog(
+            `pool slot ${slot.index} baseline initialization failed (file_changes unavailable): ${message}`
+          );
+        }
+      }
+    }
+    return {
+      ...suiteWorkspace !== void 0 && { suiteWorkspace },
+      ...sharedWorkspacePath !== void 0 && { sharedWorkspacePath },
+      ...sharedBaselineCommit !== void 0 && { sharedBaselineCommit },
+      ...suiteWorkspaceFile !== void 0 && { suiteWorkspaceFile },
+      ...beforeAllOutput !== void 0 && { beforeAllOutput },
+      ...repoManager !== void 0 && { repoManager },
+      ...poolManager !== void 0 && { poolManager },
+      ...poolSlot !== void 0 && { poolSlot },
+      poolSlots,
+      availablePoolSlots,
+      poolSlotBaselines,
+      useStaticWorkspace,
+      configuredMode,
+      hookExecutions
+    };
+  } catch (error40) {
+    await releasePoolSlots({ poolManager, poolSlot, poolSlots }).catch(() => {
+    });
+    throw error40;
+  }
+}
+async function prepareEvalCaseWorkspace(options) {
+  const {
+    evalCase,
+    evalRunId,
+    sharedWorkspacePath,
+    sharedBaselineCommit,
+    suiteWorkspaceFile,
+    repoManager,
+    evalDir,
+    cleanupWorkspaces: forceCleanup,
+    targetHooks,
+    setupDebug
+  } = options;
+  let workspacePath = sharedWorkspacePath;
+  let beforeAllOutput;
+  let beforeEachOutput;
+  const isSharedWorkspace = !!sharedWorkspacePath;
+  let caseWorkspaceFile;
+  const caseHooksEnabled = hooksEnabled(evalCase.workspace);
+  const hookExecutions = [];
+  if (!workspacePath) {
+    const rawCaseTemplate = evalCase.workspace?.template;
+    const resolvedCaseTemplate = await resolveWorkspaceTemplate(rawCaseTemplate);
+    const caseWorkspaceTemplate = resolvedCaseTemplate?.dir;
+    caseWorkspaceFile = resolvedCaseTemplate?.workspaceFile;
+    if (caseWorkspaceTemplate && evalRunId) {
+      try {
+        workspacePath = await createTempWorkspace(caseWorkspaceTemplate, evalRunId, evalCase.id);
+      } catch (error40) {
+        const message = error40 instanceof Error ? error40.message : String(error40);
+        throw new WorkspaceSetupError(`Failed to create workspace: ${message}`, {
+          failureStage: "setup",
+          failureReasonCode: "template_error",
+          hookExecutions,
+          cause: error40
+        });
+      }
+      if (caseWorkspaceFile && workspacePath) {
+        const copiedFile = path39.join(workspacePath, path39.basename(caseWorkspaceFile));
+        try {
+          await stat8(copiedFile);
+          caseWorkspaceFile = copiedFile;
+        } catch {
+        }
+      }
+    }
+    if (!workspacePath && (evalCase.workspace?.hooks || evalCase.workspace?.repos?.length) && evalRunId) {
+      workspacePath = getWorkspacePath(evalRunId, evalCase.id);
+      await mkdir17(workspacePath, { recursive: true });
+    }
+    if (evalCase.workspace?.repos?.length && workspacePath) {
+      const perCaseRepoManager = new RepoManager(setupDebug);
+      try {
+        if (setupDebug) {
+          console.log(
+            `[setup] test=${evalCase.id} materializing ${evalCase.workspace.repos.length} per-test repo(s) into ${workspacePath}`
+          );
+        }
+        await perCaseRepoManager.materializeAll(evalCase.workspace.repos, workspacePath);
+        if (setupDebug) {
+          console.log(`[setup] test=${evalCase.id} per-test repo materialization complete`);
+        }
+      } catch (error40) {
+        const message = error40 instanceof Error ? error40.message : String(error40);
+        throw new WorkspaceSetupError(`Failed to materialize repos: ${message}`, {
+          failureStage: "repo_setup",
+          failureReasonCode: "clone_error",
+          hookExecutions,
+          cause: error40
+        });
+      }
+    }
+    if (workspacePath && evalCase.metadata?.agent_skills_files) {
+      const baseDir = evalCase.metadata.agent_skills_base_dir;
+      const files = evalCase.metadata.agent_skills_files;
+      if (baseDir && files.length > 0) {
+        for (const relPath of files) {
+          const srcPath = path39.resolve(baseDir, relPath);
+          const destPath = path39.resolve(workspacePath, relPath);
+          try {
+            await mkdir17(path39.dirname(destPath), { recursive: true });
+            await copyFile2(srcPath, destPath);
+          } catch (error40) {
+            const message = error40 instanceof Error ? error40.message : String(error40);
+            throw new WorkspaceSetupError(
+              `Agent Skills eval file not found: ${relPath} (resolved from ${baseDir}): ${message}`,
+              {
+                failureStage: "setup",
+                failureReasonCode: "file_copy_error",
+                hookExecutions,
+                cause: error40
+              }
+            );
+          }
+        }
+      }
+    }
+    const caseBeforeAllHook = evalCase.workspace?.hooks?.before_all;
+    if (workspacePath && caseHooksEnabled && hasHookCommand(caseBeforeAllHook)) {
+      const beforeAllHook = caseBeforeAllHook;
+      const beforeAllCommand = (beforeAllHook.command ?? beforeAllHook.script ?? []).join(" ");
+      if (setupDebug) {
+        console.log(
+          `[setup] test=${evalCase.id} running before_all in cwd=${beforeAllHook.cwd ?? evalDir} command=${beforeAllCommand}`
+        );
+      }
+      const scriptContext = {
+        workspacePath,
+        testId: evalCase.id,
+        evalRunId: evalRunId ?? "",
+        caseInput: evalCase.question,
+        caseMetadata: evalCase.metadata,
+        evalDir,
+        workspaceFileDir: evalCase.workspace?.workspaceFileDir
+      };
+      try {
+        beforeAllOutput = await executeWorkspaceScript(
+          toScriptConfig(beforeAllHook, "before_all", `test '${evalCase.id}'`),
+          scriptContext
+        );
+        hookExecutions.push(
+          hookExecution({
+            scope: "workspace",
+            name: "before_all",
+            status: "success",
+            testId: evalCase.id,
+            workspacePath,
+            hook: beforeAllHook,
+            output: beforeAllOutput
+          })
+        );
+        if (setupDebug) {
+          console.log(`[setup] test=${evalCase.id} before_all completed`);
+        }
+      } catch (error40) {
+        const message = error40 instanceof Error ? error40.message : String(error40);
+        hookExecutions.push(
+          hookExecution({
+            scope: "workspace",
+            name: "before_all",
+            status: "failed",
+            testId: evalCase.id,
+            workspacePath,
+            hook: beforeAllHook,
+            error: message
+          })
+        );
+        if (forceCleanup && workspacePath) {
+          await cleanupWorkspace(workspacePath).catch(() => {
+          });
+        }
+        throw new WorkspaceSetupError(`before_all script failed: ${message}`, {
+          failureStage: "setup",
+          failureReasonCode: "script_error",
+          hookExecutions,
+          cause: error40
+        });
+      }
+    }
+  }
+  let beforeEachNeedsFreshBaseline = false;
+  if (caseHooksEnabled && workspacePath && evalCase.workspace?.hooks?.before_each?.reset && evalCase.workspace.hooks.before_each.reset !== "none") {
+    try {
+      if (repoManager && evalCase.workspace.repos?.length) {
+        await repoManager.reset(
+          evalCase.workspace.repos,
+          workspacePath,
+          evalCase.workspace.hooks.before_each.reset
+        );
+      } else {
+        await resetWorkspaceRoot(
+          workspacePath,
+          evalCase.workspace.hooks.before_each.reset,
+          sharedBaselineCommit
+        );
+      }
+    } catch (error40) {
+      const message = error40 instanceof Error ? error40.message : String(error40);
+      throw new WorkspaceSetupError(`before_each reset failed: ${message}`, {
+        failureStage: "setup",
+        failureReasonCode: "script_error",
+        hookExecutions,
+        cause: error40
+      });
+    }
+  }
+  const caseBeforeEachHook = evalCase.workspace?.hooks?.before_each;
+  if (workspacePath && caseHooksEnabled && hasHookCommand(caseBeforeEachHook)) {
+    const beforeEachHook = caseBeforeEachHook;
+    const scriptContext = {
+      workspacePath,
+      testId: evalCase.id,
+      evalRunId: evalRunId ?? "",
+      caseInput: evalCase.question,
+      caseMetadata: evalCase.metadata,
+      evalDir,
+      workspaceFileDir: evalCase.workspace?.workspaceFileDir
+    };
+    try {
+      beforeEachOutput = await executeWorkspaceScript(
+        toScriptConfig(beforeEachHook, "before_each", `test '${evalCase.id}'`),
+        scriptContext
+      );
+      hookExecutions.push(
+        hookExecution({
+          scope: "workspace",
+          name: "before_each",
+          status: "success",
+          testId: evalCase.id,
+          workspacePath,
+          hook: beforeEachHook,
+          output: beforeEachOutput
+        })
+      );
+      beforeEachNeedsFreshBaseline = true;
+    } catch (error40) {
+      const message = error40 instanceof Error ? error40.message : String(error40);
+      hookExecutions.push(
+        hookExecution({
+          scope: "workspace",
+          name: "before_each",
+          status: "failed",
+          testId: evalCase.id,
+          workspacePath,
+          hook: beforeEachHook,
+          error: message
+        })
+      );
+      throw new WorkspaceSetupError(`before_each script failed: ${message}`, {
+        failureStage: "setup",
+        failureReasonCode: "script_error",
+        hookExecutions,
+        cause: error40
+      });
+    }
+  }
+  const targetBeforeEachHook = targetHooks?.before_each;
+  if (workspacePath && hasHookCommand(targetBeforeEachHook)) {
+    const scriptContext = {
+      workspacePath,
+      testId: evalCase.id,
+      evalRunId: evalRunId ?? "",
+      caseInput: evalCase.question,
+      caseMetadata: evalCase.metadata,
+      evalDir,
+      workspaceFileDir: evalCase.workspace?.workspaceFileDir
+    };
+    try {
+      await executeWorkspaceScript(
+        toScriptConfig(targetBeforeEachHook, "before_each", `target hook for '${evalCase.id}'`),
+        scriptContext
+      );
+      hookExecutions.push(
+        hookExecution({
+          scope: "target",
+          name: "before_each",
+          status: "success",
+          testId: evalCase.id,
+          workspacePath,
+          hook: targetBeforeEachHook
+        })
+      );
+      beforeEachNeedsFreshBaseline = true;
+    } catch (error40) {
+      const message = error40 instanceof Error ? error40.message : String(error40);
+      hookExecutions.push(
+        hookExecution({
+          scope: "target",
+          name: "before_each",
+          status: "failed",
+          testId: evalCase.id,
+          workspacePath,
+          hook: targetBeforeEachHook,
+          error: message
+        })
+      );
+      throw new WorkspaceSetupError(`target before_each hook failed: ${message}`, {
+        failureStage: "setup",
+        failureReasonCode: "script_error",
+        hookExecutions,
+        cause: error40
+      });
+    }
+  }
+  let baselineCommit = beforeEachNeedsFreshBaseline ? void 0 : sharedBaselineCommit;
+  if (!baselineCommit && workspacePath) {
+    try {
+      baselineCommit = await initializeBaseline(workspacePath);
+    } catch (error40) {
+      const message = error40 instanceof Error ? error40.message : String(error40);
+      if (setupDebug) {
+        console.warn(`[setup] test=${evalCase.id} baseline initialization failed: ${message}`);
+      }
+    }
+  }
+  return {
+    ...workspacePath !== void 0 && { workspacePath },
+    caseWorkspaceFile: caseWorkspaceFile ?? suiteWorkspaceFile,
+    ...beforeAllOutput !== void 0 && { beforeAllOutput },
+    ...beforeEachOutput !== void 0 && { beforeEachOutput },
+    ...baselineCommit !== void 0 && { baselineCommit },
+    isSharedWorkspace,
+    hookExecutions
+  };
+}
+async function runPreflightChecks(env, cwd, log) {
+  const missing = [];
+  for (const cmd of env.required_commands ?? []) {
+    log(`preflight: checking command "${cmd}"`);
+    try {
+      if (process.platform === "win32") {
+        await execFileAsync2("where", [cmd], { cwd });
+      } else {
+        await execFileAsync2("sh", ["-c", `command -v ${cmd}`], { cwd });
+      }
+    } catch {
+      missing.push(`command: ${cmd}`);
+    }
+  }
+  for (const mod of env.required_python_modules ?? []) {
+    log(`preflight: checking Python module "${mod}"`);
+    try {
+      await execFileAsync2("python3", ["-c", `import ${mod}`], { cwd });
+    } catch {
+      missing.push(`python module: ${mod}`);
+    }
+  }
+  if (missing.length > 0) {
+    throw new Error(
+      `Preflight checks failed \u2014 missing dependencies:
+${missing.map((m) => `  \u2022 ${m}`).join("\n")}
+Install the missing dependencies before running this eval.`
+    );
   }
-  return result.stdout;
 }
 function flattenInputMessages(messages) {
   return messages.flatMap((message) => extractContentSegments(message.content));
@@ -34305,7 +35225,7 @@ async function loadTestsFromAgentSkills(filePath) {
   } catch {
     throw new Error(`Invalid Agent Skills evals.json: failed to parse JSON in '${filePath}'`);
   }
-  return parseAgentSkillsEvals(parsed, filePath, path39.dirname(path39.resolve(filePath)));
+  return parseAgentSkillsEvals(parsed, filePath, path40.dirname(path40.resolve(filePath)));
 }
 function parseAgentSkillsEvals(parsed, source = "evals.json", baseDir) {
   if (!isAgentSkillsFormat(parsed)) {
@@ -34343,7 +35263,7 @@ function parseAgentSkillsEvals(parsed, source = "evals.json", baseDir) {
       if (baseDir) {
         metadata.agent_skills_base_dir = baseDir;
         for (const file2 of evalCase.files) {
-          filePaths.push(path39.resolve(baseDir, file2));
+          filePaths.push(path40.resolve(baseDir, file2));
         }
       }
     }
@@ -34379,15 +35299,15 @@ function resolveToAbsolutePath(candidate) {
     if (candidate.startsWith("file:")) {
       return fileURLToPath4(candidate);
     }
-    return path40.resolve(candidate);
+    return path41.resolve(candidate);
   }
   throw new TypeError("Unsupported repoRoot value. Expected string or URL.");
 }
 function buildDirectoryChain2(filePath, repoRoot) {
   const directories = [];
   const seen = /* @__PURE__ */ new Set();
-  const boundary = path40.resolve(repoRoot);
-  let current = path40.resolve(path40.dirname(filePath));
+  const boundary = path41.resolve(repoRoot);
+  let current = path41.resolve(path41.dirname(filePath));
   while (current !== void 0) {
     if (!seen.has(current)) {
       directories.push(current);
@@ -34396,7 +35316,7 @@ function buildDirectoryChain2(filePath, repoRoot) {
     if (current === boundary) {
       break;
     }
-    const parent = path40.dirname(current);
+    const parent = path41.dirname(current);
     if (parent === current) {
       break;
     }
@@ -34410,16 +35330,16 @@ function buildDirectoryChain2(filePath, repoRoot) {
 function buildSearchRoots2(evalPath, repoRoot) {
   const uniqueRoots = [];
   const addRoot = (root) => {
-    const normalized = path40.resolve(root);
+    const normalized = path41.resolve(root);
     if (!uniqueRoots.includes(normalized)) {
       uniqueRoots.push(normalized);
     }
   };
-  let currentDir = path40.dirname(evalPath);
+  let currentDir = path41.dirname(evalPath);
   let reachedBoundary = false;
   while (!reachedBoundary) {
     addRoot(currentDir);
-    const parentDir = path40.dirname(currentDir);
+    const parentDir = path41.dirname(currentDir);
     if (currentDir === repoRoot || parentDir === currentDir) {
       reachedBoundary = true;
     } else {
@@ -34437,16 +35357,16 @@ function trimLeadingSeparators2(value) {
 async function resolveFileReference3(rawValue, searchRoots) {
   const displayPath = trimLeadingSeparators2(rawValue);
   const potentialPaths = [];
-  if (path40.isAbsolute(rawValue)) {
-    potentialPaths.push(path40.normalize(rawValue));
+  if (path41.isAbsolute(rawValue)) {
+    potentialPaths.push(path41.normalize(rawValue));
   }
   for (const base of searchRoots) {
-    potentialPaths.push(path40.resolve(base, displayPath));
+    potentialPaths.push(path41.resolve(base, displayPath));
   }
   const attempted = [];
   const seen = /* @__PURE__ */ new Set();
   for (const candidate of potentialPaths) {
-    const absoluteCandidate = path40.resolve(candidate);
+    const absoluteCandidate = path41.resolve(candidate);
     if (seen.has(absoluteCandidate)) {
       continue;
     }
@@ -34467,9 +35387,9 @@ var DEFAULT_EVAL_PATTERNS = [
 ];
 async function loadConfig(evalFilePath, repoRoot) {
   const directories = buildDirectoryChain2(evalFilePath, repoRoot);
-  const globalConfigPath = path41.join(getAgentvConfigDir(), "config.yaml");
+  const globalConfigPath = path422.join(getAgentvConfigDir(), "config.yaml");
   for (const directory of directories) {
-    const configPath2 = path41.join(directory, ".agentv", "config.yaml");
+    const configPath2 = path422.join(directory, ".agentv", "config.yaml");
     if (!await fileExists3(configPath2)) {
       continue;
     }
@@ -35020,8 +35940,8 @@ function isTemplateReference(value) {
 }
 async function resolveAssertionTemplateReference(include, searchRoots) {
   const templateCandidates = isTemplateReference(include) ? [
-    path422.join(".agentv", "templates", `${include}.yaml`),
-    path422.join(".agentv", "templates", `${include}.yml`)
+    path43.join(".agentv", "templates", `${include}.yaml`),
+    path43.join(".agentv", "templates", `${include}.yml`)
   ] : [include];
   const attempted = [];
   for (const candidate of templateCandidates) {
@@ -35074,10 +35994,10 @@ ${resolved.attempted.map((attempt) => `  Tried: ${attempt}`).join("\n")}` : "";
       `Invalid assertion template file in '${evalId}': ${resolved.resolvedPath} is missing a top-level assertions array`
     );
   }
-  const templateDir = path422.dirname(resolved.resolvedPath);
+  const templateDir = path43.dirname(resolved.resolvedPath);
   const nestedSearchRoots = [
     templateDir,
-    ...searchRoots.filter((root) => path422.resolve(root) !== templateDir)
+    ...searchRoots.filter((root) => path43.resolve(root) !== templateDir)
   ];
   return await expandGraderEntries(assertions, nestedSearchRoots, evalId, {
     depth: nextDepth,
@@ -35138,7 +36058,7 @@ async function collectAssertionTemplateReferencesFromValue(value, searchRoots, e
         references.push({
           kind: "assertion_template",
           displayPath: resolved.displayPath,
-          ...resolved.resolvedPath ? { resolvedPath: path422.resolve(resolved.resolvedPath) } : {}
+          ...resolved.resolvedPath ? { resolvedPath: path43.resolve(resolved.resolvedPath) } : {}
         });
         if (resolved.resolvedPath) {
           if (includeContext.chain.includes(resolved.resolvedPath)) {
@@ -35148,10 +36068,10 @@ async function collectAssertionTemplateReferencesFromValue(value, searchRoots, e
           const content = await readFile15(resolved.resolvedPath, "utf8");
           const parsed = interpolateEnv(parseYamlValue(content), process.env);
           if (isJsonObject2(parsed) && Array.isArray(parsed.assertions)) {
-            const templateDir = path422.dirname(resolved.resolvedPath);
+            const templateDir = path43.dirname(resolved.resolvedPath);
             const nestedSearchRoots = [
               templateDir,
-              ...searchRoots.filter((root) => path422.resolve(root) !== templateDir)
+              ...searchRoots.filter((root) => path43.resolve(root) !== templateDir)
             ];
             references.push(
               ...await collectAssertionTemplateReferencesFromValue(
@@ -35337,7 +36257,7 @@ async function parseGraderList(candidateEvaluators, searchRoots, evalId, default
       if (cwd) {
         const resolved = await resolveFileReference3(cwd, searchRoots);
         if (resolved.resolvedPath) {
-          resolvedCwd = path422.resolve(resolved.resolvedPath);
+          resolvedCwd = path43.resolve(resolved.resolvedPath);
         } else {
           logWarning2(
             `Code-grader evaluator '${name}' in '${evalId}': cwd not found (${resolved.displayPath})`,
@@ -35523,7 +36443,7 @@ async function parseGraderList(candidateEvaluators, searchRoots, evalId, default
             aggregatorPrompt = fileRef;
             const resolved = await resolveFileReference3(fileRef, searchRoots);
             if (resolved.resolvedPath) {
-              promptPath2 = path422.resolve(resolved.resolvedPath);
+              promptPath2 = path43.resolve(resolved.resolvedPath);
             } else {
               throw new Error(
                 `Composite aggregator in '${evalId}': prompt file not found: ${resolved.displayPath}`
@@ -36203,7 +37123,7 @@ async function parseGraderList(candidateEvaluators, searchRoots, evalId, default
       const commandPath = commandArray[commandArray.length - 1];
       const resolved = await resolveFileReference3(commandPath, searchRoots);
       if (resolved.resolvedPath) {
-        resolvedPromptScript = [...commandArray.slice(0, -1), path422.resolve(resolved.resolvedPath)];
+        resolvedPromptScript = [...commandArray.slice(0, -1), path43.resolve(resolved.resolvedPath)];
       } else {
         throw new Error(
           `Grader '${name}' in '${evalId}': prompt command file not found: ${resolved.displayPath}`
@@ -36218,7 +37138,7 @@ async function parseGraderList(candidateEvaluators, searchRoots, evalId, default
         prompt = fileRef;
         const resolved = await resolveFileReference3(fileRef, searchRoots);
         if (resolved.resolvedPath) {
-          promptPath = path422.resolve(resolved.resolvedPath);
+          promptPath = path43.resolve(resolved.resolvedPath);
           try {
             await validateCustomPromptContent(promptPath);
           } catch (error40) {
@@ -36376,7 +37296,7 @@ async function parsePreprocessors(rawValue, searchRoots, evaluatorName, evalId)
     preprocessors.push({
       type,
       command,
-      resolvedCommand: [...command.slice(0, -1), path422.resolve(resolved.resolvedPath)]
+      resolvedCommand: [...command.slice(0, -1), path43.resolve(resolved.resolvedPath)]
     });
   }
   return preprocessors;
@@ -36471,10 +37391,10 @@ async function resolveOptionalCommandSource(command, searchRoots) {
     return void 0;
   }
   const resolved = await resolveFileReference3(candidate, searchRoots);
-  return resolved.resolvedPath ? path422.resolve(resolved.resolvedPath) : void 0;
+  return resolved.resolvedPath ? path43.resolve(resolved.resolvedPath) : void 0;
 }
 function looksLikeFilePath(value) {
-  return path422.isAbsolute(value) || value.startsWith(".") || value.includes("/") || value.includes("\\") || /\.[cm]?[jt]sx?$|\.py$|\.sh$|\.bash$|\.rb$|\.go$|\.rs$/i.test(value);
+  return path43.isAbsolute(value) || value.startsWith(".") || value.includes("/") || value.includes("\\") || /\.[cm]?[jt]sx?$|\.py$|\.sh$|\.bash$|\.rb$|\.go$|\.rs$/i.test(value);
 }
 function parseCommandToArgv(command) {
   if (process.platform === "win32") {
@@ -36862,7 +37782,7 @@ var IMAGE_MEDIA_TYPES = {
   ".bmp": "image/bmp"
 };
 function detectImageMediaType(filePath) {
-  const ext = path43.extname(filePath).toLowerCase();
+  const ext = path44.extname(filePath).toLowerCase();
   return IMAGE_MEDIA_TYPES[ext];
 }
 var ANSI_YELLOW3 = "\x1B[33m";
@@ -36926,7 +37846,7 @@ async function processMessages(options) {
             ...cloneJsonObject(rawSegment),
             path: displayPath,
             text: fileContent,
-            resolvedPath: path43.resolve(resolvedPath)
+            resolvedPath: path44.resolve(resolvedPath)
           });
           if (verbose) {
             const label = messageType === "input" ? "[File]" : "[Expected Output File]";
@@ -37050,7 +37970,7 @@ async function processExpectedMessages(options) {
               type: "file",
               path: displayPath,
               text: fileContent,
-              resolvedPath: path43.resolve(resolvedPath)
+              resolvedPath: path44.resolve(resolvedPath)
             });
             if (verbose) {
               console.log(`  [Expected Output File] Found: ${displayPath}`);
@@ -37196,7 +38116,7 @@ function matchesFilter(id, filter) {
   return typeof filter === "string" ? micromatch.isMatch(id, filter) : filter.some((pattern) => micromatch.isMatch(id, pattern));
 }
 function detectFormat(filePath) {
-  const ext = path44.extname(filePath).toLowerCase();
+  const ext = path45.extname(filePath).toLowerCase();
   if (ext === ".jsonl") return "jsonl";
   if (ext === ".yaml" || ext === ".yml") return "yaml";
   if (ext === ".json") return "agent-skills-json";
@@ -37206,9 +38126,9 @@ function detectFormat(filePath) {
   );
 }
 async function loadSidecarMetadata(jsonlPath, verbose) {
-  const dir = path44.dirname(jsonlPath);
-  const base = path44.basename(jsonlPath, ".jsonl");
-  const sidecarPath = path44.join(dir, `${base}.yaml`);
+  const dir = path45.dirname(jsonlPath);
+  const base = path45.basename(jsonlPath, ".jsonl");
+  const sidecarPath = path45.join(dir, `${base}.yaml`);
   if (!await fileExists3(sidecarPath)) {
     if (verbose) {
       logWarning4(`Sidecar metadata file not found: ${sidecarPath} (using defaults)`);
@@ -37257,13 +38177,13 @@ function parseJsonlContent(content, filePath) {
 async function loadTestsFromJsonl(evalFilePath, repoRoot, options) {
   const verbose = options?.verbose ?? false;
   const filterPattern = options?.filter;
-  const absoluteTestPath = path44.resolve(evalFilePath);
+  const absoluteTestPath = path45.resolve(evalFilePath);
   const repoRootPath = resolveToAbsolutePath(repoRoot);
   const searchRoots = buildSearchRoots2(absoluteTestPath, repoRootPath);
   const sidecar = await loadSidecarMetadata(absoluteTestPath, verbose);
   const rawFile = await readFile17(absoluteTestPath, "utf8");
   const rawCases = parseJsonlContent(rawFile, evalFilePath);
-  const fallbackSuiteName = path44.basename(absoluteTestPath, ".jsonl") || "eval";
+  const fallbackSuiteName = path45.basename(absoluteTestPath, ".jsonl") || "eval";
   const suiteName = sidecar.name && sidecar.name.trim().length > 0 ? sidecar.name : fallbackSuiteName;
   const globalEvaluator = coerceEvaluator(sidecar.evaluator, "sidecar") ?? "llm-grader";
   const globalExecution = sidecar.execution;
@@ -37660,7 +38580,7 @@ function interpolateRawEvalCase(raw, vars) {
 }
 async function readTestSuiteMetadata(testFilePath) {
   try {
-    const absolutePath = path45.resolve(testFilePath);
+    const absolutePath = path46.resolve(testFilePath);
     const content = await readFile18(absolutePath, "utf8");
     const parsed = interpolateEnv(parseYamlValue(content), process.env);
     if (!isJsonObject(parsed)) {
@@ -37685,7 +38605,7 @@ async function loadTestSuite(evalFilePath, repoRoot, options) {
     return { tests: await loadTestsFromAgentSkills(evalFilePath) };
   }
   if (format === "typescript") {
-    const { loadTsEvalSuite: loadTsEvalSuite2 } = await import("./ts-eval-loader-NWH3B4HG-UXXCZKLP.js");
+    const { loadTsEvalSuite: loadTsEvalSuite2 } = await import("./ts-eval-loader-ZVL6CGTE-TZYZX3QS.js");
     return loadTsEvalSuite2(evalFilePath, resolveToAbsolutePath(repoRoot), options);
   }
   const { tests, parsed, suiteWorkspacePath } = await loadTestsFromYaml(
@@ -37720,7 +38640,7 @@ async function loadTests(evalFilePath, repoRoot, options) {
     return loadTestsFromAgentSkills(evalFilePath);
   }
   if (format === "typescript") {
-    const { loadTsEvalSuite: loadTsEvalSuite2 } = await import("./ts-eval-loader-NWH3B4HG-UXXCZKLP.js");
+    const { loadTsEvalSuite: loadTsEvalSuite2 } = await import("./ts-eval-loader-ZVL6CGTE-TZYZX3QS.js");
     const suite = await loadTsEvalSuite2(evalFilePath, resolveToAbsolutePath(repoRoot), options);
     return suite.tests;
   }
@@ -37731,7 +38651,7 @@ var loadEvalCases = loadTests;
 async function loadTestsFromYaml(evalFilePath, repoRoot, options) {
   const verbose = options?.verbose ?? false;
   const filterPattern = options?.filter;
-  const absoluteTestPath = path45.resolve(evalFilePath);
+  const absoluteTestPath = path46.resolve(evalFilePath);
   const repoRootPath = resolveToAbsolutePath(repoRoot);
   const searchRoots = buildSearchRoots2(absoluteTestPath, repoRootPath);
   const config2 = await loadConfig(absoluteTestPath, repoRootPath);
@@ -37744,7 +38664,7 @@ async function loadTestsFromYaml(evalFilePath, repoRoot, options) {
   }
   const suite = interpolated;
   const suiteNameFromFile = asString5(suite.name)?.trim();
-  const fallbackSuiteName = path45.basename(absoluteTestPath).replace(/\.eval\.ya?ml$/i, "").replace(/\.ya?ml$/i, "") || "eval";
+  const fallbackSuiteName = path46.basename(absoluteTestPath).replace(/\.eval\.ya?ml$/i, "").replace(/\.ya?ml$/i, "") || "eval";
   const suiteName = suiteNameFromFile && suiteNameFromFile.length > 0 ? suiteNameFromFile : fallbackSuiteName;
   const rawTestCases = resolveTests(suite);
   const globalEvaluator = coerceEvaluator(suite.evaluator, "global") ?? "llm-grader";
@@ -37754,13 +38674,13 @@ async function loadTestsFromYaml(evalFilePath, repoRoot, options) {
     "<suite>",
     absoluteTestPath
   );
-  const evalFileDir = path45.dirname(absoluteTestPath);
+  const evalFileDir = path46.dirname(absoluteTestPath);
   let expandedTestCases;
   if (typeof rawTestCases === "string") {
-    const externalPath = path45.resolve(evalFileDir, rawTestCases);
+    const externalPath = path46.resolve(evalFileDir, rawTestCases);
     let isDir = false;
     try {
-      const pathStat = await stat8(externalPath);
+      const pathStat = await stat9(externalPath);
       isDir = pathStat.isDirectory();
     } catch {
     }
@@ -38061,7 +38981,7 @@ function collectInputSourceReferences(inputMessages) {
       references.push({
         kind: "input_file",
         displayPath,
-        ...typeof segment.resolvedPath === "string" ? { resolvedPath: path45.resolve(segment.resolvedPath) } : {}
+        ...typeof segment.resolvedPath === "string" ? { resolvedPath: path46.resolve(segment.resolvedPath) } : {}
       });
     }
   }
@@ -38134,7 +39054,7 @@ function collectSingleGraderSourceReferences(evaluator) {
       references.push({
         kind: "code_grader_command",
         displayPath: evaluator.aggregator.path,
-        resolvedPath: path45.resolve(evaluator.aggregator.cwd ?? "", evaluator.aggregator.path),
+        resolvedPath: path46.resolve(evaluator.aggregator.cwd ?? "", evaluator.aggregator.path),
         graderName: evaluator.name
       });
     } else if (evaluator.aggregator.type === "llm-grader" && evaluator.aggregator.promptPath) {
@@ -38167,9 +39087,9 @@ function dedupeSourceReferences(references) {
   return deduped;
 }
 function toPortableRelativePath(root, candidate) {
-  const relative = path45.relative(root, candidate);
-  if (relative && !relative.startsWith("..") && !path45.isAbsolute(relative)) {
-    return relative.split(path45.sep).join("/");
+  const relative = path46.relative(root, candidate);
+  if (relative && !relative.startsWith("..") && !path46.isAbsolute(relative)) {
+    return relative.split(path46.sep).join("/");
   }
   return void 0;
 }
@@ -38223,8 +39143,8 @@ function parseWorkspaceScriptConfig(raw, evalFileDir) {
   if (!command) return void 0;
   const timeoutMs = typeof obj.timeout_ms === "number" ? obj.timeout_ms : void 0;
   let cwd = typeof obj.cwd === "string" ? obj.cwd : void 0;
-  if (cwd && !path45.isAbsolute(cwd)) {
-    cwd = path45.resolve(evalFileDir, cwd);
+  if (cwd && !path46.isAbsolute(cwd)) {
+    cwd = path46.resolve(evalFileDir, cwd);
   }
   const config2 = { command };
   if (timeoutMs !== void 0) {
@@ -38262,7 +39182,7 @@ function parseWorkspaceHooksConfig(raw, evalFileDir) {
 }
 async function resolveWorkspaceConfig(raw, evalFileDir) {
   if (typeof raw === "string") {
-    const workspaceFilePath = path45.resolve(evalFileDir, raw);
+    const workspaceFilePath = path46.resolve(evalFileDir, raw);
     let content;
     try {
       content = await readFile18(workspaceFilePath, "utf8");
@@ -38275,7 +39195,7 @@ async function resolveWorkspaceConfig(raw, evalFileDir) {
         `Invalid workspace file format: ${workspaceFilePath} (expected a YAML object)`
       );
     }
-    const workspaceFileDir = path45.dirname(workspaceFilePath);
+    const workspaceFileDir = path46.dirname(workspaceFilePath);
     const resolvedWorkspace = parseWorkspaceConfig(parsed, workspaceFileDir);
     if (resolvedWorkspace) {
       return { ...resolvedWorkspace, workspaceFileDir };
@@ -38309,8 +39229,8 @@ function parseWorkspaceConfig(raw, evalFileDir) {
     throw new Error("workspace.static has been removed. Use workspace.mode='static'.");
   }
   let template = typeof obj.template === "string" ? obj.template : void 0;
-  if (template && !path45.isAbsolute(template)) {
-    template = path45.resolve(evalFileDir, template);
+  if (template && !path46.isAbsolute(template)) {
+    template = path46.resolve(evalFileDir, template);
   }
   const isolation = obj.isolation === "shared" || obj.isolation === "per_test" ? obj.isolation : void 0;
   const repos = Array.isArray(obj.repos) ? obj.repos.map(parseRepoConfig).filter(Boolean) : void 0;
@@ -38447,8 +39367,6 @@ ${detailBlock}${ANSI_RESET6}`);
     console.error(`${ANSI_RED3}Error: ${message}${ANSI_RESET6}`);
   }
 }
-var execFileAsync2 = promisify6(execFile2);
-var WORKSPACE_GIT_TIMEOUT_MS = 3e5;
 function pathFromRoot(root) {
   return root instanceof URL ? fileURLToPath5(root) : String(root);
 }
@@ -38470,53 +39388,84 @@ function buildSkippedEvaluatorError(scores) {
 function usesFileReferencePrompt(provider) {
   return isAgentProvider(provider) || provider.kind === "cli";
 }
-function toScriptConfig(hook, hookName, context) {
-  const command = hook.command ?? hook.script;
-  if (!command || command.length === 0) {
-    throw new Error(`${hookName} hook in ${context} requires command or script`);
+function createEvaluationRuntime(options) {
+  const {
+    target,
+    targets,
+    env,
+    providerFactory,
+    evalFilePath,
+    graderTarget: cliGraderTarget,
+    model: cliModel
+  } = options;
+  const resolvedTargetsByName = /* @__PURE__ */ new Map();
+  resolvedTargetsByName.set(target.name, target);
+  const targetDefinitions = /* @__PURE__ */ new Map();
+  for (const definition of targets ?? []) {
+    targetDefinitions.set(definition.name, definition);
   }
-  return {
-    command,
-    ...hook.timeout_ms !== void 0 && { timeout_ms: hook.timeout_ms },
-    ...hook.timeoutMs !== void 0 && { timeoutMs: hook.timeoutMs },
-    ...hook.cwd !== void 0 && { cwd: hook.cwd },
-    ...hook.script !== void 0 && { script: hook.script }
+  const envLookup = env ?? process.env;
+  const providerCache = /* @__PURE__ */ new Map();
+  const getOrCreateProvider = (resolved) => {
+    const existing = providerCache.get(resolved.name);
+    if (existing) {
+      return existing;
+    }
+    const factory = providerFactory ?? createProvider;
+    const instance = factory(resolved);
+    providerCache.set(resolved.name, instance);
+    return instance;
   };
-}
-function hasHookCommand(hook) {
-  return !!(hook?.command && hook.command.length > 0 || hook?.script && hook.script.length > 0);
-}
-function hooksEnabled(workspace) {
-  return workspace?.hooks?.enabled !== false;
-}
-function workspaceGitEnv() {
-  const env = { ...process.env };
-  for (const key of Object.keys(env)) {
-    if (key.startsWith("GIT_") && key !== "GIT_SSH_COMMAND") {
-      delete env[key];
+  const resolveTargetByName = (name) => {
+    if (resolvedTargetsByName.has(name)) {
+      return resolvedTargetsByName.get(name);
     }
-  }
-  return {
-    ...env,
-    GIT_TERMINAL_PROMPT: "0",
-    GIT_ASKPASS: "",
-    GIT_SSH_COMMAND: "ssh -o BatchMode=yes"
+    const definition = resolveDelegatedTargetDefinition(name, targetDefinitions, envLookup);
+    if (!definition) {
+      return void 0;
+    }
+    const resolved = resolveTargetDefinition(definition, envLookup, evalFilePath ?? "");
+    resolvedTargetsByName.set(name, resolved);
+    return resolved;
   };
-}
-async function resetWorkspaceRoot(workspacePath, resetMode, baselineRef) {
-  if (!existsSync6(path46.join(workspacePath, ".git"))) {
-    return false;
-  }
-  const cleanFlag = resetMode === "strict" ? "-fdx" : "-fd";
-  const opts = {
-    cwd: workspacePath,
-    timeout: WORKSPACE_GIT_TIMEOUT_MS,
-    env: workspaceGitEnv(),
-    maxBuffer: 50 * 1024 * 1024
+  const resolveGraderProvider = async (targetContext) => {
+    if (cliGraderTarget) {
+      if (cliGraderTarget === "agentv") {
+        if (!cliModel) {
+          throw new Error('--grader-target "agentv" requires --model (e.g., "openai:gpt-5-mini")');
+        }
+        const { AgentvProvider: AgentvProvider2 } = await import("./agentv-provider-AYXH7WLW-NJRC6UQX.js");
+        return new AgentvProvider2("agentv", { model: cliModel, temperature: 0 });
+      }
+      const overrideTarget = resolveTargetByName(cliGraderTarget);
+      if (!overrideTarget) {
+        throw new Error(`--grader-target "${cliGraderTarget}" not found in targets`);
+      }
+      return getOrCreateProvider(overrideTarget);
+    }
+    const graderName = targetContext.graderTarget ?? targetContext.name;
+    const resolvedGrader = resolveTargetByName(graderName);
+    if (!resolvedGrader) {
+      if (!LLM_GRADER_CAPABLE_KINDS.includes(targetContext.kind)) {
+        return void 0;
+      }
+      return getOrCreateProvider(targetContext);
+    }
+    return getOrCreateProvider(resolvedGrader);
+  };
+  const targetResolver = (name) => {
+    const resolved = resolveTargetByName(name);
+    if (!resolved) {
+      return void 0;
+    }
+    return getOrCreateProvider(resolved);
+  };
+  return {
+    getOrCreateProvider,
+    resolveGraderProvider,
+    targetResolver,
+    availableTargets: [target.name, ...Array.from(targetDefinitions.keys())]
   };
-  await execFileAsync2("git", ["reset", "--hard", baselineRef ?? "HEAD"], opts);
-  await execFileAsync2("git", ["clean", cleanFlag], opts);
-  return true;
 }
 function validateDependencyGraph(tests) {
   const ids = /* @__PURE__ */ new Set();
@@ -38547,18 +39496,18 @@ function validateDependencyGraph(tests) {
   }
   const visited = /* @__PURE__ */ new Set();
   const visiting = /* @__PURE__ */ new Set();
-  function visit(id, path50) {
+  function visit(id, path51) {
     if (visiting.has(id)) {
-      const cycle = [...path50.slice(path50.indexOf(id)), id];
+      const cycle = [...path51.slice(path51.indexOf(id)), id];
       throw new Error(`Circular dependency detected: ${cycle.join(" \u2192 ")}`);
     }
     if (visited.has(id)) return;
     visiting.add(id);
-    path50.push(id);
+    path51.push(id);
     for (const dep of depMap.get(id) ?? []) {
-      visit(dep, path50);
+      visit(dep, path51);
     }
-    path50.pop();
+    path51.pop();
     visiting.delete(id);
     visited.add(id);
   }
@@ -38612,6 +39561,170 @@ function computeWaves(tests) {
   }
   return waves;
 }
+function createPreparedProvider(target) {
+  return {
+    id: `prepared:${target.name}`,
+    kind: target.kind,
+    targetName: target.name,
+    async invoke() {
+      throw new Error("Prepared grading does not invoke the target provider");
+    }
+  };
+}
+function withPreparedMetadata(evalCase, preparedAttempt) {
+  return {
+    ...evalCase.metadata,
+    preparedAttempt
+  };
+}
+async function gradePreparedEvalCase(options) {
+  const {
+    evalCase,
+    target,
+    targets,
+    env,
+    evaluators,
+    providerFactory,
+    agentTimeoutMs,
+    graderTarget,
+    model,
+    evalFilePath,
+    workspacePath,
+    baselineCommit,
+    response,
+    verbose,
+    threshold: caseThreshold,
+    preparedAttempt
+  } = options;
+  const nowFn = options.now ?? (() => /* @__PURE__ */ new Date());
+  const caseStartMs = Date.now();
+  const provider = createPreparedProvider(target);
+  const formattingMode = usesFileReferencePrompt(provider) ? "agent" : "lm";
+  const promptInputs = await buildPromptInputs(evalCase, formattingMode);
+  const typeRegistry = createBuiltinRegistry();
+  const runtime = createEvaluationRuntime({
+    target,
+    targets,
+    env,
+    providerFactory,
+    evalFilePath,
+    graderTarget,
+    model
+  });
+  const evaluatorRegistry = buildEvaluatorRegistry(evaluators, runtime.resolveGraderProvider);
+  const discoveryBaseDir = evalFilePath ? path47.dirname(path47.resolve(evalFilePath)) : process.cwd();
+  await discoverAssertions(typeRegistry, discoveryBaseDir);
+  await discoverGraders(typeRegistry, discoveryBaseDir);
+  let fileChanges;
+  if (baselineCommit) {
+    try {
+      const diff = await captureFileChanges(workspacePath, baselineCommit);
+      if (diff.length > 0) {
+        fileChanges = diff;
+      }
+    } catch (error40) {
+      if (verbose) {
+        const message = error40 instanceof Error ? error40.message : String(error40);
+        console.warn(`Warning: failed to capture prepared workspace diff: ${message}`);
+      }
+    }
+  }
+  const candidate = response ?? "";
+  const input = buildResultInput(promptInputs);
+  const outputMessages = candidate.length > 0 ? [{ role: "assistant", content: candidate }] : [];
+  const resultTrace = buildTraceFromMessages({
+    input,
+    output: outputMessages,
+    finalOutput: candidate,
+    provider: provider.kind,
+    target: target.name,
+    testId: evalCase.id,
+    conversationId: evalCase.conversation_id
+  });
+  try {
+    const gradeStartedAt = nowFn();
+    const { score, scores } = await runEvaluatorsForCase({
+      evalCase,
+      candidate,
+      target,
+      provider,
+      evaluators: evaluatorRegistry,
+      typeRegistry,
+      attempt: 0,
+      promptInputs,
+      now: gradeStartedAt,
+      agentTimeoutMs,
+      targetResolver: runtime.targetResolver,
+      availableTargets: runtime.availableTargets,
+      fileChanges,
+      workspacePath,
+      dockerConfig: evalCase.workspace?.docker,
+      threshold: evalCase.threshold ?? caseThreshold
+    });
+    const timestamp = nowFn();
+    const effectiveThreshold = evalCase.threshold ?? caseThreshold;
+    const graderTokens = aggregateEvaluatorTokenUsage(scores);
+    const evalRun = {
+      durationMs: Date.now() - caseStartMs,
+      ...graderTokens ? { tokenUsage: graderTokens } : {}
+    };
+    const skippedEvaluatorError = buildSkippedEvaluatorError(scores);
+    const executionStatus = skippedEvaluatorError ? "execution_error" : classifyQualityStatus(score.score, effectiveThreshold);
+    const baseResult = {
+      timestamp: timestamp.toISOString(),
+      testId: evalCase.id,
+      suite: evalCase.suite,
+      category: evalCase.category,
+      conversationId: evalCase.conversation_id,
+      score: skippedEvaluatorError ? 0 : score.score,
+      assertions: score.assertions,
+      target: target.name,
+      input,
+      output: candidate,
+      scores,
+      trace: resultTrace,
+      fileChanges,
+      workspacePath,
+      evalRun,
+      metadata: withPreparedMetadata(evalCase, preparedAttempt),
+      executionStatus
+    };
+    if (!skippedEvaluatorError) {
+      return baseResult;
+    }
+    return {
+      ...baseResult,
+      trace: appendErrorEventToTrace(baseResult.trace, skippedEvaluatorError, {
+        failure_stage: "evaluator",
+        failure_reason_code: "evaluator_error"
+      }),
+      error: skippedEvaluatorError,
+      failureStage: "evaluator",
+      failureReasonCode: "evaluator_error",
+      executionError: { message: skippedEvaluatorError, stage: "evaluator" }
+    };
+  } catch (error40) {
+    const evalRun = { durationMs: Date.now() - caseStartMs };
+    const errorResult = buildErrorResult(
+      evalCase,
+      target.name,
+      nowFn(),
+      error40,
+      promptInputs,
+      provider,
+      "evaluator",
+      "evaluator_error",
+      verbose
+    );
+    return {
+      ...errorResult,
+      evalRun,
+      fileChanges,
+      workspacePath,
+      metadata: withPreparedMetadata(evalCase, preparedAttempt)
+    };
+  }
+}
 async function runEvaluation(options) {
   const {
     testFilePath: evalFilePath,
@@ -38667,80 +39780,24 @@ async function runEvaluation(options) {
     }
     return [];
   }
-  const resolvedTargetsByName = /* @__PURE__ */ new Map();
-  resolvedTargetsByName.set(target.name, target);
-  const targetDefinitions = /* @__PURE__ */ new Map();
-  for (const definition of targets ?? []) {
-    targetDefinitions.set(definition.name, definition);
-  }
-  const envLookup = env ?? process.env;
-  const providerCache = /* @__PURE__ */ new Map();
-  const getOrCreateProvider = (resolved) => {
-    const existing = providerCache.get(resolved.name);
-    if (existing) {
-      return existing;
-    }
-    const factory = providerFactory ?? createProvider;
-    const instance = factory(resolved);
-    providerCache.set(resolved.name, instance);
-    return instance;
-  };
-  const resolveTargetByName = (name) => {
-    if (resolvedTargetsByName.has(name)) {
-      return resolvedTargetsByName.get(name);
-    }
-    const definition = resolveDelegatedTargetDefinition(name, targetDefinitions, envLookup);
-    if (!definition) {
-      return void 0;
-    }
-    const resolved = resolveTargetDefinition(definition, envLookup, evalFilePath);
-    resolvedTargetsByName.set(name, resolved);
-    return resolved;
-  };
-  const resolveGraderProvider = async (targetContext) => {
-    if (cliGraderTarget) {
-      if (cliGraderTarget === "agentv") {
-        if (!cliModel) {
-          throw new Error('--grader-target "agentv" requires --model (e.g., "openai:gpt-5-mini")');
-        }
-        const { AgentvProvider: AgentvProvider2 } = await import("./agentv-provider-AYXH7WLW-NJRC6UQX.js");
-        return new AgentvProvider2("agentv", { model: cliModel, temperature: 0 });
-      }
-      const overrideTarget = resolveTargetByName(cliGraderTarget);
-      if (!overrideTarget) {
-        throw new Error(`--grader-target "${cliGraderTarget}" not found in targets`);
-      }
-      return getOrCreateProvider(overrideTarget);
-    }
-    const graderName = targetContext.graderTarget ?? targetContext.name;
-    const resolvedGrader = resolveTargetByName(graderName);
-    if (!resolvedGrader) {
-      if (!LLM_GRADER_CAPABLE_KINDS.includes(targetContext.kind)) {
-        return void 0;
-      }
-      return getOrCreateProvider(targetContext);
-    }
-    return getOrCreateProvider(resolvedGrader);
-  };
+  const runtime = createEvaluationRuntime({
+    target,
+    targets,
+    env,
+    providerFactory,
+    evalFilePath,
+    graderTarget: cliGraderTarget,
+    model: cliModel
+  });
+  const { getOrCreateProvider, resolveGraderProvider, targetResolver, availableTargets } = runtime;
   if (isAgentProvider(getOrCreateProvider(target)) && !target.graderTarget && !cliGraderTarget) {
     throw new Error(
       `Target "${target.name}" is an agent provider ("${target.kind}") with no grader_target \u2014 agent providers cannot return structured JSON for grading. Set grader_target to an LLM provider (e.g., azure-llm).`
     );
   }
-  const targetResolver = (name) => {
-    const resolved = resolveTargetByName(name);
-    if (!resolved) {
-      return void 0;
-    }
-    return getOrCreateProvider(resolved);
-  };
-  const availableTargets = [
-    target.name,
-    ...Array.from(targetDefinitions.keys())
-  ];
   const evaluatorRegistry = buildEvaluatorRegistry(evaluators, resolveGraderProvider);
   const typeRegistry = createBuiltinRegistry();
-  const discoveryBaseDir = evalFilePath ? path46.dirname(path46.resolve(evalFilePath)) : process.cwd();
+  const discoveryBaseDir = evalFilePath ? path47.dirname(path47.resolve(evalFilePath)) : process.cwd();
   const evalDir = discoveryBaseDir;
   await discoverAssertions(typeRegistry, discoveryBaseDir);
   await discoverGraders(typeRegistry, discoveryBaseDir);
@@ -38796,132 +39853,38 @@ async function runEvaluation(options) {
       }
     }
   }
-  const suiteWorkspace = filteredEvalCases[0]?.workspace;
-  const rawTemplate = suiteWorkspace?.template;
-  const resolvedTemplate = await resolveWorkspaceTemplate(rawTemplate);
-  const workspaceTemplate = resolvedTemplate?.dir;
-  let suiteWorkspaceFile = resolvedTemplate?.workspaceFile;
-  const setupLog = (message) => {
-    if (verbose) {
-      console.log(`[setup] ${message}`);
-    }
-  };
-  const isPerTestIsolation = suiteWorkspace?.isolation === "per_test";
-  const cliWorkspacePath = workspacePath ?? legacyWorkspacePath;
-  const yamlWorkspacePath = suiteWorkspace?.path;
-  if (cliWorkspacePath && workspaceMode && workspaceMode !== "static") {
-    throw new Error("--workspace-path requires --workspace-mode static when both are provided");
-  }
-  let configuredMode = cliWorkspacePath ? "static" : workspaceMode ?? suiteWorkspace?.mode ?? (yamlWorkspacePath ? "static" : "pooled");
-  const configuredStaticPath = cliWorkspacePath ?? yamlWorkspacePath;
-  if (configuredMode === "static" && !configuredStaticPath) {
-    if (!suiteWorkspace?.repos?.length) {
-      setupLog("workspace.mode=static with no path and no repos \u2014 falling back to temp mode");
-      configuredMode = "temp";
-    } else {
-      throw new Error("workspace.mode=static requires workspace.path or --workspace-path");
-    }
-  }
-  const useStaticWorkspace = configuredMode === "static";
-  if (useStaticWorkspace && isPerTestIsolation) {
-    throw new Error(
-      "static workspace mode is incompatible with isolation: per_test. Use isolation: shared (default)."
-    );
-  }
-  if (configuredMode !== "static" && configuredStaticPath) {
-    throw new Error("workspace.path requires workspace.mode=static");
-  }
-  const hasSharedWorkspace = !!(useStaticWorkspace || !isPerTestIsolation && (workspaceTemplate || suiteWorkspace?.hooks || suiteWorkspace?.repos?.length));
-  const poolEnabled = configuredMode === "pooled";
-  const usePool = poolEnabled !== false && !!suiteWorkspace?.repos?.length && !isPerTestIsolation && !useStaticWorkspace;
   const resolvedRetainOnSuccess = retainOnSuccess ?? (keepWorkspaces ? "keep" : "cleanup");
   const resolvedRetainOnFailure = retainOnFailure ?? (cleanupWorkspaces ? "cleanup" : "keep");
   const workers = options.maxConcurrency ?? target.workers ?? 1;
-  setupLog(
-    `sharedWorkspace=${hasSharedWorkspace} perTestIsolation=${isPerTestIsolation} usePool=${usePool} workers=${workers}`
-  );
-  if (hasSharedWorkspace && !usePool && workers > 1 && filteredEvalCases.length > 1) {
-    console.warn(
-      [
-        `Warning: This eval uses a shared workspace with ${workers} workers.`,
-        "If the agent under test makes file edits, concurrent runs may corrupt each other.",
-        "To limit concurrency, add this to your eval YAML:",
-        "",
-        "  execution:",
-        "    workers: 1",
-        "",
-        "Or pass --workers 1 on the command line."
-      ].join("\n")
-    );
-  }
   const limit = pLimit(workers);
-  let sharedWorkspacePath;
-  let sharedBaselineCommit;
-  let beforeAllOutput;
-  let poolManager;
-  let poolSlot;
-  const poolSlots = [];
-  const availablePoolSlots = [];
-  const poolSlotBaselines = /* @__PURE__ */ new Map();
-  const poolMaxSlots = Math.min(configPoolMaxSlots ?? 10, 50);
-  let staticMaterialised = false;
-  const isYamlConfiguredPath = !cliWorkspacePath && !!yamlWorkspacePath;
-  if (useStaticWorkspace && configuredStaticPath) {
-    const dirExists = await stat9(configuredStaticPath).then(
-      (s) => s.isDirectory(),
-      () => false
-    );
-    const isEmpty = dirExists ? (await readdir8(configuredStaticPath)).length === 0 : false;
-    if (isYamlConfiguredPath && (!dirExists || isEmpty)) {
-      if (!dirExists) {
-        await mkdir17(configuredStaticPath, { recursive: true });
-      }
-      if (workspaceTemplate) {
-        await copyDirectoryRecursive(workspaceTemplate, configuredStaticPath);
-        setupLog(`copied template into static workspace: ${configuredStaticPath}`);
-      }
-      staticMaterialised = true;
-      setupLog(`materialised static workspace at: ${configuredStaticPath}`);
-    } else {
-      setupLog(`reusing existing static workspace: ${configuredStaticPath}`);
-    }
-    sharedWorkspacePath = configuredStaticPath;
-  } else if (!isPerTestIsolation && usePool && suiteWorkspace?.repos) {
-    const slotsNeeded = workers;
-    setupLog(`acquiring ${slotsNeeded} workspace pool slot(s) (pool capacity: ${poolMaxSlots})`);
-    poolManager = new WorkspacePoolManager(getWorkspacePoolRoot());
-    const poolRepoManager = new RepoManager(verbose);
-    for (let i = 0; i < slotsNeeded; i++) {
-      const slot = await poolManager.acquireWorkspace({
-        templatePath: workspaceTemplate,
-        repos: suiteWorkspace.repos,
-        maxSlots: poolMaxSlots,
-        repoManager: poolRepoManager,
-        poolReset: (workspaceClean === "full" ? "strict" : workspaceClean === "standard" ? "fast" : null) ?? "fast"
-      });
-      poolSlots.push(slot);
-      setupLog(`pool slot ${i} acquired at: ${slot.path} (existing=${slot.isExisting})`);
-    }
-    if (slotsNeeded === 1) {
-      poolSlot = poolSlots[0];
-      sharedWorkspacePath = poolSlot.path;
-    } else {
-      availablePoolSlots.push(...poolSlots);
-    }
-  } else if (!isPerTestIsolation && workspaceTemplate) {
-    setupLog(`creating shared workspace from template: ${workspaceTemplate}`);
-    try {
-      sharedWorkspacePath = await createTempWorkspace(workspaceTemplate, evalRunId, "shared");
-      setupLog(`shared workspace created at: ${sharedWorkspacePath}`);
-    } catch (error40) {
-      const message = error40 instanceof Error ? error40.message : String(error40);
-      throw new Error(`Failed to create shared workspace: ${message}`);
-    }
-  } else if (!isPerTestIsolation && (suiteWorkspace?.hooks || suiteWorkspace?.repos?.length)) {
-    sharedWorkspacePath = getWorkspacePath(evalRunId, "shared");
-    await mkdir17(sharedWorkspacePath, { recursive: true });
-    setupLog(`created empty shared workspace at: ${sharedWorkspacePath}`);
-  }
+  const sharedSetup = await prepareSharedWorkspaceSetup({
+    evalRunId,
+    evalCases: filteredEvalCases,
+    targetHooks: options.targetHooks,
+    evalDir,
+    verbose,
+    workers,
+    poolMaxSlots: configPoolMaxSlots,
+    workspacePath,
+    legacyWorkspacePath,
+    workspaceMode,
+    workspaceClean
+  });
+  const {
+    suiteWorkspace,
+    sharedWorkspacePath,
+    sharedBaselineCommit,
+    suiteWorkspaceFile,
+    beforeAllOutput,
+    repoManager,
+    poolSlot,
+    poolSlots,
+    availablePoolSlots,
+    poolSlotBaselines,
+    useStaticWorkspace
+  } = sharedSetup;
+  const targetHooks = options.targetHooks;
+  const suiteHooksEnabled = hooksEnabled(suiteWorkspace);
   try {
     let toDependencyResult2 = function(r) {
       return {
@@ -38959,198 +39922,6 @@ async function runEvaluation(options) {
       return result.costUsd;
     };
     var toDependencyResult = toDependencyResult2, checkDependencies = checkDependencies2, extractEvaluationCostUsd = extractEvaluationCostUsd2;
-    if (suiteWorkspaceFile && sharedWorkspacePath) {
-      const copiedWorkspaceFile = path46.join(sharedWorkspacePath, path46.basename(suiteWorkspaceFile));
-      try {
-        await stat9(copiedWorkspaceFile);
-        suiteWorkspaceFile = copiedWorkspaceFile;
-      } catch {
-      }
-    }
-    const hasReposToMaterialize = !!suiteWorkspace?.repos?.length && !usePool && !isPerTestIsolation;
-    const needsRepoMaterialisation = hasReposToMaterialize && (!useStaticWorkspace || staticMaterialised);
-    const needsPerRepoCheck = hasReposToMaterialize && useStaticWorkspace && !staticMaterialised && isYamlConfiguredPath;
-    const repoManager = needsRepoMaterialisation || needsPerRepoCheck ? new RepoManager(verbose) : void 0;
-    if (repoManager && sharedWorkspacePath && suiteWorkspace?.repos) {
-      try {
-        if (needsPerRepoCheck) {
-          for (const repo of suiteWorkspace.repos) {
-            if (!repo.path || !repo.repo) continue;
-            const targetDir = path46.join(sharedWorkspacePath, repo.path);
-            if (existsSync6(targetDir)) {
-              setupLog(`reusing existing repo at: ${targetDir}`);
-              continue;
-            }
-            setupLog(`materializing missing repo: ${repo.path}`);
-            await repoManager.materialize(repo, sharedWorkspacePath);
-          }
-        } else {
-          setupLog(
-            `materializing ${suiteWorkspace.repos.length} shared repo(s) into ${sharedWorkspacePath}`
-          );
-          await repoManager.materializeAll(suiteWorkspace.repos, sharedWorkspacePath);
-        }
-        setupLog("shared repo materialization complete");
-      } catch (error40) {
-        const message = error40 instanceof Error ? error40.message : String(error40);
-        if (sharedWorkspacePath && !useStaticWorkspace) {
-          await cleanupWorkspace(sharedWorkspacePath).catch(() => {
-          });
-        }
-        throw new Error(`Failed to materialize repos: ${message}`);
-      }
-    }
-    const suiteDockerConfig = suiteWorkspace?.docker;
-    if (suiteDockerConfig) {
-      setupLog(`pulling Docker image: ${suiteDockerConfig.image}`);
-      const { DockerWorkspaceProvider } = await import("./docker-workspace-RPPXBT27-B4AQHVWA.js");
-      const dockerSetup = new DockerWorkspaceProvider(suiteDockerConfig);
-      if (!await dockerSetup.isDockerAvailable()) {
-        throw new Error(
-          "Docker workspace configured but Docker CLI is not available. Install Docker and ensure it is running."
-        );
-      }
-      await dockerSetup.pullImage();
-      setupLog("Docker image pull complete");
-    }
-    if (suiteWorkspace?.env) {
-      try {
-        await runPreflightChecks(suiteWorkspace.env, sharedWorkspacePath ?? void 0, setupLog);
-        setupLog("preflight checks passed");
-      } catch (error40) {
-        const message = error40 instanceof Error ? error40.message : String(error40);
-        if (sharedWorkspacePath && !useStaticWorkspace) {
-          await cleanupWorkspace(sharedWorkspacePath).catch(() => {
-          });
-        }
-        throw new Error(message);
-      }
-    }
-    const suiteHooksEnabled = hooksEnabled(suiteWorkspace);
-    const suiteBeforeAllHook = suiteWorkspace?.hooks?.before_all;
-    if (sharedWorkspacePath && suiteHooksEnabled && hasHookCommand(suiteBeforeAllHook)) {
-      const beforeAllHook = suiteBeforeAllHook;
-      const beforeAllCommand = (beforeAllHook.command ?? beforeAllHook.script ?? []).join(" ");
-      setupLog(
-        `running shared before_all in cwd=${beforeAllHook.cwd ?? evalDir} command=${beforeAllCommand}`
-      );
-      const scriptContext = {
-        workspacePath: sharedWorkspacePath,
-        testId: "__before_all__",
-        evalRunId,
-        evalDir,
-        workspaceFileDir: suiteWorkspace?.workspaceFileDir
-      };
-      try {
-        beforeAllOutput = await executeWorkspaceScript(
-          toScriptConfig(beforeAllHook, "before_all", "suite workspace"),
-          scriptContext
-        );
-        setupLog("shared before_all completed");
-      } catch (error40) {
-        const message = error40 instanceof Error ? error40.message : String(error40);
-        if (sharedWorkspacePath && !useStaticWorkspace) {
-          await cleanupWorkspace(sharedWorkspacePath).catch(() => {
-          });
-        }
-        throw new Error(`before_all script failed: ${message}`);
-      }
-    }
-    if (availablePoolSlots.length > 0 && suiteHooksEnabled && hasHookCommand(suiteBeforeAllHook)) {
-      const beforeAllHook = suiteBeforeAllHook;
-      for (const slot of availablePoolSlots) {
-        setupLog(`running before_all on pool slot ${slot.index}`);
-        const scriptContext = {
-          workspacePath: slot.path,
-          testId: "__before_all__",
-          evalRunId,
-          evalDir,
-          workspaceFileDir: suiteWorkspace?.workspaceFileDir
-        };
-        try {
-          const output = await executeWorkspaceScript(
-            toScriptConfig(beforeAllHook, "before_all", "suite workspace"),
-            scriptContext
-          );
-          if (!beforeAllOutput) beforeAllOutput = output;
-          setupLog(`before_all completed on pool slot ${slot.index}`);
-        } catch (error40) {
-          const message = error40 instanceof Error ? error40.message : String(error40);
-          throw new Error(`before_all script failed on pool slot ${slot.index}: ${message}`);
-        }
-      }
-    }
-    const targetHooks = options.targetHooks;
-    const targetBeforeAllHook = targetHooks?.before_all;
-    if (sharedWorkspacePath && hasHookCommand(targetBeforeAllHook)) {
-      const beforeAllCommand = (targetBeforeAllHook.command ?? []).join(" ");
-      setupLog(`running target before_all command=${beforeAllCommand}`);
-      const scriptContext = {
-        workspacePath: sharedWorkspacePath,
-        testId: "__target_before_all__",
-        evalRunId,
-        evalDir,
-        workspaceFileDir: suiteWorkspace?.workspaceFileDir
-      };
-      try {
-        await executeWorkspaceScript(
-          toScriptConfig(targetBeforeAllHook, "before_all", "target hooks"),
-          scriptContext
-        );
-        setupLog("target before_all completed");
-      } catch (error40) {
-        const message = error40 instanceof Error ? error40.message : String(error40);
-        if (sharedWorkspacePath && !useStaticWorkspace) {
-          await cleanupWorkspace(sharedWorkspacePath).catch(() => {
-          });
-        }
-        throw new Error(`target before_all hook failed: ${message}`);
-      }
-    }
-    if (availablePoolSlots.length > 0 && hasHookCommand(targetBeforeAllHook)) {
-      for (const slot of availablePoolSlots) {
-        setupLog(`running target before_all on pool slot ${slot.index}`);
-        const scriptContext = {
-          workspacePath: slot.path,
-          testId: "__target_before_all__",
-          evalRunId,
-          evalDir,
-          workspaceFileDir: suiteWorkspace?.workspaceFileDir
-        };
-        try {
-          await executeWorkspaceScript(
-            toScriptConfig(targetBeforeAllHook, "before_all", "target hooks"),
-            scriptContext
-          );
-        } catch (error40) {
-          const message = error40 instanceof Error ? error40.message : String(error40);
-          throw new Error(`target before_all hook failed on pool slot ${slot.index}: ${message}`);
-        }
-      }
-    }
-    if (sharedWorkspacePath) {
-      try {
-        sharedBaselineCommit = await initializeBaseline(sharedWorkspacePath);
-        setupLog(`shared baseline initialized: ${sharedBaselineCommit}`);
-      } catch (error40) {
-        const message = error40 instanceof Error ? error40.message : String(error40);
-        setupLog(`shared baseline initialization failed (file_changes unavailable): ${message}`);
-      }
-    }
-    if (availablePoolSlots.length > 0) {
-      for (const slot of availablePoolSlots) {
-        try {
-          const baseline = await initializeBaseline(slot.path);
-          poolSlotBaselines.set(slot.path, baseline);
-          setupLog(`pool slot ${slot.index} baseline initialized: ${baseline}`);
-        } catch (error40) {
-          const message = error40 instanceof Error ? error40.message : String(error40);
-          setupLog(
-            `pool slot ${slot.index} baseline initialization failed (file_changes unavailable): ${message}`
-          );
-        }
-      }
-    }
     let nextWorkerId = 1;
     const workerIdByEvalId = /* @__PURE__ */ new Map();
     let beforeAllOutputAttached = false;
@@ -39554,17 +40325,7 @@ async function runEvaluation(options) {
     }
     return results;
   } finally {
-    if (poolManager) {
-      if (poolSlot) {
-        await poolManager.releaseSlot(poolSlot);
-      }
-      for (const slot of poolSlots) {
-        if (slot !== poolSlot) {
-          await poolManager.releaseSlot(slot).catch(() => {
-          });
-        }
-      }
-    }
+    await releaseSharedWorkspaceSetup(sharedSetup);
   }
 }
 async function runBatchEvaluation(options) {
@@ -39812,257 +40573,45 @@ async function runEvalCase(options) {
     cachedResponse = await cache.get(cacheKey);
   }
   const nowFn = now ?? (() => /* @__PURE__ */ new Date());
-  let workspacePath = sharedWorkspacePath;
-  let beforeAllOutput;
-  let beforeEachOutput;
   let afterEachOutput;
-  const isSharedWorkspace = !!sharedWorkspacePath;
-  let caseWorkspaceFile;
   const caseHooksEnabled = hooksEnabled(evalCase.workspace);
-  if (!workspacePath) {
-    const rawCaseTemplate = evalCase.workspace?.template;
-    const resolvedCaseTemplate = await resolveWorkspaceTemplate(rawCaseTemplate);
-    const caseWorkspaceTemplate = resolvedCaseTemplate?.dir;
-    caseWorkspaceFile = resolvedCaseTemplate?.workspaceFile;
-    if (caseWorkspaceTemplate && evalRunId) {
-      try {
-        workspacePath = await createTempWorkspace(caseWorkspaceTemplate, evalRunId, evalCase.id);
-      } catch (error40) {
-        const message = error40 instanceof Error ? error40.message : String(error40);
-        return buildErrorResult(
-          evalCase,
-          target.name,
-          nowFn(),
-          new Error(`Failed to create workspace: ${message}`),
-          promptInputs,
-          provider,
-          "setup",
-          "template_error",
-          verbose
-        );
-      }
-      if (caseWorkspaceFile && workspacePath) {
-        const copiedFile = path46.join(workspacePath, path46.basename(caseWorkspaceFile));
-        try {
-          await stat9(copiedFile);
-          caseWorkspaceFile = copiedFile;
-        } catch {
-        }
-      }
-    }
-    if (!workspacePath && (evalCase.workspace?.hooks || evalCase.workspace?.repos?.length) && evalRunId) {
-      workspacePath = getWorkspacePath(evalRunId, evalCase.id);
-      await mkdir17(workspacePath, { recursive: true });
-    }
-    if (evalCase.workspace?.repos?.length && workspacePath) {
-      const perCaseRepoManager = new RepoManager(setupDebug);
-      try {
-        if (setupDebug) {
-          console.log(
-            `[setup] test=${evalCase.id} materializing ${evalCase.workspace.repos.length} per-test repo(s) into ${workspacePath}`
-          );
-        }
-        await perCaseRepoManager.materializeAll(evalCase.workspace.repos, workspacePath);
-        if (setupDebug) {
-          console.log(`[setup] test=${evalCase.id} per-test repo materialization complete`);
-        }
-      } catch (error40) {
-        const message = error40 instanceof Error ? error40.message : String(error40);
-        return buildErrorResult(
-          evalCase,
-          target.name,
-          nowFn(),
-          new Error(`Failed to materialize repos: ${message}`),
-          promptInputs,
-          provider,
-          "repo_setup",
-          "clone_error",
-          verbose
-        );
-      }
-    }
-    if (workspacePath && evalCase.metadata?.agent_skills_files) {
-      const baseDir = evalCase.metadata.agent_skills_base_dir;
-      const files = evalCase.metadata.agent_skills_files;
-      if (baseDir && files.length > 0) {
-        for (const relPath of files) {
-          const srcPath = path46.resolve(baseDir, relPath);
-          const destPath = path46.resolve(workspacePath, relPath);
-          try {
-            await mkdir17(path46.dirname(destPath), { recursive: true });
-            await copyFile2(srcPath, destPath);
-          } catch (error40) {
-            const message = error40 instanceof Error ? error40.message : String(error40);
-            return buildErrorResult(
-              evalCase,
-              target.name,
-              nowFn(),
-              new Error(
-                `Agent Skills eval file not found: ${relPath} (resolved from ${baseDir}): ${message}`
-              ),
-              promptInputs,
-              provider,
-              "setup",
-              "file_copy_error",
-              verbose
-            );
-          }
-        }
-      }
-    }
-    const caseBeforeAllHook = evalCase.workspace?.hooks?.before_all;
-    if (workspacePath && caseHooksEnabled && hasHookCommand(caseBeforeAllHook)) {
-      const beforeAllHook = caseBeforeAllHook;
-      const beforeAllCommand = (beforeAllHook.command ?? beforeAllHook.script ?? []).join(" ");
-      if (setupDebug) {
-        console.log(
-          `[setup] test=${evalCase.id} running before_all in cwd=${beforeAllHook.cwd ?? evalDir} command=${beforeAllCommand}`
-        );
-      }
-      const scriptContext = {
-        workspacePath,
-        testId: evalCase.id,
-        evalRunId: evalRunId ?? "",
-        caseInput: evalCase.question,
-        caseMetadata: evalCase.metadata,
-        evalDir,
-        workspaceFileDir: evalCase.workspace?.workspaceFileDir
-      };
-      try {
-        beforeAllOutput = await executeWorkspaceScript(
-          toScriptConfig(beforeAllHook, "before_all", `test '${evalCase.id}'`),
-          scriptContext
-        );
-        if (setupDebug) {
-          console.log(`[setup] test=${evalCase.id} before_all completed`);
-        }
-      } catch (error40) {
-        const message = error40 instanceof Error ? error40.message : String(error40);
-        if (forceCleanup && workspacePath) {
-          await cleanupWorkspace(workspacePath).catch(() => {
-          });
-        }
-        return buildErrorResult(
-          evalCase,
-          target.name,
-          nowFn(),
-          new Error(`before_all script failed: ${message}`),
-          promptInputs,
-          provider,
-          "setup",
-          "script_error",
-          verbose
-        );
-      }
-    }
-  }
-  let beforeEachNeedsFreshBaseline = false;
-  if (caseHooksEnabled && workspacePath && evalCase.workspace?.hooks?.before_each?.reset && evalCase.workspace.hooks.before_each.reset !== "none") {
-    try {
-      if (repoManager && evalCase.workspace.repos?.length) {
-        await repoManager.reset(
-          evalCase.workspace.repos,
-          workspacePath,
-          evalCase.workspace.hooks.before_each.reset
-        );
-      } else {
-        await resetWorkspaceRoot(
-          workspacePath,
-          evalCase.workspace.hooks.before_each.reset,
-          sharedBaselineCommit
-        );
-      }
-    } catch (error40) {
-      const message = error40 instanceof Error ? error40.message : String(error40);
-      return buildErrorResult(
-        evalCase,
-        target.name,
-        nowFn(),
-        new Error(`before_each reset failed: ${message}`),
-        promptInputs,
-        provider,
-        "setup",
-        "script_error",
-        verbose
-      );
-    }
-  }
-  const caseBeforeEachHook = evalCase.workspace?.hooks?.before_each;
-  if (workspacePath && caseHooksEnabled && hasHookCommand(caseBeforeEachHook)) {
-    const beforeEachHook = caseBeforeEachHook;
-    const scriptContext = {
-      workspacePath,
-      testId: evalCase.id,
-      evalRunId: evalRunId ?? "",
-      caseInput: evalCase.question,
-      caseMetadata: evalCase.metadata,
-      evalDir,
-      workspaceFileDir: evalCase.workspace?.workspaceFileDir
-    };
-    try {
-      beforeEachOutput = await executeWorkspaceScript(
-        toScriptConfig(beforeEachHook, "before_each", `test '${evalCase.id}'`),
-        scriptContext
-      );
-      beforeEachNeedsFreshBaseline = true;
-    } catch (error40) {
-      const message = error40 instanceof Error ? error40.message : String(error40);
-      return buildErrorResult(
-        evalCase,
-        target.name,
-        nowFn(),
-        new Error(`before_each script failed: ${message}`),
-        promptInputs,
-        provider,
-        "setup",
-        "script_error",
-        verbose
-      );
-    }
-  }
-  const targetBeforeEachHook = options.targetHooks?.before_each;
-  if (workspacePath && hasHookCommand(targetBeforeEachHook)) {
-    const scriptContext = {
-      workspacePath,
-      testId: evalCase.id,
-      evalRunId: evalRunId ?? "",
-      caseInput: evalCase.question,
-      caseMetadata: evalCase.metadata,
+  let workspaceSetup;
+  try {
+    workspaceSetup = await prepareEvalCaseWorkspace({
+      evalCase,
+      targetName: target.name,
+      evalRunId,
+      sharedWorkspacePath,
+      sharedBaselineCommit,
+      suiteWorkspaceFile,
+      repoManager,
       evalDir,
-      workspaceFileDir: evalCase.workspace?.workspaceFileDir
-    };
-    try {
-      await executeWorkspaceScript(
-        toScriptConfig(targetBeforeEachHook, "before_each", `target hook for '${evalCase.id}'`),
-        scriptContext
-      );
-      beforeEachNeedsFreshBaseline = true;
-    } catch (error40) {
-      const message = error40 instanceof Error ? error40.message : String(error40);
-      return buildErrorResult(
-        evalCase,
-        target.name,
-        nowFn(),
-        new Error(`target before_each hook failed: ${message}`),
-        promptInputs,
-        provider,
-        "setup",
-        "script_error",
-        verbose
-      );
-    }
-  }
-  let baselineCommit = beforeEachNeedsFreshBaseline ? void 0 : sharedBaselineCommit;
-  if (!baselineCommit && workspacePath) {
-    try {
-      baselineCommit = await initializeBaseline(workspacePath);
-    } catch (error40) {
-      const message = error40 instanceof Error ? error40.message : String(error40);
-      if (verbose) {
-        console.warn(`[setup] test=${evalCase.id} baseline initialization failed: ${message}`);
-      }
-    }
+      cleanupWorkspaces: forceCleanup,
+      targetHooks: options.targetHooks,
+      setupDebug
+    });
+  } catch (error40) {
+    const setupError = error40 instanceof WorkspaceSetupError ? error40 : void 0;
+    return buildErrorResult(
+      evalCase,
+      target.name,
+      nowFn(),
+      error40,
+      promptInputs,
+      provider,
+      setupError?.failureStage ?? "setup",
+      setupError?.failureReasonCode ?? "script_error",
+      verbose
+    );
   }
+  const {
+    workspacePath,
+    beforeAllOutput,
+    beforeEachOutput,
+    baselineCommit,
+    isSharedWorkspace,
+    caseWorkspaceFile
+  } = workspaceSetup;
   if (evalCase.mode === "conversation" && evalCase.turns?.length) {
     const conversationResult = await runConversationMode({
       evalCase,
@@ -40769,7 +41318,7 @@ async function runEvaluatorList(options) {
     dockerConfig,
     dependencyResults
   };
-  const evalFileDir = evalCase.file_paths[0] ? path46.dirname(evalCase.file_paths[0]) : process.cwd();
+  const evalFileDir = evalCase.file_paths[0] ? path47.dirname(evalCase.file_paths[0]) : process.cwd();
   const dispatchContext = {
     graderProvider,
     targetResolver,
@@ -41431,38 +41980,6 @@ function computeWeightedMean(entries) {
   }
   return totalWeight > 0 ? weightedSum / totalWeight : 0;
 }
-async function runPreflightChecks(env, cwd, log) {
-  const execFileAsync3 = promisify6(execFile2);
-  const missing = [];
-  for (const cmd of env.required_commands ?? []) {
-    log(`preflight: checking command "${cmd}"`);
-    try {
-      if (process.platform === "win32") {
-        await execFileAsync3("where", [cmd], { cwd });
-      } else {
-        await execFileAsync3("sh", ["-c", `command -v ${cmd}`], { cwd });
-      }
-    } catch {
-      missing.push(`command: ${cmd}`);
-    }
-  }
-  for (const mod of env.required_python_modules ?? []) {
-    log(`preflight: checking Python module "${mod}"`);
-    try {
-      await execFileAsync3("python3", ["-c", `import ${mod}`], { cwd });
-    } catch {
-      missing.push(`python module: ${mod}`);
-    }
-  }
-  if (missing.length > 0) {
-    throw new Error(
-      `Preflight checks failed \u2014 missing dependencies:
-${missing.map((m) => `  \u2022 ${m}`).join("\n")}
-Install the missing dependencies before running this eval.`
-    );
-  }
-}
 function createFunctionProvider(taskFn) {
   return {
     id: "function-provider",
@@ -41837,22 +42354,22 @@ function deduplicateByTestIdTarget(results) {
   return deduped;
 }
 async function aggregateRunDir(runDir, options) {
-  const indexPath = path47.join(runDir, RESULT_INDEX_FILENAME);
+  const indexPath = path48.join(runDir, RESULT_INDEX_FILENAME);
   const content = await readFile20(indexPath, "utf8");
   const allResults = parseJsonlResults(content);
   const results = deduplicateByTestIdTarget(allResults);
   const timing = buildTimingArtifact(results);
-  const timingPath = path47.join(runDir, "timing.json");
+  const timingPath = path48.join(runDir, "timing.json");
   await writeFile10(timingPath, `${JSON.stringify(timing, null, 2)}
 `, "utf8");
-  const plannedTestCount = options?.plannedTestCount ?? await readPlannedTestCount(path47.join(runDir, "benchmark.json"));
+  const plannedTestCount = options?.plannedTestCount ?? await readPlannedTestCount(path48.join(runDir, "benchmark.json"));
   const benchmark = buildBenchmarkArtifact(
     results,
     options?.evalFile,
     options?.experiment,
     plannedTestCount
   );
-  const benchmarkPath = path47.join(runDir, "benchmark.json");
+  const benchmarkPath = path48.join(runDir, "benchmark.json");
   await writeFile10(benchmarkPath, `${JSON.stringify(benchmark, null, 2)}
 `, "utf8");
   const targetSet = new Set(results.map((r) => r.target ?? "unknown"));
@@ -41991,17 +42508,37 @@ function toIndexRerunSource(value) {
     source_timestamp: value.sourceTimestamp
   });
 }
+function toIndexPreparedAttempt(value) {
+  if (!isRecord4(value)) {
+    return void 0;
+  }
+  return dropUndefined5({
+    source: value.source,
+    manifest_path: value.manifestPath,
+    prepared_dir: value.preparedDir,
+    workspace_path: value.workspacePath,
+    prompt_path: value.promptPath,
+    target: value.target,
+    prepared_at: value.preparedAt,
+    setup_status: value.setupStatus,
+    baseline_status: value.baselineStatus,
+    baseline_commit: value.baselineCommit
+  });
+}
 function toIndexMetadata(metadata) {
   if (!metadata) {
     return void 0;
   }
   const rerunSource = toIndexRerunSource(metadata.rerunSource);
-  if (!rerunSource) {
+  const preparedAttempt = toIndexPreparedAttempt(metadata.preparedAttempt);
+  if (!rerunSource && !preparedAttempt) {
     return { ...metadata };
   }
+  const reservedKeys = /* @__PURE__ */ new Set(["rerunSource", "preparedAttempt"]);
   return {
-    ...Object.fromEntries(Object.entries(metadata).filter(([key]) => key !== "rerunSource")),
-    rerun_source: rerunSource
+    ...Object.fromEntries(Object.entries(metadata).filter(([key]) => !reservedKeys.has(key))),
+    ...rerunSource ? { rerun_source: rerunSource } : {},
+    ...preparedAttempt ? { prepared_attempt: preparedAttempt } : {}
   };
 }
 function buildGradingArtifact(result) {
@@ -42147,7 +42684,7 @@ async function writeInitialBenchmarkArtifact(runDir, options) {
     options.experiment,
     options.plannedTestCount
   );
-  const benchmarkPath = path47.join(runDir, "benchmark.json");
+  const benchmarkPath = path48.join(runDir, "benchmark.json");
   await writeFile10(benchmarkPath, `${JSON.stringify(stub, null, 2)}
 `, "utf8");
 }
@@ -42197,7 +42734,7 @@ function buildArtifactSubdir(result) {
     segments.push(safeArtifactPathSegment(evalSet, "default"));
   }
   segments.push(safeTestId(result.testId));
-  return path47.posix.join(...segments);
+  return path48.posix.join(...segments);
 }
 function formatOutputMarkdown(output) {
   return output.map((msg) => `@[${msg.role}]:
@@ -42213,7 +42750,7 @@ function extractInput(result) {
   return null;
 }
 function toRelativeArtifactPath(outputDir, filePath) {
-  return path47.relative(outputDir, filePath).split(path47.sep).join("/");
+  return path48.relative(outputDir, filePath).split(path48.sep).join("/");
 }
 function findResultSourceTest(result, testByTestId) {
   return testByTestId.get(result.testId ?? "unknown");
@@ -42229,7 +42766,7 @@ async function writeTraceEnvelopeSidecar(params) {
   const hasTranscript = resultHasExecutionTraceTranscript(params.result);
   const envelope = buildTraceEnvelopeFromEvaluationResult(params.result, {
     evalPath: params.evalPath,
-    runId: path47.basename(params.outputDir),
+    runId: path48.basename(params.outputDir),
     experiment: params.experiment,
     source: { path: RESULT_INDEX_FILENAME },
     capture: { content: "full", redactionLevel: "none", redactedFields: [] },
@@ -42241,7 +42778,7 @@ async function writeTraceEnvelopeSidecar(params) {
     }
   });
   await writeFile10(
-    path47.join(params.outputsDir, "execution-trace.json"),
+    path48.join(params.outputsDir, "execution-trace.json"),
     `${JSON.stringify(toTraceEnvelopeWire(envelope), null, 2)}
 `,
     "utf8"
@@ -42305,13 +42842,13 @@ function buildResultIndexArtifact(result, extraIndexFields) {
     failure_reason_code: result.failureReasonCode,
     workspace_path: result.workspacePath,
     artifact_dir: artifactSubdir,
-    grading_path: path47.posix.join(artifactSubdir, "grading.json"),
-    timing_path: path47.posix.join(artifactSubdir, "timing.json"),
-    input_path: input ? path47.posix.join(artifactSubdir, "input.md") : void 0,
-    output_path: hasAnswer ? path47.posix.join(artifactSubdir, "outputs", "answer.md") : void 0,
-    answer_path: hasAnswer ? path47.posix.join(artifactSubdir, "outputs", "answer.md") : void 0,
-    transcript_path: hasTranscript ? path47.posix.join(artifactSubdir, "outputs", "transcript.jsonl") : void 0,
-    response_path: hasAnswer ? path47.posix.join(artifactSubdir, "outputs", "response.md") : void 0,
+    grading_path: path48.posix.join(artifactSubdir, "grading.json"),
+    timing_path: path48.posix.join(artifactSubdir, "timing.json"),
+    input_path: input ? path48.posix.join(artifactSubdir, "input.md") : void 0,
+    output_path: hasAnswer ? path48.posix.join(artifactSubdir, "outputs", "answer.md") : void 0,
+    answer_path: hasAnswer ? path48.posix.join(artifactSubdir, "outputs", "answer.md") : void 0,
+    transcript_path: hasTranscript ? path48.posix.join(artifactSubdir, "outputs", "transcript.jsonl") : void 0,
+    response_path: hasAnswer ? path48.posix.join(artifactSubdir, "outputs", "response.md") : void 0,
     ...extraIndexFields,
     metadata: toIndexMetadata(result.metadata)
   };
@@ -42351,7 +42888,7 @@ async function rewriteExistingIndexRecords(outputDir, replacements) {
   if (replacements.length === 0) {
     return;
   }
-  const indexPath = path47.join(outputDir, RESULT_INDEX_FILENAME);
+  const indexPath = path48.join(outputDir, RESULT_INDEX_FILENAME);
   const content = await readFile20(indexPath, "utf8").catch(() => void 0);
   if (content === void 0) {
     return;
@@ -42520,29 +43057,29 @@ async function writePerTestArtifacts(results, outputDir, options) {
     const grading = buildGradingArtifact(result);
     const timing = buildTimingArtifact([result]);
     const artifactSubdir = buildArtifactSubdir(result);
-    const testDir = path47.join(outputDir, artifactSubdir);
+    const testDir = path48.join(outputDir, artifactSubdir);
     await mkdir18(testDir, { recursive: true });
     await writeFile10(
-      path47.join(testDir, "grading.json"),
+      path48.join(testDir, "grading.json"),
       `${JSON.stringify(grading, null, 2)}
 `,
       "utf8"
     );
     await writeFile10(
-      path47.join(testDir, "timing.json"),
+      path48.join(testDir, "timing.json"),
       `${JSON.stringify(timing, null, 2)}
 `,
       "utf8"
     );
     const input = extractInput(result);
     if (input) {
-      await writeFile10(path47.join(testDir, "input.md"), input, "utf8");
+      await writeFile10(path48.join(testDir, "input.md"), input, "utf8");
     }
-    const outputsDir = path47.join(testDir, "outputs");
+    const outputsDir = path48.join(testDir, "outputs");
     await mkdir18(outputsDir, { recursive: true });
     if (result.output.length > 0) {
-      await writeFile10(path47.join(outputsDir, "answer.md"), result.output, "utf8");
-      await writeFile10(path47.join(outputsDir, "response.md"), result.output, "utf8");
+      await writeFile10(path48.join(outputsDir, "answer.md"), result.output, "utf8");
+      await writeFile10(path48.join(outputsDir, "response.md"), result.output, "utf8");
     }
     const envelope = await writeTraceEnvelopeSidecar({
       result,
@@ -42552,7 +43089,7 @@ async function writePerTestArtifacts(results, outputDir, options) {
       experiment: options?.experiment
     });
     if (hasTranscriptProjection(result, envelope)) {
-      await writeTranscriptJsonl(path47.join(outputsDir, "transcript.jsonl"), result, envelope);
+      await writeTranscriptJsonl(path48.join(outputsDir, "transcript.jsonl"), result, envelope);
     }
     const extraIndexFields = await collectAdditionalIndexFields(
       result,
@@ -42570,9 +43107,9 @@ async function writePerTestArtifacts(results, outputDir, options) {
 }
 async function writeArtifactsFromResults(results, outputDir, options) {
   const testArtifactDir = outputDir;
-  const timingPath = path47.join(outputDir, "timing.json");
-  const benchmarkPath = path47.join(outputDir, "benchmark.json");
-  const indexPath = path47.join(outputDir, RESULT_INDEX_FILENAME);
+  const timingPath = path48.join(outputDir, "timing.json");
+  const benchmarkPath = path48.join(outputDir, "benchmark.json");
+  const indexPath = path48.join(outputDir, RESULT_INDEX_FILENAME);
   await mkdir18(outputDir, { recursive: true });
   const indexRecords = [];
   const testByTestId = new Map((options?.sourceTests ?? []).map((test) => [test.id, test]));
@@ -42580,23 +43117,23 @@ async function writeArtifactsFromResults(results, outputDir, options) {
     const grading = buildGradingArtifact(result);
     const timing2 = buildTimingArtifact([result]);
     const artifactSubdir = buildArtifactSubdir(result);
-    const testDir = path47.join(outputDir, artifactSubdir);
-    const gradingPath = path47.join(testDir, "grading.json");
-    const perTestTimingPath = path47.join(testDir, "timing.json");
+    const testDir = path48.join(outputDir, artifactSubdir);
+    const gradingPath = path48.join(testDir, "grading.json");
+    const perTestTimingPath = path48.join(testDir, "timing.json");
     await mkdir18(testDir, { recursive: true });
     await writeFile10(gradingPath, `${JSON.stringify(grading, null, 2)}
 `, "utf8");
     await writeFile10(perTestTimingPath, `${JSON.stringify(timing2, null, 2)}
 `, "utf8");
     const input = extractInput(result);
-    const inputPath = input ? path47.join(testDir, "input.md") : void 0;
+    const inputPath = input ? path48.join(testDir, "input.md") : void 0;
     if (inputPath && input) {
       await writeFile10(inputPath, input, "utf8");
     }
-    const outputsDir = path47.join(testDir, "outputs");
+    const outputsDir = path48.join(testDir, "outputs");
     await mkdir18(outputsDir, { recursive: true });
-    const answerPath = result.output.length > 0 ? path47.join(outputsDir, "answer.md") : void 0;
-    const responsePath = result.output.length > 0 ? path47.join(outputsDir, "response.md") : void 0;
+    const answerPath = result.output.length > 0 ? path48.join(outputsDir, "answer.md") : void 0;
+    const responsePath = result.output.length > 0 ? path48.join(outputsDir, "response.md") : void 0;
     if (answerPath && responsePath) {
       await writeFile10(answerPath, result.output, "utf8");
       await writeFile10(responsePath, result.output, "utf8");
@@ -42608,7 +43145,7 @@ async function writeArtifactsFromResults(results, outputDir, options) {
       evalPath: resolveEnvelopeEvalPath(result, testByTestId, options?.evalFile),
       experiment: options?.experiment
     });
-    const transcriptPath = hasTranscriptProjection(result, envelope) ? path47.join(outputsDir, "transcript.jsonl") : void 0;
+    const transcriptPath = hasTranscriptProjection(result, envelope) ? path48.join(outputsDir, "transcript.jsonl") : void 0;
     if (transcriptPath) {
       await writeTranscriptJsonl(transcriptPath, result, envelope);
     }
@@ -42649,7 +43186,7 @@ async function writeArtifactsFromResults(results, outputDir, options) {
 `, "utf8");
   await writeJsonlFile(indexPath, indexRecords);
   await writeFile10(
-    path47.join(outputDir, "transcript.jsonl"),
+    path48.join(outputDir, "transcript.jsonl"),
     buildTranscriptMessageLines(results),
     "utf8"
   );
@@ -42700,7 +43237,7 @@ async function evaluate(config2) {
     cliNoCache: false,
     yamlCache: config2.cache === void 0 ? materialized.cache : void 0
   });
-  const cache = cacheEnabled ? new ResponseCache(materialized.cachePath ? path48.resolve(materialized.cachePath) : void 0) : void 0;
+  const cache = cacheEnabled ? new ResponseCache(materialized.cachePath ? path49.resolve(materialized.cachePath) : void 0) : void 0;
   const results = await runEvaluation({
     testFilePath,
     repoRoot,
@@ -42723,7 +43260,7 @@ async function evaluate(config2) {
   });
   const allResults = collectedResults.length > 0 ? collectedResults : [...results];
   const durationMs = Date.now() - startTime;
-  const outputDir = config2.outputDir ? path48.resolve(config2.outputDir) : void 0;
+  const outputDir = config2.outputDir ? path49.resolve(config2.outputDir) : void 0;
   const artifacts = outputDir ? await writeArtifactsFromResults(allResults, outputDir, {
     evalFile: config2.specFile ? testFilePath : "",
     experiment: config2.experiment,
@@ -42743,7 +43280,7 @@ async function evaluate(config2) {
 async function materializeEvalConfig(config2, options) {
   const baseDir = options?.baseDir ?? process.cwd();
   const repoRoot = options?.repoRoot ?? await findGitRoot(baseDir) ?? baseDir;
-  const testFilePath = config2.specFile ? path48.resolve(baseDir, config2.specFile) : path48.join(baseDir, "__programmatic__.yaml");
+  const testFilePath = config2.specFile ? path49.resolve(baseDir, config2.specFile) : path49.join(baseDir, "__programmatic__.yaml");
   const effectiveFilter = options?.filter ?? config2.filter;
   if (config2.specFile) {
     const suite = await loadTestSuite(testFilePath, repoRoot, {
@@ -42820,7 +43357,7 @@ function convertAssertions(entries) {
 }
 function buildInlineEvalTests(config2, options) {
   const suiteWorkspace = config2.beforeAll ? { hooks: { before_all: toBeforeAllHook(config2.beforeAll) } } : void 0;
-  const derivedSuiteName = path48.basename(options.testFilePath).replace(/\.eval\.[cm]?ts$/i, "").replace(/\.[cm]?ts$/i, "");
+  const derivedSuiteName = path49.basename(options.testFilePath).replace(/\.eval\.[cm]?ts$/i, "").replace(/\.[cm]?ts$/i, "");
   const suiteName = config2.metadata?.name ?? (derivedSuiteName || "eval");
   return (config2.tests ?? []).filter((test) => !options.filter || matchesFilter4(test.id, options.filter)).map((test) => {
     const isConversation = test.mode === "conversation" || test.turns && test.turns.length > 0;
@@ -42916,10 +43453,10 @@ function computeSummary(results, durationMs, threshold = DEFAULT_THRESHOLD) {
 var TARGET_FILE_CANDIDATES = [".agentv/targets.yaml", ".agentv/targets.yml"];
 async function discoverDefaultTarget(repoRoot) {
   const cwd = process.cwd();
-  const chain = buildDirectoryChain(path48.join(cwd, "_placeholder"), repoRoot);
+  const chain = buildDirectoryChain(path49.join(cwd, "_placeholder"), repoRoot);
   for (const dir of chain) {
     for (const candidate of TARGET_FILE_CANDIDATES) {
-      const targetsPath = path48.join(dir, candidate);
+      const targetsPath = path49.join(dir, candidate);
       if (!existsSync7(targetsPath)) continue;
       try {
         const definitions = await readTargetDefinitions(targetsPath);
@@ -42936,7 +43473,7 @@ async function loadEnvHierarchy(repoRoot, startPath) {
   const chain = buildDirectoryChain(startPath, repoRoot);
   const envFiles = [];
   for (const dir of chain) {
-    const envPath = path48.join(dir, ".env");
+    const envPath = path49.join(dir, ".env");
     if (existsSync7(envPath)) envFiles.push(envPath);
   }
   for (let i = 0; i < envFiles.length; i++) {
@@ -42962,7 +43499,7 @@ async function loadEnvHierarchy(repoRoot, startPath) {
 }
 var EXPORT_NAMES = ["default", "config", "evalConfig"];
 async function loadTsEvalFile(filePath) {
-  const absolutePath = path49.resolve(filePath);
+  const absolutePath = path50.resolve(filePath);
   const moduleUrl = pathToFileURL2(absolutePath).href;
   const module = await import(moduleUrl);
   let config2;
@@ -42984,7 +43521,7 @@ async function loadTsEvalSuite(filePath, repoRoot, options) {
   const { config: config2, filePath: absolutePath } = await loadTsEvalFile(filePath);
   const materialized = await materializeEvalConfig(config2, {
     repoRoot,
-    baseDir: path49.dirname(absolutePath),
+    baseDir: path50.dirname(absolutePath),
     filter: options?.filter,
     category: options?.category
   });
@@ -43046,6 +43583,7 @@ export {
   buildDirectoryChain,
   buildSearchRoots,
   resolveFileReference,
+  AGENT_PROVIDER_KINDS,
   KNOWN_PROVIDERS,
   PROVIDER_ALIASES,
   extractLastAssistantContent,
@@ -43230,6 +43768,7 @@ export {
   createTempWorkspace,
   cleanupWorkspace,
   cleanupEvalWorkspaces,
+  executeWorkspaceScript,
   resolveRepoCloneUrl,
   normalizeRepoIdentity,
   computeWorkspaceFingerprint,
@@ -43246,7 +43785,9 @@ export {
   discoverProjects,
   RepoManager,
   resolveWorkspaceTemplate,
-  executeWorkspaceScript,
+  releaseSharedWorkspaceSetup,
+  prepareSharedWorkspaceSetup,
+  prepareEvalCaseWorkspace,
   isAgentSkillsFormat,
   parseAgentSkillsEvals,
   DEFAULT_EVAL_PATTERNS,
@@ -43271,6 +43812,7 @@ export {
   loadEvalCases,
   loadTestById,
   loadEvalCaseById,
+  gradePreparedEvalCase,
   runEvaluation,
   runEvalCase,
   toTranscriptJsonLines,
@@ -43300,4 +43842,4 @@ export {
   loadTsEvalFile,
   loadTsEvalSuite
 };
-//# sourceMappingURL=chunk-BLXYBUU4.js.map
+//# sourceMappingURL=chunk-ENHX2CCS.js.map