npm - @agentv/core - Versions diffs - 2.14.3 → 2.15.0 - Mend

@agentv/core 2.14.3 → 2.15.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (7) hide show

package/dist/index.js CHANGED Viewed

@@ -612,6 +612,17 @@ function parseExecutionDefaults(raw, configPath) {
   } else if (otelFile !== void 0) {
     logWarning(`Invalid execution.otel_file in ${configPath}, expected non-empty string`);
   }
+  if (typeof obj.pool_workspaces === "boolean") {
+    result.pool_workspaces = obj.pool_workspaces;
+  } else if (obj.pool_workspaces !== void 0) {
+    logWarning(`Invalid execution.pool_workspaces in ${configPath}, expected boolean`);
+  }
+  const poolSlots = obj.pool_slots;
+  if (typeof poolSlots === "number" && Number.isInteger(poolSlots) && poolSlots >= 1 && poolSlots <= 50) {
+    result.pool_slots = poolSlots;
+  } else if (poolSlots !== void 0) {
+    logWarning(`Invalid execution.pool_slots in ${configPath}, expected integer 1-50`);
+  }
   return Object.keys(result).length > 0 ? result : void 0;
 }
 function logWarning(message) {
@@ -2053,6 +2064,7 @@ async function processMessages(options) {
     repoRootPath,
     guidelinePatterns,
     guidelinePaths,
+    treatFileSegmentsAsGuidelines,
     textParts,
     messageType,
     verbose
@@ -2100,16 +2112,20 @@ async function processMessages(options) {
         }
         try {
           const fileContent = (await readFile4(resolvedPath, "utf8")).replace(/\r\n/g, "\n");
-          if (messageType === "input" && guidelinePatterns && guidelinePaths) {
-            const relativeToRepo = path5.relative(repoRootPath, resolvedPath);
-            if (isGuidelineFile(relativeToRepo, guidelinePatterns)) {
-              guidelinePaths.push(path5.resolve(resolvedPath));
-              if (verbose) {
-                console.log(`  [Guideline] Found: ${displayPath}`);
-                console.log(`    Resolved to: ${resolvedPath}`);
-              }
-              continue;
+          const classifyAsGuideline = shouldTreatAsGuideline({
+            messageType,
+            resolvedPath,
+            repoRootPath,
+            guidelinePatterns,
+            treatFileSegmentsAsGuidelines
+          });
+          if (classifyAsGuideline && guidelinePaths) {
+            guidelinePaths.push(path5.resolve(resolvedPath));
+            if (verbose) {
+              console.log(`  [Guideline] Found: ${displayPath}`);
+              console.log(`    Resolved to: ${resolvedPath}`);
             }
+            continue;
           }
           segments.push({
             type: "file",
@@ -2138,6 +2154,26 @@ async function processMessages(options) {
   }
   return segments;
 }
+function shouldTreatAsGuideline(options) {
+  const {
+    messageType,
+    resolvedPath,
+    repoRootPath,
+    guidelinePatterns,
+    treatFileSegmentsAsGuidelines
+  } = options;
+  if (messageType !== "input") {
+    return false;
+  }
+  if (treatFileSegmentsAsGuidelines) {
+    return true;
+  }
+  if (!guidelinePatterns || guidelinePatterns.length === 0) {
+    return false;
+  }
+  const relativeToRepo = path5.relative(repoRootPath, resolvedPath);
+  return isGuidelineFile(relativeToRepo, guidelinePatterns);
+}
 function asString3(value) {
   return typeof value === "string" ? value : void 0;
 }
@@ -2476,6 +2512,8 @@ async function loadTestsFromJsonl(evalFilePath, repoRoot, options) {
         for (const guidelinePath of testCase.guideline_paths) {
           console.log(`    - ${guidelinePath}`);
         }
+      } else if (!guidelinePatterns || guidelinePatterns.length === 0) {
+        console.log("  No guidelines found (guideline_patterns not configured)");
       } else {
         console.log("  No guidelines found");
       }
@@ -2845,7 +2883,7 @@ async function loadTestsFromYaml(evalFilePath, repoRoot, options) {
   } else {
     throw new Error(`Invalid test file format: ${evalFilePath} - missing 'tests' field`);
   }
-  const suiteWorkspace = parseWorkspaceConfig(suite.workspace, evalFileDir);
+  const suiteWorkspace = await resolveWorkspaceConfig(suite.workspace, evalFileDir);
   const suiteInputMessages = expandInputShorthand(suite.input);
   const rawGlobalExecution = isJsonObject(suite.execution) ? suite.execution : void 0;
   const _globalTarget = asString6(rawGlobalExecution?.target) ?? asString6(suite.target);
@@ -2881,12 +2919,24 @@ async function loadTestsFromYaml(evalFilePath, repoRoot, options) {
     }
     const caseExecution = isJsonObject(evalcase.execution) ? evalcase.execution : void 0;
     const skipDefaults = caseExecution?.skip_defaults === true;
-    const inputMessages = suiteInputMessages && !skipDefaults ? [...suiteInputMessages, ...testInputMessages] : testInputMessages;
+    const effectiveSuiteInputMessages = suiteInputMessages && !skipDefaults ? suiteInputMessages : void 0;
+    const inputMessages = effectiveSuiteInputMessages ? [...effectiveSuiteInputMessages, ...testInputMessages] : testInputMessages;
     const hasExpectedMessages = expectedMessages.length > 0;
     const guidelinePaths = [];
     const inputTextParts = [];
-    const inputSegments = await processMessages({
-      messages: inputMessages,
+    const suiteInputSegments = effectiveSuiteInputMessages ? await processMessages({
+      messages: effectiveSuiteInputMessages,
+      searchRoots,
+      repoRootPath,
+      guidelinePatterns,
+      guidelinePaths,
+      treatFileSegmentsAsGuidelines: true,
+      textParts: inputTextParts,
+      messageType: "input",
+      verbose
+    }) : [];
+    const testInputSegments = await processMessages({
+      messages: testInputMessages,
       searchRoots,
       repoRootPath,
       guidelinePatterns,
@@ -2895,6 +2945,7 @@ async function loadTestsFromYaml(evalFilePath, repoRoot, options) {
       messageType: "input",
       verbose
     });
+    const inputSegments = [...suiteInputSegments, ...testInputSegments];
     const outputSegments = hasExpectedMessages ? await processExpectedMessages({
       messages: expectedMessages,
       searchRoots,
@@ -2942,7 +2993,7 @@ async function loadTestsFromYaml(evalFilePath, repoRoot, options) {
       ...guidelinePaths.map((guidelinePath) => path8.resolve(guidelinePath)),
       ...userFilePaths
     ];
-    const caseWorkspace = parseWorkspaceConfig(evalcase.workspace, evalFileDir);
+    const caseWorkspace = await resolveWorkspaceConfig(evalcase.workspace, evalFileDir);
     const mergedWorkspace = mergeWorkspaceConfigs(suiteWorkspace, caseWorkspace);
     const metadata = isJsonObject(evalcase.metadata) ? evalcase.metadata : void 0;
     const caseTargets = extractTargetsFromTestCase(evalcase);
@@ -2973,6 +3024,8 @@ async function loadTestsFromYaml(evalFilePath, repoRoot, options) {
         for (const guidelinePath of testCase.guideline_paths) {
           console.log(`    - ${guidelinePath}`);
         }
+      } else if (!guidelinePatterns || guidelinePatterns.length === 0) {
+        console.log("  No guidelines found (guideline_patterns not configured)");
       } else {
         console.log("  No guidelines found");
       }
@@ -3072,6 +3125,26 @@ function parseResetConfig(raw) {
     ...afterEach !== void 0 && { after_each: afterEach }
   };
 }
+async function resolveWorkspaceConfig(raw, evalFileDir) {
+  if (typeof raw === "string") {
+    const workspaceFilePath = path8.resolve(evalFileDir, raw);
+    let content;
+    try {
+      content = await readFile7(workspaceFilePath, "utf8");
+    } catch {
+      throw new Error(`Workspace file not found: ${raw} (resolved to ${workspaceFilePath})`);
+    }
+    const parsed = parse2(content);
+    if (!isJsonObject(parsed)) {
+      throw new Error(
+        `Invalid workspace file format: ${workspaceFilePath} (expected a YAML object)`
+      );
+    }
+    const workspaceFileDir = path8.dirname(workspaceFilePath);
+    return parseWorkspaceConfig(parsed, workspaceFileDir);
+  }
+  return parseWorkspaceConfig(raw, evalFileDir);
+}
 function parseWorkspaceConfig(raw, evalFileDir) {
   if (!isJsonObject(raw)) return void 0;
   const obj = raw;
@@ -7174,6 +7247,9 @@ function getSubagentsRoot() {
 function getTraceStateRoot() {
   return path21.join(getAgentvHome(), "trace-state");
 }
+function getWorkspacePoolRoot() {
+  return path21.join(getAgentvHome(), "workspace-pool");
+}
 // src/evaluation/providers/vscode/dispatch/constants.ts
 var DEFAULT_LOCK_NAME = "subagent.lock";
@@ -7996,8 +8072,6 @@ var AGENTV_REQUEST_TEMPLATE = `[[ ## task ## ]]
 **IMPORTANT**: Follow these exact steps:
 1. Create and write your complete response to: {{responseFileTmp}}
-    - All intended file outputs/changes MUST be written in your response file.
-    - For each intended file, include the repo name, relative path and unified git diff following the convention \`diff --git ...\`.
 2. When completely finished, run these PowerShell commands to signal completion:
 \`\`\`
 Move-Item -LiteralPath '{{responseFileTmp}}' -Destination '{{responseFileFinal}}'
@@ -8014,8 +8088,6 @@ var AGENTV_BATCH_REQUEST_TEMPLATE = `[[ ## task ## ]]
 **IMPORTANT**: Follow these exact steps:
 1. Create and write your complete response to: {{responseFileTmp}}
-    - All intended file outputs/changes MUST be written in your response file.
-    - For each intended file, include the repo name, relative path and unified git diff following the convention \`diff --git ...\`.
 2. When completely finished and the response is stable, rename it to: {{responseFileFinal}}
 3. Do not unlock the workspace from this request; batch orchestration will handle unlocking after all responses are ready.
 `;
@@ -8628,16 +8700,16 @@ async function execFileWithStdinNode(argv, stdinPayload, options) {
   });
 }
 async function execShellWithStdin(command, stdinPayload, options = {}) {
-  const { mkdir: mkdir14, readFile: readFile12, rm: rm6, writeFile: writeFile9 } = await import("node:fs/promises");
+  const { mkdir: mkdir15, readFile: readFile13, rm: rm7, writeFile: writeFile10 } = await import("node:fs/promises");
   const { tmpdir: tmpdir3 } = await import("node:os");
-  const path41 = await import("node:path");
+  const path42 = await import("node:path");
   const { randomUUID: randomUUID8 } = await import("node:crypto");
-  const dir = path41.join(tmpdir3(), `agentv-exec-${randomUUID8()}`);
-  await mkdir14(dir, { recursive: true });
-  const stdinPath = path41.join(dir, "stdin.txt");
-  const stdoutPath = path41.join(dir, "stdout.txt");
-  const stderrPath = path41.join(dir, "stderr.txt");
-  await writeFile9(stdinPath, stdinPayload, "utf8");
+  const dir = path42.join(tmpdir3(), `agentv-exec-${randomUUID8()}`);
+  await mkdir15(dir, { recursive: true });
+  const stdinPath = path42.join(dir, "stdin.txt");
+  const stdoutPath = path42.join(dir, "stdout.txt");
+  const stderrPath = path42.join(dir, "stderr.txt");
+  await writeFile10(stdinPath, stdinPayload, "utf8");
   const wrappedCommand = process.platform === "win32" ? `(${command}) < ${shellEscapePath(stdinPath)} > ${shellEscapePath(stdoutPath)} 2> ${shellEscapePath(stderrPath)}` : `(${command}) < ${shellEscapePath(stdinPath)} > ${shellEscapePath(stdoutPath)} 2> ${shellEscapePath(stderrPath)}`;
   const { spawn: spawn4 } = await import("node:child_process");
   try {
@@ -8666,11 +8738,11 @@ async function execShellWithStdin(command, stdinPayload, options = {}) {
         resolve(code ?? 0);
       });
     });
-    const stdout = (await readFile12(stdoutPath, "utf8")).replace(/\r\n/g, "\n");
-    const stderr = (await readFile12(stderrPath, "utf8")).replace(/\r\n/g, "\n");
+    const stdout = (await readFile13(stdoutPath, "utf8")).replace(/\r\n/g, "\n");
+    const stderr = (await readFile13(stderrPath, "utf8")).replace(/\r\n/g, "\n");
     return { stdout, stderr, exitCode };
   } finally {
-    await rm6(dir, { recursive: true, force: true });
+    await rm7(dir, { recursive: true, force: true });
   }
 }
@@ -8988,7 +9060,7 @@ var CodeEvaluator = class {
       outputPath,
       guidelineFiles: context.evalCase.guideline_paths,
       inputFiles: context.evalCase.file_paths.filter(
-        (path41) => !context.evalCase.guideline_paths.includes(path41)
+        (path42) => !context.evalCase.guideline_paths.includes(path42)
       ),
       input: context.evalCase.input,
       trace: context.trace ?? null,
@@ -9238,6 +9310,8 @@ ${context.fileChanges}`;
       };
     } catch (e) {
       const message = e instanceof Error ? e.message : String(e);
+      const evalName = context.evaluator?.name ?? "llm-judge";
+      console.warn(`\u26A0 LLM judge "${evalName}" failed after 3 attempts (${message}) \u2014 skipped`);
       return {
         score: 0,
         verdict: "skip",
@@ -9266,24 +9340,39 @@ ${context.fileChanges}`;
       systemPrompt,
       target: judgeProvider.targetName
     };
-    const { data, tokenUsage } = await this.runWithRetry({
-      context,
-      judgeProvider,
-      systemPrompt,
-      userPrompt: prompt,
-      schema: rubricEvaluationSchema
-    });
-    const { score, verdict, hits, misses } = calculateRubricScore(data, rubrics);
-    return {
-      score,
-      verdict,
-      hits,
-      misses,
-      expectedAspectCount: rubrics.length,
-      reasoning: data.overall_reasoning,
-      evaluatorRawRequest,
-      tokenUsage
-    };
+    try {
+      const { data, tokenUsage } = await this.runWithRetry({
+        context,
+        judgeProvider,
+        systemPrompt,
+        userPrompt: prompt,
+        schema: rubricEvaluationSchema
+      });
+      const { score, verdict, hits, misses } = calculateRubricScore(data, rubrics);
+      return {
+        score,
+        verdict,
+        hits,
+        misses,
+        expectedAspectCount: rubrics.length,
+        reasoning: data.overall_reasoning,
+        evaluatorRawRequest,
+        tokenUsage
+      };
+    } catch (e) {
+      const message = e instanceof Error ? e.message : String(e);
+      const evalName = context.evaluator?.name ?? "llm-judge";
+      console.warn(`\u26A0 LLM judge "${evalName}" failed after 3 attempts (${message}) \u2014 skipped`);
+      return {
+        score: 0,
+        verdict: "skip",
+        hits: [],
+        misses: [`Judge parse failure after 3 attempts: ${message}`],
+        expectedAspectCount: rubrics.length,
+        reasoning: `Judge parse failure after 3 attempts: ${message}`,
+        evaluatorRawRequest
+      };
+    }
   }
   /**
    * Evaluate using score-range rubrics (analytic rubric scoring).
@@ -9297,25 +9386,40 @@ ${context.fileChanges}`;
       systemPrompt,
       target: judgeProvider.targetName
     };
-    const { data, tokenUsage } = await this.runWithRetry({
-      context,
-      judgeProvider,
-      systemPrompt,
-      userPrompt: prompt,
-      schema: scoreRangeEvaluationSchema
-    });
-    const { score, verdict, hits, misses, details } = calculateScoreRangeResult(data, rubrics);
-    return {
-      score,
-      verdict,
-      hits,
-      misses,
-      expectedAspectCount: rubrics.length,
-      reasoning: data.overall_reasoning,
-      evaluatorRawRequest,
-      details,
-      tokenUsage
-    };
+    try {
+      const { data, tokenUsage } = await this.runWithRetry({
+        context,
+        judgeProvider,
+        systemPrompt,
+        userPrompt: prompt,
+        schema: scoreRangeEvaluationSchema
+      });
+      const { score, verdict, hits, misses, details } = calculateScoreRangeResult(data, rubrics);
+      return {
+        score,
+        verdict,
+        hits,
+        misses,
+        expectedAspectCount: rubrics.length,
+        reasoning: data.overall_reasoning,
+        evaluatorRawRequest,
+        details,
+        tokenUsage
+      };
+    } catch (e) {
+      const message = e instanceof Error ? e.message : String(e);
+      const evalName = context.evaluator?.name ?? "llm-judge";
+      console.warn(`\u26A0 LLM judge "${evalName}" failed after 3 attempts (${message}) \u2014 skipped`);
+      return {
+        score: 0,
+        verdict: "skip",
+        hits: [],
+        misses: [`Judge parse failure after 3 attempts: ${message}`],
+        expectedAspectCount: rubrics.length,
+        reasoning: `Judge parse failure after 3 attempts: ${message}`,
+        evaluatorRawRequest
+      };
+    }
   }
   /**
    * Build prompt for score-range rubric evaluation.
@@ -9601,19 +9705,13 @@ var CompositeEvaluator = class {
   runWeightedAverage(results, weights) {
     let totalWeight = 0;
     let weightedSum = 0;
+    let evaluatedCount = 0;
     const allHits = [];
     const allMisses = [];
     const reasoningParts = [];
     const scores = [];
     for (const member of results) {
       const weight = weights?.[member.id] ?? 1;
-      totalWeight += weight;
-      weightedSum += member.result.score * weight;
-      allHits.push(...member.result.hits.map((h) => `[${member.id}] ${h}`));
-      allMisses.push(...member.result.misses.map((m) => `[${member.id}] ${m}`));
-      if (member.result.reasoning) {
-        reasoningParts.push(`${member.id}: ${member.result.reasoning}`);
-      }
       scores.push({
         name: member.id,
         type: member.type,
@@ -9628,6 +9726,32 @@ var CompositeEvaluator = class {
         details: member.result.details,
         tokenUsage: member.result.tokenUsage
       });
+      if (member.result.verdict === "skip") {
+        continue;
+      }
+      evaluatedCount++;
+      totalWeight += weight;
+      weightedSum += member.result.score * weight;
+      allHits.push(...member.result.hits.map((h) => `[${member.id}] ${h}`));
+      allMisses.push(...member.result.misses.map((m) => `[${member.id}] ${m}`));
+      if (member.result.reasoning) {
+        reasoningParts.push(`${member.id}: ${member.result.reasoning}`);
+      }
+    }
+    if (evaluatedCount === 0 && results.length > 0) {
+      return {
+        score: 0,
+        verdict: "skip",
+        hits: [],
+        misses: [],
+        expectedAspectCount: 1,
+        reasoning: "All evaluators skipped (infrastructure failure)",
+        evaluatorRawRequest: {
+          aggregator: "weighted_average",
+          ...weights ? { weights } : {}
+        },
+        scores
+      };
     }
     const finalScore = totalWeight > 0 ? weightedSum / totalWeight : 0;
     return {
@@ -9651,19 +9775,8 @@ var CompositeEvaluator = class {
     const reasoningParts = [];
     let passingCount = 0;
     let borderlineCount = 0;
+    let evaluatedCount = 0;
     for (const member of results) {
-      const isPassing = member.result.verdict === "pass" || member.result.verdict === "borderline";
-      if (isPassing) {
-        passingCount++;
-        if (member.result.verdict === "borderline") {
-          borderlineCount++;
-        }
-      }
-      allHits.push(...member.result.hits.map((h) => `[${member.id}] ${h}`));
-      allMisses.push(...member.result.misses.map((m) => `[${member.id}] ${m}`));
-      if (member.result.reasoning) {
-        reasoningParts.push(`${member.id}: ${member.result.reasoning}`);
-      }
       scores.push({
         name: member.id,
         type: member.type,
@@ -9677,8 +9790,39 @@ var CompositeEvaluator = class {
         details: member.result.details,
         tokenUsage: member.result.tokenUsage
       });
+      if (member.result.verdict === "skip") {
+        continue;
+      }
+      evaluatedCount++;
+      const isPassing = member.result.verdict === "pass" || member.result.verdict === "borderline";
+      if (isPassing) {
+        passingCount++;
+        if (member.result.verdict === "borderline") {
+          borderlineCount++;
+        }
+      }
+      allHits.push(...member.result.hits.map((h) => `[${member.id}] ${h}`));
+      allMisses.push(...member.result.misses.map((m) => `[${member.id}] ${m}`));
+      if (member.result.reasoning) {
+        reasoningParts.push(`${member.id}: ${member.result.reasoning}`);
+      }
+    }
+    if (evaluatedCount === 0 && results.length > 0) {
+      return {
+        score: 0,
+        verdict: "skip",
+        hits: [],
+        misses: [],
+        expectedAspectCount: 1,
+        reasoning: "All evaluators skipped (infrastructure failure)",
+        evaluatorRawRequest: {
+          aggregator: "threshold",
+          threshold
+        },
+        scores
+      };
     }
-    const totalCount = results.length;
+    const totalCount = evaluatedCount;
     const score = totalCount > 0 ? passingCount / totalCount : 0;
     const pass = score >= threshold;
     if (pass && borderlineCount > 0) {
@@ -10186,115 +10330,115 @@ var FieldAccuracyEvaluator = class {
    * Evaluate a single field against the expected value.
    */
   evaluateField(fieldConfig, candidateData, expectedData) {
-    const { path: path41, match, required = true, weight = 1 } = fieldConfig;
-    const candidateValue = resolvePath(candidateData, path41);
-    const expectedValue = resolvePath(expectedData, path41);
+    const { path: path42, match, required = true, weight = 1 } = fieldConfig;
+    const candidateValue = resolvePath(candidateData, path42);
+    const expectedValue = resolvePath(expectedData, path42);
     if (expectedValue === void 0) {
       return {
-        path: path41,
+        path: path42,
         score: 1,
         // No expected value means no comparison needed
         weight,
         hit: true,
-        message: `${path41}: no expected value`
+        message: `${path42}: no expected value`
       };
     }
     if (candidateValue === void 0) {
       if (required) {
         return {
-          path: path41,
+          path: path42,
           score: 0,
           weight,
           hit: false,
-          message: `${path41} (required, missing)`
+          message: `${path42} (required, missing)`
         };
       }
       return {
-        path: path41,
+        path: path42,
         score: 1,
         // Don't penalize missing optional fields
         weight: 0,
         // Zero weight means it won't affect the score
         hit: true,
-        message: `${path41}: optional field missing`
+        message: `${path42}: optional field missing`
       };
     }
     switch (match) {
       case "exact":
-        return this.compareExact(path41, candidateValue, expectedValue, weight);
+        return this.compareExact(path42, candidateValue, expectedValue, weight);
       case "numeric_tolerance":
         return this.compareNumericTolerance(
-          path41,
+          path42,
           candidateValue,
           expectedValue,
           fieldConfig,
           weight
         );
       case "date":
-        return this.compareDate(path41, candidateValue, expectedValue, fieldConfig, weight);
+        return this.compareDate(path42, candidateValue, expectedValue, fieldConfig, weight);
       default:
         return {
-          path: path41,
+          path: path42,
           score: 0,
           weight,
           hit: false,
-          message: `${path41}: unknown match type "${match}"`
+          message: `${path42}: unknown match type "${match}"`
         };
     }
   }
   /**
    * Exact equality comparison.
    */
-  compareExact(path41, candidateValue, expectedValue, weight) {
+  compareExact(path42, candidateValue, expectedValue, weight) {
     if (deepEqual(candidateValue, expectedValue)) {
       return {
-        path: path41,
+        path: path42,
         score: 1,
         weight,
         hit: true,
-        message: path41
+        message: path42
       };
     }
     if (typeof candidateValue !== typeof expectedValue) {
       return {
-        path: path41,
+        path: path42,
         score: 0,
         weight,
         hit: false,
-        message: `${path41} (type mismatch: got ${typeof candidateValue}, expected ${typeof expectedValue})`
+        message: `${path42} (type mismatch: got ${typeof candidateValue}, expected ${typeof expectedValue})`
       };
     }
     return {
-      path: path41,
+      path: path42,
       score: 0,
       weight,
       hit: false,
-      message: `${path41} (value mismatch)`
+      message: `${path42} (value mismatch)`
     };
   }
   /**
    * Numeric comparison with absolute or relative tolerance.
    */
-  compareNumericTolerance(path41, candidateValue, expectedValue, fieldConfig, weight) {
+  compareNumericTolerance(path42, candidateValue, expectedValue, fieldConfig, weight) {
     const { tolerance = 0, relative = false } = fieldConfig;
     const candidateNum = toNumber2(candidateValue);
     const expectedNum = toNumber2(expectedValue);
     if (candidateNum === null || expectedNum === null) {
       return {
-        path: path41,
+        path: path42,
         score: 0,
         weight,
         hit: false,
-        message: `${path41} (non-numeric value)`
+        message: `${path42} (non-numeric value)`
       };
     }
     if (!Number.isFinite(candidateNum) || !Number.isFinite(expectedNum)) {
       return {
-        path: path41,
+        path: path42,
         score: 0,
         weight,
         hit: false,
-        message: `${path41} (invalid numeric value)`
+        message: `${path42} (invalid numeric value)`
       };
     }
     const diff = Math.abs(candidateNum - expectedNum);
@@ -10307,61 +10451,61 @@ var FieldAccuracyEvaluator = class {
     }
     if (withinTolerance) {
       return {
-        path: path41,
+        path: path42,
         score: 1,
         weight,
         hit: true,
-        message: `${path41} (within tolerance: diff=${diff.toFixed(2)})`
+        message: `${path42} (within tolerance: diff=${diff.toFixed(2)})`
       };
     }
     return {
-      path: path41,
+      path: path42,
       score: 0,
       weight,
       hit: false,
-      message: `${path41} (outside tolerance: diff=${diff.toFixed(2)}, tolerance=${tolerance})`
+      message: `${path42} (outside tolerance: diff=${diff.toFixed(2)}, tolerance=${tolerance})`
     };
   }
   /**
    * Date comparison with format normalization.
    */
-  compareDate(path41, candidateValue, expectedValue, fieldConfig, weight) {
+  compareDate(path42, candidateValue, expectedValue, fieldConfig, weight) {
     const formats = fieldConfig.formats ?? DEFAULT_DATE_FORMATS;
     const candidateDate = parseDate(String(candidateValue), formats);
     const expectedDate = parseDate(String(expectedValue), formats);
     if (candidateDate === null) {
       return {
-        path: path41,
+        path: path42,
         score: 0,
         weight,
         hit: false,
-        message: `${path41} (unparseable candidate date)`
+        message: `${path42} (unparseable candidate date)`
       };
     }
     if (expectedDate === null) {
       return {
-        path: path41,
+        path: path42,
         score: 0,
         weight,
         hit: false,
-        message: `${path41} (unparseable expected date)`
+        message: `${path42} (unparseable expected date)`
       };
     }
     if (candidateDate.getFullYear() === expectedDate.getFullYear() && candidateDate.getMonth() === expectedDate.getMonth() && candidateDate.getDate() === expectedDate.getDate()) {
       return {
-        path: path41,
+        path: path42,
         score: 1,
         weight,
         hit: true,
-        message: path41
+        message: path42
       };
     }
     return {
-      path: path41,
+      path: path42,
       score: 0,
       weight,
       hit: false,
-      message: `${path41} (date mismatch: got ${formatDateISO(candidateDate)}, expected ${formatDateISO(expectedDate)})`
+      message: `${path42} (date mismatch: got ${formatDateISO(candidateDate)}, expected ${formatDateISO(expectedDate)})`
     };
   }
   /**
@@ -10402,11 +10546,11 @@ var FieldAccuracyEvaluator = class {
     };
   }
 };
-function resolvePath(obj, path41) {
-  if (!path41 || !obj) {
+function resolvePath(obj, path42) {
+  if (!path42 || !obj) {
     return void 0;
   }
-  const parts = path41.split(/\.|\[|\]/).filter((p) => p.length > 0);
+  const parts = path42.split(/\.|\[|\]/).filter((p) => p.length > 0);
   let current = obj;
   for (const part of parts) {
     if (current === null || current === void 0) {
@@ -11224,8 +11368,8 @@ var TokenUsageEvaluator = class {
 };
 // src/evaluation/evaluators/tool-trajectory.ts
-function getNestedValue(obj, path41) {
-  const parts = path41.split(".");
+function getNestedValue(obj, path42) {
+  const parts = path42.split(".");
   let current = obj;
   for (const part of parts) {
     if (current === null || current === void 0 || typeof current !== "object") {
@@ -11786,9 +11930,9 @@ function runEqualsAssertion(output, value) {
 }
 // src/evaluation/orchestrator.ts
-import { createHash as createHash2, randomUUID as randomUUID7 } from "node:crypto";
-import { mkdir as mkdir12, stat as stat7 } from "node:fs/promises";
-import path38 from "node:path";
+import { createHash as createHash3, randomUUID as randomUUID7 } from "node:crypto";
+import { mkdir as mkdir13, stat as stat7 } from "node:fs/promises";
+import path39 from "node:path";
 import micromatch4 from "micromatch";
 // ../../node_modules/.bun/yocto-queue@1.2.2/node_modules/yocto-queue/index.js
@@ -12658,17 +12802,283 @@ async function cleanupEvalWorkspaces(evalRunId, workspaceRoot) {
   }
 }
-// src/evaluation/workspace/repo-manager.ts
+// src/evaluation/workspace/pool-manager.ts
 import { execFile } from "node:child_process";
 import { createHash } from "node:crypto";
 import { existsSync as existsSync2 } from "node:fs";
-import { mkdir as mkdir11, rm as rm5, unlink, writeFile as writeFile7 } from "node:fs/promises";
+import { cp as cp2, mkdir as mkdir11, readFile as readFile11, readdir as readdir4, rm as rm5, unlink, writeFile as writeFile7 } from "node:fs/promises";
 import path36 from "node:path";
 import { promisify as promisify5 } from "node:util";
 var execFileAsync = promisify5(execFile);
+function gitEnv() {
+  const env = { ...process.env };
+  for (const key of Object.keys(env)) {
+    if (key.startsWith("GIT_") && key !== "GIT_SSH_COMMAND") {
+      delete env[key];
+    }
+  }
+  return {
+    ...env,
+    GIT_TERMINAL_PROMPT: "0",
+    GIT_ASKPASS: "",
+    GIT_SSH_COMMAND: "ssh -o BatchMode=yes"
+  };
+}
+async function git(args, opts) {
+  const { stdout } = await execFileAsync("git", args, {
+    cwd: opts?.cwd,
+    timeout: opts?.timeout ?? 3e5,
+    env: gitEnv(),
+    maxBuffer: 50 * 1024 * 1024
+  });
+  return stdout.trim();
+}
+function normalizeRepoForFingerprint(repo) {
+  const source = repo.source.type === "git" ? { type: "git", url: repo.source.url.toLowerCase().replace(/\.git$/, "") } : { type: "local", path: repo.source.path };
+  const result = {
+    path: repo.path,
+    source,
+    ref: repo.checkout?.ref ?? "HEAD"
+  };
+  if (repo.clone?.depth !== void 0) {
+    result.depth = repo.clone.depth;
+  }
+  if (repo.clone?.filter !== void 0) {
+    result.filter = repo.clone.filter;
+  }
+  if (repo.clone?.sparse?.length) {
+    result.sparse = [...repo.clone.sparse].sort();
+  }
+  return result;
+}
+function computeWorkspaceFingerprint(templatePath, repos) {
+  const canonical = {
+    templatePath: templatePath ?? null,
+    repos: [...repos].sort((a, b) => a.path.localeCompare(b.path)).map(normalizeRepoForFingerprint)
+  };
+  return createHash("sha256").update(JSON.stringify(canonical)).digest("hex");
+}
+async function copyDirectoryRecursive2(src, dest, skipDirs) {
+  await mkdir11(dest, { recursive: true });
+  const entries = await readdir4(src, { withFileTypes: true });
+  for (const entry of entries) {
+    const srcPath = path36.join(src, entry.name);
+    const destPath = path36.join(dest, entry.name);
+    if (entry.name === ".git") {
+      continue;
+    }
+    if (entry.isDirectory()) {
+      if (skipDirs?.has(entry.name)) {
+        continue;
+      }
+      await copyDirectoryRecursive2(srcPath, destPath, skipDirs);
+    } else {
+      await cp2(srcPath, destPath, { preserveTimestamps: true, force: true });
+    }
+  }
+}
+var WorkspacePoolManager = class {
+  poolRoot;
+  constructor(poolRoot) {
+    this.poolRoot = poolRoot ?? getWorkspacePoolRoot();
+  }
+  /**
+   * Acquire a workspace slot from the pool.
+   *
+   * 1. Compute fingerprint from template + repos
+   * 2. Check drift (compare stored metadata.json fingerprint vs computed)
+   * 3. If drift: warn, remove all slots, rematerialize
+   * 4. Acquire a slot (try-lock slot-0, slot-1, ..., up to maxSlots)
+   * 5. If slot exists: reset repos, re-copy template files (skip repo directories)
+   * 6. If new slot: copy template, materialize all repos, write metadata.json
+   * 7. Return the slot (with path, index, isExisting)
+   */
+  async acquireWorkspace(options) {
+    const { templatePath, repos, maxSlots, repoManager } = options;
+    const fingerprint = computeWorkspaceFingerprint(templatePath, repos);
+    const poolDir = path36.join(this.poolRoot, fingerprint);
+    await mkdir11(poolDir, { recursive: true });
+    const drifted = await this.checkDrift(poolDir, fingerprint);
+    if (drifted) {
+      console.warn(
+        `[workspace-pool] Drift detected for fingerprint ${fingerprint.slice(0, 12)}... Removing stale slots.`
+      );
+      await this.removeAllSlots(poolDir);
+    }
+    for (let i = 0; i < maxSlots; i++) {
+      const slotPath = path36.join(poolDir, `slot-${i}`);
+      const lockPath = `${slotPath}.lock`;
+      const locked = await this.tryLock(lockPath);
+      if (!locked) {
+        continue;
+      }
+      const slotExists = existsSync2(slotPath);
+      if (slotExists) {
+        await this.resetSlot(slotPath, templatePath, repos);
+        return {
+          index: i,
+          path: slotPath,
+          isExisting: true,
+          lockPath,
+          fingerprint,
+          poolDir
+        };
+      }
+      await mkdir11(slotPath, { recursive: true });
+      if (templatePath) {
+        await copyDirectoryRecursive2(templatePath, slotPath);
+      }
+      if (repos.length > 0) {
+        await repoManager.materializeAll(repos, slotPath);
+      }
+      await this.writeMetadata(poolDir, fingerprint, templatePath ?? null, repos);
+      return {
+        index: i,
+        path: slotPath,
+        isExisting: false,
+        lockPath,
+        fingerprint,
+        poolDir
+      };
+    }
+    throw new Error(
+      `All ${maxSlots} pool slots are locked for fingerprint ${fingerprint.slice(0, 12)}...`
+    );
+  }
+  /** Remove lock file to release a slot. */
+  async releaseSlot(slot) {
+    try {
+      await unlink(slot.lockPath);
+    } catch {
+    }
+  }
+  /**
+   * Try to acquire a PID-based lock file.
+   * On EEXIST, read PID and check if process is alive. If dead, stale lock — remove and retry.
+   * Returns true if lock acquired, false if slot is actively locked.
+   * Uses a bounded loop (max 3 attempts) to avoid unbounded recursion.
+   */
+  async tryLock(lockPath) {
+    for (let attempt = 0; attempt < 3; attempt++) {
+      try {
+        await writeFile7(lockPath, String(process.pid), { flag: "wx" });
+        return true;
+      } catch (err) {
+        if (err.code !== "EEXIST") {
+          throw err;
+        }
+        try {
+          const pidStr = await readFile11(lockPath, "utf-8");
+          const pid = Number.parseInt(pidStr.trim(), 10);
+          if (!Number.isNaN(pid)) {
+            try {
+              process.kill(pid, 0);
+              return false;
+            } catch {
+              await unlink(lockPath).catch(() => {
+              });
+              continue;
+            }
+          }
+        } catch {
+        }
+        return false;
+      }
+    }
+    return false;
+  }
+  /**
+   * Check if the stored fingerprint in metadata.json differs from the computed one.
+   * Returns true if drifted, false otherwise.
+   * Returns false (no drift) if metadata.json doesn't exist (first use).
+   */
+  async checkDrift(poolDir, fingerprint) {
+    const metadataPath = path36.join(poolDir, "metadata.json");
+    try {
+      const raw = await readFile11(metadataPath, "utf-8");
+      const metadata = JSON.parse(raw);
+      return metadata.fingerprint !== fingerprint;
+    } catch {
+      return false;
+    }
+  }
+  /** Write metadata.json with fingerprint, inputs, and timestamp. */
+  async writeMetadata(poolDir, fingerprint, templatePath, repos) {
+    const metadata = {
+      fingerprint,
+      templatePath,
+      repos,
+      createdAt: (/* @__PURE__ */ new Date()).toISOString()
+    };
+    await writeFile7(path36.join(poolDir, "metadata.json"), JSON.stringify(metadata, null, 2));
+  }
+  /** Remove all slot directories and their lock files from a pool directory. */
+  async removeAllSlots(poolDir) {
+    const entries = await readdir4(poolDir);
+    for (const entry of entries) {
+      if (entry.startsWith("slot-") && !entry.endsWith(".lock")) {
+        const lockPath = path36.join(poolDir, `${entry}.lock`);
+        if (existsSync2(lockPath)) {
+          try {
+            const pidStr = await readFile11(lockPath, "utf-8");
+            const pid = Number.parseInt(pidStr.trim(), 10);
+            if (!Number.isNaN(pid)) {
+              try {
+                process.kill(pid, 0);
+                console.warn(`[workspace-pool] Skipping slot ${entry}: locked by PID ${pid}`);
+                continue;
+              } catch {
+              }
+            }
+          } catch {
+          }
+        }
+        await rm5(path36.join(poolDir, entry), { recursive: true, force: true });
+        await rm5(lockPath, { force: true }).catch(() => {
+        });
+      }
+    }
+    await rm5(path36.join(poolDir, "metadata.json"), { force: true }).catch(() => {
+    });
+  }
+  /**
+   * Reset an existing slot for reuse:
+   * 1. Reset repos (git reset --hard {ref} && git clean -fd per repo)
+   * 2. Re-copy template files (skip repo directories)
+   */
+  async resetSlot(slotPath, templatePath, repos) {
+    for (const repo of repos) {
+      const repoDir = path36.join(slotPath, repo.path);
+      if (!existsSync2(repoDir)) {
+        continue;
+      }
+      const ref = repo.checkout?.ref ?? "HEAD";
+      await git(["reset", "--hard", ref], { cwd: repoDir });
+      await git(["clean", "-fd"], { cwd: repoDir });
+    }
+    if (templatePath) {
+      const repoDirNames = new Set(
+        repos.map((r) => {
+          const normalized = r.path.replace(/^\.\//, "");
+          return normalized.split("/")[0];
+        })
+      );
+      await copyDirectoryRecursive2(templatePath, slotPath, repoDirNames);
+    }
+  }
+};
+// src/evaluation/workspace/repo-manager.ts
+import { execFile as execFile2 } from "node:child_process";
+import { createHash as createHash2 } from "node:crypto";
+import { existsSync as existsSync3 } from "node:fs";
+import { mkdir as mkdir12, rm as rm6, unlink as unlink2, writeFile as writeFile8 } from "node:fs/promises";
+import path37 from "node:path";
+import { promisify as promisify6 } from "node:util";
+var execFileAsync2 = promisify6(execFile2);
 var DEFAULT_TIMEOUT_MS2 = 3e5;
 var LOCK_TIMEOUT_MS = 6e4;
-function gitEnv() {
+function gitEnv2() {
   const env = { ...process.env };
   for (const key of Object.keys(env)) {
     if (key.startsWith("GIT_") && key !== "GIT_SSH_COMMAND") {
@@ -12684,16 +13094,16 @@ function gitEnv() {
 }
 function cacheKey(source) {
   const raw = source.type === "git" ? source.url.toLowerCase().replace(/\.git$/, "") : source.path;
-  return createHash("sha256").update(raw).digest("hex");
+  return createHash2("sha256").update(raw).digest("hex");
 }
 function getSourceUrl(source) {
   return source.type === "git" ? source.url : source.path;
 }
-async function git(args, opts) {
-  const { stdout } = await execFileAsync("git", args, {
+async function git2(args, opts) {
+  const { stdout } = await execFileAsync2("git", args, {
     cwd: opts?.cwd,
     timeout: opts?.timeout ?? DEFAULT_TIMEOUT_MS2,
-    env: gitEnv(),
+    env: gitEnv2(),
     maxBuffer: 50 * 1024 * 1024
     // 50MB
   });
@@ -12703,7 +13113,7 @@ async function acquireLock(lockPath) {
   const start = Date.now();
   while (Date.now() - start < LOCK_TIMEOUT_MS) {
     try {
-      await writeFile7(lockPath, String(process.pid), { flag: "wx" });
+      await writeFile8(lockPath, String(process.pid), { flag: "wx" });
       return;
     } catch (err) {
       if (err.code === "EEXIST") {
@@ -12717,7 +13127,7 @@ async function acquireLock(lockPath) {
 }
 async function releaseLock(lockPath) {
   try {
-    await unlink(lockPath);
+    await unlink2(lockPath);
   } catch {
   }
 }
@@ -12731,16 +13141,12 @@ var RepoManager = class {
   async runGit(args, opts) {
     const startedAt = Date.now();
     if (this.verbose) {
-      console.log(
-        `[repo] git start cwd=${opts?.cwd ?? process.cwd()} args=${args.join(" ")}`
-      );
+      console.log(`[repo] git start cwd=${opts?.cwd ?? process.cwd()} args=${args.join(" ")}`);
     }
     try {
-      const output = await git(args, opts);
+      const output = await git2(args, opts);
       if (this.verbose) {
-        console.log(
-          `[repo] git ok durationMs=${Date.now() - startedAt} args=${args.join(" ")}`
-        );
+        console.log(`[repo] git ok durationMs=${Date.now() - startedAt} args=${args.join(" ")}`);
       }
       return output;
     } catch (error) {
@@ -12760,9 +13166,9 @@ var RepoManager = class {
    */
   async ensureCache(source, depth, resolve) {
     const key = cacheKey(source);
-    const cachePath = path36.join(this.cacheDir, key);
+    const cachePath = path37.join(this.cacheDir, key);
     const lockPath = `${cachePath}.lock`;
-    const cacheExists = existsSync2(path36.join(cachePath, "HEAD"));
+    const cacheExists = existsSync3(path37.join(cachePath, "HEAD"));
     if (this.verbose) {
       console.log(
         `[repo] ensureCache source=${getSourceUrl(source)} resolve=${resolve ?? "remote"} cache=${cachePath}`
@@ -12780,13 +13186,11 @@ var RepoManager = class {
         `No cache found for \`${url}\`. Run \`agentv cache add --url ${url} --from <local-path>\` to seed it.`
       );
     }
-    await mkdir11(this.cacheDir, { recursive: true });
+    await mkdir12(this.cacheDir, { recursive: true });
     const lockStartedAt = Date.now();
     await acquireLock(lockPath);
     if (this.verbose) {
-      console.log(
-        `[repo] lock acquired path=${lockPath} waitedMs=${Date.now() - lockStartedAt}`
-      );
+      console.log(`[repo] lock acquired path=${lockPath} waitedMs=${Date.now() - lockStartedAt}`);
     }
     try {
       if (cacheExists) {
@@ -12824,7 +13228,7 @@ var RepoManager = class {
    * Handles checkout, ref resolution, ancestor walking, shallow clone, sparse checkout.
    */
   async materialize(repo, workspacePath) {
-    const targetDir = path36.join(workspacePath, repo.path);
+    const targetDir = path37.join(workspacePath, repo.path);
     const startedAt = Date.now();
     if (this.verbose) {
       console.log(
@@ -12919,14 +13323,14 @@ var RepoManager = class {
   async reset(repos, workspacePath, strategy) {
     if (strategy === "recreate") {
       for (const repo of repos) {
-        const targetDir = path36.join(workspacePath, repo.path);
-        await rm5(targetDir, { recursive: true, force: true });
+        const targetDir = path37.join(workspacePath, repo.path);
+        await rm6(targetDir, { recursive: true, force: true });
       }
       await this.materializeAll(repos, workspacePath);
       return;
     }
     for (const repo of repos) {
-      const targetDir = path36.join(workspacePath, repo.path);
+      const targetDir = path37.join(workspacePath, repo.path);
       await this.runGit(["reset", "--hard", "HEAD"], { cwd: targetDir });
       await this.runGit(["clean", "-fd"], { cwd: targetDir });
     }
@@ -12938,21 +13342,21 @@ var RepoManager = class {
   async seedCache(localPath, remoteUrl, opts) {
     const source = { type: "git", url: remoteUrl };
     const key = cacheKey(source);
-    const cachePath = path36.join(this.cacheDir, key);
+    const cachePath = path37.join(this.cacheDir, key);
     const lockPath = `${cachePath}.lock`;
-    await mkdir11(this.cacheDir, { recursive: true });
+    await mkdir12(this.cacheDir, { recursive: true });
     await acquireLock(lockPath);
     try {
-      if (existsSync2(path36.join(cachePath, "HEAD"))) {
+      if (existsSync3(path37.join(cachePath, "HEAD"))) {
         if (!opts?.force) {
           throw new Error(
             `Cache already exists for ${remoteUrl} at ${cachePath}. Use force to overwrite.`
           );
         }
-        await rm5(cachePath, { recursive: true, force: true });
+        await rm6(cachePath, { recursive: true, force: true });
       }
-      await git(["clone", "--mirror", "--bare", localPath, cachePath]);
-      await git(["remote", "set-url", "origin", remoteUrl], { cwd: cachePath });
+      await git2(["clone", "--mirror", "--bare", localPath, cachePath]);
+      await git2(["remote", "set-url", "origin", remoteUrl], { cwd: cachePath });
     } finally {
       await releaseLock(lockPath);
     }
@@ -12960,41 +13364,41 @@ var RepoManager = class {
   }
   /** Remove the entire cache directory. */
   async cleanCache() {
-    await rm5(this.cacheDir, { recursive: true, force: true });
+    await rm6(this.cacheDir, { recursive: true, force: true });
   }
 };
 // src/evaluation/workspace/resolve.ts
-import { readdir as readdir4, stat as stat6 } from "node:fs/promises";
-import path37 from "node:path";
+import { readdir as readdir5, stat as stat6 } from "node:fs/promises";
+import path38 from "node:path";
 async function resolveWorkspaceTemplate(templatePath) {
   if (!templatePath) {
     return void 0;
   }
-  const resolved = path37.resolve(templatePath);
+  const resolved = path38.resolve(templatePath);
   const stats = await stat6(resolved);
   if (stats.isFile()) {
     return {
-      dir: path37.dirname(resolved),
+      dir: path38.dirname(resolved),
       workspaceFile: resolved
     };
   }
   if (!stats.isDirectory()) {
     throw new Error(`workspace template is neither a file nor a directory: ${resolved}`);
   }
-  const entries = await readdir4(resolved);
+  const entries = await readdir5(resolved);
   const workspaceFiles = entries.filter((e) => e.endsWith(".code-workspace"));
   if (workspaceFiles.length === 1) {
     return {
       dir: resolved,
-      workspaceFile: path37.join(resolved, workspaceFiles[0])
+      workspaceFile: path38.join(resolved, workspaceFiles[0])
     };
   }
   if (workspaceFiles.length > 1) {
     const conventionFile = workspaceFiles.find((f) => f === "template.code-workspace");
     return {
       dir: resolved,
-      workspaceFile: conventionFile ? path37.join(resolved, conventionFile) : void 0
+      workspaceFile: conventionFile ? path38.join(resolved, conventionFile) : void 0
     };
   }
   return { dir: resolved };
@@ -13076,7 +13480,10 @@ async function runEvaluation(options) {
     trials,
     streamCallbacks,
     totalBudgetUsd,
-    failOnError
+    failOnError,
+    poolWorkspaces,
+    poolMaxSlots: configPoolMaxSlots,
+    workspace: userWorkspacePath
   } = options;
   let useCache = options.useCache;
   if (trials && trials.count > 1 && useCache) {
@@ -13150,7 +13557,7 @@ async function runEvaluation(options) {
   ];
   const evaluatorRegistry = buildEvaluatorRegistry(evaluators, resolveJudgeProvider);
   const typeRegistry = createBuiltinRegistry();
-  const discoveryBaseDir = evalFilePath ? path38.dirname(path38.resolve(evalFilePath)) : process.cwd();
+  const discoveryBaseDir = evalFilePath ? path39.dirname(path39.resolve(evalFilePath)) : process.cwd();
   const evalDir = discoveryBaseDir;
   await discoverAssertions(typeRegistry, discoveryBaseDir);
   const providerRegistry = createBuiltinProviderRegistry();
@@ -13212,13 +13619,19 @@ async function runEvaluation(options) {
     }
   };
   const isPerTestIsolation = suiteWorkspace?.isolation === "per_test";
-  const hasSharedWorkspace = !!(workspaceTemplate || suiteWorkspace?.before_all || suiteWorkspace?.repos?.length && !isPerTestIsolation);
+  if (userWorkspacePath && isPerTestIsolation) {
+    throw new Error(
+      "--workspace is incompatible with isolation: per_test. Use isolation: shared (default)."
+    );
+  }
+  const hasSharedWorkspace = !!(userWorkspacePath || workspaceTemplate || suiteWorkspace?.before_all || suiteWorkspace?.repos?.length && !isPerTestIsolation);
+  const usePool = poolWorkspaces === true && !!suiteWorkspace?.repos?.length && !isPerTestIsolation && !userWorkspacePath;
   const requestedWorkers = options.maxConcurrency ?? target.workers ?? 1;
-  const workers = hasSharedWorkspace ? 1 : requestedWorkers;
+  const workers = hasSharedWorkspace && !usePool ? 1 : requestedWorkers;
   setupLog(
-    `sharedWorkspace=${hasSharedWorkspace} perTestIsolation=${isPerTestIsolation} requestedWorkers=${requestedWorkers} effectiveWorkers=${workers}`
+    `sharedWorkspace=${hasSharedWorkspace} perTestIsolation=${isPerTestIsolation} usePool=${usePool} requestedWorkers=${requestedWorkers} effectiveWorkers=${workers}`
   );
-  if (hasSharedWorkspace && requestedWorkers > 1) {
+  if (hasSharedWorkspace && !usePool && requestedWorkers > 1) {
     console.warn(
       `Warning: Shared workspace requires sequential execution. Overriding workers from ${requestedWorkers} to 1.`
     );
@@ -13227,7 +13640,37 @@ async function runEvaluation(options) {
   let sharedWorkspacePath;
   let sharedBaselineCommit;
   let beforeAllOutput;
-  if (workspaceTemplate) {
+  let poolManager;
+  let poolSlot;
+  const poolSlots = [];
+  const availablePoolSlots = [];
+  const poolSlotBaselines = /* @__PURE__ */ new Map();
+  const poolMaxSlots = Math.min(configPoolMaxSlots ?? 10, 50);
+  if (userWorkspacePath) {
+    sharedWorkspacePath = userWorkspacePath;
+    setupLog(`using user-provided workspace: ${userWorkspacePath}`);
+  } else if (usePool && suiteWorkspace?.repos) {
+    const slotsNeeded = workers;
+    setupLog(`acquiring ${slotsNeeded} workspace pool slot(s) (pool capacity: ${poolMaxSlots})`);
+    poolManager = new WorkspacePoolManager(getWorkspacePoolRoot());
+    const poolRepoManager = new RepoManager(void 0, verbose);
+    for (let i = 0; i < slotsNeeded; i++) {
+      const slot = await poolManager.acquireWorkspace({
+        templatePath: workspaceTemplate,
+        repos: suiteWorkspace.repos,
+        maxSlots: poolMaxSlots,
+        repoManager: poolRepoManager
+      });
+      poolSlots.push(slot);
+      setupLog(`pool slot ${i} acquired at: ${slot.path} (existing=${slot.isExisting})`);
+    }
+    if (slotsNeeded === 1) {
+      poolSlot = poolSlots[0];
+      sharedWorkspacePath = poolSlot.path;
+    } else {
+      availablePoolSlots.push(...poolSlots);
+    }
+  } else if (workspaceTemplate) {
     setupLog(`creating shared workspace from template: ${workspaceTemplate}`);
     try {
       sharedWorkspacePath = await createTempWorkspace(workspaceTemplate, evalRunId, "shared");
@@ -13236,288 +13679,344 @@ async function runEvaluation(options) {
       const message = error instanceof Error ? error.message : String(error);
       throw new Error(`Failed to create shared workspace: ${message}`);
     }
+  } else if (suiteWorkspace?.before_all || suiteWorkspace?.repos?.length && !isPerTestIsolation) {
+    sharedWorkspacePath = getWorkspacePath(evalRunId, "shared");
+    await mkdir13(sharedWorkspacePath, { recursive: true });
+    setupLog(`created empty shared workspace at: ${sharedWorkspacePath}`);
+  }
+  try {
     if (suiteWorkspaceFile && sharedWorkspacePath) {
-      const copiedWorkspaceFile = path38.join(sharedWorkspacePath, path38.basename(suiteWorkspaceFile));
+      const copiedWorkspaceFile = path39.join(sharedWorkspacePath, path39.basename(suiteWorkspaceFile));
       try {
         await stat7(copiedWorkspaceFile);
         suiteWorkspaceFile = copiedWorkspaceFile;
       } catch {
       }
     }
-  } else if (suiteWorkspace?.before_all || suiteWorkspace?.repos?.length && !isPerTestIsolation) {
-    sharedWorkspacePath = getWorkspacePath(evalRunId, "shared");
-    await mkdir12(sharedWorkspacePath, { recursive: true });
-    setupLog(`created empty shared workspace at: ${sharedWorkspacePath}`);
-  }
-  const repoManager = suiteWorkspace?.repos?.length ? new RepoManager(void 0, verbose) : void 0;
-  if (repoManager && sharedWorkspacePath && suiteWorkspace?.repos && !isPerTestIsolation) {
-    setupLog(`materializing ${suiteWorkspace.repos.length} shared repo(s) into ${sharedWorkspacePath}`);
-    try {
-      await repoManager.materializeAll(suiteWorkspace.repos, sharedWorkspacePath);
-      setupLog("shared repo materialization complete");
-    } catch (error) {
-      const message = error instanceof Error ? error.message : String(error);
-      if (sharedWorkspacePath) {
-        await cleanupWorkspace(sharedWorkspacePath).catch(() => {
-        });
-      }
-      throw new Error(`Failed to materialize repos: ${message}`);
-    }
-  }
-  if (sharedWorkspacePath && suiteWorkspace?.before_all) {
-    const beforeAllCommand = (suiteWorkspace.before_all.command ?? suiteWorkspace.before_all.script ?? []).join(" ");
-    setupLog(
-      `running shared before_all in cwd=${suiteWorkspace.before_all.cwd ?? evalDir} command=${beforeAllCommand}`
-    );
-    const scriptContext = {
-      workspacePath: sharedWorkspacePath,
-      testId: "__before_all__",
-      evalRunId,
-      evalDir
-    };
-    try {
-      beforeAllOutput = await executeWorkspaceScript(suiteWorkspace.before_all, scriptContext);
-      setupLog("shared before_all completed");
-    } catch (error) {
-      const message = error instanceof Error ? error.message : String(error);
-      if (sharedWorkspacePath) {
-        await cleanupWorkspace(sharedWorkspacePath).catch(() => {
-        });
-      }
-      throw new Error(`before_all script failed: ${message}`);
-    }
-  }
-  if (sharedWorkspacePath) {
-    try {
-      sharedBaselineCommit = await initializeBaseline(sharedWorkspacePath);
-      setupLog(`shared baseline initialized: ${sharedBaselineCommit}`);
-    } catch {
-      setupLog("shared baseline initialization skipped (non-fatal)");
-    }
-  }
-  let nextWorkerId = 1;
-  const workerIdByEvalId = /* @__PURE__ */ new Map();
-  let beforeAllOutputAttached = false;
-  let cumulativeBudgetCost = 0;
-  let budgetExhausted = false;
-  let failOnErrorTriggered = false;
-  const promises = filteredEvalCases.map(
-    (evalCase) => limit(async () => {
-      const workerId = nextWorkerId++;
-      workerIdByEvalId.set(evalCase.id, workerId);
-      if (totalBudgetUsd !== void 0 && budgetExhausted) {
-        const budgetResult = {
-          timestamp: (now ?? (() => /* @__PURE__ */ new Date()))().toISOString(),
-          testId: evalCase.id,
-          dataset: evalCase.dataset,
-          score: 0,
-          hits: [],
-          misses: [],
-          answer: "",
-          target: target.name,
-          error: `Suite budget exceeded ($${cumulativeBudgetCost.toFixed(4)} / $${totalBudgetUsd.toFixed(4)})`,
-          budgetExceeded: true,
-          executionStatus: "execution_error",
-          failureStage: "setup",
-          failureReasonCode: "budget_exceeded",
-          executionError: {
-            message: `Suite budget exceeded ($${cumulativeBudgetCost.toFixed(4)} / $${totalBudgetUsd.toFixed(4)})`,
-            stage: "setup"
-          }
-        };
-        if (onProgress) {
-          await onProgress({
-            workerId,
-            testId: evalCase.id,
-            status: "failed",
-            completedAt: Date.now(),
-            error: budgetResult.error
+    const repoManager = suiteWorkspace?.repos?.length && !usePool && !userWorkspacePath ? new RepoManager(void 0, verbose) : void 0;
+    if (repoManager && sharedWorkspacePath && suiteWorkspace?.repos && !isPerTestIsolation) {
+      setupLog(
+        `materializing ${suiteWorkspace.repos.length} shared repo(s) into ${sharedWorkspacePath}`
+      );
+      try {
+        await repoManager.materializeAll(suiteWorkspace.repos, sharedWorkspacePath);
+        setupLog("shared repo materialization complete");
+      } catch (error) {
+        const message = error instanceof Error ? error.message : String(error);
+        if (sharedWorkspacePath && !userWorkspacePath) {
+          await cleanupWorkspace(sharedWorkspacePath).catch(() => {
           });
         }
-        if (onResult) {
-          await onResult(budgetResult);
-        }
-        return budgetResult;
+        throw new Error(`Failed to materialize repos: ${message}`);
       }
-      if (failOnError === true && failOnErrorTriggered) {
-        const errorMsg = "Halted: execution error encountered with fail_on_error enabled";
-        const haltResult = {
-          timestamp: (now ?? (() => /* @__PURE__ */ new Date()))().toISOString(),
-          testId: evalCase.id,
-          dataset: evalCase.dataset,
-          score: 0,
-          hits: [],
-          misses: [],
-          answer: "",
-          target: target.name,
-          error: errorMsg,
-          executionStatus: "execution_error",
-          failureStage: "setup",
-          failureReasonCode: "error_threshold_exceeded",
-          executionError: { message: errorMsg, stage: "setup" }
-        };
-        if (onProgress) {
-          await onProgress({
-            workerId,
-            testId: evalCase.id,
-            status: "failed",
-            completedAt: Date.now(),
-            error: haltResult.error
+    }
+    if (sharedWorkspacePath && suiteWorkspace?.before_all) {
+      const beforeAllCommand = (suiteWorkspace.before_all.command ?? suiteWorkspace.before_all.script ?? []).join(" ");
+      setupLog(
+        `running shared before_all in cwd=${suiteWorkspace.before_all.cwd ?? evalDir} command=${beforeAllCommand}`
+      );
+      const scriptContext = {
+        workspacePath: sharedWorkspacePath,
+        testId: "__before_all__",
+        evalRunId,
+        evalDir
+      };
+      try {
+        beforeAllOutput = await executeWorkspaceScript(suiteWorkspace.before_all, scriptContext);
+        setupLog("shared before_all completed");
+      } catch (error) {
+        const message = error instanceof Error ? error.message : String(error);
+        if (sharedWorkspacePath && !userWorkspacePath) {
+          await cleanupWorkspace(sharedWorkspacePath).catch(() => {
           });
         }
-        if (onResult) {
-          await onResult(haltResult);
-        }
-        return haltResult;
-      }
-      if (onProgress) {
-        await onProgress({
-          workerId,
-          testId: evalCase.id,
-          status: "running",
-          startedAt: Date.now()
-        });
+        throw new Error(`before_all script failed: ${message}`);
       }
-      try {
-        const judgeProvider = await resolveJudgeProvider(target);
-        const runCaseOptions = {
-          evalCase,
-          provider: primaryProvider,
-          target,
-          evaluators: evaluatorRegistry,
-          maxRetries,
-          agentTimeoutMs,
-          cache,
-          useCache,
-          now,
-          judgeProvider,
-          targetResolver,
-          availableTargets,
+    }
+    if (availablePoolSlots.length > 0 && suiteWorkspace?.before_all) {
+      for (const slot of availablePoolSlots) {
+        setupLog(`running before_all on pool slot ${slot.index}`);
+        const scriptContext = {
+          workspacePath: slot.path,
+          testId: "__before_all__",
           evalRunId,
-          keepWorkspaces,
-          cleanupWorkspaces,
-          sharedWorkspacePath,
-          sharedBaselineCommit,
-          suiteWorkspaceFile,
-          streamCallbacks,
-          typeRegistry,
-          repoManager,
           evalDir
         };
-        let result = trials && trials.count > 1 ? await runEvalCaseWithTrials(runCaseOptions, trials) : await runEvalCase(runCaseOptions);
-        if (totalBudgetUsd !== void 0) {
-          let caseCost;
-          if (result.trials && result.trials.length > 0) {
-            const trialCostSum = result.trials.reduce((sum, t) => sum + (t.costUsd ?? 0), 0);
-            if (trialCostSum > 0) {
-              caseCost = trialCostSum;
+        try {
+          const output = await executeWorkspaceScript(suiteWorkspace.before_all, scriptContext);
+          if (!beforeAllOutput) beforeAllOutput = output;
+          setupLog(`before_all completed on pool slot ${slot.index}`);
+        } catch (error) {
+          const message = error instanceof Error ? error.message : String(error);
+          throw new Error(`before_all script failed on pool slot ${slot.index}: ${message}`);
+        }
+      }
+    }
+    if (sharedWorkspacePath) {
+      try {
+        sharedBaselineCommit = await initializeBaseline(sharedWorkspacePath);
+        setupLog(`shared baseline initialized: ${sharedBaselineCommit}`);
+      } catch {
+        setupLog("shared baseline initialization skipped (non-fatal)");
+      }
+    }
+    if (availablePoolSlots.length > 0) {
+      for (const slot of availablePoolSlots) {
+        try {
+          const baseline = await initializeBaseline(slot.path);
+          poolSlotBaselines.set(slot.path, baseline);
+          setupLog(`pool slot ${slot.index} baseline initialized: ${baseline}`);
+        } catch {
+          setupLog(`pool slot ${slot.index} baseline initialization skipped (non-fatal)`);
+        }
+      }
+    }
+    let nextWorkerId = 1;
+    const workerIdByEvalId = /* @__PURE__ */ new Map();
+    let beforeAllOutputAttached = false;
+    let cumulativeBudgetCost = 0;
+    let budgetExhausted = false;
+    let failOnErrorTriggered = false;
+    const promises = filteredEvalCases.map(
+      (evalCase) => limit(async () => {
+        const workerId = nextWorkerId++;
+        workerIdByEvalId.set(evalCase.id, workerId);
+        if (totalBudgetUsd !== void 0 && budgetExhausted) {
+          const budgetResult = {
+            timestamp: (now ?? (() => /* @__PURE__ */ new Date()))().toISOString(),
+            testId: evalCase.id,
+            dataset: evalCase.dataset,
+            score: 0,
+            hits: [],
+            misses: [],
+            answer: "",
+            target: target.name,
+            error: `Suite budget exceeded ($${cumulativeBudgetCost.toFixed(4)} / $${totalBudgetUsd.toFixed(4)})`,
+            budgetExceeded: true,
+            executionStatus: "execution_error",
+            failureStage: "setup",
+            failureReasonCode: "budget_exceeded",
+            executionError: {
+              message: `Suite budget exceeded ($${cumulativeBudgetCost.toFixed(4)} / $${totalBudgetUsd.toFixed(4)})`,
+              stage: "setup"
             }
-          } else {
-            caseCost = result.costUsd;
+          };
+          if (onProgress) {
+            await onProgress({
+              workerId,
+              testId: evalCase.id,
+              status: "failed",
+              completedAt: Date.now(),
+              error: budgetResult.error
+            });
           }
-          if (caseCost !== void 0) {
-            cumulativeBudgetCost += caseCost;
-            if (cumulativeBudgetCost >= totalBudgetUsd) {
-              budgetExhausted = true;
-            }
+          if (onResult) {
+            await onResult(budgetResult);
           }
+          return budgetResult;
         }
-        if (failOnError === true && result.executionStatus === "execution_error") {
-          failOnErrorTriggered = true;
-        }
-        if (beforeAllOutput && !beforeAllOutputAttached) {
-          result = { ...result, beforeAllOutput };
-          beforeAllOutputAttached = true;
+        if (failOnError === true && failOnErrorTriggered) {
+          const errorMsg = "Halted: execution error encountered with fail_on_error enabled";
+          const haltResult = {
+            timestamp: (now ?? (() => /* @__PURE__ */ new Date()))().toISOString(),
+            testId: evalCase.id,
+            dataset: evalCase.dataset,
+            score: 0,
+            hits: [],
+            misses: [],
+            answer: "",
+            target: target.name,
+            error: errorMsg,
+            executionStatus: "execution_error",
+            failureStage: "setup",
+            failureReasonCode: "error_threshold_exceeded",
+            executionError: { message: errorMsg, stage: "setup" }
+          };
+          if (onProgress) {
+            await onProgress({
+              workerId,
+              testId: evalCase.id,
+              status: "failed",
+              completedAt: Date.now(),
+              error: haltResult.error
+            });
+          }
+          if (onResult) {
+            await onResult(haltResult);
+          }
+          return haltResult;
         }
         if (onProgress) {
           await onProgress({
             workerId,
             testId: evalCase.id,
-            status: result.error ? "failed" : "completed",
-            startedAt: 0,
-            // Not used for completed status
-            completedAt: Date.now(),
-            error: result.error
+            status: "running",
+            startedAt: Date.now()
           });
         }
-        if (onResult) {
-          await onResult(result);
+        const testPoolSlot = availablePoolSlots.length > 0 ? availablePoolSlots.pop() : void 0;
+        const testWorkspacePath = testPoolSlot?.path ?? sharedWorkspacePath;
+        const testBaselineCommit = testPoolSlot ? poolSlotBaselines.get(testPoolSlot.path) : sharedBaselineCommit;
+        try {
+          const judgeProvider = await resolveJudgeProvider(target);
+          const runCaseOptions = {
+            evalCase,
+            provider: primaryProvider,
+            target,
+            evaluators: evaluatorRegistry,
+            maxRetries,
+            agentTimeoutMs,
+            cache,
+            useCache,
+            now,
+            judgeProvider,
+            targetResolver,
+            availableTargets,
+            evalRunId,
+            keepWorkspaces,
+            cleanupWorkspaces,
+            sharedWorkspacePath: testWorkspacePath,
+            sharedBaselineCommit: testBaselineCommit,
+            suiteWorkspaceFile,
+            streamCallbacks,
+            typeRegistry,
+            repoManager,
+            evalDir
+          };
+          let result = trials && trials.count > 1 ? await runEvalCaseWithTrials(runCaseOptions, trials) : await runEvalCase(runCaseOptions);
+          if (totalBudgetUsd !== void 0) {
+            let caseCost;
+            if (result.trials && result.trials.length > 0) {
+              const trialCostSum = result.trials.reduce((sum, t) => sum + (t.costUsd ?? 0), 0);
+              if (trialCostSum > 0) {
+                caseCost = trialCostSum;
+              }
+            } else {
+              caseCost = result.costUsd;
+            }
+            if (caseCost !== void 0) {
+              cumulativeBudgetCost += caseCost;
+              if (cumulativeBudgetCost >= totalBudgetUsd) {
+                budgetExhausted = true;
+              }
+            }
+          }
+          if (failOnError === true && result.executionStatus === "execution_error") {
+            failOnErrorTriggered = true;
+          }
+          if (beforeAllOutput && !beforeAllOutputAttached) {
+            result = { ...result, beforeAllOutput };
+            beforeAllOutputAttached = true;
+          }
+          if (onProgress) {
+            await onProgress({
+              workerId,
+              testId: evalCase.id,
+              status: result.error ? "failed" : "completed",
+              startedAt: 0,
+              // Not used for completed status
+              completedAt: Date.now(),
+              error: result.error
+            });
+          }
+          if (onResult) {
+            await onResult(result);
+          }
+          return result;
+        } catch (error) {
+          if (onProgress) {
+            await onProgress({
+              workerId,
+              testId: evalCase.id,
+              status: "failed",
+              completedAt: Date.now(),
+              error: error instanceof Error ? error.message : String(error)
+            });
+          }
+          throw error;
+        } finally {
+          if (testPoolSlot) {
+            availablePoolSlots.push(testPoolSlot);
+          }
         }
-        return result;
-      } catch (error) {
-        if (onProgress) {
-          await onProgress({
-            workerId,
-            testId: evalCase.id,
-            status: "failed",
-            completedAt: Date.now(),
-            error: error instanceof Error ? error.message : String(error)
-          });
+      })
+    );
+    const settled = await Promise.allSettled(promises);
+    const results = [];
+    for (let i = 0; i < settled.length; i++) {
+      const outcome = settled[i];
+      if (outcome.status === "fulfilled") {
+        results.push(outcome.value);
+      } else {
+        const evalCase = filteredEvalCases[i];
+        const formattingMode = usesFileReferencePrompt(primaryProvider) ? "agent" : "lm";
+        const promptInputs = await buildPromptInputs(evalCase, formattingMode);
+        const errorResult = buildErrorResult(
+          evalCase,
+          target.name,
+          (now ?? (() => /* @__PURE__ */ new Date()))(),
+          outcome.reason,
+          promptInputs,
+          primaryProvider,
+          "agent",
+          "provider_error"
+        );
+        results.push(errorResult);
+        if (onResult) {
+          await onResult(errorResult);
         }
-        throw error;
       }
-    })
-  );
-  const settled = await Promise.allSettled(promises);
-  const results = [];
-  for (let i = 0; i < settled.length; i++) {
-    const outcome = settled[i];
-    if (outcome.status === "fulfilled") {
-      results.push(outcome.value);
-    } else {
-      const evalCase = filteredEvalCases[i];
-      const formattingMode = usesFileReferencePrompt(primaryProvider) ? "agent" : "lm";
-      const promptInputs = await buildPromptInputs(evalCase, formattingMode);
-      const errorResult = buildErrorResult(
-        evalCase,
-        target.name,
-        (now ?? (() => /* @__PURE__ */ new Date()))(),
-        outcome.reason,
-        promptInputs,
-        primaryProvider,
-        "agent",
-        "provider_error"
-      );
-      results.push(errorResult);
-      if (onResult) {
-        await onResult(errorResult);
+    }
+    const afterAllWorkspaces = poolSlots.length > 1 ? poolSlots.map((s) => s.path) : sharedWorkspacePath ? [sharedWorkspacePath] : [];
+    if (afterAllWorkspaces.length > 0 && suiteWorkspace?.after_all) {
+      for (const wsPath of afterAllWorkspaces) {
+        const scriptContext = {
+          workspacePath: wsPath,
+          testId: "__after_all__",
+          evalRunId,
+          evalDir
+        };
+        try {
+          const afterAllOutput = await executeWorkspaceScript(
+            suiteWorkspace.after_all,
+            scriptContext,
+            "warn"
+          );
+          if (afterAllOutput && results.length > 0 && wsPath === afterAllWorkspaces[0]) {
+            results[results.length - 1] = { ...results[results.length - 1], afterAllOutput };
+          }
+        } catch {
+        }
       }
     }
-  }
-  if (sharedWorkspacePath && suiteWorkspace?.after_all) {
-    const scriptContext = {
-      workspacePath: sharedWorkspacePath,
-      testId: "__after_all__",
-      evalRunId,
-      evalDir
-    };
-    try {
-      const afterAllOutput = await executeWorkspaceScript(
-        suiteWorkspace.after_all,
-        scriptContext,
-        "warn"
-      );
-      if (afterAllOutput && results.length > 0) {
-        results[results.length - 1] = { ...results[results.length - 1], afterAllOutput };
+    if (sharedWorkspacePath && !poolSlot && poolSlots.length === 0 && !userWorkspacePath) {
+      const hasFailure = results.some((r) => !!r.error || r.score < 0.5);
+      if (cleanupWorkspaces) {
+        await cleanupWorkspace(sharedWorkspacePath).catch(() => {
+        });
+      } else if (!hasFailure && !keepWorkspaces) {
+        await cleanupWorkspace(sharedWorkspacePath).catch(() => {
+        });
       }
-    } catch {
     }
-  }
-  if (sharedWorkspacePath) {
-    const hasFailure = results.some((r) => !!r.error || r.score < 0.5);
     if (cleanupWorkspaces) {
-      await cleanupWorkspace(sharedWorkspacePath).catch(() => {
-      });
-    } else if (!hasFailure && !keepWorkspaces) {
-      await cleanupWorkspace(sharedWorkspacePath).catch(() => {
+      await cleanupEvalWorkspaces(evalRunId).catch(() => {
       });
     }
+    return results;
+  } finally {
+    if (poolManager) {
+      if (poolSlot) {
+        await poolManager.releaseSlot(poolSlot);
+      }
+      for (const slot of poolSlots) {
+        if (slot !== poolSlot) {
+          await poolManager.releaseSlot(slot).catch(() => {
+          });
+        }
+      }
+    }
   }
-  if (cleanupWorkspaces) {
-    await cleanupEvalWorkspaces(evalRunId).catch(() => {
-    });
-  }
-  return results;
 }
 async function runBatchEvaluation(options) {
   const {
@@ -13734,7 +14233,7 @@ async function runEvalCase(options) {
         );
       }
       if (caseWorkspaceFile && workspacePath) {
-        const copiedFile = path38.join(workspacePath, path38.basename(caseWorkspaceFile));
+        const copiedFile = path39.join(workspacePath, path39.basename(caseWorkspaceFile));
         try {
           await stat7(copiedFile);
           caseWorkspaceFile = copiedFile;
@@ -13744,7 +14243,7 @@ async function runEvalCase(options) {
     }
     if (!workspacePath && (evalCase.workspace?.before_all || evalCase.workspace?.repos?.length) && evalRunId) {
       workspacePath = getWorkspacePath(evalRunId, evalCase.id);
-      await mkdir12(workspacePath, { recursive: true });
+      await mkdir13(workspacePath, { recursive: true });
     }
     if (evalCase.workspace?.repos?.length && workspacePath) {
       const perCaseRepoManager = new RepoManager(void 0, setupDebug);
@@ -14344,7 +14843,7 @@ async function runEvaluatorList(options) {
     fileChanges,
     workspacePath
   };
-  const evalFileDir = evalCase.guideline_paths[0] ? path38.dirname(evalCase.guideline_paths[0]) : process.cwd();
+  const evalFileDir = evalCase.guideline_paths[0] ? path39.dirname(evalCase.guideline_paths[0]) : process.cwd();
   const dispatchContext = {
     judgeProvider,
     targetResolver,
@@ -14578,7 +15077,7 @@ function extractProviderError(response) {
   return trimmed.length > 0 ? trimmed : void 0;
 }
 function createCacheKey(provider, target, evalCase, promptInputs) {
-  const hash = createHash2("sha256");
+  const hash = createHash3("sha256");
   hash.update(provider.id);
   hash.update(target.name);
   hash.update(evalCase.id);
@@ -14646,8 +15145,8 @@ function computeWeightedMean(entries) {
 }
 // src/evaluation/evaluate.ts
-import { existsSync as existsSync3 } from "node:fs";
-import path39 from "node:path";
+import { existsSync as existsSync4 } from "node:fs";
+import path40 from "node:path";
 async function evaluate(config) {
   const startTime = Date.now();
   if (config.tests && config.specFile) {
@@ -14669,13 +15168,13 @@ async function evaluate(config) {
   let evalCases;
   let testFilePath;
   if (config.specFile) {
-    testFilePath = path39.resolve(config.specFile);
+    testFilePath = path40.resolve(config.specFile);
     evalCases = await loadTests(testFilePath, repoRoot, {
       verbose: config.verbose,
       filter: config.filter
     });
   } else {
-    testFilePath = path39.join(process.cwd(), "__programmatic__.yaml");
+    testFilePath = path40.join(process.cwd(), "__programmatic__.yaml");
     evalCases = (config.tests ?? []).map((test) => {
       const input = typeof test.input === "string" ? [{ role: "user", content: test.input }] : test.input;
       const question = typeof test.input === "string" ? test.input : test.input.find((m) => m.role === "user")?.content ?? "";
@@ -14761,11 +15260,11 @@ function computeSummary(results, durationMs) {
 var TARGET_FILE_CANDIDATES = [".agentv/targets.yaml", ".agentv/targets.yml"];
 async function discoverDefaultTarget(repoRoot) {
   const cwd = process.cwd();
-  const chain = buildDirectoryChain(path39.join(cwd, "_placeholder"), repoRoot);
+  const chain = buildDirectoryChain(path40.join(cwd, "_placeholder"), repoRoot);
   for (const dir of chain) {
     for (const candidate of TARGET_FILE_CANDIDATES) {
-      const targetsPath = path39.join(dir, candidate);
-      if (!existsSync3(targetsPath)) continue;
+      const targetsPath = path40.join(dir, candidate);
+      if (!existsSync4(targetsPath)) continue;
       try {
         const definitions = await readTargetDefinitions(targetsPath);
         const defaultTarget = definitions.find((d) => d.name === "default");
@@ -14779,11 +15278,11 @@ async function discoverDefaultTarget(repoRoot) {
 async function loadEnvHierarchy(repoRoot) {
   const { readFileSync: readFileSync2 } = await import("node:fs");
   const cwd = process.cwd();
-  const chain = buildDirectoryChain(path39.join(cwd, "_placeholder"), repoRoot);
+  const chain = buildDirectoryChain(path40.join(cwd, "_placeholder"), repoRoot);
   const envFiles = [];
   for (const dir of chain) {
-    const envPath = path39.join(dir, ".env");
-    if (existsSync3(envPath)) envFiles.push(envPath);
+    const envPath = path40.join(dir, ".env");
+    if (existsSync4(envPath)) envFiles.push(envPath);
   }
   for (let i = envFiles.length - 1; i >= 0; i--) {
     try {
@@ -14861,12 +15360,12 @@ var CONFIG_FILE_NAMES = [
   ".agentv/config.js"
 ];
 async function loadTsConfig(projectRoot) {
-  const { existsSync: existsSync4 } = await import("node:fs");
+  const { existsSync: existsSync5 } = await import("node:fs");
   const { pathToFileURL } = await import("node:url");
   const { join: join2 } = await import("node:path");
   for (const fileName of CONFIG_FILE_NAMES) {
     const filePath = join2(projectRoot, fileName);
-    if (!existsSync4(filePath)) {
+    if (!existsSync5(filePath)) {
       continue;
     }
     try {
@@ -14963,8 +15462,8 @@ function buildPrompt(criteria, question, referenceAnswer) {
 }
 // src/evaluation/cache/response-cache.ts
-import { mkdir as mkdir13, readFile as readFile11, writeFile as writeFile8 } from "node:fs/promises";
-import path40 from "node:path";
+import { mkdir as mkdir14, readFile as readFile12, writeFile as writeFile9 } from "node:fs/promises";
+import path41 from "node:path";
 var DEFAULT_CACHE_PATH = ".agentv/cache";
 var ResponseCache = class {
   cachePath;
@@ -14974,7 +15473,7 @@ var ResponseCache = class {
   async get(key) {
     const filePath = this.keyToPath(key);
     try {
-      const data = await readFile11(filePath, "utf8");
+      const data = await readFile12(filePath, "utf8");
       return JSON.parse(data);
     } catch {
       return void 0;
@@ -14982,13 +15481,13 @@ var ResponseCache = class {
   }
   async set(key, value) {
     const filePath = this.keyToPath(key);
-    const dir = path40.dirname(filePath);
-    await mkdir13(dir, { recursive: true });
-    await writeFile8(filePath, JSON.stringify(value, null, 2), "utf8");
+    const dir = path41.dirname(filePath);
+    await mkdir14(dir, { recursive: true });
+    await writeFile9(filePath, JSON.stringify(value, null, 2), "utf8");
   }
   keyToPath(key) {
     const prefix = key.slice(0, 2);
-    return path40.join(this.cachePath, prefix, `${key}.json`);
+    return path41.join(this.cachePath, prefix, `${key}.json`);
   }
 };
 function shouldEnableCache(params) {
@@ -15470,6 +15969,7 @@ export {
   TokenUsageEvaluator,
   ToolTrajectoryEvaluator,
   WorkspaceCreationError,
+  WorkspacePoolManager,
   assembleLlmJudgePrompt,
   avgToolDurationMs,
   buildDirectoryChain,
@@ -15484,6 +15984,7 @@ export {
   cleanupEvalWorkspaces,
   cleanupWorkspace,
   computeTraceSummary,
+  computeWorkspaceFingerprint,
   consumeClaudeLogEntries,
   consumeCodexLogEntries,
   consumeCopilotCliLogEntries,
@@ -15521,6 +16022,7 @@ export {
   getSubagentsRoot,
   getTraceStateRoot,
   getWorkspacePath,
+  getWorkspacePoolRoot,
   getWorkspacesRoot,
   initializeBaseline,
   isEvaluatorKind,