npm - @agentv/core - Versions diffs - 2.14.3 → 2.15.0 - Mend

@agentv/core 2.14.3 → 2.15.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (7) hide show

package/dist/index.cjs CHANGED Viewed

@@ -1244,12 +1244,12 @@ function serializeAttributeValue(value) {
   if (Array.isArray(value)) return { arrayValue: { values: value.map(serializeAttributeValue) } };
   return { stringValue: String(value) };
 }
-var import_promises31, import_node_path44, OtlpJsonFileExporter;
+var import_promises32, import_node_path45, OtlpJsonFileExporter;
 var init_otlp_json_file_exporter = __esm({
   "src/observability/otlp-json-file-exporter.ts"() {
     "use strict";
-    import_promises31 = require("fs/promises");
-    import_node_path44 = require("path");
+    import_promises32 = require("fs/promises");
+    import_node_path45 = require("path");
     OtlpJsonFileExporter = class {
       // biome-ignore lint/suspicious/noExplicitAny: serialized span data
       spans = [];
@@ -1288,7 +1288,7 @@ var init_otlp_json_file_exporter = __esm({
       }
       async flush() {
         if (this.spans.length === 0) return;
-        await (0, import_promises31.mkdir)((0, import_node_path44.dirname)(this.filePath), { recursive: true });
+        await (0, import_promises32.mkdir)((0, import_node_path45.dirname)(this.filePath), { recursive: true });
         const otlpJson = {
           resourceSpans: [
             {
@@ -1302,8 +1302,8 @@ var init_otlp_json_file_exporter = __esm({
             }
           ]
         };
-        const { writeFile: writeFile9 } = await import("fs/promises");
-        await writeFile9(this.filePath, JSON.stringify(otlpJson, null, 2));
+        const { writeFile: writeFile10 } = await import("fs/promises");
+        await writeFile10(this.filePath, JSON.stringify(otlpJson, null, 2));
       }
     };
   }
@@ -1319,13 +1319,13 @@ function hrTimeDiffMs(start, end) {
   const diffNano = end[1] - start[1];
   return Math.round(diffSec * 1e3 + diffNano / 1e6);
 }
-var import_node_fs13, import_promises32, import_node_path45, SimpleTraceFileExporter;
+var import_node_fs14, import_promises33, import_node_path46, SimpleTraceFileExporter;
 var init_simple_trace_file_exporter = __esm({
   "src/observability/simple-trace-file-exporter.ts"() {
     "use strict";
-    import_node_fs13 = require("fs");
-    import_promises32 = require("fs/promises");
-    import_node_path45 = require("path");
+    import_node_fs14 = require("fs");
+    import_promises33 = require("fs/promises");
+    import_node_path46 = require("path");
     SimpleTraceFileExporter = class {
       stream = null;
       filePath;
@@ -1338,8 +1338,8 @@ var init_simple_trace_file_exporter = __esm({
       async ensureStream() {
         if (!this.streamReady) {
           this.streamReady = (async () => {
-            await (0, import_promises32.mkdir)((0, import_node_path45.dirname)(this.filePath), { recursive: true });
-            this.stream = (0, import_node_fs13.createWriteStream)(this.filePath, { flags: "w" });
+            await (0, import_promises33.mkdir)((0, import_node_path46.dirname)(this.filePath), { recursive: true });
+            this.stream = (0, import_node_fs14.createWriteStream)(this.filePath, { flags: "w" });
             return this.stream;
           })();
         }
@@ -1457,6 +1457,7 @@ __export(index_exports, {
   TokenUsageEvaluator: () => TokenUsageEvaluator,
   ToolTrajectoryEvaluator: () => ToolTrajectoryEvaluator,
   WorkspaceCreationError: () => WorkspaceCreationError,
+  WorkspacePoolManager: () => WorkspacePoolManager,
   assembleLlmJudgePrompt: () => assembleLlmJudgePrompt,
   avgToolDurationMs: () => avgToolDurationMs,
   buildDirectoryChain: () => buildDirectoryChain2,
@@ -1471,6 +1472,7 @@ __export(index_exports, {
   cleanupEvalWorkspaces: () => cleanupEvalWorkspaces,
   cleanupWorkspace: () => cleanupWorkspace,
   computeTraceSummary: () => computeTraceSummary,
+  computeWorkspaceFingerprint: () => computeWorkspaceFingerprint,
   consumeClaudeLogEntries: () => consumeClaudeLogEntries,
   consumeCodexLogEntries: () => consumeCodexLogEntries,
   consumeCopilotCliLogEntries: () => consumeCopilotCliLogEntries,
@@ -1508,6 +1510,7 @@ __export(index_exports, {
   getSubagentsRoot: () => getSubagentsRoot,
   getTraceStateRoot: () => getTraceStateRoot,
   getWorkspacePath: () => getWorkspacePath,
+  getWorkspacePoolRoot: () => getWorkspacePoolRoot,
   getWorkspacesRoot: () => getWorkspacesRoot,
   initializeBaseline: () => initializeBaseline,
   isEvaluatorKind: () => isEvaluatorKind,
@@ -2236,6 +2239,17 @@ function parseExecutionDefaults(raw, configPath) {
   } else if (otelFile !== void 0) {
     logWarning(`Invalid execution.otel_file in ${configPath}, expected non-empty string`);
   }
+  if (typeof obj.pool_workspaces === "boolean") {
+    result.pool_workspaces = obj.pool_workspaces;
+  } else if (obj.pool_workspaces !== void 0) {
+    logWarning(`Invalid execution.pool_workspaces in ${configPath}, expected boolean`);
+  }
+  const poolSlots = obj.pool_slots;
+  if (typeof poolSlots === "number" && Number.isInteger(poolSlots) && poolSlots >= 1 && poolSlots <= 50) {
+    result.pool_slots = poolSlots;
+  } else if (poolSlots !== void 0) {
+    logWarning(`Invalid execution.pool_slots in ${configPath}, expected integer 1-50`);
+  }
   return Object.keys(result).length > 0 ? result : void 0;
 }
 function logWarning(message) {
@@ -3677,6 +3691,7 @@ async function processMessages(options) {
     repoRootPath,
     guidelinePatterns,
     guidelinePaths,
+    treatFileSegmentsAsGuidelines,
     textParts,
     messageType,
     verbose
@@ -3724,16 +3739,20 @@ async function processMessages(options) {
         }
         try {
           const fileContent = (await (0, import_promises5.readFile)(resolvedPath, "utf8")).replace(/\r\n/g, "\n");
-          if (messageType === "input" && guidelinePatterns && guidelinePaths) {
-            const relativeToRepo = import_node_path5.default.relative(repoRootPath, resolvedPath);
-            if (isGuidelineFile(relativeToRepo, guidelinePatterns)) {
-              guidelinePaths.push(import_node_path5.default.resolve(resolvedPath));
-              if (verbose) {
-                console.log(`  [Guideline] Found: ${displayPath}`);
-                console.log(`    Resolved to: ${resolvedPath}`);
-              }
-              continue;
+          const classifyAsGuideline = shouldTreatAsGuideline({
+            messageType,
+            resolvedPath,
+            repoRootPath,
+            guidelinePatterns,
+            treatFileSegmentsAsGuidelines
+          });
+          if (classifyAsGuideline && guidelinePaths) {
+            guidelinePaths.push(import_node_path5.default.resolve(resolvedPath));
+            if (verbose) {
+              console.log(`  [Guideline] Found: ${displayPath}`);
+              console.log(`    Resolved to: ${resolvedPath}`);
             }
+            continue;
           }
           segments.push({
             type: "file",
@@ -3762,6 +3781,26 @@ async function processMessages(options) {
   }
   return segments;
 }
+function shouldTreatAsGuideline(options) {
+  const {
+    messageType,
+    resolvedPath,
+    repoRootPath,
+    guidelinePatterns,
+    treatFileSegmentsAsGuidelines
+  } = options;
+  if (messageType !== "input") {
+    return false;
+  }
+  if (treatFileSegmentsAsGuidelines) {
+    return true;
+  }
+  if (!guidelinePatterns || guidelinePatterns.length === 0) {
+    return false;
+  }
+  const relativeToRepo = import_node_path5.default.relative(repoRootPath, resolvedPath);
+  return isGuidelineFile(relativeToRepo, guidelinePatterns);
+}
 function asString3(value) {
   return typeof value === "string" ? value : void 0;
 }
@@ -4100,6 +4139,8 @@ async function loadTestsFromJsonl(evalFilePath, repoRoot, options) {
         for (const guidelinePath of testCase.guideline_paths) {
           console.log(`    - ${guidelinePath}`);
         }
+      } else if (!guidelinePatterns || guidelinePatterns.length === 0) {
+        console.log("  No guidelines found (guideline_patterns not configured)");
       } else {
         console.log("  No guidelines found");
       }
@@ -4469,7 +4510,7 @@ async function loadTestsFromYaml(evalFilePath, repoRoot, options) {
   } else {
     throw new Error(`Invalid test file format: ${evalFilePath} - missing 'tests' field`);
   }
-  const suiteWorkspace = parseWorkspaceConfig(suite.workspace, evalFileDir);
+  const suiteWorkspace = await resolveWorkspaceConfig(suite.workspace, evalFileDir);
   const suiteInputMessages = expandInputShorthand(suite.input);
   const rawGlobalExecution = isJsonObject(suite.execution) ? suite.execution : void 0;
   const _globalTarget = asString6(rawGlobalExecution?.target) ?? asString6(suite.target);
@@ -4505,12 +4546,24 @@ async function loadTestsFromYaml(evalFilePath, repoRoot, options) {
     }
     const caseExecution = isJsonObject(evalcase.execution) ? evalcase.execution : void 0;
     const skipDefaults = caseExecution?.skip_defaults === true;
-    const inputMessages = suiteInputMessages && !skipDefaults ? [...suiteInputMessages, ...testInputMessages] : testInputMessages;
+    const effectiveSuiteInputMessages = suiteInputMessages && !skipDefaults ? suiteInputMessages : void 0;
+    const inputMessages = effectiveSuiteInputMessages ? [...effectiveSuiteInputMessages, ...testInputMessages] : testInputMessages;
     const hasExpectedMessages = expectedMessages.length > 0;
     const guidelinePaths = [];
     const inputTextParts = [];
-    const inputSegments = await processMessages({
-      messages: inputMessages,
+    const suiteInputSegments = effectiveSuiteInputMessages ? await processMessages({
+      messages: effectiveSuiteInputMessages,
+      searchRoots,
+      repoRootPath,
+      guidelinePatterns,
+      guidelinePaths,
+      treatFileSegmentsAsGuidelines: true,
+      textParts: inputTextParts,
+      messageType: "input",
+      verbose
+    }) : [];
+    const testInputSegments = await processMessages({
+      messages: testInputMessages,
       searchRoots,
       repoRootPath,
       guidelinePatterns,
@@ -4519,6 +4572,7 @@ async function loadTestsFromYaml(evalFilePath, repoRoot, options) {
       messageType: "input",
       verbose
     });
+    const inputSegments = [...suiteInputSegments, ...testInputSegments];
     const outputSegments = hasExpectedMessages ? await processExpectedMessages({
       messages: expectedMessages,
       searchRoots,
@@ -4566,7 +4620,7 @@ async function loadTestsFromYaml(evalFilePath, repoRoot, options) {
       ...guidelinePaths.map((guidelinePath) => import_node_path8.default.resolve(guidelinePath)),
       ...userFilePaths
     ];
-    const caseWorkspace = parseWorkspaceConfig(evalcase.workspace, evalFileDir);
+    const caseWorkspace = await resolveWorkspaceConfig(evalcase.workspace, evalFileDir);
     const mergedWorkspace = mergeWorkspaceConfigs(suiteWorkspace, caseWorkspace);
     const metadata = isJsonObject(evalcase.metadata) ? evalcase.metadata : void 0;
     const caseTargets = extractTargetsFromTestCase(evalcase);
@@ -4597,6 +4651,8 @@ async function loadTestsFromYaml(evalFilePath, repoRoot, options) {
         for (const guidelinePath of testCase.guideline_paths) {
           console.log(`    - ${guidelinePath}`);
         }
+      } else if (!guidelinePatterns || guidelinePatterns.length === 0) {
+        console.log("  No guidelines found (guideline_patterns not configured)");
       } else {
         console.log("  No guidelines found");
       }
@@ -4696,6 +4752,26 @@ function parseResetConfig(raw) {
     ...afterEach !== void 0 && { after_each: afterEach }
   };
 }
+async function resolveWorkspaceConfig(raw, evalFileDir) {
+  if (typeof raw === "string") {
+    const workspaceFilePath = import_node_path8.default.resolve(evalFileDir, raw);
+    let content;
+    try {
+      content = await (0, import_promises8.readFile)(workspaceFilePath, "utf8");
+    } catch {
+      throw new Error(`Workspace file not found: ${raw} (resolved to ${workspaceFilePath})`);
+    }
+    const parsed = (0, import_yaml4.parse)(content);
+    if (!isJsonObject(parsed)) {
+      throw new Error(
+        `Invalid workspace file format: ${workspaceFilePath} (expected a YAML object)`
+      );
+    }
+    const workspaceFileDir = import_node_path8.default.dirname(workspaceFilePath);
+    return parseWorkspaceConfig(parsed, workspaceFileDir);
+  }
+  return parseWorkspaceConfig(raw, evalFileDir);
+}
 function parseWorkspaceConfig(raw, evalFileDir) {
   if (!isJsonObject(raw)) return void 0;
   const obj = raw;
@@ -9493,8 +9569,8 @@ function resolveCliConfig(target, env, evalFilePath) {
   const parseResult = CliTargetInputSchema.safeParse(target, { errorMap: cliErrorMap });
   if (!parseResult.success) {
     const firstError = parseResult.error.errors[0];
-    const path43 = firstError?.path.join(".") || "";
-    const prefix = path43 ? `${target.name} ${path43}: ` : `${target.name}: `;
+    const path44 = firstError?.path.join(".") || "";
+    const prefix = path44 ? `${target.name} ${path44}: ` : `${target.name}: `;
     throw new Error(`${prefix}${firstError?.message}`);
   }
   const normalized = normalizeCliTargetInput(parseResult.data, env, evalFilePath);
@@ -10010,6 +10086,9 @@ function getSubagentsRoot() {
 function getTraceStateRoot() {
   return import_node_path23.default.join(getAgentvHome(), "trace-state");
 }
+function getWorkspacePoolRoot() {
+  return import_node_path23.default.join(getAgentvHome(), "workspace-pool");
+}
 // src/evaluation/providers/vscode/dispatch/constants.ts
 var DEFAULT_LOCK_NAME = "subagent.lock";
@@ -10832,8 +10911,6 @@ var AGENTV_REQUEST_TEMPLATE = `[[ ## task ## ]]
 **IMPORTANT**: Follow these exact steps:
 1. Create and write your complete response to: {{responseFileTmp}}
-    - All intended file outputs/changes MUST be written in your response file.
-    - For each intended file, include the repo name, relative path and unified git diff following the convention \`diff --git ...\`.
 2. When completely finished, run these PowerShell commands to signal completion:
 \`\`\`
 Move-Item -LiteralPath '{{responseFileTmp}}' -Destination '{{responseFileFinal}}'
@@ -10850,8 +10927,6 @@ var AGENTV_BATCH_REQUEST_TEMPLATE = `[[ ## task ## ]]
 **IMPORTANT**: Follow these exact steps:
 1. Create and write your complete response to: {{responseFileTmp}}
-    - All intended file outputs/changes MUST be written in your response file.
-    - For each intended file, include the repo name, relative path and unified git diff following the convention \`diff --git ...\`.
 2. When completely finished and the response is stable, rename it to: {{responseFileFinal}}
 3. Do not unlock the workspace from this request; batch orchestration will handle unlocking after all responses are ready.
 `;
@@ -11464,16 +11539,16 @@ async function execFileWithStdinNode(argv, stdinPayload, options) {
   });
 }
 async function execShellWithStdin(command, stdinPayload, options = {}) {
-  const { mkdir: mkdir16, readFile: readFile13, rm: rm6, writeFile: writeFile9 } = await import("fs/promises");
+  const { mkdir: mkdir17, readFile: readFile14, rm: rm7, writeFile: writeFile10 } = await import("fs/promises");
   const { tmpdir: tmpdir3 } = await import("os");
-  const path43 = await import("path");
+  const path44 = await import("path");
   const { randomUUID: randomUUID8 } = await import("crypto");
-  const dir = path43.join(tmpdir3(), `agentv-exec-${randomUUID8()}`);
-  await mkdir16(dir, { recursive: true });
-  const stdinPath = path43.join(dir, "stdin.txt");
-  const stdoutPath = path43.join(dir, "stdout.txt");
-  const stderrPath = path43.join(dir, "stderr.txt");
-  await writeFile9(stdinPath, stdinPayload, "utf8");
+  const dir = path44.join(tmpdir3(), `agentv-exec-${randomUUID8()}`);
+  await mkdir17(dir, { recursive: true });
+  const stdinPath = path44.join(dir, "stdin.txt");
+  const stdoutPath = path44.join(dir, "stdout.txt");
+  const stderrPath = path44.join(dir, "stderr.txt");
+  await writeFile10(stdinPath, stdinPayload, "utf8");
   const wrappedCommand = process.platform === "win32" ? `(${command}) < ${shellEscapePath(stdinPath)} > ${shellEscapePath(stdoutPath)} 2> ${shellEscapePath(stderrPath)}` : `(${command}) < ${shellEscapePath(stdinPath)} > ${shellEscapePath(stdoutPath)} 2> ${shellEscapePath(stderrPath)}`;
   const { spawn: spawn4 } = await import("child_process");
   try {
@@ -11502,11 +11577,11 @@ async function execShellWithStdin(command, stdinPayload, options = {}) {
         resolve(code ?? 0);
       });
     });
-    const stdout = (await readFile13(stdoutPath, "utf8")).replace(/\r\n/g, "\n");
-    const stderr = (await readFile13(stderrPath, "utf8")).replace(/\r\n/g, "\n");
+    const stdout = (await readFile14(stdoutPath, "utf8")).replace(/\r\n/g, "\n");
+    const stderr = (await readFile14(stderrPath, "utf8")).replace(/\r\n/g, "\n");
     return { stdout, stderr, exitCode };
   } finally {
-    await rm6(dir, { recursive: true, force: true });
+    await rm7(dir, { recursive: true, force: true });
   }
 }
@@ -11824,7 +11899,7 @@ var CodeEvaluator = class {
       outputPath,
       guidelineFiles: context2.evalCase.guideline_paths,
       inputFiles: context2.evalCase.file_paths.filter(
-        (path43) => !context2.evalCase.guideline_paths.includes(path43)
+        (path44) => !context2.evalCase.guideline_paths.includes(path44)
       ),
       input: context2.evalCase.input,
       trace: context2.trace ?? null,
@@ -12103,6 +12178,8 @@ ${context2.fileChanges}`;
       };
     } catch (e) {
       const message = e instanceof Error ? e.message : String(e);
+      const evalName = context2.evaluator?.name ?? "llm-judge";
+      console.warn(`\u26A0 LLM judge "${evalName}" failed after 3 attempts (${message}) \u2014 skipped`);
       return {
         score: 0,
         verdict: "skip",
@@ -12131,24 +12208,39 @@ ${context2.fileChanges}`;
       systemPrompt,
       target: judgeProvider.targetName
     };
-    const { data, tokenUsage } = await this.runWithRetry({
-      context: context2,
-      judgeProvider,
-      systemPrompt,
-      userPrompt: prompt,
-      schema: rubricEvaluationSchema
-    });
-    const { score, verdict, hits, misses } = calculateRubricScore(data, rubrics);
-    return {
-      score,
-      verdict,
-      hits,
-      misses,
-      expectedAspectCount: rubrics.length,
-      reasoning: data.overall_reasoning,
-      evaluatorRawRequest,
-      tokenUsage
-    };
+    try {
+      const { data, tokenUsage } = await this.runWithRetry({
+        context: context2,
+        judgeProvider,
+        systemPrompt,
+        userPrompt: prompt,
+        schema: rubricEvaluationSchema
+      });
+      const { score, verdict, hits, misses } = calculateRubricScore(data, rubrics);
+      return {
+        score,
+        verdict,
+        hits,
+        misses,
+        expectedAspectCount: rubrics.length,
+        reasoning: data.overall_reasoning,
+        evaluatorRawRequest,
+        tokenUsage
+      };
+    } catch (e) {
+      const message = e instanceof Error ? e.message : String(e);
+      const evalName = context2.evaluator?.name ?? "llm-judge";
+      console.warn(`\u26A0 LLM judge "${evalName}" failed after 3 attempts (${message}) \u2014 skipped`);
+      return {
+        score: 0,
+        verdict: "skip",
+        hits: [],
+        misses: [`Judge parse failure after 3 attempts: ${message}`],
+        expectedAspectCount: rubrics.length,
+        reasoning: `Judge parse failure after 3 attempts: ${message}`,
+        evaluatorRawRequest
+      };
+    }
   }
   /**
    * Evaluate using score-range rubrics (analytic rubric scoring).
@@ -12162,25 +12254,40 @@ ${context2.fileChanges}`;
       systemPrompt,
       target: judgeProvider.targetName
     };
-    const { data, tokenUsage } = await this.runWithRetry({
-      context: context2,
-      judgeProvider,
-      systemPrompt,
-      userPrompt: prompt,
-      schema: scoreRangeEvaluationSchema
-    });
-    const { score, verdict, hits, misses, details } = calculateScoreRangeResult(data, rubrics);
-    return {
-      score,
-      verdict,
-      hits,
-      misses,
-      expectedAspectCount: rubrics.length,
-      reasoning: data.overall_reasoning,
-      evaluatorRawRequest,
-      details,
-      tokenUsage
-    };
+    try {
+      const { data, tokenUsage } = await this.runWithRetry({
+        context: context2,
+        judgeProvider,
+        systemPrompt,
+        userPrompt: prompt,
+        schema: scoreRangeEvaluationSchema
+      });
+      const { score, verdict, hits, misses, details } = calculateScoreRangeResult(data, rubrics);
+      return {
+        score,
+        verdict,
+        hits,
+        misses,
+        expectedAspectCount: rubrics.length,
+        reasoning: data.overall_reasoning,
+        evaluatorRawRequest,
+        details,
+        tokenUsage
+      };
+    } catch (e) {
+      const message = e instanceof Error ? e.message : String(e);
+      const evalName = context2.evaluator?.name ?? "llm-judge";
+      console.warn(`\u26A0 LLM judge "${evalName}" failed after 3 attempts (${message}) \u2014 skipped`);
+      return {
+        score: 0,
+        verdict: "skip",
+        hits: [],
+        misses: [`Judge parse failure after 3 attempts: ${message}`],
+        expectedAspectCount: rubrics.length,
+        reasoning: `Judge parse failure after 3 attempts: ${message}`,
+        evaluatorRawRequest
+      };
+    }
   }
   /**
    * Build prompt for score-range rubric evaluation.
@@ -12466,19 +12573,13 @@ var CompositeEvaluator = class {
   runWeightedAverage(results, weights) {
     let totalWeight = 0;
     let weightedSum = 0;
+    let evaluatedCount = 0;
     const allHits = [];
     const allMisses = [];
     const reasoningParts = [];
     const scores = [];
     for (const member of results) {
       const weight = weights?.[member.id] ?? 1;
-      totalWeight += weight;
-      weightedSum += member.result.score * weight;
-      allHits.push(...member.result.hits.map((h) => `[${member.id}] ${h}`));
-      allMisses.push(...member.result.misses.map((m) => `[${member.id}] ${m}`));
-      if (member.result.reasoning) {
-        reasoningParts.push(`${member.id}: ${member.result.reasoning}`);
-      }
       scores.push({
         name: member.id,
         type: member.type,
@@ -12493,6 +12594,32 @@ var CompositeEvaluator = class {
         details: member.result.details,
         tokenUsage: member.result.tokenUsage
       });
+      if (member.result.verdict === "skip") {
+        continue;
+      }
+      evaluatedCount++;
+      totalWeight += weight;
+      weightedSum += member.result.score * weight;
+      allHits.push(...member.result.hits.map((h) => `[${member.id}] ${h}`));
+      allMisses.push(...member.result.misses.map((m) => `[${member.id}] ${m}`));
+      if (member.result.reasoning) {
+        reasoningParts.push(`${member.id}: ${member.result.reasoning}`);
+      }
+    }
+    if (evaluatedCount === 0 && results.length > 0) {
+      return {
+        score: 0,
+        verdict: "skip",
+        hits: [],
+        misses: [],
+        expectedAspectCount: 1,
+        reasoning: "All evaluators skipped (infrastructure failure)",
+        evaluatorRawRequest: {
+          aggregator: "weighted_average",
+          ...weights ? { weights } : {}
+        },
+        scores
+      };
     }
     const finalScore = totalWeight > 0 ? weightedSum / totalWeight : 0;
     return {
@@ -12516,19 +12643,8 @@ var CompositeEvaluator = class {
     const reasoningParts = [];
     let passingCount = 0;
     let borderlineCount = 0;
+    let evaluatedCount = 0;
     for (const member of results) {
-      const isPassing = member.result.verdict === "pass" || member.result.verdict === "borderline";
-      if (isPassing) {
-        passingCount++;
-        if (member.result.verdict === "borderline") {
-          borderlineCount++;
-        }
-      }
-      allHits.push(...member.result.hits.map((h) => `[${member.id}] ${h}`));
-      allMisses.push(...member.result.misses.map((m) => `[${member.id}] ${m}`));
-      if (member.result.reasoning) {
-        reasoningParts.push(`${member.id}: ${member.result.reasoning}`);
-      }
       scores.push({
         name: member.id,
         type: member.type,
@@ -12542,8 +12658,39 @@ var CompositeEvaluator = class {
         details: member.result.details,
         tokenUsage: member.result.tokenUsage
       });
+      if (member.result.verdict === "skip") {
+        continue;
+      }
+      evaluatedCount++;
+      const isPassing = member.result.verdict === "pass" || member.result.verdict === "borderline";
+      if (isPassing) {
+        passingCount++;
+        if (member.result.verdict === "borderline") {
+          borderlineCount++;
+        }
+      }
+      allHits.push(...member.result.hits.map((h) => `[${member.id}] ${h}`));
+      allMisses.push(...member.result.misses.map((m) => `[${member.id}] ${m}`));
+      if (member.result.reasoning) {
+        reasoningParts.push(`${member.id}: ${member.result.reasoning}`);
+      }
+    }
+    if (evaluatedCount === 0 && results.length > 0) {
+      return {
+        score: 0,
+        verdict: "skip",
+        hits: [],
+        misses: [],
+        expectedAspectCount: 1,
+        reasoning: "All evaluators skipped (infrastructure failure)",
+        evaluatorRawRequest: {
+          aggregator: "threshold",
+          threshold
+        },
+        scores
+      };
     }
-    const totalCount = results.length;
+    const totalCount = evaluatedCount;
     const score = totalCount > 0 ? passingCount / totalCount : 0;
     const pass = score >= threshold;
     if (pass && borderlineCount > 0) {
@@ -13051,115 +13198,115 @@ var FieldAccuracyEvaluator = class {
    * Evaluate a single field against the expected value.
    */
   evaluateField(fieldConfig, candidateData, expectedData) {
-    const { path: path43, match, required = true, weight = 1 } = fieldConfig;
-    const candidateValue = resolvePath(candidateData, path43);
-    const expectedValue = resolvePath(expectedData, path43);
+    const { path: path44, match, required = true, weight = 1 } = fieldConfig;
+    const candidateValue = resolvePath(candidateData, path44);
+    const expectedValue = resolvePath(expectedData, path44);
     if (expectedValue === void 0) {
       return {
-        path: path43,
+        path: path44,
         score: 1,
         // No expected value means no comparison needed
         weight,
         hit: true,
-        message: `${path43}: no expected value`
+        message: `${path44}: no expected value`
       };
     }
     if (candidateValue === void 0) {
       if (required) {
         return {
-          path: path43,
+          path: path44,
           score: 0,
           weight,
           hit: false,
-          message: `${path43} (required, missing)`
+          message: `${path44} (required, missing)`
         };
       }
       return {
-        path: path43,
+        path: path44,
         score: 1,
         // Don't penalize missing optional fields
         weight: 0,
         // Zero weight means it won't affect the score
         hit: true,
-        message: `${path43}: optional field missing`
+        message: `${path44}: optional field missing`
       };
     }
     switch (match) {
       case "exact":
-        return this.compareExact(path43, candidateValue, expectedValue, weight);
+        return this.compareExact(path44, candidateValue, expectedValue, weight);
       case "numeric_tolerance":
         return this.compareNumericTolerance(
-          path43,
+          path44,
           candidateValue,
           expectedValue,
           fieldConfig,
           weight
         );
       case "date":
-        return this.compareDate(path43, candidateValue, expectedValue, fieldConfig, weight);
+        return this.compareDate(path44, candidateValue, expectedValue, fieldConfig, weight);
       default:
         return {
-          path: path43,
+          path: path44,
           score: 0,
           weight,
           hit: false,
-          message: `${path43}: unknown match type "${match}"`
+          message: `${path44}: unknown match type "${match}"`
         };
     }
   }
   /**
    * Exact equality comparison.
    */
-  compareExact(path43, candidateValue, expectedValue, weight) {
+  compareExact(path44, candidateValue, expectedValue, weight) {
     if (deepEqual(candidateValue, expectedValue)) {
       return {
-        path: path43,
+        path: path44,
         score: 1,
         weight,
         hit: true,
-        message: path43
+        message: path44
       };
     }
     if (typeof candidateValue !== typeof expectedValue) {
       return {
-        path: path43,
+        path: path44,
         score: 0,
         weight,
         hit: false,
-        message: `${path43} (type mismatch: got ${typeof candidateValue}, expected ${typeof expectedValue})`
+        message: `${path44} (type mismatch: got ${typeof candidateValue}, expected ${typeof expectedValue})`
       };
     }
     return {
-      path: path43,
+      path: path44,
       score: 0,
       weight,
       hit: false,
-      message: `${path43} (value mismatch)`
+      message: `${path44} (value mismatch)`
     };
   }
   /**
    * Numeric comparison with absolute or relative tolerance.
    */
-  compareNumericTolerance(path43, candidateValue, expectedValue, fieldConfig, weight) {
+  compareNumericTolerance(path44, candidateValue, expectedValue, fieldConfig, weight) {
     const { tolerance = 0, relative = false } = fieldConfig;
     const candidateNum = toNumber2(candidateValue);
     const expectedNum = toNumber2(expectedValue);
     if (candidateNum === null || expectedNum === null) {
       return {
-        path: path43,
+        path: path44,
         score: 0,
         weight,
         hit: false,
-        message: `${path43} (non-numeric value)`
+        message: `${path44} (non-numeric value)`
       };
     }
     if (!Number.isFinite(candidateNum) || !Number.isFinite(expectedNum)) {
       return {
-        path: path43,
+        path: path44,
         score: 0,
         weight,
         hit: false,
-        message: `${path43} (invalid numeric value)`
+        message: `${path44} (invalid numeric value)`
       };
     }
     const diff = Math.abs(candidateNum - expectedNum);
@@ -13172,61 +13319,61 @@ var FieldAccuracyEvaluator = class {
     }
     if (withinTolerance) {
       return {
-        path: path43,
+        path: path44,
         score: 1,
         weight,
         hit: true,
-        message: `${path43} (within tolerance: diff=${diff.toFixed(2)})`
+        message: `${path44} (within tolerance: diff=${diff.toFixed(2)})`
       };
     }
     return {
-      path: path43,
+      path: path44,
       score: 0,
       weight,
       hit: false,
-      message: `${path43} (outside tolerance: diff=${diff.toFixed(2)}, tolerance=${tolerance})`
+      message: `${path44} (outside tolerance: diff=${diff.toFixed(2)}, tolerance=${tolerance})`
     };
   }
   /**
    * Date comparison with format normalization.
    */
-  compareDate(path43, candidateValue, expectedValue, fieldConfig, weight) {
+  compareDate(path44, candidateValue, expectedValue, fieldConfig, weight) {
     const formats = fieldConfig.formats ?? DEFAULT_DATE_FORMATS;
     const candidateDate = parseDate(String(candidateValue), formats);
     const expectedDate = parseDate(String(expectedValue), formats);
     if (candidateDate === null) {
       return {
-        path: path43,
+        path: path44,
         score: 0,
         weight,
         hit: false,
-        message: `${path43} (unparseable candidate date)`
+        message: `${path44} (unparseable candidate date)`
       };
     }
     if (expectedDate === null) {
       return {
-        path: path43,
+        path: path44,
         score: 0,
         weight,
         hit: false,
-        message: `${path43} (unparseable expected date)`
+        message: `${path44} (unparseable expected date)`
       };
     }
     if (candidateDate.getFullYear() === expectedDate.getFullYear() && candidateDate.getMonth() === expectedDate.getMonth() && candidateDate.getDate() === expectedDate.getDate()) {
       return {
-        path: path43,
+        path: path44,
         score: 1,
         weight,
         hit: true,
-        message: path43
+        message: path44
       };
     }
     return {
-      path: path43,
+      path: path44,
       score: 0,
       weight,
       hit: false,
-      message: `${path43} (date mismatch: got ${formatDateISO(candidateDate)}, expected ${formatDateISO(expectedDate)})`
+      message: `${path44} (date mismatch: got ${formatDateISO(candidateDate)}, expected ${formatDateISO(expectedDate)})`
     };
   }
   /**
@@ -13267,11 +13414,11 @@ var FieldAccuracyEvaluator = class {
     };
   }
 };
-function resolvePath(obj, path43) {
-  if (!path43 || !obj) {
+function resolvePath(obj, path44) {
+  if (!path44 || !obj) {
     return void 0;
   }
-  const parts = path43.split(/\.|\[|\]/).filter((p) => p.length > 0);
+  const parts = path44.split(/\.|\[|\]/).filter((p) => p.length > 0);
   let current = obj;
   for (const part of parts) {
     if (current === null || current === void 0) {
@@ -14089,8 +14236,8 @@ var TokenUsageEvaluator = class {
 };
 // src/evaluation/evaluators/tool-trajectory.ts
-function getNestedValue(obj, path43) {
-  const parts = path43.split(".");
+function getNestedValue(obj, path44) {
+  const parts = path44.split(".");
   let current = obj;
   for (const part of parts) {
     if (current === null || current === void 0 || typeof current !== "object") {
@@ -14651,9 +14798,9 @@ function runEqualsAssertion(output, value) {
 }
 // src/evaluation/orchestrator.ts
-var import_node_crypto9 = require("crypto");
-var import_promises29 = require("fs/promises");
-var import_node_path41 = __toESM(require("path"), 1);
+var import_node_crypto10 = require("crypto");
+var import_promises30 = require("fs/promises");
+var import_node_path42 = __toESM(require("path"), 1);
 var import_micromatch4 = __toESM(require("micromatch"), 1);
 // ../../node_modules/.bun/yocto-queue@1.2.2/node_modules/yocto-queue/index.js
@@ -15523,7 +15670,7 @@ async function cleanupEvalWorkspaces(evalRunId, workspaceRoot) {
   }
 }
-// src/evaluation/workspace/repo-manager.ts
+// src/evaluation/workspace/pool-manager.ts
 var import_node_child_process7 = require("child_process");
 var import_node_crypto8 = require("crypto");
 var import_node_fs11 = require("fs");
@@ -15531,8 +15678,6 @@ var import_promises27 = require("fs/promises");
 var import_node_path39 = __toESM(require("path"), 1);
 var import_node_util5 = require("util");
 var execFileAsync = (0, import_node_util5.promisify)(import_node_child_process7.execFile);
-var DEFAULT_TIMEOUT_MS2 = 3e5;
-var LOCK_TIMEOUT_MS = 6e4;
 function gitEnv() {
   const env = { ...process.env };
   for (const key of Object.keys(env)) {
@@ -15547,75 +15692,339 @@ function gitEnv() {
     GIT_SSH_COMMAND: "ssh -o BatchMode=yes"
   };
 }
-function cacheKey(source) {
-  const raw = source.type === "git" ? source.url.toLowerCase().replace(/\.git$/, "") : source.path;
-  return (0, import_node_crypto8.createHash)("sha256").update(raw).digest("hex");
-}
-function getSourceUrl(source) {
-  return source.type === "git" ? source.url : source.path;
-}
 async function git(args, opts) {
   const { stdout } = await execFileAsync("git", args, {
     cwd: opts?.cwd,
-    timeout: opts?.timeout ?? DEFAULT_TIMEOUT_MS2,
+    timeout: opts?.timeout ?? 3e5,
     env: gitEnv(),
     maxBuffer: 50 * 1024 * 1024
-    // 50MB
   });
   return stdout.trim();
 }
-async function acquireLock(lockPath) {
-  const start = Date.now();
-  while (Date.now() - start < LOCK_TIMEOUT_MS) {
-    try {
-      await (0, import_promises27.writeFile)(lockPath, String(process.pid), { flag: "wx" });
-      return;
-    } catch (err) {
-      if (err.code === "EEXIST") {
-        await new Promise((r) => setTimeout(r, 200));
+function normalizeRepoForFingerprint(repo) {
+  const source = repo.source.type === "git" ? { type: "git", url: repo.source.url.toLowerCase().replace(/\.git$/, "") } : { type: "local", path: repo.source.path };
+  const result = {
+    path: repo.path,
+    source,
+    ref: repo.checkout?.ref ?? "HEAD"
+  };
+  if (repo.clone?.depth !== void 0) {
+    result.depth = repo.clone.depth;
+  }
+  if (repo.clone?.filter !== void 0) {
+    result.filter = repo.clone.filter;
+  }
+  if (repo.clone?.sparse?.length) {
+    result.sparse = [...repo.clone.sparse].sort();
+  }
+  return result;
+}
+function computeWorkspaceFingerprint(templatePath, repos) {
+  const canonical = {
+    templatePath: templatePath ?? null,
+    repos: [...repos].sort((a, b) => a.path.localeCompare(b.path)).map(normalizeRepoForFingerprint)
+  };
+  return (0, import_node_crypto8.createHash)("sha256").update(JSON.stringify(canonical)).digest("hex");
+}
+async function copyDirectoryRecursive2(src, dest, skipDirs) {
+  await (0, import_promises27.mkdir)(dest, { recursive: true });
+  const entries = await (0, import_promises27.readdir)(src, { withFileTypes: true });
+  for (const entry of entries) {
+    const srcPath = import_node_path39.default.join(src, entry.name);
+    const destPath = import_node_path39.default.join(dest, entry.name);
+    if (entry.name === ".git") {
+      continue;
+    }
+    if (entry.isDirectory()) {
+      if (skipDirs?.has(entry.name)) {
         continue;
       }
-      throw err;
+      await copyDirectoryRecursive2(srcPath, destPath, skipDirs);
+    } else {
+      await (0, import_promises27.cp)(srcPath, destPath, { preserveTimestamps: true, force: true });
     }
   }
-  throw new Error(`Timed out waiting for lock: ${lockPath}`);
-}
-async function releaseLock(lockPath) {
-  try {
-    await (0, import_promises27.unlink)(lockPath);
-  } catch {
-  }
 }
-var RepoManager = class {
-  cacheDir;
-  verbose;
-  constructor(cacheDir, verbose = false) {
-    this.cacheDir = cacheDir ?? getGitCacheRoot();
-    this.verbose = verbose;
+var WorkspacePoolManager = class {
+  poolRoot;
+  constructor(poolRoot) {
+    this.poolRoot = poolRoot ?? getWorkspacePoolRoot();
   }
-  async runGit(args, opts) {
-    const startedAt = Date.now();
-    if (this.verbose) {
-      console.log(
-        `[repo] git start cwd=${opts?.cwd ?? process.cwd()} args=${args.join(" ")}`
+  /**
+   * Acquire a workspace slot from the pool.
+   *
+   * 1. Compute fingerprint from template + repos
+   * 2. Check drift (compare stored metadata.json fingerprint vs computed)
+   * 3. If drift: warn, remove all slots, rematerialize
+   * 4. Acquire a slot (try-lock slot-0, slot-1, ..., up to maxSlots)
+   * 5. If slot exists: reset repos, re-copy template files (skip repo directories)
+   * 6. If new slot: copy template, materialize all repos, write metadata.json
+   * 7. Return the slot (with path, index, isExisting)
+   */
+  async acquireWorkspace(options) {
+    const { templatePath, repos, maxSlots, repoManager } = options;
+    const fingerprint = computeWorkspaceFingerprint(templatePath, repos);
+    const poolDir = import_node_path39.default.join(this.poolRoot, fingerprint);
+    await (0, import_promises27.mkdir)(poolDir, { recursive: true });
+    const drifted = await this.checkDrift(poolDir, fingerprint);
+    if (drifted) {
+      console.warn(
+        `[workspace-pool] Drift detected for fingerprint ${fingerprint.slice(0, 12)}... Removing stale slots.`
       );
+      await this.removeAllSlots(poolDir);
     }
-    try {
-      const output = await git(args, opts);
-      if (this.verbose) {
-        console.log(
-          `[repo] git ok durationMs=${Date.now() - startedAt} args=${args.join(" ")}`
-        );
+    for (let i = 0; i < maxSlots; i++) {
+      const slotPath = import_node_path39.default.join(poolDir, `slot-${i}`);
+      const lockPath = `${slotPath}.lock`;
+      const locked = await this.tryLock(lockPath);
+      if (!locked) {
+        continue;
       }
-      return output;
-    } catch (error) {
-      if (this.verbose) {
-        const message = error instanceof Error ? error.message : String(error);
-        console.log(
-          `[repo] git fail durationMs=${Date.now() - startedAt} args=${args.join(" ")} error=${message}`
-        );
+      const slotExists = (0, import_node_fs11.existsSync)(slotPath);
+      if (slotExists) {
+        await this.resetSlot(slotPath, templatePath, repos);
+        return {
+          index: i,
+          path: slotPath,
+          isExisting: true,
+          lockPath,
+          fingerprint,
+          poolDir
+        };
       }
-      throw error;
+      await (0, import_promises27.mkdir)(slotPath, { recursive: true });
+      if (templatePath) {
+        await copyDirectoryRecursive2(templatePath, slotPath);
+      }
+      if (repos.length > 0) {
+        await repoManager.materializeAll(repos, slotPath);
+      }
+      await this.writeMetadata(poolDir, fingerprint, templatePath ?? null, repos);
+      return {
+        index: i,
+        path: slotPath,
+        isExisting: false,
+        lockPath,
+        fingerprint,
+        poolDir
+      };
+    }
+    throw new Error(
+      `All ${maxSlots} pool slots are locked for fingerprint ${fingerprint.slice(0, 12)}...`
+    );
+  }
+  /** Remove lock file to release a slot. */
+  async releaseSlot(slot) {
+    try {
+      await (0, import_promises27.unlink)(slot.lockPath);
+    } catch {
+    }
+  }
+  /**
+   * Try to acquire a PID-based lock file.
+   * On EEXIST, read PID and check if process is alive. If dead, stale lock — remove and retry.
+   * Returns true if lock acquired, false if slot is actively locked.
+   * Uses a bounded loop (max 3 attempts) to avoid unbounded recursion.
+   */
+  async tryLock(lockPath) {
+    for (let attempt = 0; attempt < 3; attempt++) {
+      try {
+        await (0, import_promises27.writeFile)(lockPath, String(process.pid), { flag: "wx" });
+        return true;
+      } catch (err) {
+        if (err.code !== "EEXIST") {
+          throw err;
+        }
+        try {
+          const pidStr = await (0, import_promises27.readFile)(lockPath, "utf-8");
+          const pid = Number.parseInt(pidStr.trim(), 10);
+          if (!Number.isNaN(pid)) {
+            try {
+              process.kill(pid, 0);
+              return false;
+            } catch {
+              await (0, import_promises27.unlink)(lockPath).catch(() => {
+              });
+              continue;
+            }
+          }
+        } catch {
+        }
+        return false;
+      }
+    }
+    return false;
+  }
+  /**
+   * Check if the stored fingerprint in metadata.json differs from the computed one.
+   * Returns true if drifted, false otherwise.
+   * Returns false (no drift) if metadata.json doesn't exist (first use).
+   */
+  async checkDrift(poolDir, fingerprint) {
+    const metadataPath = import_node_path39.default.join(poolDir, "metadata.json");
+    try {
+      const raw = await (0, import_promises27.readFile)(metadataPath, "utf-8");
+      const metadata = JSON.parse(raw);
+      return metadata.fingerprint !== fingerprint;
+    } catch {
+      return false;
+    }
+  }
+  /** Write metadata.json with fingerprint, inputs, and timestamp. */
+  async writeMetadata(poolDir, fingerprint, templatePath, repos) {
+    const metadata = {
+      fingerprint,
+      templatePath,
+      repos,
+      createdAt: (/* @__PURE__ */ new Date()).toISOString()
+    };
+    await (0, import_promises27.writeFile)(import_node_path39.default.join(poolDir, "metadata.json"), JSON.stringify(metadata, null, 2));
+  }
+  /** Remove all slot directories and their lock files from a pool directory. */
+  async removeAllSlots(poolDir) {
+    const entries = await (0, import_promises27.readdir)(poolDir);
+    for (const entry of entries) {
+      if (entry.startsWith("slot-") && !entry.endsWith(".lock")) {
+        const lockPath = import_node_path39.default.join(poolDir, `${entry}.lock`);
+        if ((0, import_node_fs11.existsSync)(lockPath)) {
+          try {
+            const pidStr = await (0, import_promises27.readFile)(lockPath, "utf-8");
+            const pid = Number.parseInt(pidStr.trim(), 10);
+            if (!Number.isNaN(pid)) {
+              try {
+                process.kill(pid, 0);
+                console.warn(`[workspace-pool] Skipping slot ${entry}: locked by PID ${pid}`);
+                continue;
+              } catch {
+              }
+            }
+          } catch {
+          }
+        }
+        await (0, import_promises27.rm)(import_node_path39.default.join(poolDir, entry), { recursive: true, force: true });
+        await (0, import_promises27.rm)(lockPath, { force: true }).catch(() => {
+        });
+      }
+    }
+    await (0, import_promises27.rm)(import_node_path39.default.join(poolDir, "metadata.json"), { force: true }).catch(() => {
+    });
+  }
+  /**
+   * Reset an existing slot for reuse:
+   * 1. Reset repos (git reset --hard {ref} && git clean -fd per repo)
+   * 2. Re-copy template files (skip repo directories)
+   */
+  async resetSlot(slotPath, templatePath, repos) {
+    for (const repo of repos) {
+      const repoDir = import_node_path39.default.join(slotPath, repo.path);
+      if (!(0, import_node_fs11.existsSync)(repoDir)) {
+        continue;
+      }
+      const ref = repo.checkout?.ref ?? "HEAD";
+      await git(["reset", "--hard", ref], { cwd: repoDir });
+      await git(["clean", "-fd"], { cwd: repoDir });
+    }
+    if (templatePath) {
+      const repoDirNames = new Set(
+        repos.map((r) => {
+          const normalized = r.path.replace(/^\.\//, "");
+          return normalized.split("/")[0];
+        })
+      );
+      await copyDirectoryRecursive2(templatePath, slotPath, repoDirNames);
+    }
+  }
+};
+// src/evaluation/workspace/repo-manager.ts
+var import_node_child_process8 = require("child_process");
+var import_node_crypto9 = require("crypto");
+var import_node_fs12 = require("fs");
+var import_promises28 = require("fs/promises");
+var import_node_path40 = __toESM(require("path"), 1);
+var import_node_util6 = require("util");
+var execFileAsync2 = (0, import_node_util6.promisify)(import_node_child_process8.execFile);
+var DEFAULT_TIMEOUT_MS2 = 3e5;
+var LOCK_TIMEOUT_MS = 6e4;
+function gitEnv2() {
+  const env = { ...process.env };
+  for (const key of Object.keys(env)) {
+    if (key.startsWith("GIT_") && key !== "GIT_SSH_COMMAND") {
+      delete env[key];
+    }
+  }
+  return {
+    ...env,
+    GIT_TERMINAL_PROMPT: "0",
+    GIT_ASKPASS: "",
+    GIT_SSH_COMMAND: "ssh -o BatchMode=yes"
+  };
+}
+function cacheKey(source) {
+  const raw = source.type === "git" ? source.url.toLowerCase().replace(/\.git$/, "") : source.path;
+  return (0, import_node_crypto9.createHash)("sha256").update(raw).digest("hex");
+}
+function getSourceUrl(source) {
+  return source.type === "git" ? source.url : source.path;
+}
+async function git2(args, opts) {
+  const { stdout } = await execFileAsync2("git", args, {
+    cwd: opts?.cwd,
+    timeout: opts?.timeout ?? DEFAULT_TIMEOUT_MS2,
+    env: gitEnv2(),
+    maxBuffer: 50 * 1024 * 1024
+    // 50MB
+  });
+  return stdout.trim();
+}
+async function acquireLock(lockPath) {
+  const start = Date.now();
+  while (Date.now() - start < LOCK_TIMEOUT_MS) {
+    try {
+      await (0, import_promises28.writeFile)(lockPath, String(process.pid), { flag: "wx" });
+      return;
+    } catch (err) {
+      if (err.code === "EEXIST") {
+        await new Promise((r) => setTimeout(r, 200));
+        continue;
+      }
+      throw err;
+    }
+  }
+  throw new Error(`Timed out waiting for lock: ${lockPath}`);
+}
+async function releaseLock(lockPath) {
+  try {
+    await (0, import_promises28.unlink)(lockPath);
+  } catch {
+  }
+}
+var RepoManager = class {
+  cacheDir;
+  verbose;
+  constructor(cacheDir, verbose = false) {
+    this.cacheDir = cacheDir ?? getGitCacheRoot();
+    this.verbose = verbose;
+  }
+  async runGit(args, opts) {
+    const startedAt = Date.now();
+    if (this.verbose) {
+      console.log(`[repo] git start cwd=${opts?.cwd ?? process.cwd()} args=${args.join(" ")}`);
+    }
+    try {
+      const output = await git2(args, opts);
+      if (this.verbose) {
+        console.log(`[repo] git ok durationMs=${Date.now() - startedAt} args=${args.join(" ")}`);
+      }
+      return output;
+    } catch (error) {
+      if (this.verbose) {
+        const message = error instanceof Error ? error.message : String(error);
+        console.log(
+          `[repo] git fail durationMs=${Date.now() - startedAt} args=${args.join(" ")} error=${message}`
+        );
+      }
+      throw error;
     }
   }
   /**
@@ -15625,9 +16034,9 @@ var RepoManager = class {
    */
   async ensureCache(source, depth, resolve) {
     const key = cacheKey(source);
-    const cachePath = import_node_path39.default.join(this.cacheDir, key);
+    const cachePath = import_node_path40.default.join(this.cacheDir, key);
     const lockPath = `${cachePath}.lock`;
-    const cacheExists = (0, import_node_fs11.existsSync)(import_node_path39.default.join(cachePath, "HEAD"));
+    const cacheExists = (0, import_node_fs12.existsSync)(import_node_path40.default.join(cachePath, "HEAD"));
     if (this.verbose) {
       console.log(
         `[repo] ensureCache source=${getSourceUrl(source)} resolve=${resolve ?? "remote"} cache=${cachePath}`
@@ -15645,13 +16054,11 @@ var RepoManager = class {
         `No cache found for \`${url}\`. Run \`agentv cache add --url ${url} --from <local-path>\` to seed it.`
       );
     }
-    await (0, import_promises27.mkdir)(this.cacheDir, { recursive: true });
+    await (0, import_promises28.mkdir)(this.cacheDir, { recursive: true });
     const lockStartedAt = Date.now();
     await acquireLock(lockPath);
     if (this.verbose) {
-      console.log(
-        `[repo] lock acquired path=${lockPath} waitedMs=${Date.now() - lockStartedAt}`
-      );
+      console.log(`[repo] lock acquired path=${lockPath} waitedMs=${Date.now() - lockStartedAt}`);
     }
     try {
       if (cacheExists) {
@@ -15689,7 +16096,7 @@ var RepoManager = class {
    * Handles checkout, ref resolution, ancestor walking, shallow clone, sparse checkout.
    */
   async materialize(repo, workspacePath) {
-    const targetDir = import_node_path39.default.join(workspacePath, repo.path);
+    const targetDir = import_node_path40.default.join(workspacePath, repo.path);
     const startedAt = Date.now();
     if (this.verbose) {
       console.log(
@@ -15784,14 +16191,14 @@ var RepoManager = class {
   async reset(repos, workspacePath, strategy) {
     if (strategy === "recreate") {
       for (const repo of repos) {
-        const targetDir = import_node_path39.default.join(workspacePath, repo.path);
-        await (0, import_promises27.rm)(targetDir, { recursive: true, force: true });
+        const targetDir = import_node_path40.default.join(workspacePath, repo.path);
+        await (0, import_promises28.rm)(targetDir, { recursive: true, force: true });
       }
       await this.materializeAll(repos, workspacePath);
       return;
     }
     for (const repo of repos) {
-      const targetDir = import_node_path39.default.join(workspacePath, repo.path);
+      const targetDir = import_node_path40.default.join(workspacePath, repo.path);
       await this.runGit(["reset", "--hard", "HEAD"], { cwd: targetDir });
       await this.runGit(["clean", "-fd"], { cwd: targetDir });
     }
@@ -15803,21 +16210,21 @@ var RepoManager = class {
   async seedCache(localPath, remoteUrl, opts) {
     const source = { type: "git", url: remoteUrl };
     const key = cacheKey(source);
-    const cachePath = import_node_path39.default.join(this.cacheDir, key);
+    const cachePath = import_node_path40.default.join(this.cacheDir, key);
     const lockPath = `${cachePath}.lock`;
-    await (0, import_promises27.mkdir)(this.cacheDir, { recursive: true });
+    await (0, import_promises28.mkdir)(this.cacheDir, { recursive: true });
     await acquireLock(lockPath);
     try {
-      if ((0, import_node_fs11.existsSync)(import_node_path39.default.join(cachePath, "HEAD"))) {
+      if ((0, import_node_fs12.existsSync)(import_node_path40.default.join(cachePath, "HEAD"))) {
         if (!opts?.force) {
           throw new Error(
             `Cache already exists for ${remoteUrl} at ${cachePath}. Use force to overwrite.`
           );
         }
-        await (0, import_promises27.rm)(cachePath, { recursive: true, force: true });
+        await (0, import_promises28.rm)(cachePath, { recursive: true, force: true });
       }
-      await git(["clone", "--mirror", "--bare", localPath, cachePath]);
-      await git(["remote", "set-url", "origin", remoteUrl], { cwd: cachePath });
+      await git2(["clone", "--mirror", "--bare", localPath, cachePath]);
+      await git2(["remote", "set-url", "origin", remoteUrl], { cwd: cachePath });
     } finally {
       await releaseLock(lockPath);
     }
@@ -15825,41 +16232,41 @@ var RepoManager = class {
   }
   /** Remove the entire cache directory. */
   async cleanCache() {
-    await (0, import_promises27.rm)(this.cacheDir, { recursive: true, force: true });
+    await (0, import_promises28.rm)(this.cacheDir, { recursive: true, force: true });
   }
 };
 // src/evaluation/workspace/resolve.ts
-var import_promises28 = require("fs/promises");
-var import_node_path40 = __toESM(require("path"), 1);
+var import_promises29 = require("fs/promises");
+var import_node_path41 = __toESM(require("path"), 1);
 async function resolveWorkspaceTemplate(templatePath) {
   if (!templatePath) {
     return void 0;
   }
-  const resolved = import_node_path40.default.resolve(templatePath);
-  const stats = await (0, import_promises28.stat)(resolved);
+  const resolved = import_node_path41.default.resolve(templatePath);
+  const stats = await (0, import_promises29.stat)(resolved);
   if (stats.isFile()) {
     return {
-      dir: import_node_path40.default.dirname(resolved),
+      dir: import_node_path41.default.dirname(resolved),
       workspaceFile: resolved
     };
   }
   if (!stats.isDirectory()) {
     throw new Error(`workspace template is neither a file nor a directory: ${resolved}`);
   }
-  const entries = await (0, import_promises28.readdir)(resolved);
+  const entries = await (0, import_promises29.readdir)(resolved);
   const workspaceFiles = entries.filter((e) => e.endsWith(".code-workspace"));
   if (workspaceFiles.length === 1) {
     return {
       dir: resolved,
-      workspaceFile: import_node_path40.default.join(resolved, workspaceFiles[0])
+      workspaceFile: import_node_path41.default.join(resolved, workspaceFiles[0])
     };
   }
   if (workspaceFiles.length > 1) {
     const conventionFile = workspaceFiles.find((f) => f === "template.code-workspace");
     return {
       dir: resolved,
-      workspaceFile: conventionFile ? import_node_path40.default.join(resolved, conventionFile) : void 0
+      workspaceFile: conventionFile ? import_node_path41.default.join(resolved, conventionFile) : void 0
     };
   }
   return { dir: resolved };
@@ -15941,7 +16348,10 @@ async function runEvaluation(options) {
     trials,
     streamCallbacks,
     totalBudgetUsd,
-    failOnError
+    failOnError,
+    poolWorkspaces,
+    poolMaxSlots: configPoolMaxSlots,
+    workspace: userWorkspacePath
   } = options;
   let useCache = options.useCache;
   if (trials && trials.count > 1 && useCache) {
@@ -15950,7 +16360,7 @@ async function runEvaluation(options) {
     );
     useCache = false;
   }
-  const evalRunId = (0, import_node_crypto9.randomUUID)();
+  const evalRunId = (0, import_node_crypto10.randomUUID)();
   const evalCases = preloadedEvalCases ?? await loadTests(evalFilePath, repoRoot, { verbose, filter });
   const filteredEvalCases = filterEvalCases(evalCases, filter);
   if (filteredEvalCases.length === 0) {
@@ -16015,7 +16425,7 @@ async function runEvaluation(options) {
   ];
   const evaluatorRegistry = buildEvaluatorRegistry(evaluators, resolveJudgeProvider);
   const typeRegistry = createBuiltinRegistry();
-  const discoveryBaseDir = evalFilePath ? import_node_path41.default.dirname(import_node_path41.default.resolve(evalFilePath)) : process.cwd();
+  const discoveryBaseDir = evalFilePath ? import_node_path42.default.dirname(import_node_path42.default.resolve(evalFilePath)) : process.cwd();
   const evalDir = discoveryBaseDir;
   await discoverAssertions(typeRegistry, discoveryBaseDir);
   const providerRegistry = createBuiltinProviderRegistry();
@@ -16077,13 +16487,19 @@ async function runEvaluation(options) {
     }
   };
   const isPerTestIsolation = suiteWorkspace?.isolation === "per_test";
-  const hasSharedWorkspace = !!(workspaceTemplate || suiteWorkspace?.before_all || suiteWorkspace?.repos?.length && !isPerTestIsolation);
+  if (userWorkspacePath && isPerTestIsolation) {
+    throw new Error(
+      "--workspace is incompatible with isolation: per_test. Use isolation: shared (default)."
+    );
+  }
+  const hasSharedWorkspace = !!(userWorkspacePath || workspaceTemplate || suiteWorkspace?.before_all || suiteWorkspace?.repos?.length && !isPerTestIsolation);
+  const usePool = poolWorkspaces === true && !!suiteWorkspace?.repos?.length && !isPerTestIsolation && !userWorkspacePath;
   const requestedWorkers = options.maxConcurrency ?? target.workers ?? 1;
-  const workers = hasSharedWorkspace ? 1 : requestedWorkers;
+  const workers = hasSharedWorkspace && !usePool ? 1 : requestedWorkers;
   setupLog(
-    `sharedWorkspace=${hasSharedWorkspace} perTestIsolation=${isPerTestIsolation} requestedWorkers=${requestedWorkers} effectiveWorkers=${workers}`
+    `sharedWorkspace=${hasSharedWorkspace} perTestIsolation=${isPerTestIsolation} usePool=${usePool} requestedWorkers=${requestedWorkers} effectiveWorkers=${workers}`
   );
-  if (hasSharedWorkspace && requestedWorkers > 1) {
+  if (hasSharedWorkspace && !usePool && requestedWorkers > 1) {
     console.warn(
       `Warning: Shared workspace requires sequential execution. Overriding workers from ${requestedWorkers} to 1.`
     );
@@ -16092,7 +16508,37 @@ async function runEvaluation(options) {
   let sharedWorkspacePath;
   let sharedBaselineCommit;
   let beforeAllOutput;
-  if (workspaceTemplate) {
+  let poolManager;
+  let poolSlot;
+  const poolSlots = [];
+  const availablePoolSlots = [];
+  const poolSlotBaselines = /* @__PURE__ */ new Map();
+  const poolMaxSlots = Math.min(configPoolMaxSlots ?? 10, 50);
+  if (userWorkspacePath) {
+    sharedWorkspacePath = userWorkspacePath;
+    setupLog(`using user-provided workspace: ${userWorkspacePath}`);
+  } else if (usePool && suiteWorkspace?.repos) {
+    const slotsNeeded = workers;
+    setupLog(`acquiring ${slotsNeeded} workspace pool slot(s) (pool capacity: ${poolMaxSlots})`);
+    poolManager = new WorkspacePoolManager(getWorkspacePoolRoot());
+    const poolRepoManager = new RepoManager(void 0, verbose);
+    for (let i = 0; i < slotsNeeded; i++) {
+      const slot = await poolManager.acquireWorkspace({
+        templatePath: workspaceTemplate,
+        repos: suiteWorkspace.repos,
+        maxSlots: poolMaxSlots,
+        repoManager: poolRepoManager
+      });
+      poolSlots.push(slot);
+      setupLog(`pool slot ${i} acquired at: ${slot.path} (existing=${slot.isExisting})`);
+    }
+    if (slotsNeeded === 1) {
+      poolSlot = poolSlots[0];
+      sharedWorkspacePath = poolSlot.path;
+    } else {
+      availablePoolSlots.push(...poolSlots);
+    }
+  } else if (workspaceTemplate) {
     setupLog(`creating shared workspace from template: ${workspaceTemplate}`);
     try {
       sharedWorkspacePath = await createTempWorkspace(workspaceTemplate, evalRunId, "shared");
@@ -16101,288 +16547,344 @@ async function runEvaluation(options) {
       const message = error instanceof Error ? error.message : String(error);
       throw new Error(`Failed to create shared workspace: ${message}`);
     }
-    if (suiteWorkspaceFile && sharedWorkspacePath) {
-      const copiedWorkspaceFile = import_node_path41.default.join(sharedWorkspacePath, import_node_path41.default.basename(suiteWorkspaceFile));
-      try {
-        await (0, import_promises29.stat)(copiedWorkspaceFile);
-        suiteWorkspaceFile = copiedWorkspaceFile;
-      } catch {
-      }
-    }
   } else if (suiteWorkspace?.before_all || suiteWorkspace?.repos?.length && !isPerTestIsolation) {
     sharedWorkspacePath = getWorkspacePath(evalRunId, "shared");
-    await (0, import_promises29.mkdir)(sharedWorkspacePath, { recursive: true });
+    await (0, import_promises30.mkdir)(sharedWorkspacePath, { recursive: true });
     setupLog(`created empty shared workspace at: ${sharedWorkspacePath}`);
   }
-  const repoManager = suiteWorkspace?.repos?.length ? new RepoManager(void 0, verbose) : void 0;
-  if (repoManager && sharedWorkspacePath && suiteWorkspace?.repos && !isPerTestIsolation) {
-    setupLog(`materializing ${suiteWorkspace.repos.length} shared repo(s) into ${sharedWorkspacePath}`);
-    try {
-      await repoManager.materializeAll(suiteWorkspace.repos, sharedWorkspacePath);
-      setupLog("shared repo materialization complete");
-    } catch (error) {
-      const message = error instanceof Error ? error.message : String(error);
-      if (sharedWorkspacePath) {
-        await cleanupWorkspace(sharedWorkspacePath).catch(() => {
-        });
-      }
-      throw new Error(`Failed to materialize repos: ${message}`);
-    }
-  }
-  if (sharedWorkspacePath && suiteWorkspace?.before_all) {
-    const beforeAllCommand = (suiteWorkspace.before_all.command ?? suiteWorkspace.before_all.script ?? []).join(" ");
-    setupLog(
-      `running shared before_all in cwd=${suiteWorkspace.before_all.cwd ?? evalDir} command=${beforeAllCommand}`
-    );
-    const scriptContext = {
-      workspacePath: sharedWorkspacePath,
-      testId: "__before_all__",
-      evalRunId,
-      evalDir
-    };
-    try {
-      beforeAllOutput = await executeWorkspaceScript(suiteWorkspace.before_all, scriptContext);
-      setupLog("shared before_all completed");
-    } catch (error) {
-      const message = error instanceof Error ? error.message : String(error);
-      if (sharedWorkspacePath) {
-        await cleanupWorkspace(sharedWorkspacePath).catch(() => {
-        });
+  try {
+    if (suiteWorkspaceFile && sharedWorkspacePath) {
+      const copiedWorkspaceFile = import_node_path42.default.join(sharedWorkspacePath, import_node_path42.default.basename(suiteWorkspaceFile));
+      try {
+        await (0, import_promises30.stat)(copiedWorkspaceFile);
+        suiteWorkspaceFile = copiedWorkspaceFile;
+      } catch {
       }
-      throw new Error(`before_all script failed: ${message}`);
     }
-  }
-  if (sharedWorkspacePath) {
-    try {
-      sharedBaselineCommit = await initializeBaseline(sharedWorkspacePath);
-      setupLog(`shared baseline initialized: ${sharedBaselineCommit}`);
-    } catch {
-      setupLog("shared baseline initialization skipped (non-fatal)");
-    }
-  }
-  let nextWorkerId = 1;
-  const workerIdByEvalId = /* @__PURE__ */ new Map();
-  let beforeAllOutputAttached = false;
-  let cumulativeBudgetCost = 0;
-  let budgetExhausted = false;
-  let failOnErrorTriggered = false;
-  const promises = filteredEvalCases.map(
-    (evalCase) => limit(async () => {
-      const workerId = nextWorkerId++;
-      workerIdByEvalId.set(evalCase.id, workerId);
-      if (totalBudgetUsd !== void 0 && budgetExhausted) {
-        const budgetResult = {
-          timestamp: (now ?? (() => /* @__PURE__ */ new Date()))().toISOString(),
-          testId: evalCase.id,
-          dataset: evalCase.dataset,
-          score: 0,
-          hits: [],
-          misses: [],
-          answer: "",
-          target: target.name,
-          error: `Suite budget exceeded ($${cumulativeBudgetCost.toFixed(4)} / $${totalBudgetUsd.toFixed(4)})`,
-          budgetExceeded: true,
-          executionStatus: "execution_error",
-          failureStage: "setup",
-          failureReasonCode: "budget_exceeded",
-          executionError: {
-            message: `Suite budget exceeded ($${cumulativeBudgetCost.toFixed(4)} / $${totalBudgetUsd.toFixed(4)})`,
-            stage: "setup"
-          }
-        };
-        if (onProgress) {
-          await onProgress({
-            workerId,
-            testId: evalCase.id,
-            status: "failed",
-            completedAt: Date.now(),
-            error: budgetResult.error
+    const repoManager = suiteWorkspace?.repos?.length && !usePool && !userWorkspacePath ? new RepoManager(void 0, verbose) : void 0;
+    if (repoManager && sharedWorkspacePath && suiteWorkspace?.repos && !isPerTestIsolation) {
+      setupLog(
+        `materializing ${suiteWorkspace.repos.length} shared repo(s) into ${sharedWorkspacePath}`
+      );
+      try {
+        await repoManager.materializeAll(suiteWorkspace.repos, sharedWorkspacePath);
+        setupLog("shared repo materialization complete");
+      } catch (error) {
+        const message = error instanceof Error ? error.message : String(error);
+        if (sharedWorkspacePath && !userWorkspacePath) {
+          await cleanupWorkspace(sharedWorkspacePath).catch(() => {
           });
         }
-        if (onResult) {
-          await onResult(budgetResult);
-        }
-        return budgetResult;
+        throw new Error(`Failed to materialize repos: ${message}`);
       }
-      if (failOnError === true && failOnErrorTriggered) {
-        const errorMsg = "Halted: execution error encountered with fail_on_error enabled";
-        const haltResult = {
-          timestamp: (now ?? (() => /* @__PURE__ */ new Date()))().toISOString(),
-          testId: evalCase.id,
-          dataset: evalCase.dataset,
-          score: 0,
-          hits: [],
-          misses: [],
-          answer: "",
-          target: target.name,
-          error: errorMsg,
-          executionStatus: "execution_error",
-          failureStage: "setup",
-          failureReasonCode: "error_threshold_exceeded",
-          executionError: { message: errorMsg, stage: "setup" }
-        };
-        if (onProgress) {
-          await onProgress({
-            workerId,
-            testId: evalCase.id,
-            status: "failed",
-            completedAt: Date.now(),
-            error: haltResult.error
+    }
+    if (sharedWorkspacePath && suiteWorkspace?.before_all) {
+      const beforeAllCommand = (suiteWorkspace.before_all.command ?? suiteWorkspace.before_all.script ?? []).join(" ");
+      setupLog(
+        `running shared before_all in cwd=${suiteWorkspace.before_all.cwd ?? evalDir} command=${beforeAllCommand}`
+      );
+      const scriptContext = {
+        workspacePath: sharedWorkspacePath,
+        testId: "__before_all__",
+        evalRunId,
+        evalDir
+      };
+      try {
+        beforeAllOutput = await executeWorkspaceScript(suiteWorkspace.before_all, scriptContext);
+        setupLog("shared before_all completed");
+      } catch (error) {
+        const message = error instanceof Error ? error.message : String(error);
+        if (sharedWorkspacePath && !userWorkspacePath) {
+          await cleanupWorkspace(sharedWorkspacePath).catch(() => {
           });
         }
-        if (onResult) {
-          await onResult(haltResult);
-        }
-        return haltResult;
+        throw new Error(`before_all script failed: ${message}`);
       }
-      if (onProgress) {
-        await onProgress({
-          workerId,
-          testId: evalCase.id,
-          status: "running",
-          startedAt: Date.now()
-        });
-      }
-      try {
-        const judgeProvider = await resolveJudgeProvider(target);
-        const runCaseOptions = {
-          evalCase,
-          provider: primaryProvider,
-          target,
-          evaluators: evaluatorRegistry,
-          maxRetries,
-          agentTimeoutMs,
-          cache,
-          useCache,
-          now,
-          judgeProvider,
-          targetResolver,
-          availableTargets,
+    }
+    if (availablePoolSlots.length > 0 && suiteWorkspace?.before_all) {
+      for (const slot of availablePoolSlots) {
+        setupLog(`running before_all on pool slot ${slot.index}`);
+        const scriptContext = {
+          workspacePath: slot.path,
+          testId: "__before_all__",
           evalRunId,
-          keepWorkspaces,
-          cleanupWorkspaces,
-          sharedWorkspacePath,
-          sharedBaselineCommit,
-          suiteWorkspaceFile,
-          streamCallbacks,
-          typeRegistry,
-          repoManager,
           evalDir
         };
-        let result = trials && trials.count > 1 ? await runEvalCaseWithTrials(runCaseOptions, trials) : await runEvalCase(runCaseOptions);
-        if (totalBudgetUsd !== void 0) {
-          let caseCost;
-          if (result.trials && result.trials.length > 0) {
-            const trialCostSum = result.trials.reduce((sum, t) => sum + (t.costUsd ?? 0), 0);
-            if (trialCostSum > 0) {
-              caseCost = trialCostSum;
+        try {
+          const output = await executeWorkspaceScript(suiteWorkspace.before_all, scriptContext);
+          if (!beforeAllOutput) beforeAllOutput = output;
+          setupLog(`before_all completed on pool slot ${slot.index}`);
+        } catch (error) {
+          const message = error instanceof Error ? error.message : String(error);
+          throw new Error(`before_all script failed on pool slot ${slot.index}: ${message}`);
+        }
+      }
+    }
+    if (sharedWorkspacePath) {
+      try {
+        sharedBaselineCommit = await initializeBaseline(sharedWorkspacePath);
+        setupLog(`shared baseline initialized: ${sharedBaselineCommit}`);
+      } catch {
+        setupLog("shared baseline initialization skipped (non-fatal)");
+      }
+    }
+    if (availablePoolSlots.length > 0) {
+      for (const slot of availablePoolSlots) {
+        try {
+          const baseline = await initializeBaseline(slot.path);
+          poolSlotBaselines.set(slot.path, baseline);
+          setupLog(`pool slot ${slot.index} baseline initialized: ${baseline}`);
+        } catch {
+          setupLog(`pool slot ${slot.index} baseline initialization skipped (non-fatal)`);
+        }
+      }
+    }
+    let nextWorkerId = 1;
+    const workerIdByEvalId = /* @__PURE__ */ new Map();
+    let beforeAllOutputAttached = false;
+    let cumulativeBudgetCost = 0;
+    let budgetExhausted = false;
+    let failOnErrorTriggered = false;
+    const promises = filteredEvalCases.map(
+      (evalCase) => limit(async () => {
+        const workerId = nextWorkerId++;
+        workerIdByEvalId.set(evalCase.id, workerId);
+        if (totalBudgetUsd !== void 0 && budgetExhausted) {
+          const budgetResult = {
+            timestamp: (now ?? (() => /* @__PURE__ */ new Date()))().toISOString(),
+            testId: evalCase.id,
+            dataset: evalCase.dataset,
+            score: 0,
+            hits: [],
+            misses: [],
+            answer: "",
+            target: target.name,
+            error: `Suite budget exceeded ($${cumulativeBudgetCost.toFixed(4)} / $${totalBudgetUsd.toFixed(4)})`,
+            budgetExceeded: true,
+            executionStatus: "execution_error",
+            failureStage: "setup",
+            failureReasonCode: "budget_exceeded",
+            executionError: {
+              message: `Suite budget exceeded ($${cumulativeBudgetCost.toFixed(4)} / $${totalBudgetUsd.toFixed(4)})`,
+              stage: "setup"
             }
-          } else {
-            caseCost = result.costUsd;
+          };
+          if (onProgress) {
+            await onProgress({
+              workerId,
+              testId: evalCase.id,
+              status: "failed",
+              completedAt: Date.now(),
+              error: budgetResult.error
+            });
           }
-          if (caseCost !== void 0) {
-            cumulativeBudgetCost += caseCost;
-            if (cumulativeBudgetCost >= totalBudgetUsd) {
-              budgetExhausted = true;
-            }
+          if (onResult) {
+            await onResult(budgetResult);
           }
+          return budgetResult;
         }
-        if (failOnError === true && result.executionStatus === "execution_error") {
-          failOnErrorTriggered = true;
-        }
-        if (beforeAllOutput && !beforeAllOutputAttached) {
-          result = { ...result, beforeAllOutput };
-          beforeAllOutputAttached = true;
+        if (failOnError === true && failOnErrorTriggered) {
+          const errorMsg = "Halted: execution error encountered with fail_on_error enabled";
+          const haltResult = {
+            timestamp: (now ?? (() => /* @__PURE__ */ new Date()))().toISOString(),
+            testId: evalCase.id,
+            dataset: evalCase.dataset,
+            score: 0,
+            hits: [],
+            misses: [],
+            answer: "",
+            target: target.name,
+            error: errorMsg,
+            executionStatus: "execution_error",
+            failureStage: "setup",
+            failureReasonCode: "error_threshold_exceeded",
+            executionError: { message: errorMsg, stage: "setup" }
+          };
+          if (onProgress) {
+            await onProgress({
+              workerId,
+              testId: evalCase.id,
+              status: "failed",
+              completedAt: Date.now(),
+              error: haltResult.error
+            });
+          }
+          if (onResult) {
+            await onResult(haltResult);
+          }
+          return haltResult;
         }
         if (onProgress) {
           await onProgress({
             workerId,
             testId: evalCase.id,
-            status: result.error ? "failed" : "completed",
-            startedAt: 0,
-            // Not used for completed status
-            completedAt: Date.now(),
-            error: result.error
+            status: "running",
+            startedAt: Date.now()
           });
         }
-        if (onResult) {
-          await onResult(result);
+        const testPoolSlot = availablePoolSlots.length > 0 ? availablePoolSlots.pop() : void 0;
+        const testWorkspacePath = testPoolSlot?.path ?? sharedWorkspacePath;
+        const testBaselineCommit = testPoolSlot ? poolSlotBaselines.get(testPoolSlot.path) : sharedBaselineCommit;
+        try {
+          const judgeProvider = await resolveJudgeProvider(target);
+          const runCaseOptions = {
+            evalCase,
+            provider: primaryProvider,
+            target,
+            evaluators: evaluatorRegistry,
+            maxRetries,
+            agentTimeoutMs,
+            cache,
+            useCache,
+            now,
+            judgeProvider,
+            targetResolver,
+            availableTargets,
+            evalRunId,
+            keepWorkspaces,
+            cleanupWorkspaces,
+            sharedWorkspacePath: testWorkspacePath,
+            sharedBaselineCommit: testBaselineCommit,
+            suiteWorkspaceFile,
+            streamCallbacks,
+            typeRegistry,
+            repoManager,
+            evalDir
+          };
+          let result = trials && trials.count > 1 ? await runEvalCaseWithTrials(runCaseOptions, trials) : await runEvalCase(runCaseOptions);
+          if (totalBudgetUsd !== void 0) {
+            let caseCost;
+            if (result.trials && result.trials.length > 0) {
+              const trialCostSum = result.trials.reduce((sum, t) => sum + (t.costUsd ?? 0), 0);
+              if (trialCostSum > 0) {
+                caseCost = trialCostSum;
+              }
+            } else {
+              caseCost = result.costUsd;
+            }
+            if (caseCost !== void 0) {
+              cumulativeBudgetCost += caseCost;
+              if (cumulativeBudgetCost >= totalBudgetUsd) {
+                budgetExhausted = true;
+              }
+            }
+          }
+          if (failOnError === true && result.executionStatus === "execution_error") {
+            failOnErrorTriggered = true;
+          }
+          if (beforeAllOutput && !beforeAllOutputAttached) {
+            result = { ...result, beforeAllOutput };
+            beforeAllOutputAttached = true;
+          }
+          if (onProgress) {
+            await onProgress({
+              workerId,
+              testId: evalCase.id,
+              status: result.error ? "failed" : "completed",
+              startedAt: 0,
+              // Not used for completed status
+              completedAt: Date.now(),
+              error: result.error
+            });
+          }
+          if (onResult) {
+            await onResult(result);
+          }
+          return result;
+        } catch (error) {
+          if (onProgress) {
+            await onProgress({
+              workerId,
+              testId: evalCase.id,
+              status: "failed",
+              completedAt: Date.now(),
+              error: error instanceof Error ? error.message : String(error)
+            });
+          }
+          throw error;
+        } finally {
+          if (testPoolSlot) {
+            availablePoolSlots.push(testPoolSlot);
+          }
         }
-        return result;
-      } catch (error) {
-        if (onProgress) {
-          await onProgress({
-            workerId,
-            testId: evalCase.id,
-            status: "failed",
-            completedAt: Date.now(),
-            error: error instanceof Error ? error.message : String(error)
-          });
+      })
+    );
+    const settled = await Promise.allSettled(promises);
+    const results = [];
+    for (let i = 0; i < settled.length; i++) {
+      const outcome = settled[i];
+      if (outcome.status === "fulfilled") {
+        results.push(outcome.value);
+      } else {
+        const evalCase = filteredEvalCases[i];
+        const formattingMode = usesFileReferencePrompt(primaryProvider) ? "agent" : "lm";
+        const promptInputs = await buildPromptInputs(evalCase, formattingMode);
+        const errorResult = buildErrorResult(
+          evalCase,
+          target.name,
+          (now ?? (() => /* @__PURE__ */ new Date()))(),
+          outcome.reason,
+          promptInputs,
+          primaryProvider,
+          "agent",
+          "provider_error"
+        );
+        results.push(errorResult);
+        if (onResult) {
+          await onResult(errorResult);
         }
-        throw error;
       }
-    })
-  );
-  const settled = await Promise.allSettled(promises);
-  const results = [];
-  for (let i = 0; i < settled.length; i++) {
-    const outcome = settled[i];
-    if (outcome.status === "fulfilled") {
-      results.push(outcome.value);
-    } else {
-      const evalCase = filteredEvalCases[i];
-      const formattingMode = usesFileReferencePrompt(primaryProvider) ? "agent" : "lm";
-      const promptInputs = await buildPromptInputs(evalCase, formattingMode);
-      const errorResult = buildErrorResult(
-        evalCase,
-        target.name,
-        (now ?? (() => /* @__PURE__ */ new Date()))(),
-        outcome.reason,
-        promptInputs,
-        primaryProvider,
-        "agent",
-        "provider_error"
-      );
-      results.push(errorResult);
-      if (onResult) {
-        await onResult(errorResult);
+    }
+    const afterAllWorkspaces = poolSlots.length > 1 ? poolSlots.map((s) => s.path) : sharedWorkspacePath ? [sharedWorkspacePath] : [];
+    if (afterAllWorkspaces.length > 0 && suiteWorkspace?.after_all) {
+      for (const wsPath of afterAllWorkspaces) {
+        const scriptContext = {
+          workspacePath: wsPath,
+          testId: "__after_all__",
+          evalRunId,
+          evalDir
+        };
+        try {
+          const afterAllOutput = await executeWorkspaceScript(
+            suiteWorkspace.after_all,
+            scriptContext,
+            "warn"
+          );
+          if (afterAllOutput && results.length > 0 && wsPath === afterAllWorkspaces[0]) {
+            results[results.length - 1] = { ...results[results.length - 1], afterAllOutput };
+          }
+        } catch {
+        }
       }
     }
-  }
-  if (sharedWorkspacePath && suiteWorkspace?.after_all) {
-    const scriptContext = {
-      workspacePath: sharedWorkspacePath,
-      testId: "__after_all__",
-      evalRunId,
-      evalDir
-    };
-    try {
-      const afterAllOutput = await executeWorkspaceScript(
-        suiteWorkspace.after_all,
-        scriptContext,
-        "warn"
-      );
-      if (afterAllOutput && results.length > 0) {
-        results[results.length - 1] = { ...results[results.length - 1], afterAllOutput };
+    if (sharedWorkspacePath && !poolSlot && poolSlots.length === 0 && !userWorkspacePath) {
+      const hasFailure = results.some((r) => !!r.error || r.score < 0.5);
+      if (cleanupWorkspaces) {
+        await cleanupWorkspace(sharedWorkspacePath).catch(() => {
+        });
+      } else if (!hasFailure && !keepWorkspaces) {
+        await cleanupWorkspace(sharedWorkspacePath).catch(() => {
+        });
       }
-    } catch {
     }
-  }
-  if (sharedWorkspacePath) {
-    const hasFailure = results.some((r) => !!r.error || r.score < 0.5);
     if (cleanupWorkspaces) {
-      await cleanupWorkspace(sharedWorkspacePath).catch(() => {
-      });
-    } else if (!hasFailure && !keepWorkspaces) {
-      await cleanupWorkspace(sharedWorkspacePath).catch(() => {
+      await cleanupEvalWorkspaces(evalRunId).catch(() => {
       });
     }
+    return results;
+  } finally {
+    if (poolManager) {
+      if (poolSlot) {
+        await poolManager.releaseSlot(poolSlot);
+      }
+      for (const slot of poolSlots) {
+        if (slot !== poolSlot) {
+          await poolManager.releaseSlot(slot).catch(() => {
+          });
+        }
+      }
+    }
   }
-  if (cleanupWorkspaces) {
-    await cleanupEvalWorkspaces(evalRunId).catch(() => {
-    });
-  }
-  return results;
 }
 async function runBatchEvaluation(options) {
   const {
@@ -16599,9 +17101,9 @@ async function runEvalCase(options) {
         );
       }
       if (caseWorkspaceFile && workspacePath) {
-        const copiedFile = import_node_path41.default.join(workspacePath, import_node_path41.default.basename(caseWorkspaceFile));
+        const copiedFile = import_node_path42.default.join(workspacePath, import_node_path42.default.basename(caseWorkspaceFile));
         try {
-          await (0, import_promises29.stat)(copiedFile);
+          await (0, import_promises30.stat)(copiedFile);
           caseWorkspaceFile = copiedFile;
         } catch {
         }
@@ -16609,7 +17111,7 @@ async function runEvalCase(options) {
     }
     if (!workspacePath && (evalCase.workspace?.before_all || evalCase.workspace?.repos?.length) && evalRunId) {
       workspacePath = getWorkspacePath(evalRunId, evalCase.id);
-      await (0, import_promises29.mkdir)(workspacePath, { recursive: true });
+      await (0, import_promises30.mkdir)(workspacePath, { recursive: true });
     }
     if (evalCase.workspace?.repos?.length && workspacePath) {
       const perCaseRepoManager = new RepoManager(void 0, setupDebug);
@@ -17209,7 +17711,7 @@ async function runEvaluatorList(options) {
     fileChanges,
     workspacePath
   };
-  const evalFileDir = evalCase.guideline_paths[0] ? import_node_path41.default.dirname(evalCase.guideline_paths[0]) : process.cwd();
+  const evalFileDir = evalCase.guideline_paths[0] ? import_node_path42.default.dirname(evalCase.guideline_paths[0]) : process.cwd();
   const dispatchContext = {
     judgeProvider,
     targetResolver,
@@ -17443,7 +17945,7 @@ function extractProviderError(response) {
   return trimmed.length > 0 ? trimmed : void 0;
 }
 function createCacheKey(provider, target, evalCase, promptInputs) {
-  const hash = (0, import_node_crypto9.createHash)("sha256");
+  const hash = (0, import_node_crypto10.createHash)("sha256");
   hash.update(provider.id);
   hash.update(target.name);
   hash.update(evalCase.id);
@@ -17511,8 +18013,8 @@ function computeWeightedMean(entries) {
 }
 // src/evaluation/evaluate.ts
-var import_node_fs12 = require("fs");
-var import_node_path42 = __toESM(require("path"), 1);
+var import_node_fs13 = require("fs");
+var import_node_path43 = __toESM(require("path"), 1);
 async function evaluate(config) {
   const startTime = Date.now();
   if (config.tests && config.specFile) {
@@ -17534,13 +18036,13 @@ async function evaluate(config) {
   let evalCases;
   let testFilePath;
   if (config.specFile) {
-    testFilePath = import_node_path42.default.resolve(config.specFile);
+    testFilePath = import_node_path43.default.resolve(config.specFile);
     evalCases = await loadTests(testFilePath, repoRoot, {
       verbose: config.verbose,
       filter: config.filter
     });
   } else {
-    testFilePath = import_node_path42.default.join(process.cwd(), "__programmatic__.yaml");
+    testFilePath = import_node_path43.default.join(process.cwd(), "__programmatic__.yaml");
     evalCases = (config.tests ?? []).map((test) => {
       const input = typeof test.input === "string" ? [{ role: "user", content: test.input }] : test.input;
       const question = typeof test.input === "string" ? test.input : test.input.find((m) => m.role === "user")?.content ?? "";
@@ -17626,11 +18128,11 @@ function computeSummary(results, durationMs) {
 var TARGET_FILE_CANDIDATES = [".agentv/targets.yaml", ".agentv/targets.yml"];
 async function discoverDefaultTarget(repoRoot) {
   const cwd = process.cwd();
-  const chain = buildDirectoryChain2(import_node_path42.default.join(cwd, "_placeholder"), repoRoot);
+  const chain = buildDirectoryChain2(import_node_path43.default.join(cwd, "_placeholder"), repoRoot);
   for (const dir of chain) {
     for (const candidate of TARGET_FILE_CANDIDATES) {
-      const targetsPath = import_node_path42.default.join(dir, candidate);
-      if (!(0, import_node_fs12.existsSync)(targetsPath)) continue;
+      const targetsPath = import_node_path43.default.join(dir, candidate);
+      if (!(0, import_node_fs13.existsSync)(targetsPath)) continue;
       try {
         const definitions = await readTargetDefinitions(targetsPath);
         const defaultTarget = definitions.find((d) => d.name === "default");
@@ -17644,11 +18146,11 @@ async function discoverDefaultTarget(repoRoot) {
 async function loadEnvHierarchy(repoRoot) {
   const { readFileSync: readFileSync2 } = await import("fs");
   const cwd = process.cwd();
-  const chain = buildDirectoryChain2(import_node_path42.default.join(cwd, "_placeholder"), repoRoot);
+  const chain = buildDirectoryChain2(import_node_path43.default.join(cwd, "_placeholder"), repoRoot);
   const envFiles = [];
   for (const dir of chain) {
-    const envPath = import_node_path42.default.join(dir, ".env");
-    if ((0, import_node_fs12.existsSync)(envPath)) envFiles.push(envPath);
+    const envPath = import_node_path43.default.join(dir, ".env");
+    if ((0, import_node_fs13.existsSync)(envPath)) envFiles.push(envPath);
   }
   for (let i = envFiles.length - 1; i >= 0; i--) {
     try {
@@ -17726,12 +18228,12 @@ var CONFIG_FILE_NAMES = [
   ".agentv/config.js"
 ];
 async function loadTsConfig(projectRoot) {
-  const { existsSync: existsSync4 } = await import("fs");
+  const { existsSync: existsSync5 } = await import("fs");
   const { pathToFileURL } = await import("url");
   const { join: join2 } = await import("path");
   for (const fileName of CONFIG_FILE_NAMES) {
     const filePath = join2(projectRoot, fileName);
-    if (!existsSync4(filePath)) {
+    if (!existsSync5(filePath)) {
       continue;
     }
     try {
@@ -17828,8 +18330,8 @@ function buildPrompt(criteria, question, referenceAnswer) {
 }
 // src/evaluation/cache/response-cache.ts
-var import_promises30 = require("fs/promises");
-var import_node_path43 = __toESM(require("path"), 1);
+var import_promises31 = require("fs/promises");
+var import_node_path44 = __toESM(require("path"), 1);
 var DEFAULT_CACHE_PATH = ".agentv/cache";
 var ResponseCache = class {
   cachePath;
@@ -17839,7 +18341,7 @@ var ResponseCache = class {
   async get(key) {
     const filePath = this.keyToPath(key);
     try {
-      const data = await (0, import_promises30.readFile)(filePath, "utf8");
+      const data = await (0, import_promises31.readFile)(filePath, "utf8");
       return JSON.parse(data);
     } catch {
       return void 0;
@@ -17847,13 +18349,13 @@ var ResponseCache = class {
   }
   async set(key, value) {
     const filePath = this.keyToPath(key);
-    const dir = import_node_path43.default.dirname(filePath);
-    await (0, import_promises30.mkdir)(dir, { recursive: true });
-    await (0, import_promises30.writeFile)(filePath, JSON.stringify(value, null, 2), "utf8");
+    const dir = import_node_path44.default.dirname(filePath);
+    await (0, import_promises31.mkdir)(dir, { recursive: true });
+    await (0, import_promises31.writeFile)(filePath, JSON.stringify(value, null, 2), "utf8");
   }
   keyToPath(key) {
     const prefix = key.slice(0, 2);
-    return import_node_path43.default.join(this.cachePath, prefix, `${key}.json`);
+    return import_node_path44.default.join(this.cachePath, prefix, `${key}.json`);
   }
 };
 function shouldEnableCache(params) {
@@ -18340,6 +18842,7 @@ function createAgentKernel() {
   TokenUsageEvaluator,
   ToolTrajectoryEvaluator,
   WorkspaceCreationError,
+  WorkspacePoolManager,
   assembleLlmJudgePrompt,
   avgToolDurationMs,
   buildDirectoryChain,
@@ -18354,6 +18857,7 @@ function createAgentKernel() {
   cleanupEvalWorkspaces,
   cleanupWorkspace,
   computeTraceSummary,
+  computeWorkspaceFingerprint,
   consumeClaudeLogEntries,
   consumeCodexLogEntries,
   consumeCopilotCliLogEntries,
@@ -18391,6 +18895,7 @@ function createAgentKernel() {
   getSubagentsRoot,
   getTraceStateRoot,
   getWorkspacePath,
+  getWorkspacePoolRoot,
   getWorkspacesRoot,
   initializeBaseline,
   isEvaluatorKind,