@agentv/core 2.14.3 → 2.15.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.js CHANGED
@@ -612,6 +612,17 @@ function parseExecutionDefaults(raw, configPath) {
612
612
  } else if (otelFile !== void 0) {
613
613
  logWarning(`Invalid execution.otel_file in ${configPath}, expected non-empty string`);
614
614
  }
615
+ if (typeof obj.pool_workspaces === "boolean") {
616
+ result.pool_workspaces = obj.pool_workspaces;
617
+ } else if (obj.pool_workspaces !== void 0) {
618
+ logWarning(`Invalid execution.pool_workspaces in ${configPath}, expected boolean`);
619
+ }
620
+ const poolSlots = obj.pool_slots;
621
+ if (typeof poolSlots === "number" && Number.isInteger(poolSlots) && poolSlots >= 1 && poolSlots <= 50) {
622
+ result.pool_slots = poolSlots;
623
+ } else if (poolSlots !== void 0) {
624
+ logWarning(`Invalid execution.pool_slots in ${configPath}, expected integer 1-50`);
625
+ }
615
626
  return Object.keys(result).length > 0 ? result : void 0;
616
627
  }
617
628
  function logWarning(message) {
@@ -2053,6 +2064,7 @@ async function processMessages(options) {
2053
2064
  repoRootPath,
2054
2065
  guidelinePatterns,
2055
2066
  guidelinePaths,
2067
+ treatFileSegmentsAsGuidelines,
2056
2068
  textParts,
2057
2069
  messageType,
2058
2070
  verbose
@@ -2100,16 +2112,20 @@ async function processMessages(options) {
2100
2112
  }
2101
2113
  try {
2102
2114
  const fileContent = (await readFile4(resolvedPath, "utf8")).replace(/\r\n/g, "\n");
2103
- if (messageType === "input" && guidelinePatterns && guidelinePaths) {
2104
- const relativeToRepo = path5.relative(repoRootPath, resolvedPath);
2105
- if (isGuidelineFile(relativeToRepo, guidelinePatterns)) {
2106
- guidelinePaths.push(path5.resolve(resolvedPath));
2107
- if (verbose) {
2108
- console.log(` [Guideline] Found: ${displayPath}`);
2109
- console.log(` Resolved to: ${resolvedPath}`);
2110
- }
2111
- continue;
2115
+ const classifyAsGuideline = shouldTreatAsGuideline({
2116
+ messageType,
2117
+ resolvedPath,
2118
+ repoRootPath,
2119
+ guidelinePatterns,
2120
+ treatFileSegmentsAsGuidelines
2121
+ });
2122
+ if (classifyAsGuideline && guidelinePaths) {
2123
+ guidelinePaths.push(path5.resolve(resolvedPath));
2124
+ if (verbose) {
2125
+ console.log(` [Guideline] Found: ${displayPath}`);
2126
+ console.log(` Resolved to: ${resolvedPath}`);
2112
2127
  }
2128
+ continue;
2113
2129
  }
2114
2130
  segments.push({
2115
2131
  type: "file",
@@ -2138,6 +2154,26 @@ async function processMessages(options) {
2138
2154
  }
2139
2155
  return segments;
2140
2156
  }
2157
+ function shouldTreatAsGuideline(options) {
2158
+ const {
2159
+ messageType,
2160
+ resolvedPath,
2161
+ repoRootPath,
2162
+ guidelinePatterns,
2163
+ treatFileSegmentsAsGuidelines
2164
+ } = options;
2165
+ if (messageType !== "input") {
2166
+ return false;
2167
+ }
2168
+ if (treatFileSegmentsAsGuidelines) {
2169
+ return true;
2170
+ }
2171
+ if (!guidelinePatterns || guidelinePatterns.length === 0) {
2172
+ return false;
2173
+ }
2174
+ const relativeToRepo = path5.relative(repoRootPath, resolvedPath);
2175
+ return isGuidelineFile(relativeToRepo, guidelinePatterns);
2176
+ }
2141
2177
  function asString3(value) {
2142
2178
  return typeof value === "string" ? value : void 0;
2143
2179
  }
@@ -2476,6 +2512,8 @@ async function loadTestsFromJsonl(evalFilePath, repoRoot, options) {
2476
2512
  for (const guidelinePath of testCase.guideline_paths) {
2477
2513
  console.log(` - ${guidelinePath}`);
2478
2514
  }
2515
+ } else if (!guidelinePatterns || guidelinePatterns.length === 0) {
2516
+ console.log(" No guidelines found (guideline_patterns not configured)");
2479
2517
  } else {
2480
2518
  console.log(" No guidelines found");
2481
2519
  }
@@ -2845,7 +2883,7 @@ async function loadTestsFromYaml(evalFilePath, repoRoot, options) {
2845
2883
  } else {
2846
2884
  throw new Error(`Invalid test file format: ${evalFilePath} - missing 'tests' field`);
2847
2885
  }
2848
- const suiteWorkspace = parseWorkspaceConfig(suite.workspace, evalFileDir);
2886
+ const suiteWorkspace = await resolveWorkspaceConfig(suite.workspace, evalFileDir);
2849
2887
  const suiteInputMessages = expandInputShorthand(suite.input);
2850
2888
  const rawGlobalExecution = isJsonObject(suite.execution) ? suite.execution : void 0;
2851
2889
  const _globalTarget = asString6(rawGlobalExecution?.target) ?? asString6(suite.target);
@@ -2881,12 +2919,24 @@ async function loadTestsFromYaml(evalFilePath, repoRoot, options) {
2881
2919
  }
2882
2920
  const caseExecution = isJsonObject(evalcase.execution) ? evalcase.execution : void 0;
2883
2921
  const skipDefaults = caseExecution?.skip_defaults === true;
2884
- const inputMessages = suiteInputMessages && !skipDefaults ? [...suiteInputMessages, ...testInputMessages] : testInputMessages;
2922
+ const effectiveSuiteInputMessages = suiteInputMessages && !skipDefaults ? suiteInputMessages : void 0;
2923
+ const inputMessages = effectiveSuiteInputMessages ? [...effectiveSuiteInputMessages, ...testInputMessages] : testInputMessages;
2885
2924
  const hasExpectedMessages = expectedMessages.length > 0;
2886
2925
  const guidelinePaths = [];
2887
2926
  const inputTextParts = [];
2888
- const inputSegments = await processMessages({
2889
- messages: inputMessages,
2927
+ const suiteInputSegments = effectiveSuiteInputMessages ? await processMessages({
2928
+ messages: effectiveSuiteInputMessages,
2929
+ searchRoots,
2930
+ repoRootPath,
2931
+ guidelinePatterns,
2932
+ guidelinePaths,
2933
+ treatFileSegmentsAsGuidelines: true,
2934
+ textParts: inputTextParts,
2935
+ messageType: "input",
2936
+ verbose
2937
+ }) : [];
2938
+ const testInputSegments = await processMessages({
2939
+ messages: testInputMessages,
2890
2940
  searchRoots,
2891
2941
  repoRootPath,
2892
2942
  guidelinePatterns,
@@ -2895,6 +2945,7 @@ async function loadTestsFromYaml(evalFilePath, repoRoot, options) {
2895
2945
  messageType: "input",
2896
2946
  verbose
2897
2947
  });
2948
+ const inputSegments = [...suiteInputSegments, ...testInputSegments];
2898
2949
  const outputSegments = hasExpectedMessages ? await processExpectedMessages({
2899
2950
  messages: expectedMessages,
2900
2951
  searchRoots,
@@ -2942,7 +2993,7 @@ async function loadTestsFromYaml(evalFilePath, repoRoot, options) {
2942
2993
  ...guidelinePaths.map((guidelinePath) => path8.resolve(guidelinePath)),
2943
2994
  ...userFilePaths
2944
2995
  ];
2945
- const caseWorkspace = parseWorkspaceConfig(evalcase.workspace, evalFileDir);
2996
+ const caseWorkspace = await resolveWorkspaceConfig(evalcase.workspace, evalFileDir);
2946
2997
  const mergedWorkspace = mergeWorkspaceConfigs(suiteWorkspace, caseWorkspace);
2947
2998
  const metadata = isJsonObject(evalcase.metadata) ? evalcase.metadata : void 0;
2948
2999
  const caseTargets = extractTargetsFromTestCase(evalcase);
@@ -2973,6 +3024,8 @@ async function loadTestsFromYaml(evalFilePath, repoRoot, options) {
2973
3024
  for (const guidelinePath of testCase.guideline_paths) {
2974
3025
  console.log(` - ${guidelinePath}`);
2975
3026
  }
3027
+ } else if (!guidelinePatterns || guidelinePatterns.length === 0) {
3028
+ console.log(" No guidelines found (guideline_patterns not configured)");
2976
3029
  } else {
2977
3030
  console.log(" No guidelines found");
2978
3031
  }
@@ -3072,6 +3125,26 @@ function parseResetConfig(raw) {
3072
3125
  ...afterEach !== void 0 && { after_each: afterEach }
3073
3126
  };
3074
3127
  }
3128
+ async function resolveWorkspaceConfig(raw, evalFileDir) {
3129
+ if (typeof raw === "string") {
3130
+ const workspaceFilePath = path8.resolve(evalFileDir, raw);
3131
+ let content;
3132
+ try {
3133
+ content = await readFile7(workspaceFilePath, "utf8");
3134
+ } catch {
3135
+ throw new Error(`Workspace file not found: ${raw} (resolved to ${workspaceFilePath})`);
3136
+ }
3137
+ const parsed = parse2(content);
3138
+ if (!isJsonObject(parsed)) {
3139
+ throw new Error(
3140
+ `Invalid workspace file format: ${workspaceFilePath} (expected a YAML object)`
3141
+ );
3142
+ }
3143
+ const workspaceFileDir = path8.dirname(workspaceFilePath);
3144
+ return parseWorkspaceConfig(parsed, workspaceFileDir);
3145
+ }
3146
+ return parseWorkspaceConfig(raw, evalFileDir);
3147
+ }
3075
3148
  function parseWorkspaceConfig(raw, evalFileDir) {
3076
3149
  if (!isJsonObject(raw)) return void 0;
3077
3150
  const obj = raw;
@@ -7174,6 +7247,9 @@ function getSubagentsRoot() {
7174
7247
  function getTraceStateRoot() {
7175
7248
  return path21.join(getAgentvHome(), "trace-state");
7176
7249
  }
7250
+ function getWorkspacePoolRoot() {
7251
+ return path21.join(getAgentvHome(), "workspace-pool");
7252
+ }
7177
7253
 
7178
7254
  // src/evaluation/providers/vscode/dispatch/constants.ts
7179
7255
  var DEFAULT_LOCK_NAME = "subagent.lock";
@@ -7996,8 +8072,6 @@ var AGENTV_REQUEST_TEMPLATE = `[[ ## task ## ]]
7996
8072
 
7997
8073
  **IMPORTANT**: Follow these exact steps:
7998
8074
  1. Create and write your complete response to: {{responseFileTmp}}
7999
- - All intended file outputs/changes MUST be written in your response file.
8000
- - For each intended file, include the repo name, relative path and unified git diff following the convention \`diff --git ...\`.
8001
8075
  2. When completely finished, run these PowerShell commands to signal completion:
8002
8076
  \`\`\`
8003
8077
  Move-Item -LiteralPath '{{responseFileTmp}}' -Destination '{{responseFileFinal}}'
@@ -8014,8 +8088,6 @@ var AGENTV_BATCH_REQUEST_TEMPLATE = `[[ ## task ## ]]
8014
8088
 
8015
8089
  **IMPORTANT**: Follow these exact steps:
8016
8090
  1. Create and write your complete response to: {{responseFileTmp}}
8017
- - All intended file outputs/changes MUST be written in your response file.
8018
- - For each intended file, include the repo name, relative path and unified git diff following the convention \`diff --git ...\`.
8019
8091
  2. When completely finished and the response is stable, rename it to: {{responseFileFinal}}
8020
8092
  3. Do not unlock the workspace from this request; batch orchestration will handle unlocking after all responses are ready.
8021
8093
  `;
@@ -8628,16 +8700,16 @@ async function execFileWithStdinNode(argv, stdinPayload, options) {
8628
8700
  });
8629
8701
  }
8630
8702
  async function execShellWithStdin(command, stdinPayload, options = {}) {
8631
- const { mkdir: mkdir14, readFile: readFile12, rm: rm6, writeFile: writeFile9 } = await import("node:fs/promises");
8703
+ const { mkdir: mkdir15, readFile: readFile13, rm: rm7, writeFile: writeFile10 } = await import("node:fs/promises");
8632
8704
  const { tmpdir: tmpdir3 } = await import("node:os");
8633
- const path41 = await import("node:path");
8705
+ const path42 = await import("node:path");
8634
8706
  const { randomUUID: randomUUID8 } = await import("node:crypto");
8635
- const dir = path41.join(tmpdir3(), `agentv-exec-${randomUUID8()}`);
8636
- await mkdir14(dir, { recursive: true });
8637
- const stdinPath = path41.join(dir, "stdin.txt");
8638
- const stdoutPath = path41.join(dir, "stdout.txt");
8639
- const stderrPath = path41.join(dir, "stderr.txt");
8640
- await writeFile9(stdinPath, stdinPayload, "utf8");
8707
+ const dir = path42.join(tmpdir3(), `agentv-exec-${randomUUID8()}`);
8708
+ await mkdir15(dir, { recursive: true });
8709
+ const stdinPath = path42.join(dir, "stdin.txt");
8710
+ const stdoutPath = path42.join(dir, "stdout.txt");
8711
+ const stderrPath = path42.join(dir, "stderr.txt");
8712
+ await writeFile10(stdinPath, stdinPayload, "utf8");
8641
8713
  const wrappedCommand = process.platform === "win32" ? `(${command}) < ${shellEscapePath(stdinPath)} > ${shellEscapePath(stdoutPath)} 2> ${shellEscapePath(stderrPath)}` : `(${command}) < ${shellEscapePath(stdinPath)} > ${shellEscapePath(stdoutPath)} 2> ${shellEscapePath(stderrPath)}`;
8642
8714
  const { spawn: spawn4 } = await import("node:child_process");
8643
8715
  try {
@@ -8666,11 +8738,11 @@ async function execShellWithStdin(command, stdinPayload, options = {}) {
8666
8738
  resolve(code ?? 0);
8667
8739
  });
8668
8740
  });
8669
- const stdout = (await readFile12(stdoutPath, "utf8")).replace(/\r\n/g, "\n");
8670
- const stderr = (await readFile12(stderrPath, "utf8")).replace(/\r\n/g, "\n");
8741
+ const stdout = (await readFile13(stdoutPath, "utf8")).replace(/\r\n/g, "\n");
8742
+ const stderr = (await readFile13(stderrPath, "utf8")).replace(/\r\n/g, "\n");
8671
8743
  return { stdout, stderr, exitCode };
8672
8744
  } finally {
8673
- await rm6(dir, { recursive: true, force: true });
8745
+ await rm7(dir, { recursive: true, force: true });
8674
8746
  }
8675
8747
  }
8676
8748
 
@@ -8988,7 +9060,7 @@ var CodeEvaluator = class {
8988
9060
  outputPath,
8989
9061
  guidelineFiles: context.evalCase.guideline_paths,
8990
9062
  inputFiles: context.evalCase.file_paths.filter(
8991
- (path41) => !context.evalCase.guideline_paths.includes(path41)
9063
+ (path42) => !context.evalCase.guideline_paths.includes(path42)
8992
9064
  ),
8993
9065
  input: context.evalCase.input,
8994
9066
  trace: context.trace ?? null,
@@ -9238,6 +9310,8 @@ ${context.fileChanges}`;
9238
9310
  };
9239
9311
  } catch (e) {
9240
9312
  const message = e instanceof Error ? e.message : String(e);
9313
+ const evalName = context.evaluator?.name ?? "llm-judge";
9314
+ console.warn(`\u26A0 LLM judge "${evalName}" failed after 3 attempts (${message}) \u2014 skipped`);
9241
9315
  return {
9242
9316
  score: 0,
9243
9317
  verdict: "skip",
@@ -9266,24 +9340,39 @@ ${context.fileChanges}`;
9266
9340
  systemPrompt,
9267
9341
  target: judgeProvider.targetName
9268
9342
  };
9269
- const { data, tokenUsage } = await this.runWithRetry({
9270
- context,
9271
- judgeProvider,
9272
- systemPrompt,
9273
- userPrompt: prompt,
9274
- schema: rubricEvaluationSchema
9275
- });
9276
- const { score, verdict, hits, misses } = calculateRubricScore(data, rubrics);
9277
- return {
9278
- score,
9279
- verdict,
9280
- hits,
9281
- misses,
9282
- expectedAspectCount: rubrics.length,
9283
- reasoning: data.overall_reasoning,
9284
- evaluatorRawRequest,
9285
- tokenUsage
9286
- };
9343
+ try {
9344
+ const { data, tokenUsage } = await this.runWithRetry({
9345
+ context,
9346
+ judgeProvider,
9347
+ systemPrompt,
9348
+ userPrompt: prompt,
9349
+ schema: rubricEvaluationSchema
9350
+ });
9351
+ const { score, verdict, hits, misses } = calculateRubricScore(data, rubrics);
9352
+ return {
9353
+ score,
9354
+ verdict,
9355
+ hits,
9356
+ misses,
9357
+ expectedAspectCount: rubrics.length,
9358
+ reasoning: data.overall_reasoning,
9359
+ evaluatorRawRequest,
9360
+ tokenUsage
9361
+ };
9362
+ } catch (e) {
9363
+ const message = e instanceof Error ? e.message : String(e);
9364
+ const evalName = context.evaluator?.name ?? "llm-judge";
9365
+ console.warn(`\u26A0 LLM judge "${evalName}" failed after 3 attempts (${message}) \u2014 skipped`);
9366
+ return {
9367
+ score: 0,
9368
+ verdict: "skip",
9369
+ hits: [],
9370
+ misses: [`Judge parse failure after 3 attempts: ${message}`],
9371
+ expectedAspectCount: rubrics.length,
9372
+ reasoning: `Judge parse failure after 3 attempts: ${message}`,
9373
+ evaluatorRawRequest
9374
+ };
9375
+ }
9287
9376
  }
9288
9377
  /**
9289
9378
  * Evaluate using score-range rubrics (analytic rubric scoring).
@@ -9297,25 +9386,40 @@ ${context.fileChanges}`;
9297
9386
  systemPrompt,
9298
9387
  target: judgeProvider.targetName
9299
9388
  };
9300
- const { data, tokenUsage } = await this.runWithRetry({
9301
- context,
9302
- judgeProvider,
9303
- systemPrompt,
9304
- userPrompt: prompt,
9305
- schema: scoreRangeEvaluationSchema
9306
- });
9307
- const { score, verdict, hits, misses, details } = calculateScoreRangeResult(data, rubrics);
9308
- return {
9309
- score,
9310
- verdict,
9311
- hits,
9312
- misses,
9313
- expectedAspectCount: rubrics.length,
9314
- reasoning: data.overall_reasoning,
9315
- evaluatorRawRequest,
9316
- details,
9317
- tokenUsage
9318
- };
9389
+ try {
9390
+ const { data, tokenUsage } = await this.runWithRetry({
9391
+ context,
9392
+ judgeProvider,
9393
+ systemPrompt,
9394
+ userPrompt: prompt,
9395
+ schema: scoreRangeEvaluationSchema
9396
+ });
9397
+ const { score, verdict, hits, misses, details } = calculateScoreRangeResult(data, rubrics);
9398
+ return {
9399
+ score,
9400
+ verdict,
9401
+ hits,
9402
+ misses,
9403
+ expectedAspectCount: rubrics.length,
9404
+ reasoning: data.overall_reasoning,
9405
+ evaluatorRawRequest,
9406
+ details,
9407
+ tokenUsage
9408
+ };
9409
+ } catch (e) {
9410
+ const message = e instanceof Error ? e.message : String(e);
9411
+ const evalName = context.evaluator?.name ?? "llm-judge";
9412
+ console.warn(`\u26A0 LLM judge "${evalName}" failed after 3 attempts (${message}) \u2014 skipped`);
9413
+ return {
9414
+ score: 0,
9415
+ verdict: "skip",
9416
+ hits: [],
9417
+ misses: [`Judge parse failure after 3 attempts: ${message}`],
9418
+ expectedAspectCount: rubrics.length,
9419
+ reasoning: `Judge parse failure after 3 attempts: ${message}`,
9420
+ evaluatorRawRequest
9421
+ };
9422
+ }
9319
9423
  }
9320
9424
  /**
9321
9425
  * Build prompt for score-range rubric evaluation.
@@ -9601,19 +9705,13 @@ var CompositeEvaluator = class {
9601
9705
  runWeightedAverage(results, weights) {
9602
9706
  let totalWeight = 0;
9603
9707
  let weightedSum = 0;
9708
+ let evaluatedCount = 0;
9604
9709
  const allHits = [];
9605
9710
  const allMisses = [];
9606
9711
  const reasoningParts = [];
9607
9712
  const scores = [];
9608
9713
  for (const member of results) {
9609
9714
  const weight = weights?.[member.id] ?? 1;
9610
- totalWeight += weight;
9611
- weightedSum += member.result.score * weight;
9612
- allHits.push(...member.result.hits.map((h) => `[${member.id}] ${h}`));
9613
- allMisses.push(...member.result.misses.map((m) => `[${member.id}] ${m}`));
9614
- if (member.result.reasoning) {
9615
- reasoningParts.push(`${member.id}: ${member.result.reasoning}`);
9616
- }
9617
9715
  scores.push({
9618
9716
  name: member.id,
9619
9717
  type: member.type,
@@ -9628,6 +9726,32 @@ var CompositeEvaluator = class {
9628
9726
  details: member.result.details,
9629
9727
  tokenUsage: member.result.tokenUsage
9630
9728
  });
9729
+ if (member.result.verdict === "skip") {
9730
+ continue;
9731
+ }
9732
+ evaluatedCount++;
9733
+ totalWeight += weight;
9734
+ weightedSum += member.result.score * weight;
9735
+ allHits.push(...member.result.hits.map((h) => `[${member.id}] ${h}`));
9736
+ allMisses.push(...member.result.misses.map((m) => `[${member.id}] ${m}`));
9737
+ if (member.result.reasoning) {
9738
+ reasoningParts.push(`${member.id}: ${member.result.reasoning}`);
9739
+ }
9740
+ }
9741
+ if (evaluatedCount === 0 && results.length > 0) {
9742
+ return {
9743
+ score: 0,
9744
+ verdict: "skip",
9745
+ hits: [],
9746
+ misses: [],
9747
+ expectedAspectCount: 1,
9748
+ reasoning: "All evaluators skipped (infrastructure failure)",
9749
+ evaluatorRawRequest: {
9750
+ aggregator: "weighted_average",
9751
+ ...weights ? { weights } : {}
9752
+ },
9753
+ scores
9754
+ };
9631
9755
  }
9632
9756
  const finalScore = totalWeight > 0 ? weightedSum / totalWeight : 0;
9633
9757
  return {
@@ -9651,19 +9775,8 @@ var CompositeEvaluator = class {
9651
9775
  const reasoningParts = [];
9652
9776
  let passingCount = 0;
9653
9777
  let borderlineCount = 0;
9778
+ let evaluatedCount = 0;
9654
9779
  for (const member of results) {
9655
- const isPassing = member.result.verdict === "pass" || member.result.verdict === "borderline";
9656
- if (isPassing) {
9657
- passingCount++;
9658
- if (member.result.verdict === "borderline") {
9659
- borderlineCount++;
9660
- }
9661
- }
9662
- allHits.push(...member.result.hits.map((h) => `[${member.id}] ${h}`));
9663
- allMisses.push(...member.result.misses.map((m) => `[${member.id}] ${m}`));
9664
- if (member.result.reasoning) {
9665
- reasoningParts.push(`${member.id}: ${member.result.reasoning}`);
9666
- }
9667
9780
  scores.push({
9668
9781
  name: member.id,
9669
9782
  type: member.type,
@@ -9677,8 +9790,39 @@ var CompositeEvaluator = class {
9677
9790
  details: member.result.details,
9678
9791
  tokenUsage: member.result.tokenUsage
9679
9792
  });
9793
+ if (member.result.verdict === "skip") {
9794
+ continue;
9795
+ }
9796
+ evaluatedCount++;
9797
+ const isPassing = member.result.verdict === "pass" || member.result.verdict === "borderline";
9798
+ if (isPassing) {
9799
+ passingCount++;
9800
+ if (member.result.verdict === "borderline") {
9801
+ borderlineCount++;
9802
+ }
9803
+ }
9804
+ allHits.push(...member.result.hits.map((h) => `[${member.id}] ${h}`));
9805
+ allMisses.push(...member.result.misses.map((m) => `[${member.id}] ${m}`));
9806
+ if (member.result.reasoning) {
9807
+ reasoningParts.push(`${member.id}: ${member.result.reasoning}`);
9808
+ }
9809
+ }
9810
+ if (evaluatedCount === 0 && results.length > 0) {
9811
+ return {
9812
+ score: 0,
9813
+ verdict: "skip",
9814
+ hits: [],
9815
+ misses: [],
9816
+ expectedAspectCount: 1,
9817
+ reasoning: "All evaluators skipped (infrastructure failure)",
9818
+ evaluatorRawRequest: {
9819
+ aggregator: "threshold",
9820
+ threshold
9821
+ },
9822
+ scores
9823
+ };
9680
9824
  }
9681
- const totalCount = results.length;
9825
+ const totalCount = evaluatedCount;
9682
9826
  const score = totalCount > 0 ? passingCount / totalCount : 0;
9683
9827
  const pass = score >= threshold;
9684
9828
  if (pass && borderlineCount > 0) {
@@ -10186,115 +10330,115 @@ var FieldAccuracyEvaluator = class {
10186
10330
  * Evaluate a single field against the expected value.
10187
10331
  */
10188
10332
  evaluateField(fieldConfig, candidateData, expectedData) {
10189
- const { path: path41, match, required = true, weight = 1 } = fieldConfig;
10190
- const candidateValue = resolvePath(candidateData, path41);
10191
- const expectedValue = resolvePath(expectedData, path41);
10333
+ const { path: path42, match, required = true, weight = 1 } = fieldConfig;
10334
+ const candidateValue = resolvePath(candidateData, path42);
10335
+ const expectedValue = resolvePath(expectedData, path42);
10192
10336
  if (expectedValue === void 0) {
10193
10337
  return {
10194
- path: path41,
10338
+ path: path42,
10195
10339
  score: 1,
10196
10340
  // No expected value means no comparison needed
10197
10341
  weight,
10198
10342
  hit: true,
10199
- message: `${path41}: no expected value`
10343
+ message: `${path42}: no expected value`
10200
10344
  };
10201
10345
  }
10202
10346
  if (candidateValue === void 0) {
10203
10347
  if (required) {
10204
10348
  return {
10205
- path: path41,
10349
+ path: path42,
10206
10350
  score: 0,
10207
10351
  weight,
10208
10352
  hit: false,
10209
- message: `${path41} (required, missing)`
10353
+ message: `${path42} (required, missing)`
10210
10354
  };
10211
10355
  }
10212
10356
  return {
10213
- path: path41,
10357
+ path: path42,
10214
10358
  score: 1,
10215
10359
  // Don't penalize missing optional fields
10216
10360
  weight: 0,
10217
10361
  // Zero weight means it won't affect the score
10218
10362
  hit: true,
10219
- message: `${path41}: optional field missing`
10363
+ message: `${path42}: optional field missing`
10220
10364
  };
10221
10365
  }
10222
10366
  switch (match) {
10223
10367
  case "exact":
10224
- return this.compareExact(path41, candidateValue, expectedValue, weight);
10368
+ return this.compareExact(path42, candidateValue, expectedValue, weight);
10225
10369
  case "numeric_tolerance":
10226
10370
  return this.compareNumericTolerance(
10227
- path41,
10371
+ path42,
10228
10372
  candidateValue,
10229
10373
  expectedValue,
10230
10374
  fieldConfig,
10231
10375
  weight
10232
10376
  );
10233
10377
  case "date":
10234
- return this.compareDate(path41, candidateValue, expectedValue, fieldConfig, weight);
10378
+ return this.compareDate(path42, candidateValue, expectedValue, fieldConfig, weight);
10235
10379
  default:
10236
10380
  return {
10237
- path: path41,
10381
+ path: path42,
10238
10382
  score: 0,
10239
10383
  weight,
10240
10384
  hit: false,
10241
- message: `${path41}: unknown match type "${match}"`
10385
+ message: `${path42}: unknown match type "${match}"`
10242
10386
  };
10243
10387
  }
10244
10388
  }
10245
10389
  /**
10246
10390
  * Exact equality comparison.
10247
10391
  */
10248
- compareExact(path41, candidateValue, expectedValue, weight) {
10392
+ compareExact(path42, candidateValue, expectedValue, weight) {
10249
10393
  if (deepEqual(candidateValue, expectedValue)) {
10250
10394
  return {
10251
- path: path41,
10395
+ path: path42,
10252
10396
  score: 1,
10253
10397
  weight,
10254
10398
  hit: true,
10255
- message: path41
10399
+ message: path42
10256
10400
  };
10257
10401
  }
10258
10402
  if (typeof candidateValue !== typeof expectedValue) {
10259
10403
  return {
10260
- path: path41,
10404
+ path: path42,
10261
10405
  score: 0,
10262
10406
  weight,
10263
10407
  hit: false,
10264
- message: `${path41} (type mismatch: got ${typeof candidateValue}, expected ${typeof expectedValue})`
10408
+ message: `${path42} (type mismatch: got ${typeof candidateValue}, expected ${typeof expectedValue})`
10265
10409
  };
10266
10410
  }
10267
10411
  return {
10268
- path: path41,
10412
+ path: path42,
10269
10413
  score: 0,
10270
10414
  weight,
10271
10415
  hit: false,
10272
- message: `${path41} (value mismatch)`
10416
+ message: `${path42} (value mismatch)`
10273
10417
  };
10274
10418
  }
10275
10419
  /**
10276
10420
  * Numeric comparison with absolute or relative tolerance.
10277
10421
  */
10278
- compareNumericTolerance(path41, candidateValue, expectedValue, fieldConfig, weight) {
10422
+ compareNumericTolerance(path42, candidateValue, expectedValue, fieldConfig, weight) {
10279
10423
  const { tolerance = 0, relative = false } = fieldConfig;
10280
10424
  const candidateNum = toNumber2(candidateValue);
10281
10425
  const expectedNum = toNumber2(expectedValue);
10282
10426
  if (candidateNum === null || expectedNum === null) {
10283
10427
  return {
10284
- path: path41,
10428
+ path: path42,
10285
10429
  score: 0,
10286
10430
  weight,
10287
10431
  hit: false,
10288
- message: `${path41} (non-numeric value)`
10432
+ message: `${path42} (non-numeric value)`
10289
10433
  };
10290
10434
  }
10291
10435
  if (!Number.isFinite(candidateNum) || !Number.isFinite(expectedNum)) {
10292
10436
  return {
10293
- path: path41,
10437
+ path: path42,
10294
10438
  score: 0,
10295
10439
  weight,
10296
10440
  hit: false,
10297
- message: `${path41} (invalid numeric value)`
10441
+ message: `${path42} (invalid numeric value)`
10298
10442
  };
10299
10443
  }
10300
10444
  const diff = Math.abs(candidateNum - expectedNum);
@@ -10307,61 +10451,61 @@ var FieldAccuracyEvaluator = class {
10307
10451
  }
10308
10452
  if (withinTolerance) {
10309
10453
  return {
10310
- path: path41,
10454
+ path: path42,
10311
10455
  score: 1,
10312
10456
  weight,
10313
10457
  hit: true,
10314
- message: `${path41} (within tolerance: diff=${diff.toFixed(2)})`
10458
+ message: `${path42} (within tolerance: diff=${diff.toFixed(2)})`
10315
10459
  };
10316
10460
  }
10317
10461
  return {
10318
- path: path41,
10462
+ path: path42,
10319
10463
  score: 0,
10320
10464
  weight,
10321
10465
  hit: false,
10322
- message: `${path41} (outside tolerance: diff=${diff.toFixed(2)}, tolerance=${tolerance})`
10466
+ message: `${path42} (outside tolerance: diff=${diff.toFixed(2)}, tolerance=${tolerance})`
10323
10467
  };
10324
10468
  }
10325
10469
  /**
10326
10470
  * Date comparison with format normalization.
10327
10471
  */
10328
- compareDate(path41, candidateValue, expectedValue, fieldConfig, weight) {
10472
+ compareDate(path42, candidateValue, expectedValue, fieldConfig, weight) {
10329
10473
  const formats = fieldConfig.formats ?? DEFAULT_DATE_FORMATS;
10330
10474
  const candidateDate = parseDate(String(candidateValue), formats);
10331
10475
  const expectedDate = parseDate(String(expectedValue), formats);
10332
10476
  if (candidateDate === null) {
10333
10477
  return {
10334
- path: path41,
10478
+ path: path42,
10335
10479
  score: 0,
10336
10480
  weight,
10337
10481
  hit: false,
10338
- message: `${path41} (unparseable candidate date)`
10482
+ message: `${path42} (unparseable candidate date)`
10339
10483
  };
10340
10484
  }
10341
10485
  if (expectedDate === null) {
10342
10486
  return {
10343
- path: path41,
10487
+ path: path42,
10344
10488
  score: 0,
10345
10489
  weight,
10346
10490
  hit: false,
10347
- message: `${path41} (unparseable expected date)`
10491
+ message: `${path42} (unparseable expected date)`
10348
10492
  };
10349
10493
  }
10350
10494
  if (candidateDate.getFullYear() === expectedDate.getFullYear() && candidateDate.getMonth() === expectedDate.getMonth() && candidateDate.getDate() === expectedDate.getDate()) {
10351
10495
  return {
10352
- path: path41,
10496
+ path: path42,
10353
10497
  score: 1,
10354
10498
  weight,
10355
10499
  hit: true,
10356
- message: path41
10500
+ message: path42
10357
10501
  };
10358
10502
  }
10359
10503
  return {
10360
- path: path41,
10504
+ path: path42,
10361
10505
  score: 0,
10362
10506
  weight,
10363
10507
  hit: false,
10364
- message: `${path41} (date mismatch: got ${formatDateISO(candidateDate)}, expected ${formatDateISO(expectedDate)})`
10508
+ message: `${path42} (date mismatch: got ${formatDateISO(candidateDate)}, expected ${formatDateISO(expectedDate)})`
10365
10509
  };
10366
10510
  }
10367
10511
  /**
@@ -10402,11 +10546,11 @@ var FieldAccuracyEvaluator = class {
10402
10546
  };
10403
10547
  }
10404
10548
  };
10405
- function resolvePath(obj, path41) {
10406
- if (!path41 || !obj) {
10549
+ function resolvePath(obj, path42) {
10550
+ if (!path42 || !obj) {
10407
10551
  return void 0;
10408
10552
  }
10409
- const parts = path41.split(/\.|\[|\]/).filter((p) => p.length > 0);
10553
+ const parts = path42.split(/\.|\[|\]/).filter((p) => p.length > 0);
10410
10554
  let current = obj;
10411
10555
  for (const part of parts) {
10412
10556
  if (current === null || current === void 0) {
@@ -11224,8 +11368,8 @@ var TokenUsageEvaluator = class {
11224
11368
  };
11225
11369
 
11226
11370
  // src/evaluation/evaluators/tool-trajectory.ts
11227
- function getNestedValue(obj, path41) {
11228
- const parts = path41.split(".");
11371
+ function getNestedValue(obj, path42) {
11372
+ const parts = path42.split(".");
11229
11373
  let current = obj;
11230
11374
  for (const part of parts) {
11231
11375
  if (current === null || current === void 0 || typeof current !== "object") {
@@ -11786,9 +11930,9 @@ function runEqualsAssertion(output, value) {
11786
11930
  }
11787
11931
 
11788
11932
  // src/evaluation/orchestrator.ts
11789
- import { createHash as createHash2, randomUUID as randomUUID7 } from "node:crypto";
11790
- import { mkdir as mkdir12, stat as stat7 } from "node:fs/promises";
11791
- import path38 from "node:path";
11933
+ import { createHash as createHash3, randomUUID as randomUUID7 } from "node:crypto";
11934
+ import { mkdir as mkdir13, stat as stat7 } from "node:fs/promises";
11935
+ import path39 from "node:path";
11792
11936
  import micromatch4 from "micromatch";
11793
11937
 
11794
11938
  // ../../node_modules/.bun/yocto-queue@1.2.2/node_modules/yocto-queue/index.js
@@ -12658,17 +12802,283 @@ async function cleanupEvalWorkspaces(evalRunId, workspaceRoot) {
12658
12802
  }
12659
12803
  }
12660
12804
 
12661
- // src/evaluation/workspace/repo-manager.ts
12805
+ // src/evaluation/workspace/pool-manager.ts
12662
12806
  import { execFile } from "node:child_process";
12663
12807
  import { createHash } from "node:crypto";
12664
12808
  import { existsSync as existsSync2 } from "node:fs";
12665
- import { mkdir as mkdir11, rm as rm5, unlink, writeFile as writeFile7 } from "node:fs/promises";
12809
+ import { cp as cp2, mkdir as mkdir11, readFile as readFile11, readdir as readdir4, rm as rm5, unlink, writeFile as writeFile7 } from "node:fs/promises";
12666
12810
  import path36 from "node:path";
12667
12811
  import { promisify as promisify5 } from "node:util";
12668
12812
  var execFileAsync = promisify5(execFile);
12813
+ function gitEnv() {
12814
+ const env = { ...process.env };
12815
+ for (const key of Object.keys(env)) {
12816
+ if (key.startsWith("GIT_") && key !== "GIT_SSH_COMMAND") {
12817
+ delete env[key];
12818
+ }
12819
+ }
12820
+ return {
12821
+ ...env,
12822
+ GIT_TERMINAL_PROMPT: "0",
12823
+ GIT_ASKPASS: "",
12824
+ GIT_SSH_COMMAND: "ssh -o BatchMode=yes"
12825
+ };
12826
+ }
12827
+ async function git(args, opts) {
12828
+ const { stdout } = await execFileAsync("git", args, {
12829
+ cwd: opts?.cwd,
12830
+ timeout: opts?.timeout ?? 3e5,
12831
+ env: gitEnv(),
12832
+ maxBuffer: 50 * 1024 * 1024
12833
+ });
12834
+ return stdout.trim();
12835
+ }
12836
+ function normalizeRepoForFingerprint(repo) {
12837
+ const source = repo.source.type === "git" ? { type: "git", url: repo.source.url.toLowerCase().replace(/\.git$/, "") } : { type: "local", path: repo.source.path };
12838
+ const result = {
12839
+ path: repo.path,
12840
+ source,
12841
+ ref: repo.checkout?.ref ?? "HEAD"
12842
+ };
12843
+ if (repo.clone?.depth !== void 0) {
12844
+ result.depth = repo.clone.depth;
12845
+ }
12846
+ if (repo.clone?.filter !== void 0) {
12847
+ result.filter = repo.clone.filter;
12848
+ }
12849
+ if (repo.clone?.sparse?.length) {
12850
+ result.sparse = [...repo.clone.sparse].sort();
12851
+ }
12852
+ return result;
12853
+ }
12854
+ function computeWorkspaceFingerprint(templatePath, repos) {
12855
+ const canonical = {
12856
+ templatePath: templatePath ?? null,
12857
+ repos: [...repos].sort((a, b) => a.path.localeCompare(b.path)).map(normalizeRepoForFingerprint)
12858
+ };
12859
+ return createHash("sha256").update(JSON.stringify(canonical)).digest("hex");
12860
+ }
12861
+ async function copyDirectoryRecursive2(src, dest, skipDirs) {
12862
+ await mkdir11(dest, { recursive: true });
12863
+ const entries = await readdir4(src, { withFileTypes: true });
12864
+ for (const entry of entries) {
12865
+ const srcPath = path36.join(src, entry.name);
12866
+ const destPath = path36.join(dest, entry.name);
12867
+ if (entry.name === ".git") {
12868
+ continue;
12869
+ }
12870
+ if (entry.isDirectory()) {
12871
+ if (skipDirs?.has(entry.name)) {
12872
+ continue;
12873
+ }
12874
+ await copyDirectoryRecursive2(srcPath, destPath, skipDirs);
12875
+ } else {
12876
+ await cp2(srcPath, destPath, { preserveTimestamps: true, force: true });
12877
+ }
12878
+ }
12879
+ }
12880
+ var WorkspacePoolManager = class {
12881
+ poolRoot;
12882
+ constructor(poolRoot) {
12883
+ this.poolRoot = poolRoot ?? getWorkspacePoolRoot();
12884
+ }
12885
+ /**
12886
+ * Acquire a workspace slot from the pool.
12887
+ *
12888
+ * 1. Compute fingerprint from template + repos
12889
+ * 2. Check drift (compare stored metadata.json fingerprint vs computed)
12890
+ * 3. If drift: warn, remove all slots, rematerialize
12891
+ * 4. Acquire a slot (try-lock slot-0, slot-1, ..., up to maxSlots)
12892
+ * 5. If slot exists: reset repos, re-copy template files (skip repo directories)
12893
+ * 6. If new slot: copy template, materialize all repos, write metadata.json
12894
+ * 7. Return the slot (with path, index, isExisting)
12895
+ */
12896
+ async acquireWorkspace(options) {
12897
+ const { templatePath, repos, maxSlots, repoManager } = options;
12898
+ const fingerprint = computeWorkspaceFingerprint(templatePath, repos);
12899
+ const poolDir = path36.join(this.poolRoot, fingerprint);
12900
+ await mkdir11(poolDir, { recursive: true });
12901
+ const drifted = await this.checkDrift(poolDir, fingerprint);
12902
+ if (drifted) {
12903
+ console.warn(
12904
+ `[workspace-pool] Drift detected for fingerprint ${fingerprint.slice(0, 12)}... Removing stale slots.`
12905
+ );
12906
+ await this.removeAllSlots(poolDir);
12907
+ }
12908
+ for (let i = 0; i < maxSlots; i++) {
12909
+ const slotPath = path36.join(poolDir, `slot-${i}`);
12910
+ const lockPath = `${slotPath}.lock`;
12911
+ const locked = await this.tryLock(lockPath);
12912
+ if (!locked) {
12913
+ continue;
12914
+ }
12915
+ const slotExists = existsSync2(slotPath);
12916
+ if (slotExists) {
12917
+ await this.resetSlot(slotPath, templatePath, repos);
12918
+ return {
12919
+ index: i,
12920
+ path: slotPath,
12921
+ isExisting: true,
12922
+ lockPath,
12923
+ fingerprint,
12924
+ poolDir
12925
+ };
12926
+ }
12927
+ await mkdir11(slotPath, { recursive: true });
12928
+ if (templatePath) {
12929
+ await copyDirectoryRecursive2(templatePath, slotPath);
12930
+ }
12931
+ if (repos.length > 0) {
12932
+ await repoManager.materializeAll(repos, slotPath);
12933
+ }
12934
+ await this.writeMetadata(poolDir, fingerprint, templatePath ?? null, repos);
12935
+ return {
12936
+ index: i,
12937
+ path: slotPath,
12938
+ isExisting: false,
12939
+ lockPath,
12940
+ fingerprint,
12941
+ poolDir
12942
+ };
12943
+ }
12944
+ throw new Error(
12945
+ `All ${maxSlots} pool slots are locked for fingerprint ${fingerprint.slice(0, 12)}...`
12946
+ );
12947
+ }
12948
+ /** Remove lock file to release a slot. */
12949
+ async releaseSlot(slot) {
12950
+ try {
12951
+ await unlink(slot.lockPath);
12952
+ } catch {
12953
+ }
12954
+ }
12955
+ /**
12956
+ * Try to acquire a PID-based lock file.
12957
+ * On EEXIST, read PID and check if process is alive. If dead, stale lock — remove and retry.
12958
+ * Returns true if lock acquired, false if slot is actively locked.
12959
+ * Uses a bounded loop (max 3 attempts) to avoid unbounded recursion.
12960
+ */
12961
+ async tryLock(lockPath) {
12962
+ for (let attempt = 0; attempt < 3; attempt++) {
12963
+ try {
12964
+ await writeFile7(lockPath, String(process.pid), { flag: "wx" });
12965
+ return true;
12966
+ } catch (err) {
12967
+ if (err.code !== "EEXIST") {
12968
+ throw err;
12969
+ }
12970
+ try {
12971
+ const pidStr = await readFile11(lockPath, "utf-8");
12972
+ const pid = Number.parseInt(pidStr.trim(), 10);
12973
+ if (!Number.isNaN(pid)) {
12974
+ try {
12975
+ process.kill(pid, 0);
12976
+ return false;
12977
+ } catch {
12978
+ await unlink(lockPath).catch(() => {
12979
+ });
12980
+ continue;
12981
+ }
12982
+ }
12983
+ } catch {
12984
+ }
12985
+ return false;
12986
+ }
12987
+ }
12988
+ return false;
12989
+ }
12990
+ /**
12991
+ * Check if the stored fingerprint in metadata.json differs from the computed one.
12992
+ * Returns true if drifted, false otherwise.
12993
+ * Returns false (no drift) if metadata.json doesn't exist (first use).
12994
+ */
12995
+ async checkDrift(poolDir, fingerprint) {
12996
+ const metadataPath = path36.join(poolDir, "metadata.json");
12997
+ try {
12998
+ const raw = await readFile11(metadataPath, "utf-8");
12999
+ const metadata = JSON.parse(raw);
13000
+ return metadata.fingerprint !== fingerprint;
13001
+ } catch {
13002
+ return false;
13003
+ }
13004
+ }
13005
+ /** Write metadata.json with fingerprint, inputs, and timestamp. */
13006
+ async writeMetadata(poolDir, fingerprint, templatePath, repos) {
13007
+ const metadata = {
13008
+ fingerprint,
13009
+ templatePath,
13010
+ repos,
13011
+ createdAt: (/* @__PURE__ */ new Date()).toISOString()
13012
+ };
13013
+ await writeFile7(path36.join(poolDir, "metadata.json"), JSON.stringify(metadata, null, 2));
13014
+ }
13015
+ /** Remove all slot directories and their lock files from a pool directory. */
13016
+ async removeAllSlots(poolDir) {
13017
+ const entries = await readdir4(poolDir);
13018
+ for (const entry of entries) {
13019
+ if (entry.startsWith("slot-") && !entry.endsWith(".lock")) {
13020
+ const lockPath = path36.join(poolDir, `${entry}.lock`);
13021
+ if (existsSync2(lockPath)) {
13022
+ try {
13023
+ const pidStr = await readFile11(lockPath, "utf-8");
13024
+ const pid = Number.parseInt(pidStr.trim(), 10);
13025
+ if (!Number.isNaN(pid)) {
13026
+ try {
13027
+ process.kill(pid, 0);
13028
+ console.warn(`[workspace-pool] Skipping slot ${entry}: locked by PID ${pid}`);
13029
+ continue;
13030
+ } catch {
13031
+ }
13032
+ }
13033
+ } catch {
13034
+ }
13035
+ }
13036
+ await rm5(path36.join(poolDir, entry), { recursive: true, force: true });
13037
+ await rm5(lockPath, { force: true }).catch(() => {
13038
+ });
13039
+ }
13040
+ }
13041
+ await rm5(path36.join(poolDir, "metadata.json"), { force: true }).catch(() => {
13042
+ });
13043
+ }
13044
+ /**
13045
+ * Reset an existing slot for reuse:
13046
+ * 1. Reset repos (git reset --hard {ref} && git clean -fd per repo)
13047
+ * 2. Re-copy template files (skip repo directories)
13048
+ */
13049
+ async resetSlot(slotPath, templatePath, repos) {
13050
+ for (const repo of repos) {
13051
+ const repoDir = path36.join(slotPath, repo.path);
13052
+ if (!existsSync2(repoDir)) {
13053
+ continue;
13054
+ }
13055
+ const ref = repo.checkout?.ref ?? "HEAD";
13056
+ await git(["reset", "--hard", ref], { cwd: repoDir });
13057
+ await git(["clean", "-fd"], { cwd: repoDir });
13058
+ }
13059
+ if (templatePath) {
13060
+ const repoDirNames = new Set(
13061
+ repos.map((r) => {
13062
+ const normalized = r.path.replace(/^\.\//, "");
13063
+ return normalized.split("/")[0];
13064
+ })
13065
+ );
13066
+ await copyDirectoryRecursive2(templatePath, slotPath, repoDirNames);
13067
+ }
13068
+ }
13069
+ };
13070
+
13071
+ // src/evaluation/workspace/repo-manager.ts
13072
+ import { execFile as execFile2 } from "node:child_process";
13073
+ import { createHash as createHash2 } from "node:crypto";
13074
+ import { existsSync as existsSync3 } from "node:fs";
13075
+ import { mkdir as mkdir12, rm as rm6, unlink as unlink2, writeFile as writeFile8 } from "node:fs/promises";
13076
+ import path37 from "node:path";
13077
+ import { promisify as promisify6 } from "node:util";
13078
+ var execFileAsync2 = promisify6(execFile2);
12669
13079
  var DEFAULT_TIMEOUT_MS2 = 3e5;
12670
13080
  var LOCK_TIMEOUT_MS = 6e4;
12671
- function gitEnv() {
13081
+ function gitEnv2() {
12672
13082
  const env = { ...process.env };
12673
13083
  for (const key of Object.keys(env)) {
12674
13084
  if (key.startsWith("GIT_") && key !== "GIT_SSH_COMMAND") {
@@ -12684,16 +13094,16 @@ function gitEnv() {
12684
13094
  }
12685
13095
  function cacheKey(source) {
12686
13096
  const raw = source.type === "git" ? source.url.toLowerCase().replace(/\.git$/, "") : source.path;
12687
- return createHash("sha256").update(raw).digest("hex");
13097
+ return createHash2("sha256").update(raw).digest("hex");
12688
13098
  }
12689
13099
  function getSourceUrl(source) {
12690
13100
  return source.type === "git" ? source.url : source.path;
12691
13101
  }
12692
- async function git(args, opts) {
12693
- const { stdout } = await execFileAsync("git", args, {
13102
+ async function git2(args, opts) {
13103
+ const { stdout } = await execFileAsync2("git", args, {
12694
13104
  cwd: opts?.cwd,
12695
13105
  timeout: opts?.timeout ?? DEFAULT_TIMEOUT_MS2,
12696
- env: gitEnv(),
13106
+ env: gitEnv2(),
12697
13107
  maxBuffer: 50 * 1024 * 1024
12698
13108
  // 50MB
12699
13109
  });
@@ -12703,7 +13113,7 @@ async function acquireLock(lockPath) {
12703
13113
  const start = Date.now();
12704
13114
  while (Date.now() - start < LOCK_TIMEOUT_MS) {
12705
13115
  try {
12706
- await writeFile7(lockPath, String(process.pid), { flag: "wx" });
13116
+ await writeFile8(lockPath, String(process.pid), { flag: "wx" });
12707
13117
  return;
12708
13118
  } catch (err) {
12709
13119
  if (err.code === "EEXIST") {
@@ -12717,7 +13127,7 @@ async function acquireLock(lockPath) {
12717
13127
  }
12718
13128
  async function releaseLock(lockPath) {
12719
13129
  try {
12720
- await unlink(lockPath);
13130
+ await unlink2(lockPath);
12721
13131
  } catch {
12722
13132
  }
12723
13133
  }
@@ -12731,16 +13141,12 @@ var RepoManager = class {
12731
13141
  async runGit(args, opts) {
12732
13142
  const startedAt = Date.now();
12733
13143
  if (this.verbose) {
12734
- console.log(
12735
- `[repo] git start cwd=${opts?.cwd ?? process.cwd()} args=${args.join(" ")}`
12736
- );
13144
+ console.log(`[repo] git start cwd=${opts?.cwd ?? process.cwd()} args=${args.join(" ")}`);
12737
13145
  }
12738
13146
  try {
12739
- const output = await git(args, opts);
13147
+ const output = await git2(args, opts);
12740
13148
  if (this.verbose) {
12741
- console.log(
12742
- `[repo] git ok durationMs=${Date.now() - startedAt} args=${args.join(" ")}`
12743
- );
13149
+ console.log(`[repo] git ok durationMs=${Date.now() - startedAt} args=${args.join(" ")}`);
12744
13150
  }
12745
13151
  return output;
12746
13152
  } catch (error) {
@@ -12760,9 +13166,9 @@ var RepoManager = class {
12760
13166
  */
12761
13167
  async ensureCache(source, depth, resolve) {
12762
13168
  const key = cacheKey(source);
12763
- const cachePath = path36.join(this.cacheDir, key);
13169
+ const cachePath = path37.join(this.cacheDir, key);
12764
13170
  const lockPath = `${cachePath}.lock`;
12765
- const cacheExists = existsSync2(path36.join(cachePath, "HEAD"));
13171
+ const cacheExists = existsSync3(path37.join(cachePath, "HEAD"));
12766
13172
  if (this.verbose) {
12767
13173
  console.log(
12768
13174
  `[repo] ensureCache source=${getSourceUrl(source)} resolve=${resolve ?? "remote"} cache=${cachePath}`
@@ -12780,13 +13186,11 @@ var RepoManager = class {
12780
13186
  `No cache found for \`${url}\`. Run \`agentv cache add --url ${url} --from <local-path>\` to seed it.`
12781
13187
  );
12782
13188
  }
12783
- await mkdir11(this.cacheDir, { recursive: true });
13189
+ await mkdir12(this.cacheDir, { recursive: true });
12784
13190
  const lockStartedAt = Date.now();
12785
13191
  await acquireLock(lockPath);
12786
13192
  if (this.verbose) {
12787
- console.log(
12788
- `[repo] lock acquired path=${lockPath} waitedMs=${Date.now() - lockStartedAt}`
12789
- );
13193
+ console.log(`[repo] lock acquired path=${lockPath} waitedMs=${Date.now() - lockStartedAt}`);
12790
13194
  }
12791
13195
  try {
12792
13196
  if (cacheExists) {
@@ -12824,7 +13228,7 @@ var RepoManager = class {
12824
13228
  * Handles checkout, ref resolution, ancestor walking, shallow clone, sparse checkout.
12825
13229
  */
12826
13230
  async materialize(repo, workspacePath) {
12827
- const targetDir = path36.join(workspacePath, repo.path);
13231
+ const targetDir = path37.join(workspacePath, repo.path);
12828
13232
  const startedAt = Date.now();
12829
13233
  if (this.verbose) {
12830
13234
  console.log(
@@ -12919,14 +13323,14 @@ var RepoManager = class {
12919
13323
  async reset(repos, workspacePath, strategy) {
12920
13324
  if (strategy === "recreate") {
12921
13325
  for (const repo of repos) {
12922
- const targetDir = path36.join(workspacePath, repo.path);
12923
- await rm5(targetDir, { recursive: true, force: true });
13326
+ const targetDir = path37.join(workspacePath, repo.path);
13327
+ await rm6(targetDir, { recursive: true, force: true });
12924
13328
  }
12925
13329
  await this.materializeAll(repos, workspacePath);
12926
13330
  return;
12927
13331
  }
12928
13332
  for (const repo of repos) {
12929
- const targetDir = path36.join(workspacePath, repo.path);
13333
+ const targetDir = path37.join(workspacePath, repo.path);
12930
13334
  await this.runGit(["reset", "--hard", "HEAD"], { cwd: targetDir });
12931
13335
  await this.runGit(["clean", "-fd"], { cwd: targetDir });
12932
13336
  }
@@ -12938,21 +13342,21 @@ var RepoManager = class {
12938
13342
  async seedCache(localPath, remoteUrl, opts) {
12939
13343
  const source = { type: "git", url: remoteUrl };
12940
13344
  const key = cacheKey(source);
12941
- const cachePath = path36.join(this.cacheDir, key);
13345
+ const cachePath = path37.join(this.cacheDir, key);
12942
13346
  const lockPath = `${cachePath}.lock`;
12943
- await mkdir11(this.cacheDir, { recursive: true });
13347
+ await mkdir12(this.cacheDir, { recursive: true });
12944
13348
  await acquireLock(lockPath);
12945
13349
  try {
12946
- if (existsSync2(path36.join(cachePath, "HEAD"))) {
13350
+ if (existsSync3(path37.join(cachePath, "HEAD"))) {
12947
13351
  if (!opts?.force) {
12948
13352
  throw new Error(
12949
13353
  `Cache already exists for ${remoteUrl} at ${cachePath}. Use force to overwrite.`
12950
13354
  );
12951
13355
  }
12952
- await rm5(cachePath, { recursive: true, force: true });
13356
+ await rm6(cachePath, { recursive: true, force: true });
12953
13357
  }
12954
- await git(["clone", "--mirror", "--bare", localPath, cachePath]);
12955
- await git(["remote", "set-url", "origin", remoteUrl], { cwd: cachePath });
13358
+ await git2(["clone", "--mirror", "--bare", localPath, cachePath]);
13359
+ await git2(["remote", "set-url", "origin", remoteUrl], { cwd: cachePath });
12956
13360
  } finally {
12957
13361
  await releaseLock(lockPath);
12958
13362
  }
@@ -12960,41 +13364,41 @@ var RepoManager = class {
12960
13364
  }
12961
13365
  /** Remove the entire cache directory. */
12962
13366
  async cleanCache() {
12963
- await rm5(this.cacheDir, { recursive: true, force: true });
13367
+ await rm6(this.cacheDir, { recursive: true, force: true });
12964
13368
  }
12965
13369
  };
12966
13370
 
12967
13371
  // src/evaluation/workspace/resolve.ts
12968
- import { readdir as readdir4, stat as stat6 } from "node:fs/promises";
12969
- import path37 from "node:path";
13372
+ import { readdir as readdir5, stat as stat6 } from "node:fs/promises";
13373
+ import path38 from "node:path";
12970
13374
  async function resolveWorkspaceTemplate(templatePath) {
12971
13375
  if (!templatePath) {
12972
13376
  return void 0;
12973
13377
  }
12974
- const resolved = path37.resolve(templatePath);
13378
+ const resolved = path38.resolve(templatePath);
12975
13379
  const stats = await stat6(resolved);
12976
13380
  if (stats.isFile()) {
12977
13381
  return {
12978
- dir: path37.dirname(resolved),
13382
+ dir: path38.dirname(resolved),
12979
13383
  workspaceFile: resolved
12980
13384
  };
12981
13385
  }
12982
13386
  if (!stats.isDirectory()) {
12983
13387
  throw new Error(`workspace template is neither a file nor a directory: ${resolved}`);
12984
13388
  }
12985
- const entries = await readdir4(resolved);
13389
+ const entries = await readdir5(resolved);
12986
13390
  const workspaceFiles = entries.filter((e) => e.endsWith(".code-workspace"));
12987
13391
  if (workspaceFiles.length === 1) {
12988
13392
  return {
12989
13393
  dir: resolved,
12990
- workspaceFile: path37.join(resolved, workspaceFiles[0])
13394
+ workspaceFile: path38.join(resolved, workspaceFiles[0])
12991
13395
  };
12992
13396
  }
12993
13397
  if (workspaceFiles.length > 1) {
12994
13398
  const conventionFile = workspaceFiles.find((f) => f === "template.code-workspace");
12995
13399
  return {
12996
13400
  dir: resolved,
12997
- workspaceFile: conventionFile ? path37.join(resolved, conventionFile) : void 0
13401
+ workspaceFile: conventionFile ? path38.join(resolved, conventionFile) : void 0
12998
13402
  };
12999
13403
  }
13000
13404
  return { dir: resolved };
@@ -13076,7 +13480,10 @@ async function runEvaluation(options) {
13076
13480
  trials,
13077
13481
  streamCallbacks,
13078
13482
  totalBudgetUsd,
13079
- failOnError
13483
+ failOnError,
13484
+ poolWorkspaces,
13485
+ poolMaxSlots: configPoolMaxSlots,
13486
+ workspace: userWorkspacePath
13080
13487
  } = options;
13081
13488
  let useCache = options.useCache;
13082
13489
  if (trials && trials.count > 1 && useCache) {
@@ -13150,7 +13557,7 @@ async function runEvaluation(options) {
13150
13557
  ];
13151
13558
  const evaluatorRegistry = buildEvaluatorRegistry(evaluators, resolveJudgeProvider);
13152
13559
  const typeRegistry = createBuiltinRegistry();
13153
- const discoveryBaseDir = evalFilePath ? path38.dirname(path38.resolve(evalFilePath)) : process.cwd();
13560
+ const discoveryBaseDir = evalFilePath ? path39.dirname(path39.resolve(evalFilePath)) : process.cwd();
13154
13561
  const evalDir = discoveryBaseDir;
13155
13562
  await discoverAssertions(typeRegistry, discoveryBaseDir);
13156
13563
  const providerRegistry = createBuiltinProviderRegistry();
@@ -13212,13 +13619,19 @@ async function runEvaluation(options) {
13212
13619
  }
13213
13620
  };
13214
13621
  const isPerTestIsolation = suiteWorkspace?.isolation === "per_test";
13215
- const hasSharedWorkspace = !!(workspaceTemplate || suiteWorkspace?.before_all || suiteWorkspace?.repos?.length && !isPerTestIsolation);
13622
+ if (userWorkspacePath && isPerTestIsolation) {
13623
+ throw new Error(
13624
+ "--workspace is incompatible with isolation: per_test. Use isolation: shared (default)."
13625
+ );
13626
+ }
13627
+ const hasSharedWorkspace = !!(userWorkspacePath || workspaceTemplate || suiteWorkspace?.before_all || suiteWorkspace?.repos?.length && !isPerTestIsolation);
13628
+ const usePool = poolWorkspaces === true && !!suiteWorkspace?.repos?.length && !isPerTestIsolation && !userWorkspacePath;
13216
13629
  const requestedWorkers = options.maxConcurrency ?? target.workers ?? 1;
13217
- const workers = hasSharedWorkspace ? 1 : requestedWorkers;
13630
+ const workers = hasSharedWorkspace && !usePool ? 1 : requestedWorkers;
13218
13631
  setupLog(
13219
- `sharedWorkspace=${hasSharedWorkspace} perTestIsolation=${isPerTestIsolation} requestedWorkers=${requestedWorkers} effectiveWorkers=${workers}`
13632
+ `sharedWorkspace=${hasSharedWorkspace} perTestIsolation=${isPerTestIsolation} usePool=${usePool} requestedWorkers=${requestedWorkers} effectiveWorkers=${workers}`
13220
13633
  );
13221
- if (hasSharedWorkspace && requestedWorkers > 1) {
13634
+ if (hasSharedWorkspace && !usePool && requestedWorkers > 1) {
13222
13635
  console.warn(
13223
13636
  `Warning: Shared workspace requires sequential execution. Overriding workers from ${requestedWorkers} to 1.`
13224
13637
  );
@@ -13227,7 +13640,37 @@ async function runEvaluation(options) {
13227
13640
  let sharedWorkspacePath;
13228
13641
  let sharedBaselineCommit;
13229
13642
  let beforeAllOutput;
13230
- if (workspaceTemplate) {
13643
+ let poolManager;
13644
+ let poolSlot;
13645
+ const poolSlots = [];
13646
+ const availablePoolSlots = [];
13647
+ const poolSlotBaselines = /* @__PURE__ */ new Map();
13648
+ const poolMaxSlots = Math.min(configPoolMaxSlots ?? 10, 50);
13649
+ if (userWorkspacePath) {
13650
+ sharedWorkspacePath = userWorkspacePath;
13651
+ setupLog(`using user-provided workspace: ${userWorkspacePath}`);
13652
+ } else if (usePool && suiteWorkspace?.repos) {
13653
+ const slotsNeeded = workers;
13654
+ setupLog(`acquiring ${slotsNeeded} workspace pool slot(s) (pool capacity: ${poolMaxSlots})`);
13655
+ poolManager = new WorkspacePoolManager(getWorkspacePoolRoot());
13656
+ const poolRepoManager = new RepoManager(void 0, verbose);
13657
+ for (let i = 0; i < slotsNeeded; i++) {
13658
+ const slot = await poolManager.acquireWorkspace({
13659
+ templatePath: workspaceTemplate,
13660
+ repos: suiteWorkspace.repos,
13661
+ maxSlots: poolMaxSlots,
13662
+ repoManager: poolRepoManager
13663
+ });
13664
+ poolSlots.push(slot);
13665
+ setupLog(`pool slot ${i} acquired at: ${slot.path} (existing=${slot.isExisting})`);
13666
+ }
13667
+ if (slotsNeeded === 1) {
13668
+ poolSlot = poolSlots[0];
13669
+ sharedWorkspacePath = poolSlot.path;
13670
+ } else {
13671
+ availablePoolSlots.push(...poolSlots);
13672
+ }
13673
+ } else if (workspaceTemplate) {
13231
13674
  setupLog(`creating shared workspace from template: ${workspaceTemplate}`);
13232
13675
  try {
13233
13676
  sharedWorkspacePath = await createTempWorkspace(workspaceTemplate, evalRunId, "shared");
@@ -13236,288 +13679,344 @@ async function runEvaluation(options) {
13236
13679
  const message = error instanceof Error ? error.message : String(error);
13237
13680
  throw new Error(`Failed to create shared workspace: ${message}`);
13238
13681
  }
13682
+ } else if (suiteWorkspace?.before_all || suiteWorkspace?.repos?.length && !isPerTestIsolation) {
13683
+ sharedWorkspacePath = getWorkspacePath(evalRunId, "shared");
13684
+ await mkdir13(sharedWorkspacePath, { recursive: true });
13685
+ setupLog(`created empty shared workspace at: ${sharedWorkspacePath}`);
13686
+ }
13687
+ try {
13239
13688
  if (suiteWorkspaceFile && sharedWorkspacePath) {
13240
- const copiedWorkspaceFile = path38.join(sharedWorkspacePath, path38.basename(suiteWorkspaceFile));
13689
+ const copiedWorkspaceFile = path39.join(sharedWorkspacePath, path39.basename(suiteWorkspaceFile));
13241
13690
  try {
13242
13691
  await stat7(copiedWorkspaceFile);
13243
13692
  suiteWorkspaceFile = copiedWorkspaceFile;
13244
13693
  } catch {
13245
13694
  }
13246
13695
  }
13247
- } else if (suiteWorkspace?.before_all || suiteWorkspace?.repos?.length && !isPerTestIsolation) {
13248
- sharedWorkspacePath = getWorkspacePath(evalRunId, "shared");
13249
- await mkdir12(sharedWorkspacePath, { recursive: true });
13250
- setupLog(`created empty shared workspace at: ${sharedWorkspacePath}`);
13251
- }
13252
- const repoManager = suiteWorkspace?.repos?.length ? new RepoManager(void 0, verbose) : void 0;
13253
- if (repoManager && sharedWorkspacePath && suiteWorkspace?.repos && !isPerTestIsolation) {
13254
- setupLog(`materializing ${suiteWorkspace.repos.length} shared repo(s) into ${sharedWorkspacePath}`);
13255
- try {
13256
- await repoManager.materializeAll(suiteWorkspace.repos, sharedWorkspacePath);
13257
- setupLog("shared repo materialization complete");
13258
- } catch (error) {
13259
- const message = error instanceof Error ? error.message : String(error);
13260
- if (sharedWorkspacePath) {
13261
- await cleanupWorkspace(sharedWorkspacePath).catch(() => {
13262
- });
13263
- }
13264
- throw new Error(`Failed to materialize repos: ${message}`);
13265
- }
13266
- }
13267
- if (sharedWorkspacePath && suiteWorkspace?.before_all) {
13268
- const beforeAllCommand = (suiteWorkspace.before_all.command ?? suiteWorkspace.before_all.script ?? []).join(" ");
13269
- setupLog(
13270
- `running shared before_all in cwd=${suiteWorkspace.before_all.cwd ?? evalDir} command=${beforeAllCommand}`
13271
- );
13272
- const scriptContext = {
13273
- workspacePath: sharedWorkspacePath,
13274
- testId: "__before_all__",
13275
- evalRunId,
13276
- evalDir
13277
- };
13278
- try {
13279
- beforeAllOutput = await executeWorkspaceScript(suiteWorkspace.before_all, scriptContext);
13280
- setupLog("shared before_all completed");
13281
- } catch (error) {
13282
- const message = error instanceof Error ? error.message : String(error);
13283
- if (sharedWorkspacePath) {
13284
- await cleanupWorkspace(sharedWorkspacePath).catch(() => {
13285
- });
13286
- }
13287
- throw new Error(`before_all script failed: ${message}`);
13288
- }
13289
- }
13290
- if (sharedWorkspacePath) {
13291
- try {
13292
- sharedBaselineCommit = await initializeBaseline(sharedWorkspacePath);
13293
- setupLog(`shared baseline initialized: ${sharedBaselineCommit}`);
13294
- } catch {
13295
- setupLog("shared baseline initialization skipped (non-fatal)");
13296
- }
13297
- }
13298
- let nextWorkerId = 1;
13299
- const workerIdByEvalId = /* @__PURE__ */ new Map();
13300
- let beforeAllOutputAttached = false;
13301
- let cumulativeBudgetCost = 0;
13302
- let budgetExhausted = false;
13303
- let failOnErrorTriggered = false;
13304
- const promises = filteredEvalCases.map(
13305
- (evalCase) => limit(async () => {
13306
- const workerId = nextWorkerId++;
13307
- workerIdByEvalId.set(evalCase.id, workerId);
13308
- if (totalBudgetUsd !== void 0 && budgetExhausted) {
13309
- const budgetResult = {
13310
- timestamp: (now ?? (() => /* @__PURE__ */ new Date()))().toISOString(),
13311
- testId: evalCase.id,
13312
- dataset: evalCase.dataset,
13313
- score: 0,
13314
- hits: [],
13315
- misses: [],
13316
- answer: "",
13317
- target: target.name,
13318
- error: `Suite budget exceeded ($${cumulativeBudgetCost.toFixed(4)} / $${totalBudgetUsd.toFixed(4)})`,
13319
- budgetExceeded: true,
13320
- executionStatus: "execution_error",
13321
- failureStage: "setup",
13322
- failureReasonCode: "budget_exceeded",
13323
- executionError: {
13324
- message: `Suite budget exceeded ($${cumulativeBudgetCost.toFixed(4)} / $${totalBudgetUsd.toFixed(4)})`,
13325
- stage: "setup"
13326
- }
13327
- };
13328
- if (onProgress) {
13329
- await onProgress({
13330
- workerId,
13331
- testId: evalCase.id,
13332
- status: "failed",
13333
- completedAt: Date.now(),
13334
- error: budgetResult.error
13696
+ const repoManager = suiteWorkspace?.repos?.length && !usePool && !userWorkspacePath ? new RepoManager(void 0, verbose) : void 0;
13697
+ if (repoManager && sharedWorkspacePath && suiteWorkspace?.repos && !isPerTestIsolation) {
13698
+ setupLog(
13699
+ `materializing ${suiteWorkspace.repos.length} shared repo(s) into ${sharedWorkspacePath}`
13700
+ );
13701
+ try {
13702
+ await repoManager.materializeAll(suiteWorkspace.repos, sharedWorkspacePath);
13703
+ setupLog("shared repo materialization complete");
13704
+ } catch (error) {
13705
+ const message = error instanceof Error ? error.message : String(error);
13706
+ if (sharedWorkspacePath && !userWorkspacePath) {
13707
+ await cleanupWorkspace(sharedWorkspacePath).catch(() => {
13335
13708
  });
13336
13709
  }
13337
- if (onResult) {
13338
- await onResult(budgetResult);
13339
- }
13340
- return budgetResult;
13710
+ throw new Error(`Failed to materialize repos: ${message}`);
13341
13711
  }
13342
- if (failOnError === true && failOnErrorTriggered) {
13343
- const errorMsg = "Halted: execution error encountered with fail_on_error enabled";
13344
- const haltResult = {
13345
- timestamp: (now ?? (() => /* @__PURE__ */ new Date()))().toISOString(),
13346
- testId: evalCase.id,
13347
- dataset: evalCase.dataset,
13348
- score: 0,
13349
- hits: [],
13350
- misses: [],
13351
- answer: "",
13352
- target: target.name,
13353
- error: errorMsg,
13354
- executionStatus: "execution_error",
13355
- failureStage: "setup",
13356
- failureReasonCode: "error_threshold_exceeded",
13357
- executionError: { message: errorMsg, stage: "setup" }
13358
- };
13359
- if (onProgress) {
13360
- await onProgress({
13361
- workerId,
13362
- testId: evalCase.id,
13363
- status: "failed",
13364
- completedAt: Date.now(),
13365
- error: haltResult.error
13712
+ }
13713
+ if (sharedWorkspacePath && suiteWorkspace?.before_all) {
13714
+ const beforeAllCommand = (suiteWorkspace.before_all.command ?? suiteWorkspace.before_all.script ?? []).join(" ");
13715
+ setupLog(
13716
+ `running shared before_all in cwd=${suiteWorkspace.before_all.cwd ?? evalDir} command=${beforeAllCommand}`
13717
+ );
13718
+ const scriptContext = {
13719
+ workspacePath: sharedWorkspacePath,
13720
+ testId: "__before_all__",
13721
+ evalRunId,
13722
+ evalDir
13723
+ };
13724
+ try {
13725
+ beforeAllOutput = await executeWorkspaceScript(suiteWorkspace.before_all, scriptContext);
13726
+ setupLog("shared before_all completed");
13727
+ } catch (error) {
13728
+ const message = error instanceof Error ? error.message : String(error);
13729
+ if (sharedWorkspacePath && !userWorkspacePath) {
13730
+ await cleanupWorkspace(sharedWorkspacePath).catch(() => {
13366
13731
  });
13367
13732
  }
13368
- if (onResult) {
13369
- await onResult(haltResult);
13370
- }
13371
- return haltResult;
13372
- }
13373
- if (onProgress) {
13374
- await onProgress({
13375
- workerId,
13376
- testId: evalCase.id,
13377
- status: "running",
13378
- startedAt: Date.now()
13379
- });
13733
+ throw new Error(`before_all script failed: ${message}`);
13380
13734
  }
13381
- try {
13382
- const judgeProvider = await resolveJudgeProvider(target);
13383
- const runCaseOptions = {
13384
- evalCase,
13385
- provider: primaryProvider,
13386
- target,
13387
- evaluators: evaluatorRegistry,
13388
- maxRetries,
13389
- agentTimeoutMs,
13390
- cache,
13391
- useCache,
13392
- now,
13393
- judgeProvider,
13394
- targetResolver,
13395
- availableTargets,
13735
+ }
13736
+ if (availablePoolSlots.length > 0 && suiteWorkspace?.before_all) {
13737
+ for (const slot of availablePoolSlots) {
13738
+ setupLog(`running before_all on pool slot ${slot.index}`);
13739
+ const scriptContext = {
13740
+ workspacePath: slot.path,
13741
+ testId: "__before_all__",
13396
13742
  evalRunId,
13397
- keepWorkspaces,
13398
- cleanupWorkspaces,
13399
- sharedWorkspacePath,
13400
- sharedBaselineCommit,
13401
- suiteWorkspaceFile,
13402
- streamCallbacks,
13403
- typeRegistry,
13404
- repoManager,
13405
13743
  evalDir
13406
13744
  };
13407
- let result = trials && trials.count > 1 ? await runEvalCaseWithTrials(runCaseOptions, trials) : await runEvalCase(runCaseOptions);
13408
- if (totalBudgetUsd !== void 0) {
13409
- let caseCost;
13410
- if (result.trials && result.trials.length > 0) {
13411
- const trialCostSum = result.trials.reduce((sum, t) => sum + (t.costUsd ?? 0), 0);
13412
- if (trialCostSum > 0) {
13413
- caseCost = trialCostSum;
13745
+ try {
13746
+ const output = await executeWorkspaceScript(suiteWorkspace.before_all, scriptContext);
13747
+ if (!beforeAllOutput) beforeAllOutput = output;
13748
+ setupLog(`before_all completed on pool slot ${slot.index}`);
13749
+ } catch (error) {
13750
+ const message = error instanceof Error ? error.message : String(error);
13751
+ throw new Error(`before_all script failed on pool slot ${slot.index}: ${message}`);
13752
+ }
13753
+ }
13754
+ }
13755
+ if (sharedWorkspacePath) {
13756
+ try {
13757
+ sharedBaselineCommit = await initializeBaseline(sharedWorkspacePath);
13758
+ setupLog(`shared baseline initialized: ${sharedBaselineCommit}`);
13759
+ } catch {
13760
+ setupLog("shared baseline initialization skipped (non-fatal)");
13761
+ }
13762
+ }
13763
+ if (availablePoolSlots.length > 0) {
13764
+ for (const slot of availablePoolSlots) {
13765
+ try {
13766
+ const baseline = await initializeBaseline(slot.path);
13767
+ poolSlotBaselines.set(slot.path, baseline);
13768
+ setupLog(`pool slot ${slot.index} baseline initialized: ${baseline}`);
13769
+ } catch {
13770
+ setupLog(`pool slot ${slot.index} baseline initialization skipped (non-fatal)`);
13771
+ }
13772
+ }
13773
+ }
13774
+ let nextWorkerId = 1;
13775
+ const workerIdByEvalId = /* @__PURE__ */ new Map();
13776
+ let beforeAllOutputAttached = false;
13777
+ let cumulativeBudgetCost = 0;
13778
+ let budgetExhausted = false;
13779
+ let failOnErrorTriggered = false;
13780
+ const promises = filteredEvalCases.map(
13781
+ (evalCase) => limit(async () => {
13782
+ const workerId = nextWorkerId++;
13783
+ workerIdByEvalId.set(evalCase.id, workerId);
13784
+ if (totalBudgetUsd !== void 0 && budgetExhausted) {
13785
+ const budgetResult = {
13786
+ timestamp: (now ?? (() => /* @__PURE__ */ new Date()))().toISOString(),
13787
+ testId: evalCase.id,
13788
+ dataset: evalCase.dataset,
13789
+ score: 0,
13790
+ hits: [],
13791
+ misses: [],
13792
+ answer: "",
13793
+ target: target.name,
13794
+ error: `Suite budget exceeded ($${cumulativeBudgetCost.toFixed(4)} / $${totalBudgetUsd.toFixed(4)})`,
13795
+ budgetExceeded: true,
13796
+ executionStatus: "execution_error",
13797
+ failureStage: "setup",
13798
+ failureReasonCode: "budget_exceeded",
13799
+ executionError: {
13800
+ message: `Suite budget exceeded ($${cumulativeBudgetCost.toFixed(4)} / $${totalBudgetUsd.toFixed(4)})`,
13801
+ stage: "setup"
13414
13802
  }
13415
- } else {
13416
- caseCost = result.costUsd;
13803
+ };
13804
+ if (onProgress) {
13805
+ await onProgress({
13806
+ workerId,
13807
+ testId: evalCase.id,
13808
+ status: "failed",
13809
+ completedAt: Date.now(),
13810
+ error: budgetResult.error
13811
+ });
13417
13812
  }
13418
- if (caseCost !== void 0) {
13419
- cumulativeBudgetCost += caseCost;
13420
- if (cumulativeBudgetCost >= totalBudgetUsd) {
13421
- budgetExhausted = true;
13422
- }
13813
+ if (onResult) {
13814
+ await onResult(budgetResult);
13423
13815
  }
13816
+ return budgetResult;
13424
13817
  }
13425
- if (failOnError === true && result.executionStatus === "execution_error") {
13426
- failOnErrorTriggered = true;
13427
- }
13428
- if (beforeAllOutput && !beforeAllOutputAttached) {
13429
- result = { ...result, beforeAllOutput };
13430
- beforeAllOutputAttached = true;
13818
+ if (failOnError === true && failOnErrorTriggered) {
13819
+ const errorMsg = "Halted: execution error encountered with fail_on_error enabled";
13820
+ const haltResult = {
13821
+ timestamp: (now ?? (() => /* @__PURE__ */ new Date()))().toISOString(),
13822
+ testId: evalCase.id,
13823
+ dataset: evalCase.dataset,
13824
+ score: 0,
13825
+ hits: [],
13826
+ misses: [],
13827
+ answer: "",
13828
+ target: target.name,
13829
+ error: errorMsg,
13830
+ executionStatus: "execution_error",
13831
+ failureStage: "setup",
13832
+ failureReasonCode: "error_threshold_exceeded",
13833
+ executionError: { message: errorMsg, stage: "setup" }
13834
+ };
13835
+ if (onProgress) {
13836
+ await onProgress({
13837
+ workerId,
13838
+ testId: evalCase.id,
13839
+ status: "failed",
13840
+ completedAt: Date.now(),
13841
+ error: haltResult.error
13842
+ });
13843
+ }
13844
+ if (onResult) {
13845
+ await onResult(haltResult);
13846
+ }
13847
+ return haltResult;
13431
13848
  }
13432
13849
  if (onProgress) {
13433
13850
  await onProgress({
13434
13851
  workerId,
13435
13852
  testId: evalCase.id,
13436
- status: result.error ? "failed" : "completed",
13437
- startedAt: 0,
13438
- // Not used for completed status
13439
- completedAt: Date.now(),
13440
- error: result.error
13853
+ status: "running",
13854
+ startedAt: Date.now()
13441
13855
  });
13442
13856
  }
13443
- if (onResult) {
13444
- await onResult(result);
13857
+ const testPoolSlot = availablePoolSlots.length > 0 ? availablePoolSlots.pop() : void 0;
13858
+ const testWorkspacePath = testPoolSlot?.path ?? sharedWorkspacePath;
13859
+ const testBaselineCommit = testPoolSlot ? poolSlotBaselines.get(testPoolSlot.path) : sharedBaselineCommit;
13860
+ try {
13861
+ const judgeProvider = await resolveJudgeProvider(target);
13862
+ const runCaseOptions = {
13863
+ evalCase,
13864
+ provider: primaryProvider,
13865
+ target,
13866
+ evaluators: evaluatorRegistry,
13867
+ maxRetries,
13868
+ agentTimeoutMs,
13869
+ cache,
13870
+ useCache,
13871
+ now,
13872
+ judgeProvider,
13873
+ targetResolver,
13874
+ availableTargets,
13875
+ evalRunId,
13876
+ keepWorkspaces,
13877
+ cleanupWorkspaces,
13878
+ sharedWorkspacePath: testWorkspacePath,
13879
+ sharedBaselineCommit: testBaselineCommit,
13880
+ suiteWorkspaceFile,
13881
+ streamCallbacks,
13882
+ typeRegistry,
13883
+ repoManager,
13884
+ evalDir
13885
+ };
13886
+ let result = trials && trials.count > 1 ? await runEvalCaseWithTrials(runCaseOptions, trials) : await runEvalCase(runCaseOptions);
13887
+ if (totalBudgetUsd !== void 0) {
13888
+ let caseCost;
13889
+ if (result.trials && result.trials.length > 0) {
13890
+ const trialCostSum = result.trials.reduce((sum, t) => sum + (t.costUsd ?? 0), 0);
13891
+ if (trialCostSum > 0) {
13892
+ caseCost = trialCostSum;
13893
+ }
13894
+ } else {
13895
+ caseCost = result.costUsd;
13896
+ }
13897
+ if (caseCost !== void 0) {
13898
+ cumulativeBudgetCost += caseCost;
13899
+ if (cumulativeBudgetCost >= totalBudgetUsd) {
13900
+ budgetExhausted = true;
13901
+ }
13902
+ }
13903
+ }
13904
+ if (failOnError === true && result.executionStatus === "execution_error") {
13905
+ failOnErrorTriggered = true;
13906
+ }
13907
+ if (beforeAllOutput && !beforeAllOutputAttached) {
13908
+ result = { ...result, beforeAllOutput };
13909
+ beforeAllOutputAttached = true;
13910
+ }
13911
+ if (onProgress) {
13912
+ await onProgress({
13913
+ workerId,
13914
+ testId: evalCase.id,
13915
+ status: result.error ? "failed" : "completed",
13916
+ startedAt: 0,
13917
+ // Not used for completed status
13918
+ completedAt: Date.now(),
13919
+ error: result.error
13920
+ });
13921
+ }
13922
+ if (onResult) {
13923
+ await onResult(result);
13924
+ }
13925
+ return result;
13926
+ } catch (error) {
13927
+ if (onProgress) {
13928
+ await onProgress({
13929
+ workerId,
13930
+ testId: evalCase.id,
13931
+ status: "failed",
13932
+ completedAt: Date.now(),
13933
+ error: error instanceof Error ? error.message : String(error)
13934
+ });
13935
+ }
13936
+ throw error;
13937
+ } finally {
13938
+ if (testPoolSlot) {
13939
+ availablePoolSlots.push(testPoolSlot);
13940
+ }
13445
13941
  }
13446
- return result;
13447
- } catch (error) {
13448
- if (onProgress) {
13449
- await onProgress({
13450
- workerId,
13451
- testId: evalCase.id,
13452
- status: "failed",
13453
- completedAt: Date.now(),
13454
- error: error instanceof Error ? error.message : String(error)
13455
- });
13942
+ })
13943
+ );
13944
+ const settled = await Promise.allSettled(promises);
13945
+ const results = [];
13946
+ for (let i = 0; i < settled.length; i++) {
13947
+ const outcome = settled[i];
13948
+ if (outcome.status === "fulfilled") {
13949
+ results.push(outcome.value);
13950
+ } else {
13951
+ const evalCase = filteredEvalCases[i];
13952
+ const formattingMode = usesFileReferencePrompt(primaryProvider) ? "agent" : "lm";
13953
+ const promptInputs = await buildPromptInputs(evalCase, formattingMode);
13954
+ const errorResult = buildErrorResult(
13955
+ evalCase,
13956
+ target.name,
13957
+ (now ?? (() => /* @__PURE__ */ new Date()))(),
13958
+ outcome.reason,
13959
+ promptInputs,
13960
+ primaryProvider,
13961
+ "agent",
13962
+ "provider_error"
13963
+ );
13964
+ results.push(errorResult);
13965
+ if (onResult) {
13966
+ await onResult(errorResult);
13456
13967
  }
13457
- throw error;
13458
13968
  }
13459
- })
13460
- );
13461
- const settled = await Promise.allSettled(promises);
13462
- const results = [];
13463
- for (let i = 0; i < settled.length; i++) {
13464
- const outcome = settled[i];
13465
- if (outcome.status === "fulfilled") {
13466
- results.push(outcome.value);
13467
- } else {
13468
- const evalCase = filteredEvalCases[i];
13469
- const formattingMode = usesFileReferencePrompt(primaryProvider) ? "agent" : "lm";
13470
- const promptInputs = await buildPromptInputs(evalCase, formattingMode);
13471
- const errorResult = buildErrorResult(
13472
- evalCase,
13473
- target.name,
13474
- (now ?? (() => /* @__PURE__ */ new Date()))(),
13475
- outcome.reason,
13476
- promptInputs,
13477
- primaryProvider,
13478
- "agent",
13479
- "provider_error"
13480
- );
13481
- results.push(errorResult);
13482
- if (onResult) {
13483
- await onResult(errorResult);
13969
+ }
13970
+ const afterAllWorkspaces = poolSlots.length > 1 ? poolSlots.map((s) => s.path) : sharedWorkspacePath ? [sharedWorkspacePath] : [];
13971
+ if (afterAllWorkspaces.length > 0 && suiteWorkspace?.after_all) {
13972
+ for (const wsPath of afterAllWorkspaces) {
13973
+ const scriptContext = {
13974
+ workspacePath: wsPath,
13975
+ testId: "__after_all__",
13976
+ evalRunId,
13977
+ evalDir
13978
+ };
13979
+ try {
13980
+ const afterAllOutput = await executeWorkspaceScript(
13981
+ suiteWorkspace.after_all,
13982
+ scriptContext,
13983
+ "warn"
13984
+ );
13985
+ if (afterAllOutput && results.length > 0 && wsPath === afterAllWorkspaces[0]) {
13986
+ results[results.length - 1] = { ...results[results.length - 1], afterAllOutput };
13987
+ }
13988
+ } catch {
13989
+ }
13484
13990
  }
13485
13991
  }
13486
- }
13487
- if (sharedWorkspacePath && suiteWorkspace?.after_all) {
13488
- const scriptContext = {
13489
- workspacePath: sharedWorkspacePath,
13490
- testId: "__after_all__",
13491
- evalRunId,
13492
- evalDir
13493
- };
13494
- try {
13495
- const afterAllOutput = await executeWorkspaceScript(
13496
- suiteWorkspace.after_all,
13497
- scriptContext,
13498
- "warn"
13499
- );
13500
- if (afterAllOutput && results.length > 0) {
13501
- results[results.length - 1] = { ...results[results.length - 1], afterAllOutput };
13992
+ if (sharedWorkspacePath && !poolSlot && poolSlots.length === 0 && !userWorkspacePath) {
13993
+ const hasFailure = results.some((r) => !!r.error || r.score < 0.5);
13994
+ if (cleanupWorkspaces) {
13995
+ await cleanupWorkspace(sharedWorkspacePath).catch(() => {
13996
+ });
13997
+ } else if (!hasFailure && !keepWorkspaces) {
13998
+ await cleanupWorkspace(sharedWorkspacePath).catch(() => {
13999
+ });
13502
14000
  }
13503
- } catch {
13504
14001
  }
13505
- }
13506
- if (sharedWorkspacePath) {
13507
- const hasFailure = results.some((r) => !!r.error || r.score < 0.5);
13508
14002
  if (cleanupWorkspaces) {
13509
- await cleanupWorkspace(sharedWorkspacePath).catch(() => {
13510
- });
13511
- } else if (!hasFailure && !keepWorkspaces) {
13512
- await cleanupWorkspace(sharedWorkspacePath).catch(() => {
14003
+ await cleanupEvalWorkspaces(evalRunId).catch(() => {
13513
14004
  });
13514
14005
  }
14006
+ return results;
14007
+ } finally {
14008
+ if (poolManager) {
14009
+ if (poolSlot) {
14010
+ await poolManager.releaseSlot(poolSlot);
14011
+ }
14012
+ for (const slot of poolSlots) {
14013
+ if (slot !== poolSlot) {
14014
+ await poolManager.releaseSlot(slot).catch(() => {
14015
+ });
14016
+ }
14017
+ }
14018
+ }
13515
14019
  }
13516
- if (cleanupWorkspaces) {
13517
- await cleanupEvalWorkspaces(evalRunId).catch(() => {
13518
- });
13519
- }
13520
- return results;
13521
14020
  }
13522
14021
  async function runBatchEvaluation(options) {
13523
14022
  const {
@@ -13734,7 +14233,7 @@ async function runEvalCase(options) {
13734
14233
  );
13735
14234
  }
13736
14235
  if (caseWorkspaceFile && workspacePath) {
13737
- const copiedFile = path38.join(workspacePath, path38.basename(caseWorkspaceFile));
14236
+ const copiedFile = path39.join(workspacePath, path39.basename(caseWorkspaceFile));
13738
14237
  try {
13739
14238
  await stat7(copiedFile);
13740
14239
  caseWorkspaceFile = copiedFile;
@@ -13744,7 +14243,7 @@ async function runEvalCase(options) {
13744
14243
  }
13745
14244
  if (!workspacePath && (evalCase.workspace?.before_all || evalCase.workspace?.repos?.length) && evalRunId) {
13746
14245
  workspacePath = getWorkspacePath(evalRunId, evalCase.id);
13747
- await mkdir12(workspacePath, { recursive: true });
14246
+ await mkdir13(workspacePath, { recursive: true });
13748
14247
  }
13749
14248
  if (evalCase.workspace?.repos?.length && workspacePath) {
13750
14249
  const perCaseRepoManager = new RepoManager(void 0, setupDebug);
@@ -14344,7 +14843,7 @@ async function runEvaluatorList(options) {
14344
14843
  fileChanges,
14345
14844
  workspacePath
14346
14845
  };
14347
- const evalFileDir = evalCase.guideline_paths[0] ? path38.dirname(evalCase.guideline_paths[0]) : process.cwd();
14846
+ const evalFileDir = evalCase.guideline_paths[0] ? path39.dirname(evalCase.guideline_paths[0]) : process.cwd();
14348
14847
  const dispatchContext = {
14349
14848
  judgeProvider,
14350
14849
  targetResolver,
@@ -14578,7 +15077,7 @@ function extractProviderError(response) {
14578
15077
  return trimmed.length > 0 ? trimmed : void 0;
14579
15078
  }
14580
15079
  function createCacheKey(provider, target, evalCase, promptInputs) {
14581
- const hash = createHash2("sha256");
15080
+ const hash = createHash3("sha256");
14582
15081
  hash.update(provider.id);
14583
15082
  hash.update(target.name);
14584
15083
  hash.update(evalCase.id);
@@ -14646,8 +15145,8 @@ function computeWeightedMean(entries) {
14646
15145
  }
14647
15146
 
14648
15147
  // src/evaluation/evaluate.ts
14649
- import { existsSync as existsSync3 } from "node:fs";
14650
- import path39 from "node:path";
15148
+ import { existsSync as existsSync4 } from "node:fs";
15149
+ import path40 from "node:path";
14651
15150
  async function evaluate(config) {
14652
15151
  const startTime = Date.now();
14653
15152
  if (config.tests && config.specFile) {
@@ -14669,13 +15168,13 @@ async function evaluate(config) {
14669
15168
  let evalCases;
14670
15169
  let testFilePath;
14671
15170
  if (config.specFile) {
14672
- testFilePath = path39.resolve(config.specFile);
15171
+ testFilePath = path40.resolve(config.specFile);
14673
15172
  evalCases = await loadTests(testFilePath, repoRoot, {
14674
15173
  verbose: config.verbose,
14675
15174
  filter: config.filter
14676
15175
  });
14677
15176
  } else {
14678
- testFilePath = path39.join(process.cwd(), "__programmatic__.yaml");
15177
+ testFilePath = path40.join(process.cwd(), "__programmatic__.yaml");
14679
15178
  evalCases = (config.tests ?? []).map((test) => {
14680
15179
  const input = typeof test.input === "string" ? [{ role: "user", content: test.input }] : test.input;
14681
15180
  const question = typeof test.input === "string" ? test.input : test.input.find((m) => m.role === "user")?.content ?? "";
@@ -14761,11 +15260,11 @@ function computeSummary(results, durationMs) {
14761
15260
  var TARGET_FILE_CANDIDATES = [".agentv/targets.yaml", ".agentv/targets.yml"];
14762
15261
  async function discoverDefaultTarget(repoRoot) {
14763
15262
  const cwd = process.cwd();
14764
- const chain = buildDirectoryChain(path39.join(cwd, "_placeholder"), repoRoot);
15263
+ const chain = buildDirectoryChain(path40.join(cwd, "_placeholder"), repoRoot);
14765
15264
  for (const dir of chain) {
14766
15265
  for (const candidate of TARGET_FILE_CANDIDATES) {
14767
- const targetsPath = path39.join(dir, candidate);
14768
- if (!existsSync3(targetsPath)) continue;
15266
+ const targetsPath = path40.join(dir, candidate);
15267
+ if (!existsSync4(targetsPath)) continue;
14769
15268
  try {
14770
15269
  const definitions = await readTargetDefinitions(targetsPath);
14771
15270
  const defaultTarget = definitions.find((d) => d.name === "default");
@@ -14779,11 +15278,11 @@ async function discoverDefaultTarget(repoRoot) {
14779
15278
  async function loadEnvHierarchy(repoRoot) {
14780
15279
  const { readFileSync: readFileSync2 } = await import("node:fs");
14781
15280
  const cwd = process.cwd();
14782
- const chain = buildDirectoryChain(path39.join(cwd, "_placeholder"), repoRoot);
15281
+ const chain = buildDirectoryChain(path40.join(cwd, "_placeholder"), repoRoot);
14783
15282
  const envFiles = [];
14784
15283
  for (const dir of chain) {
14785
- const envPath = path39.join(dir, ".env");
14786
- if (existsSync3(envPath)) envFiles.push(envPath);
15284
+ const envPath = path40.join(dir, ".env");
15285
+ if (existsSync4(envPath)) envFiles.push(envPath);
14787
15286
  }
14788
15287
  for (let i = envFiles.length - 1; i >= 0; i--) {
14789
15288
  try {
@@ -14861,12 +15360,12 @@ var CONFIG_FILE_NAMES = [
14861
15360
  ".agentv/config.js"
14862
15361
  ];
14863
15362
  async function loadTsConfig(projectRoot) {
14864
- const { existsSync: existsSync4 } = await import("node:fs");
15363
+ const { existsSync: existsSync5 } = await import("node:fs");
14865
15364
  const { pathToFileURL } = await import("node:url");
14866
15365
  const { join: join2 } = await import("node:path");
14867
15366
  for (const fileName of CONFIG_FILE_NAMES) {
14868
15367
  const filePath = join2(projectRoot, fileName);
14869
- if (!existsSync4(filePath)) {
15368
+ if (!existsSync5(filePath)) {
14870
15369
  continue;
14871
15370
  }
14872
15371
  try {
@@ -14963,8 +15462,8 @@ function buildPrompt(criteria, question, referenceAnswer) {
14963
15462
  }
14964
15463
 
14965
15464
  // src/evaluation/cache/response-cache.ts
14966
- import { mkdir as mkdir13, readFile as readFile11, writeFile as writeFile8 } from "node:fs/promises";
14967
- import path40 from "node:path";
15465
+ import { mkdir as mkdir14, readFile as readFile12, writeFile as writeFile9 } from "node:fs/promises";
15466
+ import path41 from "node:path";
14968
15467
  var DEFAULT_CACHE_PATH = ".agentv/cache";
14969
15468
  var ResponseCache = class {
14970
15469
  cachePath;
@@ -14974,7 +15473,7 @@ var ResponseCache = class {
14974
15473
  async get(key) {
14975
15474
  const filePath = this.keyToPath(key);
14976
15475
  try {
14977
- const data = await readFile11(filePath, "utf8");
15476
+ const data = await readFile12(filePath, "utf8");
14978
15477
  return JSON.parse(data);
14979
15478
  } catch {
14980
15479
  return void 0;
@@ -14982,13 +15481,13 @@ var ResponseCache = class {
14982
15481
  }
14983
15482
  async set(key, value) {
14984
15483
  const filePath = this.keyToPath(key);
14985
- const dir = path40.dirname(filePath);
14986
- await mkdir13(dir, { recursive: true });
14987
- await writeFile8(filePath, JSON.stringify(value, null, 2), "utf8");
15484
+ const dir = path41.dirname(filePath);
15485
+ await mkdir14(dir, { recursive: true });
15486
+ await writeFile9(filePath, JSON.stringify(value, null, 2), "utf8");
14988
15487
  }
14989
15488
  keyToPath(key) {
14990
15489
  const prefix = key.slice(0, 2);
14991
- return path40.join(this.cachePath, prefix, `${key}.json`);
15490
+ return path41.join(this.cachePath, prefix, `${key}.json`);
14992
15491
  }
14993
15492
  };
14994
15493
  function shouldEnableCache(params) {
@@ -15470,6 +15969,7 @@ export {
15470
15969
  TokenUsageEvaluator,
15471
15970
  ToolTrajectoryEvaluator,
15472
15971
  WorkspaceCreationError,
15972
+ WorkspacePoolManager,
15473
15973
  assembleLlmJudgePrompt,
15474
15974
  avgToolDurationMs,
15475
15975
  buildDirectoryChain,
@@ -15484,6 +15984,7 @@ export {
15484
15984
  cleanupEvalWorkspaces,
15485
15985
  cleanupWorkspace,
15486
15986
  computeTraceSummary,
15987
+ computeWorkspaceFingerprint,
15487
15988
  consumeClaudeLogEntries,
15488
15989
  consumeCodexLogEntries,
15489
15990
  consumeCopilotCliLogEntries,
@@ -15521,6 +16022,7 @@ export {
15521
16022
  getSubagentsRoot,
15522
16023
  getTraceStateRoot,
15523
16024
  getWorkspacePath,
16025
+ getWorkspacePoolRoot,
15524
16026
  getWorkspacesRoot,
15525
16027
  initializeBaseline,
15526
16028
  isEvaluatorKind,