agentv 2.14.3 → 2.15.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -632,8 +632,8 @@ function getErrorMap() {
632
632
 
633
633
  // ../../node_modules/.bun/zod@3.25.76/node_modules/zod/v3/helpers/parseUtil.js
634
634
  var makeIssue = (params) => {
635
- const { data, path: path41, errorMaps, issueData } = params;
636
- const fullPath = [...path41, ...issueData.path || []];
635
+ const { data, path: path42, errorMaps, issueData } = params;
636
+ const fullPath = [...path42, ...issueData.path || []];
637
637
  const fullIssue = {
638
638
  ...issueData,
639
639
  path: fullPath
@@ -749,11 +749,11 @@ var errorUtil;
749
749
 
750
750
  // ../../node_modules/.bun/zod@3.25.76/node_modules/zod/v3/types.js
751
751
  var ParseInputLazyPath = class {
752
- constructor(parent, value, path41, key) {
752
+ constructor(parent, value, path42, key) {
753
753
  this._cachedPath = [];
754
754
  this.parent = parent;
755
755
  this.data = value;
756
- this._path = path41;
756
+ this._path = path42;
757
757
  this._key = key;
758
758
  }
759
759
  get path() {
@@ -6661,10 +6661,10 @@ function assignProp(target, prop, value) {
6661
6661
  configurable: true
6662
6662
  });
6663
6663
  }
6664
- function getElementAtPath(obj, path41) {
6665
- if (!path41)
6664
+ function getElementAtPath(obj, path42) {
6665
+ if (!path42)
6666
6666
  return obj;
6667
- return path41.reduce((acc, key) => acc?.[key], obj);
6667
+ return path42.reduce((acc, key) => acc?.[key], obj);
6668
6668
  }
6669
6669
  function promiseAllObject(promisesObj) {
6670
6670
  const keys = Object.keys(promisesObj);
@@ -6984,11 +6984,11 @@ function aborted(x, startIndex = 0) {
6984
6984
  }
6985
6985
  return false;
6986
6986
  }
6987
- function prefixIssues(path41, issues) {
6987
+ function prefixIssues(path42, issues) {
6988
6988
  return issues.map((iss) => {
6989
6989
  var _a17;
6990
6990
  (_a17 = iss).path ?? (_a17.path = []);
6991
- iss.path.unshift(path41);
6991
+ iss.path.unshift(path42);
6992
6992
  return iss;
6993
6993
  });
6994
6994
  }
@@ -7125,7 +7125,7 @@ function treeifyError(error40, _mapper) {
7125
7125
  return issue2.message;
7126
7126
  };
7127
7127
  const result = { errors: [] };
7128
- const processError = (error41, path41 = []) => {
7128
+ const processError = (error41, path42 = []) => {
7129
7129
  var _a17, _b8;
7130
7130
  for (const issue2 of error41.issues) {
7131
7131
  if (issue2.code === "invalid_union" && issue2.errors.length) {
@@ -7135,7 +7135,7 @@ function treeifyError(error40, _mapper) {
7135
7135
  } else if (issue2.code === "invalid_element") {
7136
7136
  processError({ issues: issue2.issues }, issue2.path);
7137
7137
  } else {
7138
- const fullpath = [...path41, ...issue2.path];
7138
+ const fullpath = [...path42, ...issue2.path];
7139
7139
  if (fullpath.length === 0) {
7140
7140
  result.errors.push(mapper(issue2));
7141
7141
  continue;
@@ -7165,9 +7165,9 @@ function treeifyError(error40, _mapper) {
7165
7165
  processError(error40);
7166
7166
  return result;
7167
7167
  }
7168
- function toDotPath(path41) {
7168
+ function toDotPath(path42) {
7169
7169
  const segs = [];
7170
- for (const seg of path41) {
7170
+ for (const seg of path42) {
7171
7171
  if (typeof seg === "number")
7172
7172
  segs.push(`[${seg}]`);
7173
7173
  else if (typeof seg === "symbol")
@@ -26720,14 +26720,14 @@ function createAzure(options = {}) {
26720
26720
  description: "Azure OpenAI resource name"
26721
26721
  });
26722
26722
  const apiVersion = (_a17 = options.apiVersion) != null ? _a17 : "v1";
26723
- const url2 = ({ path: path41, modelId }) => {
26723
+ const url2 = ({ path: path42, modelId }) => {
26724
26724
  var _a24;
26725
26725
  const baseUrlPrefix = (_a24 = options.baseURL) != null ? _a24 : `https://${getResourceName()}.openai.azure.com/openai`;
26726
26726
  let fullUrl;
26727
26727
  if (options.useDeploymentBasedUrls) {
26728
- fullUrl = new URL(`${baseUrlPrefix}/deployments/${modelId}${path41}`);
26728
+ fullUrl = new URL(`${baseUrlPrefix}/deployments/${modelId}${path42}`);
26729
26729
  } else {
26730
- fullUrl = new URL(`${baseUrlPrefix}/v1${path41}`);
26730
+ fullUrl = new URL(`${baseUrlPrefix}/v1${path42}`);
26731
26731
  }
26732
26732
  fullUrl.searchParams.set("api-version", apiVersion);
26733
26733
  return fullUrl.toString();
@@ -33960,9 +33960,9 @@ import { randomBytes } from "node:crypto";
33960
33960
  import { createServer } from "node:http";
33961
33961
  import fs2 from "node:fs/promises";
33962
33962
  import path31 from "node:path";
33963
- import { createHash as createHash2, randomUUID as randomUUID7 } from "node:crypto";
33964
- import { mkdir as mkdir12, stat as stat7 } from "node:fs/promises";
33965
- import path38 from "node:path";
33963
+ import { createHash as createHash3, randomUUID as randomUUID7 } from "node:crypto";
33964
+ import { mkdir as mkdir13, stat as stat7 } from "node:fs/promises";
33965
+ import path39 from "node:path";
33966
33966
  import micromatch4 from "micromatch";
33967
33967
  import { readFileSync } from "node:fs";
33968
33968
  import path322 from "node:path";
@@ -33977,15 +33977,21 @@ import path35 from "node:path";
33977
33977
  import { execFile } from "node:child_process";
33978
33978
  import { createHash } from "node:crypto";
33979
33979
  import { existsSync as existsSync2 } from "node:fs";
33980
- import { mkdir as mkdir11, rm as rm5, unlink, writeFile as writeFile7 } from "node:fs/promises";
33980
+ import { cp as cp2, mkdir as mkdir11, readFile as readFile11, readdir as readdir4, rm as rm5, unlink, writeFile as writeFile7 } from "node:fs/promises";
33981
33981
  import path36 from "node:path";
33982
33982
  import { promisify as promisify5 } from "node:util";
33983
- import { readdir as readdir4, stat as stat6 } from "node:fs/promises";
33984
- import path37 from "node:path";
33983
+ import { execFile as execFile2 } from "node:child_process";
33984
+ import { createHash as createHash2 } from "node:crypto";
33985
33985
  import { existsSync as existsSync3 } from "node:fs";
33986
- import path39 from "node:path";
33987
- import { mkdir as mkdir13, readFile as readFile11, writeFile as writeFile8 } from "node:fs/promises";
33986
+ import { mkdir as mkdir12, rm as rm6, unlink as unlink2, writeFile as writeFile8 } from "node:fs/promises";
33987
+ import path37 from "node:path";
33988
+ import { promisify as promisify6 } from "node:util";
33989
+ import { readdir as readdir5, stat as stat6 } from "node:fs/promises";
33990
+ import path38 from "node:path";
33991
+ import { existsSync as existsSync4 } from "node:fs";
33988
33992
  import path40 from "node:path";
33993
+ import { mkdir as mkdir14, readFile as readFile12, writeFile as writeFile9 } from "node:fs/promises";
33994
+ import path41 from "node:path";
33989
33995
  function computeTraceSummary(messages) {
33990
33996
  const toolCallCounts = {};
33991
33997
  const toolDurations = {};
@@ -34546,6 +34552,17 @@ function parseExecutionDefaults(raw, configPath) {
34546
34552
  } else if (otelFile !== void 0) {
34547
34553
  logWarning(`Invalid execution.otel_file in ${configPath}, expected non-empty string`);
34548
34554
  }
34555
+ if (typeof obj.pool_workspaces === "boolean") {
34556
+ result.pool_workspaces = obj.pool_workspaces;
34557
+ } else if (obj.pool_workspaces !== void 0) {
34558
+ logWarning(`Invalid execution.pool_workspaces in ${configPath}, expected boolean`);
34559
+ }
34560
+ const poolSlots = obj.pool_slots;
34561
+ if (typeof poolSlots === "number" && Number.isInteger(poolSlots) && poolSlots >= 1 && poolSlots <= 50) {
34562
+ result.pool_slots = poolSlots;
34563
+ } else if (poolSlots !== void 0) {
34564
+ logWarning(`Invalid execution.pool_slots in ${configPath}, expected integer 1-50`);
34565
+ }
34549
34566
  return Object.keys(result).length > 0 ? result : void 0;
34550
34567
  }
34551
34568
  function logWarning(message) {
@@ -35961,6 +35978,7 @@ async function processMessages(options) {
35961
35978
  repoRootPath,
35962
35979
  guidelinePatterns,
35963
35980
  guidelinePaths,
35981
+ treatFileSegmentsAsGuidelines,
35964
35982
  textParts,
35965
35983
  messageType,
35966
35984
  verbose
@@ -36008,16 +36026,20 @@ async function processMessages(options) {
36008
36026
  }
36009
36027
  try {
36010
36028
  const fileContent = (await readFile4(resolvedPath, "utf8")).replace(/\r\n/g, "\n");
36011
- if (messageType === "input" && guidelinePatterns && guidelinePaths) {
36012
- const relativeToRepo = path5.relative(repoRootPath, resolvedPath);
36013
- if (isGuidelineFile(relativeToRepo, guidelinePatterns)) {
36014
- guidelinePaths.push(path5.resolve(resolvedPath));
36015
- if (verbose) {
36016
- console.log(` [Guideline] Found: ${displayPath}`);
36017
- console.log(` Resolved to: ${resolvedPath}`);
36018
- }
36019
- continue;
36029
+ const classifyAsGuideline = shouldTreatAsGuideline({
36030
+ messageType,
36031
+ resolvedPath,
36032
+ repoRootPath,
36033
+ guidelinePatterns,
36034
+ treatFileSegmentsAsGuidelines
36035
+ });
36036
+ if (classifyAsGuideline && guidelinePaths) {
36037
+ guidelinePaths.push(path5.resolve(resolvedPath));
36038
+ if (verbose) {
36039
+ console.log(` [Guideline] Found: ${displayPath}`);
36040
+ console.log(` Resolved to: ${resolvedPath}`);
36020
36041
  }
36042
+ continue;
36021
36043
  }
36022
36044
  segments.push({
36023
36045
  type: "file",
@@ -36046,6 +36068,26 @@ async function processMessages(options) {
36046
36068
  }
36047
36069
  return segments;
36048
36070
  }
36071
+ function shouldTreatAsGuideline(options) {
36072
+ const {
36073
+ messageType,
36074
+ resolvedPath,
36075
+ repoRootPath,
36076
+ guidelinePatterns,
36077
+ treatFileSegmentsAsGuidelines
36078
+ } = options;
36079
+ if (messageType !== "input") {
36080
+ return false;
36081
+ }
36082
+ if (treatFileSegmentsAsGuidelines) {
36083
+ return true;
36084
+ }
36085
+ if (!guidelinePatterns || guidelinePatterns.length === 0) {
36086
+ return false;
36087
+ }
36088
+ const relativeToRepo = path5.relative(repoRootPath, resolvedPath);
36089
+ return isGuidelineFile(relativeToRepo, guidelinePatterns);
36090
+ }
36049
36091
  function asString3(value) {
36050
36092
  return typeof value === "string" ? value : void 0;
36051
36093
  }
@@ -36380,6 +36422,8 @@ async function loadTestsFromJsonl(evalFilePath, repoRoot, options) {
36380
36422
  for (const guidelinePath of testCase.guideline_paths) {
36381
36423
  console.log(` - ${guidelinePath}`);
36382
36424
  }
36425
+ } else if (!guidelinePatterns || guidelinePatterns.length === 0) {
36426
+ console.log(" No guidelines found (guideline_patterns not configured)");
36383
36427
  } else {
36384
36428
  console.log(" No guidelines found");
36385
36429
  }
@@ -36740,7 +36784,7 @@ async function loadTestsFromYaml(evalFilePath, repoRoot, options) {
36740
36784
  } else {
36741
36785
  throw new Error(`Invalid test file format: ${evalFilePath} - missing 'tests' field`);
36742
36786
  }
36743
- const suiteWorkspace = parseWorkspaceConfig(suite.workspace, evalFileDir);
36787
+ const suiteWorkspace = await resolveWorkspaceConfig(suite.workspace, evalFileDir);
36744
36788
  const suiteInputMessages = expandInputShorthand(suite.input);
36745
36789
  const rawGlobalExecution = isJsonObject(suite.execution) ? suite.execution : void 0;
36746
36790
  const _globalTarget = asString6(rawGlobalExecution?.target) ?? asString6(suite.target);
@@ -36776,12 +36820,24 @@ async function loadTestsFromYaml(evalFilePath, repoRoot, options) {
36776
36820
  }
36777
36821
  const caseExecution = isJsonObject(evalcase.execution) ? evalcase.execution : void 0;
36778
36822
  const skipDefaults = caseExecution?.skip_defaults === true;
36779
- const inputMessages = suiteInputMessages && !skipDefaults ? [...suiteInputMessages, ...testInputMessages] : testInputMessages;
36823
+ const effectiveSuiteInputMessages = suiteInputMessages && !skipDefaults ? suiteInputMessages : void 0;
36824
+ const inputMessages = effectiveSuiteInputMessages ? [...effectiveSuiteInputMessages, ...testInputMessages] : testInputMessages;
36780
36825
  const hasExpectedMessages = expectedMessages.length > 0;
36781
36826
  const guidelinePaths = [];
36782
36827
  const inputTextParts = [];
36783
- const inputSegments = await processMessages({
36784
- messages: inputMessages,
36828
+ const suiteInputSegments = effectiveSuiteInputMessages ? await processMessages({
36829
+ messages: effectiveSuiteInputMessages,
36830
+ searchRoots,
36831
+ repoRootPath,
36832
+ guidelinePatterns,
36833
+ guidelinePaths,
36834
+ treatFileSegmentsAsGuidelines: true,
36835
+ textParts: inputTextParts,
36836
+ messageType: "input",
36837
+ verbose
36838
+ }) : [];
36839
+ const testInputSegments = await processMessages({
36840
+ messages: testInputMessages,
36785
36841
  searchRoots,
36786
36842
  repoRootPath,
36787
36843
  guidelinePatterns,
@@ -36790,6 +36846,7 @@ async function loadTestsFromYaml(evalFilePath, repoRoot, options) {
36790
36846
  messageType: "input",
36791
36847
  verbose
36792
36848
  });
36849
+ const inputSegments = [...suiteInputSegments, ...testInputSegments];
36793
36850
  const outputSegments = hasExpectedMessages ? await processExpectedMessages({
36794
36851
  messages: expectedMessages,
36795
36852
  searchRoots,
@@ -36837,7 +36894,7 @@ async function loadTestsFromYaml(evalFilePath, repoRoot, options) {
36837
36894
  ...guidelinePaths.map((guidelinePath) => path8.resolve(guidelinePath)),
36838
36895
  ...userFilePaths
36839
36896
  ];
36840
- const caseWorkspace = parseWorkspaceConfig(evalcase.workspace, evalFileDir);
36897
+ const caseWorkspace = await resolveWorkspaceConfig(evalcase.workspace, evalFileDir);
36841
36898
  const mergedWorkspace = mergeWorkspaceConfigs(suiteWorkspace, caseWorkspace);
36842
36899
  const metadata = isJsonObject(evalcase.metadata) ? evalcase.metadata : void 0;
36843
36900
  const caseTargets = extractTargetsFromTestCase(evalcase);
@@ -36868,6 +36925,8 @@ async function loadTestsFromYaml(evalFilePath, repoRoot, options) {
36868
36925
  for (const guidelinePath of testCase.guideline_paths) {
36869
36926
  console.log(` - ${guidelinePath}`);
36870
36927
  }
36928
+ } else if (!guidelinePatterns || guidelinePatterns.length === 0) {
36929
+ console.log(" No guidelines found (guideline_patterns not configured)");
36871
36930
  } else {
36872
36931
  console.log(" No guidelines found");
36873
36932
  }
@@ -36967,6 +37026,26 @@ function parseResetConfig(raw) {
36967
37026
  ...afterEach !== void 0 && { after_each: afterEach }
36968
37027
  };
36969
37028
  }
37029
+ async function resolveWorkspaceConfig(raw, evalFileDir) {
37030
+ if (typeof raw === "string") {
37031
+ const workspaceFilePath = path8.resolve(evalFileDir, raw);
37032
+ let content;
37033
+ try {
37034
+ content = await readFile7(workspaceFilePath, "utf8");
37035
+ } catch {
37036
+ throw new Error(`Workspace file not found: ${raw} (resolved to ${workspaceFilePath})`);
37037
+ }
37038
+ const parsed = parse22(content);
37039
+ if (!isJsonObject(parsed)) {
37040
+ throw new Error(
37041
+ `Invalid workspace file format: ${workspaceFilePath} (expected a YAML object)`
37042
+ );
37043
+ }
37044
+ const workspaceFileDir = path8.dirname(workspaceFilePath);
37045
+ return parseWorkspaceConfig(parsed, workspaceFileDir);
37046
+ }
37047
+ return parseWorkspaceConfig(raw, evalFileDir);
37048
+ }
36970
37049
  function parseWorkspaceConfig(raw, evalFileDir) {
36971
37050
  if (!isJsonObject(raw)) return void 0;
36972
37051
  const obj = raw;
@@ -40938,6 +41017,9 @@ function getSubagentsRoot() {
40938
41017
  function getTraceStateRoot() {
40939
41018
  return path21.join(getAgentvHome(), "trace-state");
40940
41019
  }
41020
+ function getWorkspacePoolRoot() {
41021
+ return path21.join(getAgentvHome(), "workspace-pool");
41022
+ }
40941
41023
  var DEFAULT_LOCK_NAME = "subagent.lock";
40942
41024
  var DEFAULT_ALIVE_FILENAME = ".alive";
40943
41025
  function getDefaultSubagentRoot(vscodeCmd = "code") {
@@ -41738,8 +41820,6 @@ var AGENTV_REQUEST_TEMPLATE = `[[ ## task ## ]]
41738
41820
 
41739
41821
  **IMPORTANT**: Follow these exact steps:
41740
41822
  1. Create and write your complete response to: {{responseFileTmp}}
41741
- - All intended file outputs/changes MUST be written in your response file.
41742
- - For each intended file, include the repo name, relative path and unified git diff following the convention \`diff --git ...\`.
41743
41823
  2. When completely finished, run these PowerShell commands to signal completion:
41744
41824
  \`\`\`
41745
41825
  Move-Item -LiteralPath '{{responseFileTmp}}' -Destination '{{responseFileFinal}}'
@@ -41756,8 +41836,6 @@ var AGENTV_BATCH_REQUEST_TEMPLATE = `[[ ## task ## ]]
41756
41836
 
41757
41837
  **IMPORTANT**: Follow these exact steps:
41758
41838
  1. Create and write your complete response to: {{responseFileTmp}}
41759
- - All intended file outputs/changes MUST be written in your response file.
41760
- - For each intended file, include the repo name, relative path and unified git diff following the convention \`diff --git ...\`.
41761
41839
  2. When completely finished and the response is stable, rename it to: {{responseFileFinal}}
41762
41840
  3. Do not unlock the workspace from this request; batch orchestration will handle unlocking after all responses are ready.
41763
41841
  `;
@@ -42347,16 +42425,16 @@ async function execFileWithStdinNode(argv, stdinPayload, options) {
42347
42425
  });
42348
42426
  }
42349
42427
  async function execShellWithStdin(command, stdinPayload, options = {}) {
42350
- const { mkdir: mkdir14, readFile: readFile12, rm: rm6, writeFile: writeFile9 } = await import("node:fs/promises");
42428
+ const { mkdir: mkdir15, readFile: readFile13, rm: rm7, writeFile: writeFile10 } = await import("node:fs/promises");
42351
42429
  const { tmpdir: tmpdir3 } = await import("node:os");
42352
- const path41 = await import("node:path");
42430
+ const path42 = await import("node:path");
42353
42431
  const { randomUUID: randomUUID8 } = await import("node:crypto");
42354
- const dir = path41.join(tmpdir3(), `agentv-exec-${randomUUID8()}`);
42355
- await mkdir14(dir, { recursive: true });
42356
- const stdinPath = path41.join(dir, "stdin.txt");
42357
- const stdoutPath = path41.join(dir, "stdout.txt");
42358
- const stderrPath = path41.join(dir, "stderr.txt");
42359
- await writeFile9(stdinPath, stdinPayload, "utf8");
42432
+ const dir = path42.join(tmpdir3(), `agentv-exec-${randomUUID8()}`);
42433
+ await mkdir15(dir, { recursive: true });
42434
+ const stdinPath = path42.join(dir, "stdin.txt");
42435
+ const stdoutPath = path42.join(dir, "stdout.txt");
42436
+ const stderrPath = path42.join(dir, "stderr.txt");
42437
+ await writeFile10(stdinPath, stdinPayload, "utf8");
42360
42438
  const wrappedCommand = process.platform === "win32" ? `(${command}) < ${shellEscapePath(stdinPath)} > ${shellEscapePath(stdoutPath)} 2> ${shellEscapePath(stderrPath)}` : `(${command}) < ${shellEscapePath(stdinPath)} > ${shellEscapePath(stdoutPath)} 2> ${shellEscapePath(stderrPath)}`;
42361
42439
  const { spawn: spawn4 } = await import("node:child_process");
42362
42440
  try {
@@ -42385,11 +42463,11 @@ async function execShellWithStdin(command, stdinPayload, options = {}) {
42385
42463
  resolve2(code ?? 0);
42386
42464
  });
42387
42465
  });
42388
- const stdout = (await readFile12(stdoutPath, "utf8")).replace(/\r\n/g, "\n");
42389
- const stderr = (await readFile12(stderrPath, "utf8")).replace(/\r\n/g, "\n");
42466
+ const stdout = (await readFile13(stdoutPath, "utf8")).replace(/\r\n/g, "\n");
42467
+ const stderr = (await readFile13(stderrPath, "utf8")).replace(/\r\n/g, "\n");
42390
42468
  return { stdout, stderr, exitCode };
42391
42469
  } finally {
42392
- await rm6(dir, { recursive: true, force: true });
42470
+ await rm7(dir, { recursive: true, force: true });
42393
42471
  }
42394
42472
  }
42395
42473
  var DEFAULT_MAX_CALLS = 50;
@@ -42699,7 +42777,7 @@ var CodeEvaluator = class {
42699
42777
  outputPath,
42700
42778
  guidelineFiles: context.evalCase.guideline_paths,
42701
42779
  inputFiles: context.evalCase.file_paths.filter(
42702
- (path41) => !context.evalCase.guideline_paths.includes(path41)
42780
+ (path42) => !context.evalCase.guideline_paths.includes(path42)
42703
42781
  ),
42704
42782
  input: context.evalCase.input,
42705
42783
  trace: context.trace ?? null,
@@ -42942,6 +43020,8 @@ ${context.fileChanges}`;
42942
43020
  };
42943
43021
  } catch (e) {
42944
43022
  const message = e instanceof Error ? e.message : String(e);
43023
+ const evalName = context.evaluator?.name ?? "llm-judge";
43024
+ console.warn(`\u26A0 LLM judge "${evalName}" failed after 3 attempts (${message}) \u2014 skipped`);
42945
43025
  return {
42946
43026
  score: 0,
42947
43027
  verdict: "skip",
@@ -42970,24 +43050,39 @@ ${context.fileChanges}`;
42970
43050
  systemPrompt,
42971
43051
  target: judgeProvider.targetName
42972
43052
  };
42973
- const { data, tokenUsage } = await this.runWithRetry({
42974
- context,
42975
- judgeProvider,
42976
- systemPrompt,
42977
- userPrompt: prompt,
42978
- schema: rubricEvaluationSchema
42979
- });
42980
- const { score, verdict, hits, misses } = calculateRubricScore(data, rubrics);
42981
- return {
42982
- score,
42983
- verdict,
42984
- hits,
42985
- misses,
42986
- expectedAspectCount: rubrics.length,
42987
- reasoning: data.overall_reasoning,
42988
- evaluatorRawRequest,
42989
- tokenUsage
42990
- };
43053
+ try {
43054
+ const { data, tokenUsage } = await this.runWithRetry({
43055
+ context,
43056
+ judgeProvider,
43057
+ systemPrompt,
43058
+ userPrompt: prompt,
43059
+ schema: rubricEvaluationSchema
43060
+ });
43061
+ const { score, verdict, hits, misses } = calculateRubricScore(data, rubrics);
43062
+ return {
43063
+ score,
43064
+ verdict,
43065
+ hits,
43066
+ misses,
43067
+ expectedAspectCount: rubrics.length,
43068
+ reasoning: data.overall_reasoning,
43069
+ evaluatorRawRequest,
43070
+ tokenUsage
43071
+ };
43072
+ } catch (e) {
43073
+ const message = e instanceof Error ? e.message : String(e);
43074
+ const evalName = context.evaluator?.name ?? "llm-judge";
43075
+ console.warn(`\u26A0 LLM judge "${evalName}" failed after 3 attempts (${message}) \u2014 skipped`);
43076
+ return {
43077
+ score: 0,
43078
+ verdict: "skip",
43079
+ hits: [],
43080
+ misses: [`Judge parse failure after 3 attempts: ${message}`],
43081
+ expectedAspectCount: rubrics.length,
43082
+ reasoning: `Judge parse failure after 3 attempts: ${message}`,
43083
+ evaluatorRawRequest
43084
+ };
43085
+ }
42991
43086
  }
42992
43087
  /**
42993
43088
  * Evaluate using score-range rubrics (analytic rubric scoring).
@@ -43001,25 +43096,40 @@ ${context.fileChanges}`;
43001
43096
  systemPrompt,
43002
43097
  target: judgeProvider.targetName
43003
43098
  };
43004
- const { data, tokenUsage } = await this.runWithRetry({
43005
- context,
43006
- judgeProvider,
43007
- systemPrompt,
43008
- userPrompt: prompt,
43009
- schema: scoreRangeEvaluationSchema
43010
- });
43011
- const { score, verdict, hits, misses, details } = calculateScoreRangeResult(data, rubrics);
43012
- return {
43013
- score,
43014
- verdict,
43015
- hits,
43016
- misses,
43017
- expectedAspectCount: rubrics.length,
43018
- reasoning: data.overall_reasoning,
43019
- evaluatorRawRequest,
43020
- details,
43021
- tokenUsage
43022
- };
43099
+ try {
43100
+ const { data, tokenUsage } = await this.runWithRetry({
43101
+ context,
43102
+ judgeProvider,
43103
+ systemPrompt,
43104
+ userPrompt: prompt,
43105
+ schema: scoreRangeEvaluationSchema
43106
+ });
43107
+ const { score, verdict, hits, misses, details } = calculateScoreRangeResult(data, rubrics);
43108
+ return {
43109
+ score,
43110
+ verdict,
43111
+ hits,
43112
+ misses,
43113
+ expectedAspectCount: rubrics.length,
43114
+ reasoning: data.overall_reasoning,
43115
+ evaluatorRawRequest,
43116
+ details,
43117
+ tokenUsage
43118
+ };
43119
+ } catch (e) {
43120
+ const message = e instanceof Error ? e.message : String(e);
43121
+ const evalName = context.evaluator?.name ?? "llm-judge";
43122
+ console.warn(`\u26A0 LLM judge "${evalName}" failed after 3 attempts (${message}) \u2014 skipped`);
43123
+ return {
43124
+ score: 0,
43125
+ verdict: "skip",
43126
+ hits: [],
43127
+ misses: [`Judge parse failure after 3 attempts: ${message}`],
43128
+ expectedAspectCount: rubrics.length,
43129
+ reasoning: `Judge parse failure after 3 attempts: ${message}`,
43130
+ evaluatorRawRequest
43131
+ };
43132
+ }
43023
43133
  }
43024
43134
  /**
43025
43135
  * Build prompt for score-range rubric evaluation.
@@ -43303,19 +43413,13 @@ var CompositeEvaluator = class {
43303
43413
  runWeightedAverage(results, weights) {
43304
43414
  let totalWeight = 0;
43305
43415
  let weightedSum = 0;
43416
+ let evaluatedCount = 0;
43306
43417
  const allHits = [];
43307
43418
  const allMisses = [];
43308
43419
  const reasoningParts = [];
43309
43420
  const scores = [];
43310
43421
  for (const member of results) {
43311
43422
  const weight = weights?.[member.id] ?? 1;
43312
- totalWeight += weight;
43313
- weightedSum += member.result.score * weight;
43314
- allHits.push(...member.result.hits.map((h) => `[${member.id}] ${h}`));
43315
- allMisses.push(...member.result.misses.map((m) => `[${member.id}] ${m}`));
43316
- if (member.result.reasoning) {
43317
- reasoningParts.push(`${member.id}: ${member.result.reasoning}`);
43318
- }
43319
43423
  scores.push({
43320
43424
  name: member.id,
43321
43425
  type: member.type,
@@ -43330,6 +43434,32 @@ var CompositeEvaluator = class {
43330
43434
  details: member.result.details,
43331
43435
  tokenUsage: member.result.tokenUsage
43332
43436
  });
43437
+ if (member.result.verdict === "skip") {
43438
+ continue;
43439
+ }
43440
+ evaluatedCount++;
43441
+ totalWeight += weight;
43442
+ weightedSum += member.result.score * weight;
43443
+ allHits.push(...member.result.hits.map((h) => `[${member.id}] ${h}`));
43444
+ allMisses.push(...member.result.misses.map((m) => `[${member.id}] ${m}`));
43445
+ if (member.result.reasoning) {
43446
+ reasoningParts.push(`${member.id}: ${member.result.reasoning}`);
43447
+ }
43448
+ }
43449
+ if (evaluatedCount === 0 && results.length > 0) {
43450
+ return {
43451
+ score: 0,
43452
+ verdict: "skip",
43453
+ hits: [],
43454
+ misses: [],
43455
+ expectedAspectCount: 1,
43456
+ reasoning: "All evaluators skipped (infrastructure failure)",
43457
+ evaluatorRawRequest: {
43458
+ aggregator: "weighted_average",
43459
+ ...weights ? { weights } : {}
43460
+ },
43461
+ scores
43462
+ };
43333
43463
  }
43334
43464
  const finalScore = totalWeight > 0 ? weightedSum / totalWeight : 0;
43335
43465
  return {
@@ -43353,19 +43483,8 @@ var CompositeEvaluator = class {
43353
43483
  const reasoningParts = [];
43354
43484
  let passingCount = 0;
43355
43485
  let borderlineCount = 0;
43486
+ let evaluatedCount = 0;
43356
43487
  for (const member of results) {
43357
- const isPassing = member.result.verdict === "pass" || member.result.verdict === "borderline";
43358
- if (isPassing) {
43359
- passingCount++;
43360
- if (member.result.verdict === "borderline") {
43361
- borderlineCount++;
43362
- }
43363
- }
43364
- allHits.push(...member.result.hits.map((h) => `[${member.id}] ${h}`));
43365
- allMisses.push(...member.result.misses.map((m) => `[${member.id}] ${m}`));
43366
- if (member.result.reasoning) {
43367
- reasoningParts.push(`${member.id}: ${member.result.reasoning}`);
43368
- }
43369
43488
  scores.push({
43370
43489
  name: member.id,
43371
43490
  type: member.type,
@@ -43379,8 +43498,39 @@ var CompositeEvaluator = class {
43379
43498
  details: member.result.details,
43380
43499
  tokenUsage: member.result.tokenUsage
43381
43500
  });
43501
+ if (member.result.verdict === "skip") {
43502
+ continue;
43503
+ }
43504
+ evaluatedCount++;
43505
+ const isPassing = member.result.verdict === "pass" || member.result.verdict === "borderline";
43506
+ if (isPassing) {
43507
+ passingCount++;
43508
+ if (member.result.verdict === "borderline") {
43509
+ borderlineCount++;
43510
+ }
43511
+ }
43512
+ allHits.push(...member.result.hits.map((h) => `[${member.id}] ${h}`));
43513
+ allMisses.push(...member.result.misses.map((m) => `[${member.id}] ${m}`));
43514
+ if (member.result.reasoning) {
43515
+ reasoningParts.push(`${member.id}: ${member.result.reasoning}`);
43516
+ }
43517
+ }
43518
+ if (evaluatedCount === 0 && results.length > 0) {
43519
+ return {
43520
+ score: 0,
43521
+ verdict: "skip",
43522
+ hits: [],
43523
+ misses: [],
43524
+ expectedAspectCount: 1,
43525
+ reasoning: "All evaluators skipped (infrastructure failure)",
43526
+ evaluatorRawRequest: {
43527
+ aggregator: "threshold",
43528
+ threshold
43529
+ },
43530
+ scores
43531
+ };
43382
43532
  }
43383
- const totalCount = results.length;
43533
+ const totalCount = evaluatedCount;
43384
43534
  const score = totalCount > 0 ? passingCount / totalCount : 0;
43385
43535
  const pass = score >= threshold;
43386
43536
  if (pass && borderlineCount > 0) {
@@ -43882,115 +44032,115 @@ var FieldAccuracyEvaluator = class {
43882
44032
  * Evaluate a single field against the expected value.
43883
44033
  */
43884
44034
  evaluateField(fieldConfig, candidateData, expectedData) {
43885
- const { path: path41, match, required: required2 = true, weight = 1 } = fieldConfig;
43886
- const candidateValue = resolvePath(candidateData, path41);
43887
- const expectedValue = resolvePath(expectedData, path41);
44035
+ const { path: path42, match, required: required2 = true, weight = 1 } = fieldConfig;
44036
+ const candidateValue = resolvePath(candidateData, path42);
44037
+ const expectedValue = resolvePath(expectedData, path42);
43888
44038
  if (expectedValue === void 0) {
43889
44039
  return {
43890
- path: path41,
44040
+ path: path42,
43891
44041
  score: 1,
43892
44042
  // No expected value means no comparison needed
43893
44043
  weight,
43894
44044
  hit: true,
43895
- message: `${path41}: no expected value`
44045
+ message: `${path42}: no expected value`
43896
44046
  };
43897
44047
  }
43898
44048
  if (candidateValue === void 0) {
43899
44049
  if (required2) {
43900
44050
  return {
43901
- path: path41,
44051
+ path: path42,
43902
44052
  score: 0,
43903
44053
  weight,
43904
44054
  hit: false,
43905
- message: `${path41} (required, missing)`
44055
+ message: `${path42} (required, missing)`
43906
44056
  };
43907
44057
  }
43908
44058
  return {
43909
- path: path41,
44059
+ path: path42,
43910
44060
  score: 1,
43911
44061
  // Don't penalize missing optional fields
43912
44062
  weight: 0,
43913
44063
  // Zero weight means it won't affect the score
43914
44064
  hit: true,
43915
- message: `${path41}: optional field missing`
44065
+ message: `${path42}: optional field missing`
43916
44066
  };
43917
44067
  }
43918
44068
  switch (match) {
43919
44069
  case "exact":
43920
- return this.compareExact(path41, candidateValue, expectedValue, weight);
44070
+ return this.compareExact(path42, candidateValue, expectedValue, weight);
43921
44071
  case "numeric_tolerance":
43922
44072
  return this.compareNumericTolerance(
43923
- path41,
44073
+ path42,
43924
44074
  candidateValue,
43925
44075
  expectedValue,
43926
44076
  fieldConfig,
43927
44077
  weight
43928
44078
  );
43929
44079
  case "date":
43930
- return this.compareDate(path41, candidateValue, expectedValue, fieldConfig, weight);
44080
+ return this.compareDate(path42, candidateValue, expectedValue, fieldConfig, weight);
43931
44081
  default:
43932
44082
  return {
43933
- path: path41,
44083
+ path: path42,
43934
44084
  score: 0,
43935
44085
  weight,
43936
44086
  hit: false,
43937
- message: `${path41}: unknown match type "${match}"`
44087
+ message: `${path42}: unknown match type "${match}"`
43938
44088
  };
43939
44089
  }
43940
44090
  }
43941
44091
  /**
43942
44092
  * Exact equality comparison.
43943
44093
  */
43944
- compareExact(path41, candidateValue, expectedValue, weight) {
44094
+ compareExact(path42, candidateValue, expectedValue, weight) {
43945
44095
  if (deepEqual(candidateValue, expectedValue)) {
43946
44096
  return {
43947
- path: path41,
44097
+ path: path42,
43948
44098
  score: 1,
43949
44099
  weight,
43950
44100
  hit: true,
43951
- message: path41
44101
+ message: path42
43952
44102
  };
43953
44103
  }
43954
44104
  if (typeof candidateValue !== typeof expectedValue) {
43955
44105
  return {
43956
- path: path41,
44106
+ path: path42,
43957
44107
  score: 0,
43958
44108
  weight,
43959
44109
  hit: false,
43960
- message: `${path41} (type mismatch: got ${typeof candidateValue}, expected ${typeof expectedValue})`
44110
+ message: `${path42} (type mismatch: got ${typeof candidateValue}, expected ${typeof expectedValue})`
43961
44111
  };
43962
44112
  }
43963
44113
  return {
43964
- path: path41,
44114
+ path: path42,
43965
44115
  score: 0,
43966
44116
  weight,
43967
44117
  hit: false,
43968
- message: `${path41} (value mismatch)`
44118
+ message: `${path42} (value mismatch)`
43969
44119
  };
43970
44120
  }
43971
44121
  /**
43972
44122
  * Numeric comparison with absolute or relative tolerance.
43973
44123
  */
43974
- compareNumericTolerance(path41, candidateValue, expectedValue, fieldConfig, weight) {
44124
+ compareNumericTolerance(path42, candidateValue, expectedValue, fieldConfig, weight) {
43975
44125
  const { tolerance = 0, relative = false } = fieldConfig;
43976
44126
  const candidateNum = toNumber2(candidateValue);
43977
44127
  const expectedNum = toNumber2(expectedValue);
43978
44128
  if (candidateNum === null || expectedNum === null) {
43979
44129
  return {
43980
- path: path41,
44130
+ path: path42,
43981
44131
  score: 0,
43982
44132
  weight,
43983
44133
  hit: false,
43984
- message: `${path41} (non-numeric value)`
44134
+ message: `${path42} (non-numeric value)`
43985
44135
  };
43986
44136
  }
43987
44137
  if (!Number.isFinite(candidateNum) || !Number.isFinite(expectedNum)) {
43988
44138
  return {
43989
- path: path41,
44139
+ path: path42,
43990
44140
  score: 0,
43991
44141
  weight,
43992
44142
  hit: false,
43993
- message: `${path41} (invalid numeric value)`
44143
+ message: `${path42} (invalid numeric value)`
43994
44144
  };
43995
44145
  }
43996
44146
  const diff = Math.abs(candidateNum - expectedNum);
@@ -44003,61 +44153,61 @@ var FieldAccuracyEvaluator = class {
44003
44153
  }
44004
44154
  if (withinTolerance) {
44005
44155
  return {
44006
- path: path41,
44156
+ path: path42,
44007
44157
  score: 1,
44008
44158
  weight,
44009
44159
  hit: true,
44010
- message: `${path41} (within tolerance: diff=${diff.toFixed(2)})`
44160
+ message: `${path42} (within tolerance: diff=${diff.toFixed(2)})`
44011
44161
  };
44012
44162
  }
44013
44163
  return {
44014
- path: path41,
44164
+ path: path42,
44015
44165
  score: 0,
44016
44166
  weight,
44017
44167
  hit: false,
44018
- message: `${path41} (outside tolerance: diff=${diff.toFixed(2)}, tolerance=${tolerance})`
44168
+ message: `${path42} (outside tolerance: diff=${diff.toFixed(2)}, tolerance=${tolerance})`
44019
44169
  };
44020
44170
  }
44021
44171
  /**
44022
44172
  * Date comparison with format normalization.
44023
44173
  */
44024
- compareDate(path41, candidateValue, expectedValue, fieldConfig, weight) {
44174
+ compareDate(path42, candidateValue, expectedValue, fieldConfig, weight) {
44025
44175
  const formats = fieldConfig.formats ?? DEFAULT_DATE_FORMATS;
44026
44176
  const candidateDate = parseDate(String(candidateValue), formats);
44027
44177
  const expectedDate = parseDate(String(expectedValue), formats);
44028
44178
  if (candidateDate === null) {
44029
44179
  return {
44030
- path: path41,
44180
+ path: path42,
44031
44181
  score: 0,
44032
44182
  weight,
44033
44183
  hit: false,
44034
- message: `${path41} (unparseable candidate date)`
44184
+ message: `${path42} (unparseable candidate date)`
44035
44185
  };
44036
44186
  }
44037
44187
  if (expectedDate === null) {
44038
44188
  return {
44039
- path: path41,
44189
+ path: path42,
44040
44190
  score: 0,
44041
44191
  weight,
44042
44192
  hit: false,
44043
- message: `${path41} (unparseable expected date)`
44193
+ message: `${path42} (unparseable expected date)`
44044
44194
  };
44045
44195
  }
44046
44196
  if (candidateDate.getFullYear() === expectedDate.getFullYear() && candidateDate.getMonth() === expectedDate.getMonth() && candidateDate.getDate() === expectedDate.getDate()) {
44047
44197
  return {
44048
- path: path41,
44198
+ path: path42,
44049
44199
  score: 1,
44050
44200
  weight,
44051
44201
  hit: true,
44052
- message: path41
44202
+ message: path42
44053
44203
  };
44054
44204
  }
44055
44205
  return {
44056
- path: path41,
44206
+ path: path42,
44057
44207
  score: 0,
44058
44208
  weight,
44059
44209
  hit: false,
44060
- message: `${path41} (date mismatch: got ${formatDateISO(candidateDate)}, expected ${formatDateISO(expectedDate)})`
44210
+ message: `${path42} (date mismatch: got ${formatDateISO(candidateDate)}, expected ${formatDateISO(expectedDate)})`
44061
44211
  };
44062
44212
  }
44063
44213
  /**
@@ -44098,11 +44248,11 @@ var FieldAccuracyEvaluator = class {
44098
44248
  };
44099
44249
  }
44100
44250
  };
44101
- function resolvePath(obj, path41) {
44102
- if (!path41 || !obj) {
44251
+ function resolvePath(obj, path42) {
44252
+ if (!path42 || !obj) {
44103
44253
  return void 0;
44104
44254
  }
44105
- const parts = path41.split(/\.|\[|\]/).filter((p) => p.length > 0);
44255
+ const parts = path42.split(/\.|\[|\]/).filter((p) => p.length > 0);
44106
44256
  let current = obj;
44107
44257
  for (const part of parts) {
44108
44258
  if (current === null || current === void 0) {
@@ -44906,8 +45056,8 @@ var TokenUsageEvaluator = class {
44906
45056
  };
44907
45057
  }
44908
45058
  };
44909
- function getNestedValue(obj, path41) {
44910
- const parts = path41.split(".");
45059
+ function getNestedValue(obj, path42) {
45060
+ const parts = path42.split(".");
44911
45061
  let current = obj;
44912
45062
  for (const part of parts) {
44913
45063
  if (current === null || current === void 0 || typeof current !== "object") {
@@ -46302,9 +46452,267 @@ async function cleanupEvalWorkspaces(evalRunId, workspaceRoot) {
46302
46452
  }
46303
46453
  }
46304
46454
  var execFileAsync = promisify5(execFile);
46455
+ function gitEnv() {
46456
+ const env = { ...process.env };
46457
+ for (const key of Object.keys(env)) {
46458
+ if (key.startsWith("GIT_") && key !== "GIT_SSH_COMMAND") {
46459
+ delete env[key];
46460
+ }
46461
+ }
46462
+ return {
46463
+ ...env,
46464
+ GIT_TERMINAL_PROMPT: "0",
46465
+ GIT_ASKPASS: "",
46466
+ GIT_SSH_COMMAND: "ssh -o BatchMode=yes"
46467
+ };
46468
+ }
46469
+ async function git(args, opts) {
46470
+ const { stdout } = await execFileAsync("git", args, {
46471
+ cwd: opts?.cwd,
46472
+ timeout: opts?.timeout ?? 3e5,
46473
+ env: gitEnv(),
46474
+ maxBuffer: 50 * 1024 * 1024
46475
+ });
46476
+ return stdout.trim();
46477
+ }
46478
+ function normalizeRepoForFingerprint(repo) {
46479
+ const source = repo.source.type === "git" ? { type: "git", url: repo.source.url.toLowerCase().replace(/\.git$/, "") } : { type: "local", path: repo.source.path };
46480
+ const result = {
46481
+ path: repo.path,
46482
+ source,
46483
+ ref: repo.checkout?.ref ?? "HEAD"
46484
+ };
46485
+ if (repo.clone?.depth !== void 0) {
46486
+ result.depth = repo.clone.depth;
46487
+ }
46488
+ if (repo.clone?.filter !== void 0) {
46489
+ result.filter = repo.clone.filter;
46490
+ }
46491
+ if (repo.clone?.sparse?.length) {
46492
+ result.sparse = [...repo.clone.sparse].sort();
46493
+ }
46494
+ return result;
46495
+ }
46496
+ function computeWorkspaceFingerprint(templatePath, repos) {
46497
+ const canonical = {
46498
+ templatePath: templatePath ?? null,
46499
+ repos: [...repos].sort((a, b) => a.path.localeCompare(b.path)).map(normalizeRepoForFingerprint)
46500
+ };
46501
+ return createHash("sha256").update(JSON.stringify(canonical)).digest("hex");
46502
+ }
46503
+ async function copyDirectoryRecursive2(src, dest, skipDirs) {
46504
+ await mkdir11(dest, { recursive: true });
46505
+ const entries = await readdir4(src, { withFileTypes: true });
46506
+ for (const entry of entries) {
46507
+ const srcPath = path36.join(src, entry.name);
46508
+ const destPath = path36.join(dest, entry.name);
46509
+ if (entry.name === ".git") {
46510
+ continue;
46511
+ }
46512
+ if (entry.isDirectory()) {
46513
+ if (skipDirs?.has(entry.name)) {
46514
+ continue;
46515
+ }
46516
+ await copyDirectoryRecursive2(srcPath, destPath, skipDirs);
46517
+ } else {
46518
+ await cp2(srcPath, destPath, { preserveTimestamps: true, force: true });
46519
+ }
46520
+ }
46521
+ }
46522
+ var WorkspacePoolManager = class {
46523
+ poolRoot;
46524
+ constructor(poolRoot) {
46525
+ this.poolRoot = poolRoot ?? getWorkspacePoolRoot();
46526
+ }
46527
+ /**
46528
+ * Acquire a workspace slot from the pool.
46529
+ *
46530
+ * 1. Compute fingerprint from template + repos
46531
+ * 2. Check drift (compare stored metadata.json fingerprint vs computed)
46532
+ * 3. If drift: warn, remove all slots, rematerialize
46533
+ * 4. Acquire a slot (try-lock slot-0, slot-1, ..., up to maxSlots)
46534
+ * 5. If slot exists: reset repos, re-copy template files (skip repo directories)
46535
+ * 6. If new slot: copy template, materialize all repos, write metadata.json
46536
+ * 7. Return the slot (with path, index, isExisting)
46537
+ */
46538
+ async acquireWorkspace(options) {
46539
+ const { templatePath, repos, maxSlots, repoManager } = options;
46540
+ const fingerprint = computeWorkspaceFingerprint(templatePath, repos);
46541
+ const poolDir = path36.join(this.poolRoot, fingerprint);
46542
+ await mkdir11(poolDir, { recursive: true });
46543
+ const drifted = await this.checkDrift(poolDir, fingerprint);
46544
+ if (drifted) {
46545
+ console.warn(
46546
+ `[workspace-pool] Drift detected for fingerprint ${fingerprint.slice(0, 12)}... Removing stale slots.`
46547
+ );
46548
+ await this.removeAllSlots(poolDir);
46549
+ }
46550
+ for (let i = 0; i < maxSlots; i++) {
46551
+ const slotPath = path36.join(poolDir, `slot-${i}`);
46552
+ const lockPath = `${slotPath}.lock`;
46553
+ const locked = await this.tryLock(lockPath);
46554
+ if (!locked) {
46555
+ continue;
46556
+ }
46557
+ const slotExists = existsSync2(slotPath);
46558
+ if (slotExists) {
46559
+ await this.resetSlot(slotPath, templatePath, repos);
46560
+ return {
46561
+ index: i,
46562
+ path: slotPath,
46563
+ isExisting: true,
46564
+ lockPath,
46565
+ fingerprint,
46566
+ poolDir
46567
+ };
46568
+ }
46569
+ await mkdir11(slotPath, { recursive: true });
46570
+ if (templatePath) {
46571
+ await copyDirectoryRecursive2(templatePath, slotPath);
46572
+ }
46573
+ if (repos.length > 0) {
46574
+ await repoManager.materializeAll(repos, slotPath);
46575
+ }
46576
+ await this.writeMetadata(poolDir, fingerprint, templatePath ?? null, repos);
46577
+ return {
46578
+ index: i,
46579
+ path: slotPath,
46580
+ isExisting: false,
46581
+ lockPath,
46582
+ fingerprint,
46583
+ poolDir
46584
+ };
46585
+ }
46586
+ throw new Error(
46587
+ `All ${maxSlots} pool slots are locked for fingerprint ${fingerprint.slice(0, 12)}...`
46588
+ );
46589
+ }
46590
+ /** Remove lock file to release a slot. */
46591
+ async releaseSlot(slot) {
46592
+ try {
46593
+ await unlink(slot.lockPath);
46594
+ } catch {
46595
+ }
46596
+ }
46597
+ /**
46598
+ * Try to acquire a PID-based lock file.
46599
+ * On EEXIST, read PID and check if process is alive. If dead, stale lock — remove and retry.
46600
+ * Returns true if lock acquired, false if slot is actively locked.
46601
+ * Uses a bounded loop (max 3 attempts) to avoid unbounded recursion.
46602
+ */
46603
+ async tryLock(lockPath) {
46604
+ for (let attempt = 0; attempt < 3; attempt++) {
46605
+ try {
46606
+ await writeFile7(lockPath, String(process.pid), { flag: "wx" });
46607
+ return true;
46608
+ } catch (err) {
46609
+ if (err.code !== "EEXIST") {
46610
+ throw err;
46611
+ }
46612
+ try {
46613
+ const pidStr = await readFile11(lockPath, "utf-8");
46614
+ const pid = Number.parseInt(pidStr.trim(), 10);
46615
+ if (!Number.isNaN(pid)) {
46616
+ try {
46617
+ process.kill(pid, 0);
46618
+ return false;
46619
+ } catch {
46620
+ await unlink(lockPath).catch(() => {
46621
+ });
46622
+ continue;
46623
+ }
46624
+ }
46625
+ } catch {
46626
+ }
46627
+ return false;
46628
+ }
46629
+ }
46630
+ return false;
46631
+ }
46632
+ /**
46633
+ * Check if the stored fingerprint in metadata.json differs from the computed one.
46634
+ * Returns true if drifted, false otherwise.
46635
+ * Returns false (no drift) if metadata.json doesn't exist (first use).
46636
+ */
46637
+ async checkDrift(poolDir, fingerprint) {
46638
+ const metadataPath = path36.join(poolDir, "metadata.json");
46639
+ try {
46640
+ const raw = await readFile11(metadataPath, "utf-8");
46641
+ const metadata = JSON.parse(raw);
46642
+ return metadata.fingerprint !== fingerprint;
46643
+ } catch {
46644
+ return false;
46645
+ }
46646
+ }
46647
+ /** Write metadata.json with fingerprint, inputs, and timestamp. */
46648
+ async writeMetadata(poolDir, fingerprint, templatePath, repos) {
46649
+ const metadata = {
46650
+ fingerprint,
46651
+ templatePath,
46652
+ repos,
46653
+ createdAt: (/* @__PURE__ */ new Date()).toISOString()
46654
+ };
46655
+ await writeFile7(path36.join(poolDir, "metadata.json"), JSON.stringify(metadata, null, 2));
46656
+ }
46657
+ /** Remove all slot directories and their lock files from a pool directory. */
46658
+ async removeAllSlots(poolDir) {
46659
+ const entries = await readdir4(poolDir);
46660
+ for (const entry of entries) {
46661
+ if (entry.startsWith("slot-") && !entry.endsWith(".lock")) {
46662
+ const lockPath = path36.join(poolDir, `${entry}.lock`);
46663
+ if (existsSync2(lockPath)) {
46664
+ try {
46665
+ const pidStr = await readFile11(lockPath, "utf-8");
46666
+ const pid = Number.parseInt(pidStr.trim(), 10);
46667
+ if (!Number.isNaN(pid)) {
46668
+ try {
46669
+ process.kill(pid, 0);
46670
+ console.warn(`[workspace-pool] Skipping slot ${entry}: locked by PID ${pid}`);
46671
+ continue;
46672
+ } catch {
46673
+ }
46674
+ }
46675
+ } catch {
46676
+ }
46677
+ }
46678
+ await rm5(path36.join(poolDir, entry), { recursive: true, force: true });
46679
+ await rm5(lockPath, { force: true }).catch(() => {
46680
+ });
46681
+ }
46682
+ }
46683
+ await rm5(path36.join(poolDir, "metadata.json"), { force: true }).catch(() => {
46684
+ });
46685
+ }
46686
+ /**
46687
+ * Reset an existing slot for reuse:
46688
+ * 1. Reset repos (git reset --hard {ref} && git clean -fd per repo)
46689
+ * 2. Re-copy template files (skip repo directories)
46690
+ */
46691
+ async resetSlot(slotPath, templatePath, repos) {
46692
+ for (const repo of repos) {
46693
+ const repoDir = path36.join(slotPath, repo.path);
46694
+ if (!existsSync2(repoDir)) {
46695
+ continue;
46696
+ }
46697
+ const ref = repo.checkout?.ref ?? "HEAD";
46698
+ await git(["reset", "--hard", ref], { cwd: repoDir });
46699
+ await git(["clean", "-fd"], { cwd: repoDir });
46700
+ }
46701
+ if (templatePath) {
46702
+ const repoDirNames = new Set(
46703
+ repos.map((r) => {
46704
+ const normalized = r.path.replace(/^\.\//, "");
46705
+ return normalized.split("/")[0];
46706
+ })
46707
+ );
46708
+ await copyDirectoryRecursive2(templatePath, slotPath, repoDirNames);
46709
+ }
46710
+ }
46711
+ };
46712
+ var execFileAsync2 = promisify6(execFile2);
46305
46713
  var DEFAULT_TIMEOUT_MS2 = 3e5;
46306
46714
  var LOCK_TIMEOUT_MS = 6e4;
46307
- function gitEnv() {
46715
+ function gitEnv2() {
46308
46716
  const env = { ...process.env };
46309
46717
  for (const key of Object.keys(env)) {
46310
46718
  if (key.startsWith("GIT_") && key !== "GIT_SSH_COMMAND") {
@@ -46320,16 +46728,16 @@ function gitEnv() {
46320
46728
  }
46321
46729
  function cacheKey(source) {
46322
46730
  const raw = source.type === "git" ? source.url.toLowerCase().replace(/\.git$/, "") : source.path;
46323
- return createHash("sha256").update(raw).digest("hex");
46731
+ return createHash2("sha256").update(raw).digest("hex");
46324
46732
  }
46325
46733
  function getSourceUrl(source) {
46326
46734
  return source.type === "git" ? source.url : source.path;
46327
46735
  }
46328
- async function git(args, opts) {
46329
- const { stdout } = await execFileAsync("git", args, {
46736
+ async function git2(args, opts) {
46737
+ const { stdout } = await execFileAsync2("git", args, {
46330
46738
  cwd: opts?.cwd,
46331
46739
  timeout: opts?.timeout ?? DEFAULT_TIMEOUT_MS2,
46332
- env: gitEnv(),
46740
+ env: gitEnv2(),
46333
46741
  maxBuffer: 50 * 1024 * 1024
46334
46742
  // 50MB
46335
46743
  });
@@ -46339,7 +46747,7 @@ async function acquireLock(lockPath) {
46339
46747
  const start = Date.now();
46340
46748
  while (Date.now() - start < LOCK_TIMEOUT_MS) {
46341
46749
  try {
46342
- await writeFile7(lockPath, String(process.pid), { flag: "wx" });
46750
+ await writeFile8(lockPath, String(process.pid), { flag: "wx" });
46343
46751
  return;
46344
46752
  } catch (err) {
46345
46753
  if (err.code === "EEXIST") {
@@ -46353,7 +46761,7 @@ async function acquireLock(lockPath) {
46353
46761
  }
46354
46762
  async function releaseLock(lockPath) {
46355
46763
  try {
46356
- await unlink(lockPath);
46764
+ await unlink2(lockPath);
46357
46765
  } catch {
46358
46766
  }
46359
46767
  }
@@ -46367,16 +46775,12 @@ var RepoManager = class {
46367
46775
  async runGit(args, opts) {
46368
46776
  const startedAt = Date.now();
46369
46777
  if (this.verbose) {
46370
- console.log(
46371
- `[repo] git start cwd=${opts?.cwd ?? process.cwd()} args=${args.join(" ")}`
46372
- );
46778
+ console.log(`[repo] git start cwd=${opts?.cwd ?? process.cwd()} args=${args.join(" ")}`);
46373
46779
  }
46374
46780
  try {
46375
- const output = await git(args, opts);
46781
+ const output = await git2(args, opts);
46376
46782
  if (this.verbose) {
46377
- console.log(
46378
- `[repo] git ok durationMs=${Date.now() - startedAt} args=${args.join(" ")}`
46379
- );
46783
+ console.log(`[repo] git ok durationMs=${Date.now() - startedAt} args=${args.join(" ")}`);
46380
46784
  }
46381
46785
  return output;
46382
46786
  } catch (error40) {
@@ -46396,9 +46800,9 @@ var RepoManager = class {
46396
46800
  */
46397
46801
  async ensureCache(source, depth, resolve2) {
46398
46802
  const key = cacheKey(source);
46399
- const cachePath = path36.join(this.cacheDir, key);
46803
+ const cachePath = path37.join(this.cacheDir, key);
46400
46804
  const lockPath = `${cachePath}.lock`;
46401
- const cacheExists = existsSync2(path36.join(cachePath, "HEAD"));
46805
+ const cacheExists = existsSync3(path37.join(cachePath, "HEAD"));
46402
46806
  if (this.verbose) {
46403
46807
  console.log(
46404
46808
  `[repo] ensureCache source=${getSourceUrl(source)} resolve=${resolve2 ?? "remote"} cache=${cachePath}`
@@ -46416,13 +46820,11 @@ var RepoManager = class {
46416
46820
  `No cache found for \`${url2}\`. Run \`agentv cache add --url ${url2} --from <local-path>\` to seed it.`
46417
46821
  );
46418
46822
  }
46419
- await mkdir11(this.cacheDir, { recursive: true });
46823
+ await mkdir12(this.cacheDir, { recursive: true });
46420
46824
  const lockStartedAt = Date.now();
46421
46825
  await acquireLock(lockPath);
46422
46826
  if (this.verbose) {
46423
- console.log(
46424
- `[repo] lock acquired path=${lockPath} waitedMs=${Date.now() - lockStartedAt}`
46425
- );
46827
+ console.log(`[repo] lock acquired path=${lockPath} waitedMs=${Date.now() - lockStartedAt}`);
46426
46828
  }
46427
46829
  try {
46428
46830
  if (cacheExists) {
@@ -46460,7 +46862,7 @@ var RepoManager = class {
46460
46862
  * Handles checkout, ref resolution, ancestor walking, shallow clone, sparse checkout.
46461
46863
  */
46462
46864
  async materialize(repo, workspacePath) {
46463
- const targetDir = path36.join(workspacePath, repo.path);
46865
+ const targetDir = path37.join(workspacePath, repo.path);
46464
46866
  const startedAt = Date.now();
46465
46867
  if (this.verbose) {
46466
46868
  console.log(
@@ -46555,14 +46957,14 @@ var RepoManager = class {
46555
46957
  async reset(repos, workspacePath, strategy) {
46556
46958
  if (strategy === "recreate") {
46557
46959
  for (const repo of repos) {
46558
- const targetDir = path36.join(workspacePath, repo.path);
46559
- await rm5(targetDir, { recursive: true, force: true });
46960
+ const targetDir = path37.join(workspacePath, repo.path);
46961
+ await rm6(targetDir, { recursive: true, force: true });
46560
46962
  }
46561
46963
  await this.materializeAll(repos, workspacePath);
46562
46964
  return;
46563
46965
  }
46564
46966
  for (const repo of repos) {
46565
- const targetDir = path36.join(workspacePath, repo.path);
46967
+ const targetDir = path37.join(workspacePath, repo.path);
46566
46968
  await this.runGit(["reset", "--hard", "HEAD"], { cwd: targetDir });
46567
46969
  await this.runGit(["clean", "-fd"], { cwd: targetDir });
46568
46970
  }
@@ -46574,21 +46976,21 @@ var RepoManager = class {
46574
46976
  async seedCache(localPath, remoteUrl, opts) {
46575
46977
  const source = { type: "git", url: remoteUrl };
46576
46978
  const key = cacheKey(source);
46577
- const cachePath = path36.join(this.cacheDir, key);
46979
+ const cachePath = path37.join(this.cacheDir, key);
46578
46980
  const lockPath = `${cachePath}.lock`;
46579
- await mkdir11(this.cacheDir, { recursive: true });
46981
+ await mkdir12(this.cacheDir, { recursive: true });
46580
46982
  await acquireLock(lockPath);
46581
46983
  try {
46582
- if (existsSync2(path36.join(cachePath, "HEAD"))) {
46984
+ if (existsSync3(path37.join(cachePath, "HEAD"))) {
46583
46985
  if (!opts?.force) {
46584
46986
  throw new Error(
46585
46987
  `Cache already exists for ${remoteUrl} at ${cachePath}. Use force to overwrite.`
46586
46988
  );
46587
46989
  }
46588
- await rm5(cachePath, { recursive: true, force: true });
46990
+ await rm6(cachePath, { recursive: true, force: true });
46589
46991
  }
46590
- await git(["clone", "--mirror", "--bare", localPath, cachePath]);
46591
- await git(["remote", "set-url", "origin", remoteUrl], { cwd: cachePath });
46992
+ await git2(["clone", "--mirror", "--bare", localPath, cachePath]);
46993
+ await git2(["remote", "set-url", "origin", remoteUrl], { cwd: cachePath });
46592
46994
  } finally {
46593
46995
  await releaseLock(lockPath);
46594
46996
  }
@@ -46596,37 +46998,37 @@ var RepoManager = class {
46596
46998
  }
46597
46999
  /** Remove the entire cache directory. */
46598
47000
  async cleanCache() {
46599
- await rm5(this.cacheDir, { recursive: true, force: true });
47001
+ await rm6(this.cacheDir, { recursive: true, force: true });
46600
47002
  }
46601
47003
  };
46602
47004
  async function resolveWorkspaceTemplate(templatePath) {
46603
47005
  if (!templatePath) {
46604
47006
  return void 0;
46605
47007
  }
46606
- const resolved = path37.resolve(templatePath);
47008
+ const resolved = path38.resolve(templatePath);
46607
47009
  const stats = await stat6(resolved);
46608
47010
  if (stats.isFile()) {
46609
47011
  return {
46610
- dir: path37.dirname(resolved),
47012
+ dir: path38.dirname(resolved),
46611
47013
  workspaceFile: resolved
46612
47014
  };
46613
47015
  }
46614
47016
  if (!stats.isDirectory()) {
46615
47017
  throw new Error(`workspace template is neither a file nor a directory: ${resolved}`);
46616
47018
  }
46617
- const entries = await readdir4(resolved);
47019
+ const entries = await readdir5(resolved);
46618
47020
  const workspaceFiles = entries.filter((e) => e.endsWith(".code-workspace"));
46619
47021
  if (workspaceFiles.length === 1) {
46620
47022
  return {
46621
47023
  dir: resolved,
46622
- workspaceFile: path37.join(resolved, workspaceFiles[0])
47024
+ workspaceFile: path38.join(resolved, workspaceFiles[0])
46623
47025
  };
46624
47026
  }
46625
47027
  if (workspaceFiles.length > 1) {
46626
47028
  const conventionFile = workspaceFiles.find((f) => f === "template.code-workspace");
46627
47029
  return {
46628
47030
  dir: resolved,
46629
- workspaceFile: conventionFile ? path37.join(resolved, conventionFile) : void 0
47031
+ workspaceFile: conventionFile ? path38.join(resolved, conventionFile) : void 0
46630
47032
  };
46631
47033
  }
46632
47034
  return { dir: resolved };
@@ -46704,7 +47106,10 @@ async function runEvaluation(options) {
46704
47106
  trials,
46705
47107
  streamCallbacks,
46706
47108
  totalBudgetUsd,
46707
- failOnError
47109
+ failOnError,
47110
+ poolWorkspaces,
47111
+ poolMaxSlots: configPoolMaxSlots,
47112
+ workspace: userWorkspacePath
46708
47113
  } = options;
46709
47114
  let useCache = options.useCache;
46710
47115
  if (trials && trials.count > 1 && useCache) {
@@ -46778,7 +47183,7 @@ async function runEvaluation(options) {
46778
47183
  ];
46779
47184
  const evaluatorRegistry = buildEvaluatorRegistry(evaluators, resolveJudgeProvider);
46780
47185
  const typeRegistry = createBuiltinRegistry();
46781
- const discoveryBaseDir = evalFilePath ? path38.dirname(path38.resolve(evalFilePath)) : process.cwd();
47186
+ const discoveryBaseDir = evalFilePath ? path39.dirname(path39.resolve(evalFilePath)) : process.cwd();
46782
47187
  const evalDir = discoveryBaseDir;
46783
47188
  await discoverAssertions(typeRegistry, discoveryBaseDir);
46784
47189
  const providerRegistry = createBuiltinProviderRegistry();
@@ -46840,13 +47245,19 @@ async function runEvaluation(options) {
46840
47245
  }
46841
47246
  };
46842
47247
  const isPerTestIsolation = suiteWorkspace?.isolation === "per_test";
46843
- const hasSharedWorkspace = !!(workspaceTemplate || suiteWorkspace?.before_all || suiteWorkspace?.repos?.length && !isPerTestIsolation);
47248
+ if (userWorkspacePath && isPerTestIsolation) {
47249
+ throw new Error(
47250
+ "--workspace is incompatible with isolation: per_test. Use isolation: shared (default)."
47251
+ );
47252
+ }
47253
+ const hasSharedWorkspace = !!(userWorkspacePath || workspaceTemplate || suiteWorkspace?.before_all || suiteWorkspace?.repos?.length && !isPerTestIsolation);
47254
+ const usePool = poolWorkspaces === true && !!suiteWorkspace?.repos?.length && !isPerTestIsolation && !userWorkspacePath;
46844
47255
  const requestedWorkers = options.maxConcurrency ?? target.workers ?? 1;
46845
- const workers = hasSharedWorkspace ? 1 : requestedWorkers;
47256
+ const workers = hasSharedWorkspace && !usePool ? 1 : requestedWorkers;
46846
47257
  setupLog(
46847
- `sharedWorkspace=${hasSharedWorkspace} perTestIsolation=${isPerTestIsolation} requestedWorkers=${requestedWorkers} effectiveWorkers=${workers}`
47258
+ `sharedWorkspace=${hasSharedWorkspace} perTestIsolation=${isPerTestIsolation} usePool=${usePool} requestedWorkers=${requestedWorkers} effectiveWorkers=${workers}`
46848
47259
  );
46849
- if (hasSharedWorkspace && requestedWorkers > 1) {
47260
+ if (hasSharedWorkspace && !usePool && requestedWorkers > 1) {
46850
47261
  console.warn(
46851
47262
  `Warning: Shared workspace requires sequential execution. Overriding workers from ${requestedWorkers} to 1.`
46852
47263
  );
@@ -46855,7 +47266,37 @@ async function runEvaluation(options) {
46855
47266
  let sharedWorkspacePath;
46856
47267
  let sharedBaselineCommit;
46857
47268
  let beforeAllOutput;
46858
- if (workspaceTemplate) {
47269
+ let poolManager;
47270
+ let poolSlot;
47271
+ const poolSlots = [];
47272
+ const availablePoolSlots = [];
47273
+ const poolSlotBaselines = /* @__PURE__ */ new Map();
47274
+ const poolMaxSlots = Math.min(configPoolMaxSlots ?? 10, 50);
47275
+ if (userWorkspacePath) {
47276
+ sharedWorkspacePath = userWorkspacePath;
47277
+ setupLog(`using user-provided workspace: ${userWorkspacePath}`);
47278
+ } else if (usePool && suiteWorkspace?.repos) {
47279
+ const slotsNeeded = workers;
47280
+ setupLog(`acquiring ${slotsNeeded} workspace pool slot(s) (pool capacity: ${poolMaxSlots})`);
47281
+ poolManager = new WorkspacePoolManager(getWorkspacePoolRoot());
47282
+ const poolRepoManager = new RepoManager(void 0, verbose);
47283
+ for (let i = 0; i < slotsNeeded; i++) {
47284
+ const slot = await poolManager.acquireWorkspace({
47285
+ templatePath: workspaceTemplate,
47286
+ repos: suiteWorkspace.repos,
47287
+ maxSlots: poolMaxSlots,
47288
+ repoManager: poolRepoManager
47289
+ });
47290
+ poolSlots.push(slot);
47291
+ setupLog(`pool slot ${i} acquired at: ${slot.path} (existing=${slot.isExisting})`);
47292
+ }
47293
+ if (slotsNeeded === 1) {
47294
+ poolSlot = poolSlots[0];
47295
+ sharedWorkspacePath = poolSlot.path;
47296
+ } else {
47297
+ availablePoolSlots.push(...poolSlots);
47298
+ }
47299
+ } else if (workspaceTemplate) {
46859
47300
  setupLog(`creating shared workspace from template: ${workspaceTemplate}`);
46860
47301
  try {
46861
47302
  sharedWorkspacePath = await createTempWorkspace(workspaceTemplate, evalRunId, "shared");
@@ -46864,288 +47305,344 @@ async function runEvaluation(options) {
46864
47305
  const message = error40 instanceof Error ? error40.message : String(error40);
46865
47306
  throw new Error(`Failed to create shared workspace: ${message}`);
46866
47307
  }
47308
+ } else if (suiteWorkspace?.before_all || suiteWorkspace?.repos?.length && !isPerTestIsolation) {
47309
+ sharedWorkspacePath = getWorkspacePath(evalRunId, "shared");
47310
+ await mkdir13(sharedWorkspacePath, { recursive: true });
47311
+ setupLog(`created empty shared workspace at: ${sharedWorkspacePath}`);
47312
+ }
47313
+ try {
46867
47314
  if (suiteWorkspaceFile && sharedWorkspacePath) {
46868
- const copiedWorkspaceFile = path38.join(sharedWorkspacePath, path38.basename(suiteWorkspaceFile));
47315
+ const copiedWorkspaceFile = path39.join(sharedWorkspacePath, path39.basename(suiteWorkspaceFile));
46869
47316
  try {
46870
47317
  await stat7(copiedWorkspaceFile);
46871
47318
  suiteWorkspaceFile = copiedWorkspaceFile;
46872
47319
  } catch {
46873
47320
  }
46874
47321
  }
46875
- } else if (suiteWorkspace?.before_all || suiteWorkspace?.repos?.length && !isPerTestIsolation) {
46876
- sharedWorkspacePath = getWorkspacePath(evalRunId, "shared");
46877
- await mkdir12(sharedWorkspacePath, { recursive: true });
46878
- setupLog(`created empty shared workspace at: ${sharedWorkspacePath}`);
46879
- }
46880
- const repoManager = suiteWorkspace?.repos?.length ? new RepoManager(void 0, verbose) : void 0;
46881
- if (repoManager && sharedWorkspacePath && suiteWorkspace?.repos && !isPerTestIsolation) {
46882
- setupLog(`materializing ${suiteWorkspace.repos.length} shared repo(s) into ${sharedWorkspacePath}`);
46883
- try {
46884
- await repoManager.materializeAll(suiteWorkspace.repos, sharedWorkspacePath);
46885
- setupLog("shared repo materialization complete");
46886
- } catch (error40) {
46887
- const message = error40 instanceof Error ? error40.message : String(error40);
46888
- if (sharedWorkspacePath) {
46889
- await cleanupWorkspace(sharedWorkspacePath).catch(() => {
46890
- });
46891
- }
46892
- throw new Error(`Failed to materialize repos: ${message}`);
46893
- }
46894
- }
46895
- if (sharedWorkspacePath && suiteWorkspace?.before_all) {
46896
- const beforeAllCommand = (suiteWorkspace.before_all.command ?? suiteWorkspace.before_all.script ?? []).join(" ");
46897
- setupLog(
46898
- `running shared before_all in cwd=${suiteWorkspace.before_all.cwd ?? evalDir} command=${beforeAllCommand}`
46899
- );
46900
- const scriptContext = {
46901
- workspacePath: sharedWorkspacePath,
46902
- testId: "__before_all__",
46903
- evalRunId,
46904
- evalDir
46905
- };
46906
- try {
46907
- beforeAllOutput = await executeWorkspaceScript(suiteWorkspace.before_all, scriptContext);
46908
- setupLog("shared before_all completed");
46909
- } catch (error40) {
46910
- const message = error40 instanceof Error ? error40.message : String(error40);
46911
- if (sharedWorkspacePath) {
46912
- await cleanupWorkspace(sharedWorkspacePath).catch(() => {
46913
- });
46914
- }
46915
- throw new Error(`before_all script failed: ${message}`);
46916
- }
46917
- }
46918
- if (sharedWorkspacePath) {
46919
- try {
46920
- sharedBaselineCommit = await initializeBaseline(sharedWorkspacePath);
46921
- setupLog(`shared baseline initialized: ${sharedBaselineCommit}`);
46922
- } catch {
46923
- setupLog("shared baseline initialization skipped (non-fatal)");
46924
- }
46925
- }
46926
- let nextWorkerId = 1;
46927
- const workerIdByEvalId = /* @__PURE__ */ new Map();
46928
- let beforeAllOutputAttached = false;
46929
- let cumulativeBudgetCost = 0;
46930
- let budgetExhausted = false;
46931
- let failOnErrorTriggered = false;
46932
- const promises = filteredEvalCases.map(
46933
- (evalCase) => limit(async () => {
46934
- const workerId = nextWorkerId++;
46935
- workerIdByEvalId.set(evalCase.id, workerId);
46936
- if (totalBudgetUsd !== void 0 && budgetExhausted) {
46937
- const budgetResult = {
46938
- timestamp: (now ?? (() => /* @__PURE__ */ new Date()))().toISOString(),
46939
- testId: evalCase.id,
46940
- dataset: evalCase.dataset,
46941
- score: 0,
46942
- hits: [],
46943
- misses: [],
46944
- answer: "",
46945
- target: target.name,
46946
- error: `Suite budget exceeded ($${cumulativeBudgetCost.toFixed(4)} / $${totalBudgetUsd.toFixed(4)})`,
46947
- budgetExceeded: true,
46948
- executionStatus: "execution_error",
46949
- failureStage: "setup",
46950
- failureReasonCode: "budget_exceeded",
46951
- executionError: {
46952
- message: `Suite budget exceeded ($${cumulativeBudgetCost.toFixed(4)} / $${totalBudgetUsd.toFixed(4)})`,
46953
- stage: "setup"
46954
- }
46955
- };
46956
- if (onProgress) {
46957
- await onProgress({
46958
- workerId,
46959
- testId: evalCase.id,
46960
- status: "failed",
46961
- completedAt: Date.now(),
46962
- error: budgetResult.error
47322
+ const repoManager = suiteWorkspace?.repos?.length && !usePool && !userWorkspacePath ? new RepoManager(void 0, verbose) : void 0;
47323
+ if (repoManager && sharedWorkspacePath && suiteWorkspace?.repos && !isPerTestIsolation) {
47324
+ setupLog(
47325
+ `materializing ${suiteWorkspace.repos.length} shared repo(s) into ${sharedWorkspacePath}`
47326
+ );
47327
+ try {
47328
+ await repoManager.materializeAll(suiteWorkspace.repos, sharedWorkspacePath);
47329
+ setupLog("shared repo materialization complete");
47330
+ } catch (error40) {
47331
+ const message = error40 instanceof Error ? error40.message : String(error40);
47332
+ if (sharedWorkspacePath && !userWorkspacePath) {
47333
+ await cleanupWorkspace(sharedWorkspacePath).catch(() => {
46963
47334
  });
46964
47335
  }
46965
- if (onResult) {
46966
- await onResult(budgetResult);
46967
- }
46968
- return budgetResult;
47336
+ throw new Error(`Failed to materialize repos: ${message}`);
46969
47337
  }
46970
- if (failOnError === true && failOnErrorTriggered) {
46971
- const errorMsg = "Halted: execution error encountered with fail_on_error enabled";
46972
- const haltResult = {
46973
- timestamp: (now ?? (() => /* @__PURE__ */ new Date()))().toISOString(),
46974
- testId: evalCase.id,
46975
- dataset: evalCase.dataset,
46976
- score: 0,
46977
- hits: [],
46978
- misses: [],
46979
- answer: "",
46980
- target: target.name,
46981
- error: errorMsg,
46982
- executionStatus: "execution_error",
46983
- failureStage: "setup",
46984
- failureReasonCode: "error_threshold_exceeded",
46985
- executionError: { message: errorMsg, stage: "setup" }
46986
- };
46987
- if (onProgress) {
46988
- await onProgress({
46989
- workerId,
46990
- testId: evalCase.id,
46991
- status: "failed",
46992
- completedAt: Date.now(),
46993
- error: haltResult.error
47338
+ }
47339
+ if (sharedWorkspacePath && suiteWorkspace?.before_all) {
47340
+ const beforeAllCommand = (suiteWorkspace.before_all.command ?? suiteWorkspace.before_all.script ?? []).join(" ");
47341
+ setupLog(
47342
+ `running shared before_all in cwd=${suiteWorkspace.before_all.cwd ?? evalDir} command=${beforeAllCommand}`
47343
+ );
47344
+ const scriptContext = {
47345
+ workspacePath: sharedWorkspacePath,
47346
+ testId: "__before_all__",
47347
+ evalRunId,
47348
+ evalDir
47349
+ };
47350
+ try {
47351
+ beforeAllOutput = await executeWorkspaceScript(suiteWorkspace.before_all, scriptContext);
47352
+ setupLog("shared before_all completed");
47353
+ } catch (error40) {
47354
+ const message = error40 instanceof Error ? error40.message : String(error40);
47355
+ if (sharedWorkspacePath && !userWorkspacePath) {
47356
+ await cleanupWorkspace(sharedWorkspacePath).catch(() => {
46994
47357
  });
46995
47358
  }
46996
- if (onResult) {
46997
- await onResult(haltResult);
46998
- }
46999
- return haltResult;
47000
- }
47001
- if (onProgress) {
47002
- await onProgress({
47003
- workerId,
47004
- testId: evalCase.id,
47005
- status: "running",
47006
- startedAt: Date.now()
47007
- });
47359
+ throw new Error(`before_all script failed: ${message}`);
47008
47360
  }
47009
- try {
47010
- const judgeProvider = await resolveJudgeProvider(target);
47011
- const runCaseOptions = {
47012
- evalCase,
47013
- provider: primaryProvider,
47014
- target,
47015
- evaluators: evaluatorRegistry,
47016
- maxRetries,
47017
- agentTimeoutMs,
47018
- cache,
47019
- useCache,
47020
- now,
47021
- judgeProvider,
47022
- targetResolver,
47023
- availableTargets,
47361
+ }
47362
+ if (availablePoolSlots.length > 0 && suiteWorkspace?.before_all) {
47363
+ for (const slot of availablePoolSlots) {
47364
+ setupLog(`running before_all on pool slot ${slot.index}`);
47365
+ const scriptContext = {
47366
+ workspacePath: slot.path,
47367
+ testId: "__before_all__",
47024
47368
  evalRunId,
47025
- keepWorkspaces,
47026
- cleanupWorkspaces,
47027
- sharedWorkspacePath,
47028
- sharedBaselineCommit,
47029
- suiteWorkspaceFile,
47030
- streamCallbacks,
47031
- typeRegistry,
47032
- repoManager,
47033
47369
  evalDir
47034
47370
  };
47035
- let result = trials && trials.count > 1 ? await runEvalCaseWithTrials(runCaseOptions, trials) : await runEvalCase(runCaseOptions);
47036
- if (totalBudgetUsd !== void 0) {
47037
- let caseCost;
47038
- if (result.trials && result.trials.length > 0) {
47039
- const trialCostSum = result.trials.reduce((sum, t) => sum + (t.costUsd ?? 0), 0);
47040
- if (trialCostSum > 0) {
47041
- caseCost = trialCostSum;
47371
+ try {
47372
+ const output = await executeWorkspaceScript(suiteWorkspace.before_all, scriptContext);
47373
+ if (!beforeAllOutput) beforeAllOutput = output;
47374
+ setupLog(`before_all completed on pool slot ${slot.index}`);
47375
+ } catch (error40) {
47376
+ const message = error40 instanceof Error ? error40.message : String(error40);
47377
+ throw new Error(`before_all script failed on pool slot ${slot.index}: ${message}`);
47378
+ }
47379
+ }
47380
+ }
47381
+ if (sharedWorkspacePath) {
47382
+ try {
47383
+ sharedBaselineCommit = await initializeBaseline(sharedWorkspacePath);
47384
+ setupLog(`shared baseline initialized: ${sharedBaselineCommit}`);
47385
+ } catch {
47386
+ setupLog("shared baseline initialization skipped (non-fatal)");
47387
+ }
47388
+ }
47389
+ if (availablePoolSlots.length > 0) {
47390
+ for (const slot of availablePoolSlots) {
47391
+ try {
47392
+ const baseline = await initializeBaseline(slot.path);
47393
+ poolSlotBaselines.set(slot.path, baseline);
47394
+ setupLog(`pool slot ${slot.index} baseline initialized: ${baseline}`);
47395
+ } catch {
47396
+ setupLog(`pool slot ${slot.index} baseline initialization skipped (non-fatal)`);
47397
+ }
47398
+ }
47399
+ }
47400
+ let nextWorkerId = 1;
47401
+ const workerIdByEvalId = /* @__PURE__ */ new Map();
47402
+ let beforeAllOutputAttached = false;
47403
+ let cumulativeBudgetCost = 0;
47404
+ let budgetExhausted = false;
47405
+ let failOnErrorTriggered = false;
47406
+ const promises = filteredEvalCases.map(
47407
+ (evalCase) => limit(async () => {
47408
+ const workerId = nextWorkerId++;
47409
+ workerIdByEvalId.set(evalCase.id, workerId);
47410
+ if (totalBudgetUsd !== void 0 && budgetExhausted) {
47411
+ const budgetResult = {
47412
+ timestamp: (now ?? (() => /* @__PURE__ */ new Date()))().toISOString(),
47413
+ testId: evalCase.id,
47414
+ dataset: evalCase.dataset,
47415
+ score: 0,
47416
+ hits: [],
47417
+ misses: [],
47418
+ answer: "",
47419
+ target: target.name,
47420
+ error: `Suite budget exceeded ($${cumulativeBudgetCost.toFixed(4)} / $${totalBudgetUsd.toFixed(4)})`,
47421
+ budgetExceeded: true,
47422
+ executionStatus: "execution_error",
47423
+ failureStage: "setup",
47424
+ failureReasonCode: "budget_exceeded",
47425
+ executionError: {
47426
+ message: `Suite budget exceeded ($${cumulativeBudgetCost.toFixed(4)} / $${totalBudgetUsd.toFixed(4)})`,
47427
+ stage: "setup"
47042
47428
  }
47043
- } else {
47044
- caseCost = result.costUsd;
47429
+ };
47430
+ if (onProgress) {
47431
+ await onProgress({
47432
+ workerId,
47433
+ testId: evalCase.id,
47434
+ status: "failed",
47435
+ completedAt: Date.now(),
47436
+ error: budgetResult.error
47437
+ });
47045
47438
  }
47046
- if (caseCost !== void 0) {
47047
- cumulativeBudgetCost += caseCost;
47048
- if (cumulativeBudgetCost >= totalBudgetUsd) {
47049
- budgetExhausted = true;
47050
- }
47439
+ if (onResult) {
47440
+ await onResult(budgetResult);
47051
47441
  }
47442
+ return budgetResult;
47052
47443
  }
47053
- if (failOnError === true && result.executionStatus === "execution_error") {
47054
- failOnErrorTriggered = true;
47055
- }
47056
- if (beforeAllOutput && !beforeAllOutputAttached) {
47057
- result = { ...result, beforeAllOutput };
47058
- beforeAllOutputAttached = true;
47444
+ if (failOnError === true && failOnErrorTriggered) {
47445
+ const errorMsg = "Halted: execution error encountered with fail_on_error enabled";
47446
+ const haltResult = {
47447
+ timestamp: (now ?? (() => /* @__PURE__ */ new Date()))().toISOString(),
47448
+ testId: evalCase.id,
47449
+ dataset: evalCase.dataset,
47450
+ score: 0,
47451
+ hits: [],
47452
+ misses: [],
47453
+ answer: "",
47454
+ target: target.name,
47455
+ error: errorMsg,
47456
+ executionStatus: "execution_error",
47457
+ failureStage: "setup",
47458
+ failureReasonCode: "error_threshold_exceeded",
47459
+ executionError: { message: errorMsg, stage: "setup" }
47460
+ };
47461
+ if (onProgress) {
47462
+ await onProgress({
47463
+ workerId,
47464
+ testId: evalCase.id,
47465
+ status: "failed",
47466
+ completedAt: Date.now(),
47467
+ error: haltResult.error
47468
+ });
47469
+ }
47470
+ if (onResult) {
47471
+ await onResult(haltResult);
47472
+ }
47473
+ return haltResult;
47059
47474
  }
47060
47475
  if (onProgress) {
47061
47476
  await onProgress({
47062
47477
  workerId,
47063
47478
  testId: evalCase.id,
47064
- status: result.error ? "failed" : "completed",
47065
- startedAt: 0,
47066
- // Not used for completed status
47067
- completedAt: Date.now(),
47068
- error: result.error
47479
+ status: "running",
47480
+ startedAt: Date.now()
47069
47481
  });
47070
47482
  }
47071
- if (onResult) {
47072
- await onResult(result);
47483
+ const testPoolSlot = availablePoolSlots.length > 0 ? availablePoolSlots.pop() : void 0;
47484
+ const testWorkspacePath = testPoolSlot?.path ?? sharedWorkspacePath;
47485
+ const testBaselineCommit = testPoolSlot ? poolSlotBaselines.get(testPoolSlot.path) : sharedBaselineCommit;
47486
+ try {
47487
+ const judgeProvider = await resolveJudgeProvider(target);
47488
+ const runCaseOptions = {
47489
+ evalCase,
47490
+ provider: primaryProvider,
47491
+ target,
47492
+ evaluators: evaluatorRegistry,
47493
+ maxRetries,
47494
+ agentTimeoutMs,
47495
+ cache,
47496
+ useCache,
47497
+ now,
47498
+ judgeProvider,
47499
+ targetResolver,
47500
+ availableTargets,
47501
+ evalRunId,
47502
+ keepWorkspaces,
47503
+ cleanupWorkspaces,
47504
+ sharedWorkspacePath: testWorkspacePath,
47505
+ sharedBaselineCommit: testBaselineCommit,
47506
+ suiteWorkspaceFile,
47507
+ streamCallbacks,
47508
+ typeRegistry,
47509
+ repoManager,
47510
+ evalDir
47511
+ };
47512
+ let result = trials && trials.count > 1 ? await runEvalCaseWithTrials(runCaseOptions, trials) : await runEvalCase(runCaseOptions);
47513
+ if (totalBudgetUsd !== void 0) {
47514
+ let caseCost;
47515
+ if (result.trials && result.trials.length > 0) {
47516
+ const trialCostSum = result.trials.reduce((sum, t) => sum + (t.costUsd ?? 0), 0);
47517
+ if (trialCostSum > 0) {
47518
+ caseCost = trialCostSum;
47519
+ }
47520
+ } else {
47521
+ caseCost = result.costUsd;
47522
+ }
47523
+ if (caseCost !== void 0) {
47524
+ cumulativeBudgetCost += caseCost;
47525
+ if (cumulativeBudgetCost >= totalBudgetUsd) {
47526
+ budgetExhausted = true;
47527
+ }
47528
+ }
47529
+ }
47530
+ if (failOnError === true && result.executionStatus === "execution_error") {
47531
+ failOnErrorTriggered = true;
47532
+ }
47533
+ if (beforeAllOutput && !beforeAllOutputAttached) {
47534
+ result = { ...result, beforeAllOutput };
47535
+ beforeAllOutputAttached = true;
47536
+ }
47537
+ if (onProgress) {
47538
+ await onProgress({
47539
+ workerId,
47540
+ testId: evalCase.id,
47541
+ status: result.error ? "failed" : "completed",
47542
+ startedAt: 0,
47543
+ // Not used for completed status
47544
+ completedAt: Date.now(),
47545
+ error: result.error
47546
+ });
47547
+ }
47548
+ if (onResult) {
47549
+ await onResult(result);
47550
+ }
47551
+ return result;
47552
+ } catch (error40) {
47553
+ if (onProgress) {
47554
+ await onProgress({
47555
+ workerId,
47556
+ testId: evalCase.id,
47557
+ status: "failed",
47558
+ completedAt: Date.now(),
47559
+ error: error40 instanceof Error ? error40.message : String(error40)
47560
+ });
47561
+ }
47562
+ throw error40;
47563
+ } finally {
47564
+ if (testPoolSlot) {
47565
+ availablePoolSlots.push(testPoolSlot);
47566
+ }
47073
47567
  }
47074
- return result;
47075
- } catch (error40) {
47076
- if (onProgress) {
47077
- await onProgress({
47078
- workerId,
47079
- testId: evalCase.id,
47080
- status: "failed",
47081
- completedAt: Date.now(),
47082
- error: error40 instanceof Error ? error40.message : String(error40)
47083
- });
47568
+ })
47569
+ );
47570
+ const settled = await Promise.allSettled(promises);
47571
+ const results = [];
47572
+ for (let i = 0; i < settled.length; i++) {
47573
+ const outcome = settled[i];
47574
+ if (outcome.status === "fulfilled") {
47575
+ results.push(outcome.value);
47576
+ } else {
47577
+ const evalCase = filteredEvalCases[i];
47578
+ const formattingMode = usesFileReferencePrompt(primaryProvider) ? "agent" : "lm";
47579
+ const promptInputs = await buildPromptInputs(evalCase, formattingMode);
47580
+ const errorResult = buildErrorResult(
47581
+ evalCase,
47582
+ target.name,
47583
+ (now ?? (() => /* @__PURE__ */ new Date()))(),
47584
+ outcome.reason,
47585
+ promptInputs,
47586
+ primaryProvider,
47587
+ "agent",
47588
+ "provider_error"
47589
+ );
47590
+ results.push(errorResult);
47591
+ if (onResult) {
47592
+ await onResult(errorResult);
47084
47593
  }
47085
- throw error40;
47086
47594
  }
47087
- })
47088
- );
47089
- const settled = await Promise.allSettled(promises);
47090
- const results = [];
47091
- for (let i = 0; i < settled.length; i++) {
47092
- const outcome = settled[i];
47093
- if (outcome.status === "fulfilled") {
47094
- results.push(outcome.value);
47095
- } else {
47096
- const evalCase = filteredEvalCases[i];
47097
- const formattingMode = usesFileReferencePrompt(primaryProvider) ? "agent" : "lm";
47098
- const promptInputs = await buildPromptInputs(evalCase, formattingMode);
47099
- const errorResult = buildErrorResult(
47100
- evalCase,
47101
- target.name,
47102
- (now ?? (() => /* @__PURE__ */ new Date()))(),
47103
- outcome.reason,
47104
- promptInputs,
47105
- primaryProvider,
47106
- "agent",
47107
- "provider_error"
47108
- );
47109
- results.push(errorResult);
47110
- if (onResult) {
47111
- await onResult(errorResult);
47595
+ }
47596
+ const afterAllWorkspaces = poolSlots.length > 1 ? poolSlots.map((s) => s.path) : sharedWorkspacePath ? [sharedWorkspacePath] : [];
47597
+ if (afterAllWorkspaces.length > 0 && suiteWorkspace?.after_all) {
47598
+ for (const wsPath of afterAllWorkspaces) {
47599
+ const scriptContext = {
47600
+ workspacePath: wsPath,
47601
+ testId: "__after_all__",
47602
+ evalRunId,
47603
+ evalDir
47604
+ };
47605
+ try {
47606
+ const afterAllOutput = await executeWorkspaceScript(
47607
+ suiteWorkspace.after_all,
47608
+ scriptContext,
47609
+ "warn"
47610
+ );
47611
+ if (afterAllOutput && results.length > 0 && wsPath === afterAllWorkspaces[0]) {
47612
+ results[results.length - 1] = { ...results[results.length - 1], afterAllOutput };
47613
+ }
47614
+ } catch {
47615
+ }
47112
47616
  }
47113
47617
  }
47114
- }
47115
- if (sharedWorkspacePath && suiteWorkspace?.after_all) {
47116
- const scriptContext = {
47117
- workspacePath: sharedWorkspacePath,
47118
- testId: "__after_all__",
47119
- evalRunId,
47120
- evalDir
47121
- };
47122
- try {
47123
- const afterAllOutput = await executeWorkspaceScript(
47124
- suiteWorkspace.after_all,
47125
- scriptContext,
47126
- "warn"
47127
- );
47128
- if (afterAllOutput && results.length > 0) {
47129
- results[results.length - 1] = { ...results[results.length - 1], afterAllOutput };
47618
+ if (sharedWorkspacePath && !poolSlot && poolSlots.length === 0 && !userWorkspacePath) {
47619
+ const hasFailure = results.some((r) => !!r.error || r.score < 0.5);
47620
+ if (cleanupWorkspaces) {
47621
+ await cleanupWorkspace(sharedWorkspacePath).catch(() => {
47622
+ });
47623
+ } else if (!hasFailure && !keepWorkspaces) {
47624
+ await cleanupWorkspace(sharedWorkspacePath).catch(() => {
47625
+ });
47130
47626
  }
47131
- } catch {
47132
47627
  }
47133
- }
47134
- if (sharedWorkspacePath) {
47135
- const hasFailure = results.some((r) => !!r.error || r.score < 0.5);
47136
47628
  if (cleanupWorkspaces) {
47137
- await cleanupWorkspace(sharedWorkspacePath).catch(() => {
47138
- });
47139
- } else if (!hasFailure && !keepWorkspaces) {
47140
- await cleanupWorkspace(sharedWorkspacePath).catch(() => {
47629
+ await cleanupEvalWorkspaces(evalRunId).catch(() => {
47141
47630
  });
47142
47631
  }
47632
+ return results;
47633
+ } finally {
47634
+ if (poolManager) {
47635
+ if (poolSlot) {
47636
+ await poolManager.releaseSlot(poolSlot);
47637
+ }
47638
+ for (const slot of poolSlots) {
47639
+ if (slot !== poolSlot) {
47640
+ await poolManager.releaseSlot(slot).catch(() => {
47641
+ });
47642
+ }
47643
+ }
47644
+ }
47143
47645
  }
47144
- if (cleanupWorkspaces) {
47145
- await cleanupEvalWorkspaces(evalRunId).catch(() => {
47146
- });
47147
- }
47148
- return results;
47149
47646
  }
47150
47647
  async function runBatchEvaluation(options) {
47151
47648
  const {
@@ -47362,7 +47859,7 @@ async function runEvalCase(options) {
47362
47859
  );
47363
47860
  }
47364
47861
  if (caseWorkspaceFile && workspacePath) {
47365
- const copiedFile = path38.join(workspacePath, path38.basename(caseWorkspaceFile));
47862
+ const copiedFile = path39.join(workspacePath, path39.basename(caseWorkspaceFile));
47366
47863
  try {
47367
47864
  await stat7(copiedFile);
47368
47865
  caseWorkspaceFile = copiedFile;
@@ -47372,7 +47869,7 @@ async function runEvalCase(options) {
47372
47869
  }
47373
47870
  if (!workspacePath && (evalCase.workspace?.before_all || evalCase.workspace?.repos?.length) && evalRunId) {
47374
47871
  workspacePath = getWorkspacePath(evalRunId, evalCase.id);
47375
- await mkdir12(workspacePath, { recursive: true });
47872
+ await mkdir13(workspacePath, { recursive: true });
47376
47873
  }
47377
47874
  if (evalCase.workspace?.repos?.length && workspacePath) {
47378
47875
  const perCaseRepoManager = new RepoManager(void 0, setupDebug);
@@ -47972,7 +48469,7 @@ async function runEvaluatorList(options) {
47972
48469
  fileChanges,
47973
48470
  workspacePath
47974
48471
  };
47975
- const evalFileDir = evalCase.guideline_paths[0] ? path38.dirname(evalCase.guideline_paths[0]) : process.cwd();
48472
+ const evalFileDir = evalCase.guideline_paths[0] ? path39.dirname(evalCase.guideline_paths[0]) : process.cwd();
47976
48473
  const dispatchContext = {
47977
48474
  judgeProvider,
47978
48475
  targetResolver,
@@ -48206,7 +48703,7 @@ function extractProviderError(response) {
48206
48703
  return trimmed.length > 0 ? trimmed : void 0;
48207
48704
  }
48208
48705
  function createCacheKey(provider, target, evalCase, promptInputs) {
48209
- const hash = createHash2("sha256");
48706
+ const hash = createHash3("sha256");
48210
48707
  hash.update(provider.id);
48211
48708
  hash.update(target.name);
48212
48709
  hash.update(evalCase.id);
@@ -48293,13 +48790,13 @@ async function evaluate(config2) {
48293
48790
  let evalCases;
48294
48791
  let testFilePath;
48295
48792
  if (config2.specFile) {
48296
- testFilePath = path39.resolve(config2.specFile);
48793
+ testFilePath = path40.resolve(config2.specFile);
48297
48794
  evalCases = await loadTests(testFilePath, repoRoot, {
48298
48795
  verbose: config2.verbose,
48299
48796
  filter: config2.filter
48300
48797
  });
48301
48798
  } else {
48302
- testFilePath = path39.join(process.cwd(), "__programmatic__.yaml");
48799
+ testFilePath = path40.join(process.cwd(), "__programmatic__.yaml");
48303
48800
  evalCases = (config2.tests ?? []).map((test) => {
48304
48801
  const input = typeof test.input === "string" ? [{ role: "user", content: test.input }] : test.input;
48305
48802
  const question = typeof test.input === "string" ? test.input : test.input.find((m) => m.role === "user")?.content ?? "";
@@ -48385,11 +48882,11 @@ function computeSummary(results, durationMs) {
48385
48882
  var TARGET_FILE_CANDIDATES = [".agentv/targets.yaml", ".agentv/targets.yml"];
48386
48883
  async function discoverDefaultTarget(repoRoot) {
48387
48884
  const cwd = process.cwd();
48388
- const chain = buildDirectoryChain(path39.join(cwd, "_placeholder"), repoRoot);
48885
+ const chain = buildDirectoryChain(path40.join(cwd, "_placeholder"), repoRoot);
48389
48886
  for (const dir of chain) {
48390
48887
  for (const candidate of TARGET_FILE_CANDIDATES) {
48391
- const targetsPath = path39.join(dir, candidate);
48392
- if (!existsSync3(targetsPath)) continue;
48888
+ const targetsPath = path40.join(dir, candidate);
48889
+ if (!existsSync4(targetsPath)) continue;
48393
48890
  try {
48394
48891
  const definitions = await readTargetDefinitions(targetsPath);
48395
48892
  const defaultTarget = definitions.find((d) => d.name === "default");
@@ -48403,11 +48900,11 @@ async function discoverDefaultTarget(repoRoot) {
48403
48900
  async function loadEnvHierarchy(repoRoot) {
48404
48901
  const { readFileSync: readFileSync2 } = await import("node:fs");
48405
48902
  const cwd = process.cwd();
48406
- const chain = buildDirectoryChain(path39.join(cwd, "_placeholder"), repoRoot);
48903
+ const chain = buildDirectoryChain(path40.join(cwd, "_placeholder"), repoRoot);
48407
48904
  const envFiles = [];
48408
48905
  for (const dir of chain) {
48409
- const envPath = path39.join(dir, ".env");
48410
- if (existsSync3(envPath)) envFiles.push(envPath);
48906
+ const envPath = path40.join(dir, ".env");
48907
+ if (existsSync4(envPath)) envFiles.push(envPath);
48411
48908
  }
48412
48909
  for (let i = envFiles.length - 1; i >= 0; i--) {
48413
48910
  try {
@@ -48482,12 +48979,12 @@ var CONFIG_FILE_NAMES = [
48482
48979
  ".agentv/config.js"
48483
48980
  ];
48484
48981
  async function loadTsConfig(projectRoot) {
48485
- const { existsSync: existsSync4 } = await import("node:fs");
48982
+ const { existsSync: existsSync5 } = await import("node:fs");
48486
48983
  const { pathToFileURL } = await import("node:url");
48487
48984
  const { join: join2 } = await import("node:path");
48488
48985
  for (const fileName of CONFIG_FILE_NAMES) {
48489
48986
  const filePath = join2(projectRoot, fileName);
48490
- if (!existsSync4(filePath)) {
48987
+ if (!existsSync5(filePath)) {
48491
48988
  continue;
48492
48989
  }
48493
48990
  try {
@@ -48587,7 +49084,7 @@ var ResponseCache = class {
48587
49084
  async get(key) {
48588
49085
  const filePath = this.keyToPath(key);
48589
49086
  try {
48590
- const data = await readFile11(filePath, "utf8");
49087
+ const data = await readFile12(filePath, "utf8");
48591
49088
  return JSON.parse(data);
48592
49089
  } catch {
48593
49090
  return void 0;
@@ -48595,13 +49092,13 @@ var ResponseCache = class {
48595
49092
  }
48596
49093
  async set(key, value) {
48597
49094
  const filePath = this.keyToPath(key);
48598
- const dir = path40.dirname(filePath);
48599
- await mkdir13(dir, { recursive: true });
48600
- await writeFile8(filePath, JSON.stringify(value, null, 2), "utf8");
49095
+ const dir = path41.dirname(filePath);
49096
+ await mkdir14(dir, { recursive: true });
49097
+ await writeFile9(filePath, JSON.stringify(value, null, 2), "utf8");
48601
49098
  }
48602
49099
  keyToPath(key) {
48603
49100
  const prefix = key.slice(0, 2);
48604
- return path40.join(this.cachePath, prefix, `${key}.json`);
49101
+ return path41.join(this.cachePath, prefix, `${key}.json`);
48605
49102
  }
48606
49103
  };
48607
49104
  function shouldEnableCache(params) {
@@ -49110,6 +49607,7 @@ export {
49110
49607
  getGitCacheRoot,
49111
49608
  getSubagentsRoot,
49112
49609
  getTraceStateRoot,
49610
+ getWorkspacePoolRoot,
49113
49611
  ensureVSCodeSubagents,
49114
49612
  readTargetDefinitions,
49115
49613
  listTargetNames,
@@ -49171,6 +49669,8 @@ export {
49171
49669
  createTempWorkspace,
49172
49670
  cleanupWorkspace,
49173
49671
  cleanupEvalWorkspaces,
49672
+ computeWorkspaceFingerprint,
49673
+ WorkspacePoolManager,
49174
49674
  RepoManager,
49175
49675
  resolveWorkspaceTemplate,
49176
49676
  executeWorkspaceScript,
@@ -49189,4 +49689,4 @@ export {
49189
49689
  OtelStreamingObserver,
49190
49690
  createAgentKernel
49191
49691
  };
49192
- //# sourceMappingURL=chunk-TK4PB62M.js.map
49692
+ //# sourceMappingURL=chunk-VBK7BJLE.js.map