@agentv/core 2.14.2 → 2.15.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.js CHANGED
@@ -234,7 +234,7 @@ async function resolveFileReference2(ref, evalFileDir) {
234
234
  const rawPath = extractFilePath(ref);
235
235
  const absolutePattern = path.resolve(evalFileDir, rawPath);
236
236
  if (isGlobPattern(rawPath)) {
237
- const matches = await fg(absolutePattern, {
237
+ const matches = await fg(absolutePattern.replaceAll("\\", "/"), {
238
238
  onlyFiles: true,
239
239
  absolute: true
240
240
  });
@@ -612,6 +612,17 @@ function parseExecutionDefaults(raw, configPath) {
612
612
  } else if (otelFile !== void 0) {
613
613
  logWarning(`Invalid execution.otel_file in ${configPath}, expected non-empty string`);
614
614
  }
615
+ if (typeof obj.pool_workspaces === "boolean") {
616
+ result.pool_workspaces = obj.pool_workspaces;
617
+ } else if (obj.pool_workspaces !== void 0) {
618
+ logWarning(`Invalid execution.pool_workspaces in ${configPath}, expected boolean`);
619
+ }
620
+ const poolSlots = obj.pool_slots;
621
+ if (typeof poolSlots === "number" && Number.isInteger(poolSlots) && poolSlots >= 1 && poolSlots <= 50) {
622
+ result.pool_slots = poolSlots;
623
+ } else if (poolSlots !== void 0) {
624
+ logWarning(`Invalid execution.pool_slots in ${configPath}, expected integer 1-50`);
625
+ }
615
626
  return Object.keys(result).length > 0 ? result : void 0;
616
627
  }
617
628
  function logWarning(message) {
@@ -2053,6 +2064,7 @@ async function processMessages(options) {
2053
2064
  repoRootPath,
2054
2065
  guidelinePatterns,
2055
2066
  guidelinePaths,
2067
+ treatFileSegmentsAsGuidelines,
2056
2068
  textParts,
2057
2069
  messageType,
2058
2070
  verbose
@@ -2100,16 +2112,20 @@ async function processMessages(options) {
2100
2112
  }
2101
2113
  try {
2102
2114
  const fileContent = (await readFile4(resolvedPath, "utf8")).replace(/\r\n/g, "\n");
2103
- if (messageType === "input" && guidelinePatterns && guidelinePaths) {
2104
- const relativeToRepo = path5.relative(repoRootPath, resolvedPath);
2105
- if (isGuidelineFile(relativeToRepo, guidelinePatterns)) {
2106
- guidelinePaths.push(path5.resolve(resolvedPath));
2107
- if (verbose) {
2108
- console.log(` [Guideline] Found: ${displayPath}`);
2109
- console.log(` Resolved to: ${resolvedPath}`);
2110
- }
2111
- continue;
2115
+ const classifyAsGuideline = shouldTreatAsGuideline({
2116
+ messageType,
2117
+ resolvedPath,
2118
+ repoRootPath,
2119
+ guidelinePatterns,
2120
+ treatFileSegmentsAsGuidelines
2121
+ });
2122
+ if (classifyAsGuideline && guidelinePaths) {
2123
+ guidelinePaths.push(path5.resolve(resolvedPath));
2124
+ if (verbose) {
2125
+ console.log(` [Guideline] Found: ${displayPath}`);
2126
+ console.log(` Resolved to: ${resolvedPath}`);
2112
2127
  }
2128
+ continue;
2113
2129
  }
2114
2130
  segments.push({
2115
2131
  type: "file",
@@ -2138,6 +2154,26 @@ async function processMessages(options) {
2138
2154
  }
2139
2155
  return segments;
2140
2156
  }
2157
+ function shouldTreatAsGuideline(options) {
2158
+ const {
2159
+ messageType,
2160
+ resolvedPath,
2161
+ repoRootPath,
2162
+ guidelinePatterns,
2163
+ treatFileSegmentsAsGuidelines
2164
+ } = options;
2165
+ if (messageType !== "input") {
2166
+ return false;
2167
+ }
2168
+ if (treatFileSegmentsAsGuidelines) {
2169
+ return true;
2170
+ }
2171
+ if (!guidelinePatterns || guidelinePatterns.length === 0) {
2172
+ return false;
2173
+ }
2174
+ const relativeToRepo = path5.relative(repoRootPath, resolvedPath);
2175
+ return isGuidelineFile(relativeToRepo, guidelinePatterns);
2176
+ }
2141
2177
  function asString3(value) {
2142
2178
  return typeof value === "string" ? value : void 0;
2143
2179
  }
@@ -2476,6 +2512,8 @@ async function loadTestsFromJsonl(evalFilePath, repoRoot, options) {
2476
2512
  for (const guidelinePath of testCase.guideline_paths) {
2477
2513
  console.log(` - ${guidelinePath}`);
2478
2514
  }
2515
+ } else if (!guidelinePatterns || guidelinePatterns.length === 0) {
2516
+ console.log(" No guidelines found (guideline_patterns not configured)");
2479
2517
  } else {
2480
2518
  console.log(" No guidelines found");
2481
2519
  }
@@ -2845,7 +2883,7 @@ async function loadTestsFromYaml(evalFilePath, repoRoot, options) {
2845
2883
  } else {
2846
2884
  throw new Error(`Invalid test file format: ${evalFilePath} - missing 'tests' field`);
2847
2885
  }
2848
- const suiteWorkspace = parseWorkspaceConfig(suite.workspace, evalFileDir);
2886
+ const suiteWorkspace = await resolveWorkspaceConfig(suite.workspace, evalFileDir);
2849
2887
  const suiteInputMessages = expandInputShorthand(suite.input);
2850
2888
  const rawGlobalExecution = isJsonObject(suite.execution) ? suite.execution : void 0;
2851
2889
  const _globalTarget = asString6(rawGlobalExecution?.target) ?? asString6(suite.target);
@@ -2881,12 +2919,24 @@ async function loadTestsFromYaml(evalFilePath, repoRoot, options) {
2881
2919
  }
2882
2920
  const caseExecution = isJsonObject(evalcase.execution) ? evalcase.execution : void 0;
2883
2921
  const skipDefaults = caseExecution?.skip_defaults === true;
2884
- const inputMessages = suiteInputMessages && !skipDefaults ? [...suiteInputMessages, ...testInputMessages] : testInputMessages;
2922
+ const effectiveSuiteInputMessages = suiteInputMessages && !skipDefaults ? suiteInputMessages : void 0;
2923
+ const inputMessages = effectiveSuiteInputMessages ? [...effectiveSuiteInputMessages, ...testInputMessages] : testInputMessages;
2885
2924
  const hasExpectedMessages = expectedMessages.length > 0;
2886
2925
  const guidelinePaths = [];
2887
2926
  const inputTextParts = [];
2888
- const inputSegments = await processMessages({
2889
- messages: inputMessages,
2927
+ const suiteInputSegments = effectiveSuiteInputMessages ? await processMessages({
2928
+ messages: effectiveSuiteInputMessages,
2929
+ searchRoots,
2930
+ repoRootPath,
2931
+ guidelinePatterns,
2932
+ guidelinePaths,
2933
+ treatFileSegmentsAsGuidelines: true,
2934
+ textParts: inputTextParts,
2935
+ messageType: "input",
2936
+ verbose
2937
+ }) : [];
2938
+ const testInputSegments = await processMessages({
2939
+ messages: testInputMessages,
2890
2940
  searchRoots,
2891
2941
  repoRootPath,
2892
2942
  guidelinePatterns,
@@ -2895,6 +2945,7 @@ async function loadTestsFromYaml(evalFilePath, repoRoot, options) {
2895
2945
  messageType: "input",
2896
2946
  verbose
2897
2947
  });
2948
+ const inputSegments = [...suiteInputSegments, ...testInputSegments];
2898
2949
  const outputSegments = hasExpectedMessages ? await processExpectedMessages({
2899
2950
  messages: expectedMessages,
2900
2951
  searchRoots,
@@ -2942,7 +2993,7 @@ async function loadTestsFromYaml(evalFilePath, repoRoot, options) {
2942
2993
  ...guidelinePaths.map((guidelinePath) => path8.resolve(guidelinePath)),
2943
2994
  ...userFilePaths
2944
2995
  ];
2945
- const caseWorkspace = parseWorkspaceConfig(evalcase.workspace, evalFileDir);
2996
+ const caseWorkspace = await resolveWorkspaceConfig(evalcase.workspace, evalFileDir);
2946
2997
  const mergedWorkspace = mergeWorkspaceConfigs(suiteWorkspace, caseWorkspace);
2947
2998
  const metadata = isJsonObject(evalcase.metadata) ? evalcase.metadata : void 0;
2948
2999
  const caseTargets = extractTargetsFromTestCase(evalcase);
@@ -2973,6 +3024,8 @@ async function loadTestsFromYaml(evalFilePath, repoRoot, options) {
2973
3024
  for (const guidelinePath of testCase.guideline_paths) {
2974
3025
  console.log(` - ${guidelinePath}`);
2975
3026
  }
3027
+ } else if (!guidelinePatterns || guidelinePatterns.length === 0) {
3028
+ console.log(" No guidelines found (guideline_patterns not configured)");
2976
3029
  } else {
2977
3030
  console.log(" No guidelines found");
2978
3031
  }
@@ -3072,6 +3125,26 @@ function parseResetConfig(raw) {
3072
3125
  ...afterEach !== void 0 && { after_each: afterEach }
3073
3126
  };
3074
3127
  }
3128
+ async function resolveWorkspaceConfig(raw, evalFileDir) {
3129
+ if (typeof raw === "string") {
3130
+ const workspaceFilePath = path8.resolve(evalFileDir, raw);
3131
+ let content;
3132
+ try {
3133
+ content = await readFile7(workspaceFilePath, "utf8");
3134
+ } catch {
3135
+ throw new Error(`Workspace file not found: ${raw} (resolved to ${workspaceFilePath})`);
3136
+ }
3137
+ const parsed = parse2(content);
3138
+ if (!isJsonObject(parsed)) {
3139
+ throw new Error(
3140
+ `Invalid workspace file format: ${workspaceFilePath} (expected a YAML object)`
3141
+ );
3142
+ }
3143
+ const workspaceFileDir = path8.dirname(workspaceFilePath);
3144
+ return parseWorkspaceConfig(parsed, workspaceFileDir);
3145
+ }
3146
+ return parseWorkspaceConfig(raw, evalFileDir);
3147
+ }
3075
3148
  function parseWorkspaceConfig(raw, evalFileDir) {
3076
3149
  if (!isJsonObject(raw)) return void 0;
3077
3150
  const obj = raw;
@@ -5049,7 +5122,7 @@ import { arch, platform } from "node:os";
5049
5122
  import path13 from "node:path";
5050
5123
  import { fileURLToPath as fileURLToPath2 } from "node:url";
5051
5124
  function resolvePlatformCliPath() {
5052
- const os5 = platform();
5125
+ const os3 = platform();
5053
5126
  const cpu = arch();
5054
5127
  const platformMap = {
5055
5128
  linux: "linux",
@@ -5060,13 +5133,13 @@ function resolvePlatformCliPath() {
5060
5133
  x64: "x64",
5061
5134
  arm64: "arm64"
5062
5135
  };
5063
- const osPart = platformMap[os5];
5136
+ const osPart = platformMap[os3];
5064
5137
  const archPart = archMap[cpu];
5065
5138
  if (!osPart || !archPart) {
5066
5139
  return void 0;
5067
5140
  }
5068
5141
  const packageName = `@github/copilot-${osPart}-${archPart}`;
5069
- const binaryName = os5 === "win32" ? "copilot.exe" : "copilot";
5142
+ const binaryName = os3 === "win32" ? "copilot.exe" : "copilot";
5070
5143
  try {
5071
5144
  const resolved = import.meta.resolve(`${packageName}/package.json`);
5072
5145
  const packageJsonPath = resolved.startsWith("file:") ? fileURLToPath2(resolved) : resolved;
@@ -6868,12 +6941,12 @@ var ProviderRegistry = class {
6868
6941
  // src/evaluation/providers/vscode-provider.ts
6869
6942
  import { exec as exec2 } from "node:child_process";
6870
6943
  import { constants as constants3, access as access3, stat as stat4 } from "node:fs/promises";
6871
- import path27 from "node:path";
6944
+ import path28 from "node:path";
6872
6945
  import { promisify as promisify3 } from "node:util";
6873
6946
 
6874
6947
  // src/evaluation/providers/vscode/dispatch/agentDispatch.ts
6875
6948
  import { stat as stat3, writeFile as writeFile4 } from "node:fs/promises";
6876
- import path25 from "node:path";
6949
+ import path26 from "node:path";
6877
6950
 
6878
6951
  // src/evaluation/providers/vscode/utils/fs.ts
6879
6952
  import { constants as constants2 } from "node:fs";
@@ -7141,17 +7214,49 @@ async function waitForBatchResponses(responseFilesFinal, pollInterval = 1e3, sil
7141
7214
  // src/evaluation/providers/vscode/dispatch/vscodeProcess.ts
7142
7215
  import { exec, spawn as spawn3 } from "node:child_process";
7143
7216
  import { mkdir as mkdir7, writeFile as writeFile2 } from "node:fs/promises";
7144
- import path22 from "node:path";
7217
+ import path23 from "node:path";
7145
7218
  import { promisify as promisify2 } from "node:util";
7146
7219
 
7147
7220
  // src/evaluation/providers/vscode/dispatch/constants.ts
7221
+ import path22 from "node:path";
7222
+
7223
+ // src/paths.ts
7148
7224
  import os2 from "node:os";
7149
7225
  import path21 from "node:path";
7226
+ var logged = false;
7227
+ function getAgentvHome() {
7228
+ const envHome = process.env.AGENTV_HOME;
7229
+ if (envHome) {
7230
+ if (!logged) {
7231
+ logged = true;
7232
+ console.warn(`Using AGENTV_HOME: ${envHome}`);
7233
+ }
7234
+ return envHome;
7235
+ }
7236
+ return path21.join(os2.homedir(), ".agentv");
7237
+ }
7238
+ function getWorkspacesRoot() {
7239
+ return path21.join(getAgentvHome(), "workspaces");
7240
+ }
7241
+ function getGitCacheRoot() {
7242
+ return path21.join(getAgentvHome(), "git-cache");
7243
+ }
7244
+ function getSubagentsRoot() {
7245
+ return path21.join(getAgentvHome(), "subagents");
7246
+ }
7247
+ function getTraceStateRoot() {
7248
+ return path21.join(getAgentvHome(), "trace-state");
7249
+ }
7250
+ function getWorkspacePoolRoot() {
7251
+ return path21.join(getAgentvHome(), "workspace-pool");
7252
+ }
7253
+
7254
+ // src/evaluation/providers/vscode/dispatch/constants.ts
7150
7255
  var DEFAULT_LOCK_NAME = "subagent.lock";
7151
7256
  var DEFAULT_ALIVE_FILENAME = ".alive";
7152
7257
  function getDefaultSubagentRoot(vscodeCmd = "code") {
7153
7258
  const folder = vscodeCmd === "code-insiders" ? "vscode-insiders-agents" : "vscode-agents";
7154
- return path21.join(os2.homedir(), ".agentv", "subagents", folder);
7259
+ return path22.join(getSubagentsRoot(), folder);
7155
7260
  }
7156
7261
  var DEFAULT_SUBAGENT_ROOT = getDefaultSubagentRoot();
7157
7262
 
@@ -7165,12 +7270,19 @@ description: 'Wake-up Signal'
7165
7270
  model: Grok Code Fast 1 (copilot)
7166
7271
  ---`;
7167
7272
  function spawnVsCode(vscodeCmd, args, options) {
7168
- const child = spawn3(vscodeCmd, args, {
7273
+ const useShell = options?.shell ?? true;
7274
+ const command = useShell ? shellQuote(vscodeCmd) : vscodeCmd;
7275
+ const child = spawn3(command, args, {
7169
7276
  windowsHide: true,
7170
- shell: options?.shell ?? true,
7277
+ shell: useShell,
7171
7278
  detached: false
7172
7279
  });
7173
- child.on("error", () => {
7280
+ child.on("error", (error) => {
7281
+ const label = options?.label ?? "spawn";
7282
+ const renderedArgs = args.map((value) => JSON.stringify(value)).join(" ");
7283
+ console.error(
7284
+ `[vscode] ${label} failed: command=${JSON.stringify(vscodeCmd)} args=${renderedArgs} error=${error.message}`
7285
+ );
7174
7286
  });
7175
7287
  return child;
7176
7288
  }
@@ -7207,16 +7319,20 @@ async function checkWorkspaceOpened(workspaceName, vscodeCmd) {
7207
7319
  async function ensureWorkspaceFocused(workspacePath, workspaceName, subagentDir, vscodeCmd, pollInterval = 1, timeout = 60) {
7208
7320
  const alreadyOpen = await checkWorkspaceOpened(workspaceName, vscodeCmd);
7209
7321
  if (alreadyOpen) {
7210
- spawnVsCode(shellQuote(vscodeCmd), [workspacePath]);
7322
+ const child = spawnVsCode(vscodeCmd, [workspacePath], { label: "focus-existing-workspace" });
7323
+ await raceSpawnError(child);
7211
7324
  return true;
7212
7325
  }
7213
- const aliveFile = path22.join(subagentDir, DEFAULT_ALIVE_FILENAME);
7326
+ const aliveFile = path23.join(subagentDir, DEFAULT_ALIVE_FILENAME);
7214
7327
  await removeIfExists(aliveFile);
7215
- const githubAgentsDir = path22.join(subagentDir, ".github", "agents");
7328
+ const githubAgentsDir = path23.join(subagentDir, ".github", "agents");
7216
7329
  await mkdir7(githubAgentsDir, { recursive: true });
7217
- const wakeupDst = path22.join(githubAgentsDir, "wakeup.md");
7330
+ const wakeupDst = path23.join(githubAgentsDir, "wakeup.md");
7218
7331
  await writeFile2(wakeupDst, DEFAULT_WAKEUP_CONTENT, "utf8");
7219
- spawnVsCode(shellQuote(vscodeCmd), [workspacePath]);
7332
+ const workspaceChild = spawnVsCode(vscodeCmd, [workspacePath], {
7333
+ label: "open-workspace"
7334
+ });
7335
+ await raceSpawnError(workspaceChild);
7220
7336
  await sleep2(100);
7221
7337
  const wakeupChatId = "wakeup";
7222
7338
  const chatArgs = [
@@ -7224,9 +7340,10 @@ async function ensureWorkspaceFocused(workspacePath, workspaceName, subagentDir,
7224
7340
  "chat",
7225
7341
  "-m",
7226
7342
  wakeupChatId,
7227
- `create a file named .alive in the ${path22.basename(subagentDir)} folder`
7343
+ `create a file named .alive in the ${path23.basename(subagentDir)} folder`
7228
7344
  ];
7229
- spawnVsCode(shellQuote(vscodeCmd), chatArgs);
7345
+ const wakeupChild = spawnVsCode(vscodeCmd, chatArgs, { label: "send-wakeup-chat" });
7346
+ await raceSpawnError(wakeupChild);
7230
7347
  const start = Date.now();
7231
7348
  while (!await pathExists(aliveFile)) {
7232
7349
  if (Date.now() - start > timeout * 1e3) {
@@ -7238,10 +7355,10 @@ async function ensureWorkspaceFocused(workspacePath, workspaceName, subagentDir,
7238
7355
  return true;
7239
7356
  }
7240
7357
  async function launchVsCodeWithChat(subagentDir, chatId, attachmentPaths, requestInstructions, timestamp, vscodeCmd) {
7241
- const workspacePath = path22.join(subagentDir, `${path22.basename(subagentDir)}.code-workspace`);
7242
- const messagesDir = path22.join(subagentDir, "messages");
7358
+ const workspacePath = path23.join(subagentDir, `${path23.basename(subagentDir)}.code-workspace`);
7359
+ const messagesDir = path23.join(subagentDir, "messages");
7243
7360
  await mkdir7(messagesDir, { recursive: true });
7244
- const reqFile = path22.join(messagesDir, `${timestamp}_req.md`);
7361
+ const reqFile = path23.join(messagesDir, `${timestamp}_req.md`);
7245
7362
  await writeFile2(reqFile, requestInstructions, { encoding: "utf8" });
7246
7363
  const reqUri = pathToFileUri2(reqFile);
7247
7364
  const chatArgs = ["-r", "chat", "-m", chatId];
@@ -7249,25 +7366,25 @@ async function launchVsCodeWithChat(subagentDir, chatId, attachmentPaths, reques
7249
7366
  chatArgs.push("-a", attachment);
7250
7367
  }
7251
7368
  chatArgs.push("-a", reqFile);
7252
- chatArgs.push(`Follow instructions in [${path22.basename(reqFile)}](${reqUri})`);
7369
+ chatArgs.push(`Follow instructions in [${path23.basename(reqFile)}](${reqUri})`);
7253
7370
  const workspaceReady = await ensureWorkspaceFocused(
7254
7371
  workspacePath,
7255
- path22.basename(subagentDir),
7372
+ path23.basename(subagentDir),
7256
7373
  subagentDir,
7257
7374
  vscodeCmd
7258
7375
  );
7259
7376
  if (!workspaceReady) {
7260
7377
  throw new Error(
7261
- `VS Code workspace '${path22.basename(subagentDir)}' failed to become ready within the timeout. Check that '${vscodeCmd}' can open workspaces.`
7378
+ `VS Code workspace '${path23.basename(subagentDir)}' failed to become ready within the timeout. Check that '${vscodeCmd}' can open workspaces.`
7262
7379
  );
7263
7380
  }
7264
7381
  await sleep2(500);
7265
- const child = spawnVsCode(shellQuote(vscodeCmd), chatArgs);
7382
+ const child = spawnVsCode(vscodeCmd, chatArgs, { label: "send-chat" });
7266
7383
  await raceSpawnError(child);
7267
7384
  }
7268
7385
  async function launchVsCodeWithBatchChat(subagentDir, chatId, attachmentPaths, chatInstruction, vscodeCmd) {
7269
- const workspacePath = path22.join(subagentDir, `${path22.basename(subagentDir)}.code-workspace`);
7270
- const messagesDir = path22.join(subagentDir, "messages");
7386
+ const workspacePath = path23.join(subagentDir, `${path23.basename(subagentDir)}.code-workspace`);
7387
+ const messagesDir = path23.join(subagentDir, "messages");
7271
7388
  await mkdir7(messagesDir, { recursive: true });
7272
7389
  const chatArgs = ["-r", "chat", "-m", chatId];
7273
7390
  for (const attachment of attachmentPaths) {
@@ -7276,26 +7393,26 @@ async function launchVsCodeWithBatchChat(subagentDir, chatId, attachmentPaths, c
7276
7393
  chatArgs.push(chatInstruction);
7277
7394
  const workspaceReady = await ensureWorkspaceFocused(
7278
7395
  workspacePath,
7279
- path22.basename(subagentDir),
7396
+ path23.basename(subagentDir),
7280
7397
  subagentDir,
7281
7398
  vscodeCmd
7282
7399
  );
7283
7400
  if (!workspaceReady) {
7284
7401
  throw new Error(
7285
- `VS Code workspace '${path22.basename(subagentDir)}' failed to become ready within the timeout. Check that '${vscodeCmd}' can open workspaces.`
7402
+ `VS Code workspace '${path23.basename(subagentDir)}' failed to become ready within the timeout. Check that '${vscodeCmd}' can open workspaces.`
7286
7403
  );
7287
7404
  }
7288
7405
  await sleep2(500);
7289
- const child = spawnVsCode(shellQuote(vscodeCmd), chatArgs);
7406
+ const child = spawnVsCode(vscodeCmd, chatArgs, { label: "send-batch-chat" });
7290
7407
  await raceSpawnError(child);
7291
7408
  }
7292
7409
 
7293
7410
  // src/evaluation/providers/vscode/dispatch/workspaceManager.ts
7294
7411
  import { copyFile, mkdir as mkdir8, readFile as readFile9, readdir as readdir2, stat as stat2, writeFile as writeFile3 } from "node:fs/promises";
7295
- import path24 from "node:path";
7412
+ import path25 from "node:path";
7296
7413
 
7297
7414
  // src/evaluation/providers/vscode/utils/workspace.ts
7298
- import path23 from "node:path";
7415
+ import path24 from "node:path";
7299
7416
  import JSON5 from "json5";
7300
7417
  function transformWorkspacePaths(workspaceContent, templateDir) {
7301
7418
  let workspace;
@@ -7312,10 +7429,10 @@ function transformWorkspacePaths(workspaceContent, templateDir) {
7312
7429
  }
7313
7430
  const transformedFolders = workspace.folders.map((folder) => {
7314
7431
  const folderPath = folder.path;
7315
- if (path23.isAbsolute(folderPath)) {
7432
+ if (path24.isAbsolute(folderPath)) {
7316
7433
  return folder;
7317
7434
  }
7318
- const absolutePath = path23.resolve(templateDir, folderPath);
7435
+ const absolutePath = path24.resolve(templateDir, folderPath);
7319
7436
  return {
7320
7437
  ...folder,
7321
7438
  path: absolutePath
@@ -7337,19 +7454,19 @@ function transformWorkspacePaths(workspaceContent, templateDir) {
7337
7454
  if (locationMap && typeof locationMap === "object") {
7338
7455
  const transformedMap = {};
7339
7456
  for (const [locationPath, value] of Object.entries(locationMap)) {
7340
- const isAbsolute = path23.isAbsolute(locationPath);
7457
+ const isAbsolute = path24.isAbsolute(locationPath);
7341
7458
  if (isAbsolute) {
7342
7459
  transformedMap[locationPath] = value;
7343
7460
  } else {
7344
7461
  const firstGlobIndex = locationPath.search(/[*]/);
7345
7462
  if (firstGlobIndex === -1) {
7346
- const resolvedPath = path23.resolve(templateDir, locationPath).replace(/\\/g, "/");
7463
+ const resolvedPath = path24.resolve(templateDir, locationPath).replace(/\\/g, "/");
7347
7464
  transformedMap[resolvedPath] = value;
7348
7465
  } else {
7349
7466
  const basePathEnd = locationPath.lastIndexOf("/", firstGlobIndex);
7350
7467
  const basePath = basePathEnd !== -1 ? locationPath.substring(0, basePathEnd) : ".";
7351
7468
  const patternPath = locationPath.substring(basePathEnd !== -1 ? basePathEnd : 0);
7352
- const resolvedPath = (path23.resolve(templateDir, basePath) + patternPath).replace(
7469
+ const resolvedPath = (path24.resolve(templateDir, basePath) + patternPath).replace(
7353
7470
  /\\/g,
7354
7471
  "/"
7355
7472
  );
@@ -7390,7 +7507,7 @@ async function findUnlockedSubagent(subagentRoot) {
7390
7507
  number: Number.parseInt(entry.name.split("-")[1] ?? "", 10)
7391
7508
  })).filter((entry) => Number.isInteger(entry.number)).sort((a, b) => a.number - b.number);
7392
7509
  for (const subagent of subagents) {
7393
- const lockFile = path24.join(subagent.absolutePath, DEFAULT_LOCK_NAME);
7510
+ const lockFile = path25.join(subagent.absolutePath, DEFAULT_LOCK_NAME);
7394
7511
  if (!await pathExists(lockFile)) {
7395
7512
  return subagent.absolutePath;
7396
7513
  }
@@ -7400,7 +7517,7 @@ async function findUnlockedSubagent(subagentRoot) {
7400
7517
  async function copyAgentConfig(subagentDir, workspaceTemplate, cwd) {
7401
7518
  let workspaceContent;
7402
7519
  if (workspaceTemplate) {
7403
- const workspaceSrc = path24.resolve(workspaceTemplate);
7520
+ const workspaceSrc = path25.resolve(workspaceTemplate);
7404
7521
  if (!await pathExists(workspaceSrc)) {
7405
7522
  throw new Error(`workspace template not found: ${workspaceSrc}`);
7406
7523
  }
@@ -7413,13 +7530,13 @@ async function copyAgentConfig(subagentDir, workspaceTemplate, cwd) {
7413
7530
  } else {
7414
7531
  workspaceContent = DEFAULT_WORKSPACE_TEMPLATE;
7415
7532
  }
7416
- const workspaceName = `${path24.basename(subagentDir)}.code-workspace`;
7417
- const workspaceDst = path24.join(subagentDir, workspaceName);
7418
- const templateDir = workspaceTemplate ? path24.dirname(path24.resolve(workspaceTemplate)) : subagentDir;
7533
+ const workspaceName = `${path25.basename(subagentDir)}.code-workspace`;
7534
+ const workspaceDst = path25.join(subagentDir, workspaceName);
7535
+ const templateDir = workspaceTemplate ? path25.dirname(path25.resolve(workspaceTemplate)) : subagentDir;
7419
7536
  const workspaceJson = JSON.stringify(workspaceContent, null, 2);
7420
7537
  let transformedContent = transformWorkspacePaths(workspaceJson, templateDir);
7421
7538
  if (cwd) {
7422
- const absCwd = path24.resolve(cwd);
7539
+ const absCwd = path25.resolve(cwd);
7423
7540
  const parsed = JSON.parse(transformedContent);
7424
7541
  const alreadyPresent = parsed.folders.some((f) => f.path === absCwd);
7425
7542
  if (!alreadyPresent) {
@@ -7428,35 +7545,35 @@ async function copyAgentConfig(subagentDir, workspaceTemplate, cwd) {
7428
7545
  }
7429
7546
  }
7430
7547
  await writeFile3(workspaceDst, transformedContent, "utf8");
7431
- const messagesDir = path24.join(subagentDir, "messages");
7548
+ const messagesDir = path25.join(subagentDir, "messages");
7432
7549
  await mkdir8(messagesDir, { recursive: true });
7433
7550
  return { workspace: workspaceDst, messagesDir };
7434
7551
  }
7435
7552
  async function createSubagentLock(subagentDir) {
7436
- const messagesDir = path24.join(subagentDir, "messages");
7553
+ const messagesDir = path25.join(subagentDir, "messages");
7437
7554
  if (await pathExists(messagesDir)) {
7438
7555
  const files = await readdir2(messagesDir);
7439
7556
  await Promise.all(
7440
7557
  files.map(async (file) => {
7441
- const target = path24.join(messagesDir, file);
7558
+ const target = path25.join(messagesDir, file);
7442
7559
  await removeIfExists(target);
7443
7560
  })
7444
7561
  );
7445
7562
  }
7446
- const githubAgentsDir = path24.join(subagentDir, ".github", "agents");
7563
+ const githubAgentsDir = path25.join(subagentDir, ".github", "agents");
7447
7564
  if (await pathExists(githubAgentsDir)) {
7448
7565
  const agentFiles = await readdir2(githubAgentsDir);
7449
7566
  const preservedFiles = /* @__PURE__ */ new Set(["wakeup.md", "subagent.md"]);
7450
7567
  await Promise.all(
7451
- agentFiles.filter((file) => file.endsWith(".md") && !preservedFiles.has(file)).map((file) => removeIfExists(path24.join(githubAgentsDir, file)))
7568
+ agentFiles.filter((file) => file.endsWith(".md") && !preservedFiles.has(file)).map((file) => removeIfExists(path25.join(githubAgentsDir, file)))
7452
7569
  );
7453
7570
  }
7454
- const lockFile = path24.join(subagentDir, DEFAULT_LOCK_NAME);
7571
+ const lockFile = path25.join(subagentDir, DEFAULT_LOCK_NAME);
7455
7572
  await writeFile3(lockFile, "", { encoding: "utf8" });
7456
7573
  return lockFile;
7457
7574
  }
7458
7575
  async function removeSubagentLock(subagentDir) {
7459
- const lockFile = path24.join(subagentDir, DEFAULT_LOCK_NAME);
7576
+ const lockFile = path25.join(subagentDir, DEFAULT_LOCK_NAME);
7460
7577
  await removeIfExists(lockFile);
7461
7578
  }
7462
7579
  async function prepareSubagentDirectory(subagentDir, promptFile, chatId, workspaceTemplate, dryRun, cwd) {
@@ -7476,9 +7593,9 @@ async function prepareSubagentDirectory(subagentDir, promptFile, chatId, workspa
7476
7593
  return 1;
7477
7594
  }
7478
7595
  if (promptFile) {
7479
- const githubAgentsDir = path24.join(subagentDir, ".github", "agents");
7596
+ const githubAgentsDir = path25.join(subagentDir, ".github", "agents");
7480
7597
  await mkdir8(githubAgentsDir, { recursive: true });
7481
- const agentFile = path24.join(githubAgentsDir, `${chatId}.md`);
7598
+ const agentFile = path25.join(githubAgentsDir, `${chatId}.md`);
7482
7599
  try {
7483
7600
  await copyFile(promptFile, agentFile);
7484
7601
  } catch (error) {
@@ -7497,7 +7614,7 @@ async function resolvePromptFile(promptFile) {
7497
7614
  if (!promptFile) {
7498
7615
  return void 0;
7499
7616
  }
7500
- const resolvedPrompt = path25.resolve(promptFile);
7617
+ const resolvedPrompt = path26.resolve(promptFile);
7501
7618
  if (!await pathExists(resolvedPrompt)) {
7502
7619
  throw new Error(`Prompt file not found: ${resolvedPrompt}`);
7503
7620
  }
@@ -7513,7 +7630,7 @@ async function resolveAttachments(extraAttachments) {
7513
7630
  }
7514
7631
  const resolved = [];
7515
7632
  for (const attachment of extraAttachments) {
7516
- const resolvedPath = path25.resolve(attachment);
7633
+ const resolvedPath = path26.resolve(attachment);
7517
7634
  if (!await pathExists(resolvedPath)) {
7518
7635
  throw new Error(`Attachment not found: ${resolvedPath}`);
7519
7636
  }
@@ -7555,7 +7672,7 @@ async function dispatchAgentSession(options) {
7555
7672
  error: "No unlocked subagents available. Provision additional subagents with: subagent code provision --subagents <desired_total>"
7556
7673
  };
7557
7674
  }
7558
- const subagentName = path25.basename(subagentDir);
7675
+ const subagentName = path26.basename(subagentDir);
7559
7676
  const chatId = Math.random().toString(16).slice(2, 10);
7560
7677
  const preparationResult = await prepareSubagentDirectory(
7561
7678
  subagentDir,
@@ -7583,9 +7700,9 @@ async function dispatchAgentSession(options) {
7583
7700
  };
7584
7701
  }
7585
7702
  const timestamp = generateTimestamp();
7586
- const messagesDir = path25.join(subagentDir, "messages");
7587
- const responseFileTmp = path25.join(messagesDir, `${timestamp}_res.tmp.md`);
7588
- const responseFileFinal = path25.join(messagesDir, `${timestamp}_res.md`);
7703
+ const messagesDir = path26.join(subagentDir, "messages");
7704
+ const responseFileTmp = path26.join(messagesDir, `${timestamp}_res.tmp.md`);
7705
+ const responseFileFinal = path26.join(messagesDir, `${timestamp}_res.md`);
7589
7706
  const requestInstructions = createRequestPrompt(
7590
7707
  userQuery,
7591
7708
  responseFileTmp,
@@ -7690,7 +7807,7 @@ async function dispatchBatchAgent(options) {
7690
7807
  error: "No unlocked subagents available. Provision additional subagents with: subagent code provision --subagents <desired_total>"
7691
7808
  };
7692
7809
  }
7693
- subagentName = path25.basename(subagentDir);
7810
+ subagentName = path26.basename(subagentDir);
7694
7811
  const chatId = Math.random().toString(16).slice(2, 10);
7695
7812
  const preparationResult = await prepareSubagentDirectory(
7696
7813
  subagentDir,
@@ -7721,17 +7838,17 @@ async function dispatchBatchAgent(options) {
7721
7838
  };
7722
7839
  }
7723
7840
  const timestamp = generateTimestamp();
7724
- const messagesDir = path25.join(subagentDir, "messages");
7841
+ const messagesDir = path26.join(subagentDir, "messages");
7725
7842
  requestFiles = userQueries.map(
7726
- (_, index) => path25.join(messagesDir, `${timestamp}_${index}_req.md`)
7843
+ (_, index) => path26.join(messagesDir, `${timestamp}_${index}_req.md`)
7727
7844
  );
7728
7845
  const responseTmpFiles = userQueries.map(
7729
- (_, index) => path25.join(messagesDir, `${timestamp}_${index}_res.tmp.md`)
7846
+ (_, index) => path26.join(messagesDir, `${timestamp}_${index}_res.tmp.md`)
7730
7847
  );
7731
7848
  responseFilesFinal = userQueries.map(
7732
- (_, index) => path25.join(messagesDir, `${timestamp}_${index}_res.md`)
7849
+ (_, index) => path26.join(messagesDir, `${timestamp}_${index}_res.md`)
7733
7850
  );
7734
- const orchestratorFile = path25.join(messagesDir, `${timestamp}_orchestrator.md`);
7851
+ const orchestratorFile = path26.join(messagesDir, `${timestamp}_orchestrator.md`);
7735
7852
  if (!dryRun) {
7736
7853
  await Promise.all(
7737
7854
  userQueries.map((query, index) => {
@@ -7817,7 +7934,7 @@ async function dispatchBatchAgent(options) {
7817
7934
 
7818
7935
  // src/evaluation/providers/vscode/dispatch/provision.ts
7819
7936
  import { writeFile as writeFile5 } from "node:fs/promises";
7820
- import path26 from "node:path";
7937
+ import path27 from "node:path";
7821
7938
  var DEFAULT_WORKSPACE_TEMPLATE2 = {
7822
7939
  folders: [
7823
7940
  {
@@ -7848,7 +7965,7 @@ async function provisionSubagents(options) {
7848
7965
  if (!Number.isInteger(subagents) || subagents < 1) {
7849
7966
  throw new Error("subagents must be a positive integer");
7850
7967
  }
7851
- const targetPath = path26.resolve(targetRoot);
7968
+ const targetPath = path27.resolve(targetRoot);
7852
7969
  if (!dryRun) {
7853
7970
  await ensureDir(targetPath);
7854
7971
  }
@@ -7868,7 +7985,7 @@ async function provisionSubagents(options) {
7868
7985
  continue;
7869
7986
  }
7870
7987
  highestNumber = Math.max(highestNumber, parsed);
7871
- const lockFile = path26.join(entry.absolutePath, lockName);
7988
+ const lockFile = path27.join(entry.absolutePath, lockName);
7872
7989
  const locked = await pathExists(lockFile);
7873
7990
  if (locked) {
7874
7991
  lockedSubagents.add(entry.absolutePath);
@@ -7885,10 +8002,10 @@ async function provisionSubagents(options) {
7885
8002
  break;
7886
8003
  }
7887
8004
  const subagentDir = subagent.absolutePath;
7888
- const githubAgentsDir = path26.join(subagentDir, ".github", "agents");
7889
- const lockFile = path26.join(subagentDir, lockName);
7890
- const workspaceDst = path26.join(subagentDir, `${path26.basename(subagentDir)}.code-workspace`);
7891
- const wakeupDst = path26.join(githubAgentsDir, "wakeup.md");
8005
+ const githubAgentsDir = path27.join(subagentDir, ".github", "agents");
8006
+ const lockFile = path27.join(subagentDir, lockName);
8007
+ const workspaceDst = path27.join(subagentDir, `${path27.basename(subagentDir)}.code-workspace`);
8008
+ const wakeupDst = path27.join(githubAgentsDir, "wakeup.md");
7892
8009
  const isLocked = await pathExists(lockFile);
7893
8010
  if (isLocked && !force) {
7894
8011
  continue;
@@ -7926,10 +8043,10 @@ async function provisionSubagents(options) {
7926
8043
  let nextIndex = highestNumber;
7927
8044
  while (subagentsProvisioned < subagents) {
7928
8045
  nextIndex += 1;
7929
- const subagentDir = path26.join(targetPath, `subagent-${nextIndex}`);
7930
- const githubAgentsDir = path26.join(subagentDir, ".github", "agents");
7931
- const workspaceDst = path26.join(subagentDir, `${path26.basename(subagentDir)}.code-workspace`);
7932
- const wakeupDst = path26.join(githubAgentsDir, "wakeup.md");
8046
+ const subagentDir = path27.join(targetPath, `subagent-${nextIndex}`);
8047
+ const githubAgentsDir = path27.join(subagentDir, ".github", "agents");
8048
+ const workspaceDst = path27.join(subagentDir, `${path27.basename(subagentDir)}.code-workspace`);
8049
+ const wakeupDst = path27.join(githubAgentsDir, "wakeup.md");
7933
8050
  if (!dryRun) {
7934
8051
  await ensureDir(subagentDir);
7935
8052
  await ensureDir(githubAgentsDir);
@@ -7955,8 +8072,6 @@ var AGENTV_REQUEST_TEMPLATE = `[[ ## task ## ]]
7955
8072
 
7956
8073
  **IMPORTANT**: Follow these exact steps:
7957
8074
  1. Create and write your complete response to: {{responseFileTmp}}
7958
- - All intended file outputs/changes MUST be written in your response file.
7959
- - For each intended file, include the repo name, relative path and unified git diff following the convention \`diff --git ...\`.
7960
8075
  2. When completely finished, run these PowerShell commands to signal completion:
7961
8076
  \`\`\`
7962
8077
  Move-Item -LiteralPath '{{responseFileTmp}}' -Destination '{{responseFileFinal}}'
@@ -7973,8 +8088,6 @@ var AGENTV_BATCH_REQUEST_TEMPLATE = `[[ ## task ## ]]
7973
8088
 
7974
8089
  **IMPORTANT**: Follow these exact steps:
7975
8090
  1. Create and write your complete response to: {{responseFileTmp}}
7976
- - All intended file outputs/changes MUST be written in your response file.
7977
- - For each intended file, include the repo name, relative path and unified git diff following the convention \`diff --git ...\`.
7978
8091
  2. When completely finished and the response is stable, rename it to: {{responseFileFinal}}
7979
8092
  3. Do not unlock the workspace from this request; batch orchestration will handle unlocking after all responses are ready.
7980
8093
  `;
@@ -8123,7 +8236,7 @@ var VSCodeProvider = class {
8123
8236
  async function locateVSCodeExecutable(candidate) {
8124
8237
  const includesPathSeparator = candidate.includes("/") || candidate.includes("\\");
8125
8238
  if (includesPathSeparator) {
8126
- const resolved = path27.isAbsolute(candidate) ? candidate : path27.resolve(candidate);
8239
+ const resolved = path28.isAbsolute(candidate) ? candidate : path28.resolve(candidate);
8127
8240
  try {
8128
8241
  await access3(resolved, constants3.F_OK);
8129
8242
  return resolved;
@@ -8152,7 +8265,7 @@ async function resolveWorkspaceTemplateFile(template) {
8152
8265
  return void 0;
8153
8266
  }
8154
8267
  try {
8155
- const stats = await stat4(path27.resolve(template));
8268
+ const stats = await stat4(path28.resolve(template));
8156
8269
  return stats.isFile() ? template : void 0;
8157
8270
  } catch {
8158
8271
  return template;
@@ -8178,7 +8291,7 @@ function buildMandatoryPrereadBlock2(guidelineFiles, attachmentFiles) {
8178
8291
  return "";
8179
8292
  }
8180
8293
  const buildList = (files) => files.map((absolutePath) => {
8181
- const fileName = path27.basename(absolutePath);
8294
+ const fileName = path28.basename(absolutePath);
8182
8295
  const fileUri = pathToFileUri3(absolutePath);
8183
8296
  return `* [${fileName}](${fileUri})`;
8184
8297
  });
@@ -8203,8 +8316,8 @@ function collectGuidelineFiles2(attachments, guidelinePatterns) {
8203
8316
  }
8204
8317
  const unique = /* @__PURE__ */ new Map();
8205
8318
  for (const attachment of attachments) {
8206
- const absolutePath = path27.resolve(attachment);
8207
- const normalized = absolutePath.split(path27.sep).join("/");
8319
+ const absolutePath = path28.resolve(attachment);
8320
+ const normalized = absolutePath.split(path28.sep).join("/");
8208
8321
  if (isGuidelineFile(normalized, guidelinePatterns)) {
8209
8322
  if (!unique.has(absolutePath)) {
8210
8323
  unique.set(absolutePath, absolutePath);
@@ -8219,7 +8332,7 @@ function collectAttachmentFiles(attachments) {
8219
8332
  }
8220
8333
  const unique = /* @__PURE__ */ new Map();
8221
8334
  for (const attachment of attachments) {
8222
- const absolutePath = path27.resolve(attachment);
8335
+ const absolutePath = path28.resolve(attachment);
8223
8336
  if (!unique.has(absolutePath)) {
8224
8337
  unique.set(absolutePath, absolutePath);
8225
8338
  }
@@ -8227,7 +8340,7 @@ function collectAttachmentFiles(attachments) {
8227
8340
  return Array.from(unique.values());
8228
8341
  }
8229
8342
  function pathToFileUri3(filePath) {
8230
- const absolutePath = path27.isAbsolute(filePath) ? filePath : path27.resolve(filePath);
8343
+ const absolutePath = path28.isAbsolute(filePath) ? filePath : path28.resolve(filePath);
8231
8344
  const normalizedPath = absolutePath.replace(/\\/g, "/");
8232
8345
  if (/^[a-zA-Z]:\//.test(normalizedPath)) {
8233
8346
  return `file:///${normalizedPath}`;
@@ -8240,7 +8353,7 @@ function normalizeAttachments(attachments) {
8240
8353
  }
8241
8354
  const deduped = /* @__PURE__ */ new Set();
8242
8355
  for (const attachment of attachments) {
8243
- deduped.add(path27.resolve(attachment));
8356
+ deduped.add(path28.resolve(attachment));
8244
8357
  }
8245
8358
  return Array.from(deduped);
8246
8359
  }
@@ -8249,7 +8362,7 @@ function mergeAttachments(all) {
8249
8362
  for (const list of all) {
8250
8363
  if (!list) continue;
8251
8364
  for (const inputFile of list) {
8252
- deduped.add(path27.resolve(inputFile));
8365
+ deduped.add(path28.resolve(inputFile));
8253
8366
  }
8254
8367
  }
8255
8368
  return deduped.size > 0 ? Array.from(deduped) : void 0;
@@ -8298,7 +8411,7 @@ total unlocked subagents available: ${result.created.length + result.skippedExis
8298
8411
  // src/evaluation/providers/targets-file.ts
8299
8412
  import { constants as constants4 } from "node:fs";
8300
8413
  import { access as access4, readFile as readFile10 } from "node:fs/promises";
8301
- import path28 from "node:path";
8414
+ import path29 from "node:path";
8302
8415
  import { parse as parse3 } from "yaml";
8303
8416
  function isRecord(value) {
8304
8417
  return typeof value === "object" && value !== null && !Array.isArray(value);
@@ -8335,7 +8448,7 @@ async function fileExists3(filePath) {
8335
8448
  }
8336
8449
  }
8337
8450
  async function readTargetDefinitions(filePath) {
8338
- const absolutePath = path28.resolve(filePath);
8451
+ const absolutePath = path29.resolve(filePath);
8339
8452
  if (!await fileExists3(absolutePath)) {
8340
8453
  throw new Error(`targets.yaml not found at ${absolutePath}`);
8341
8454
  }
@@ -8355,16 +8468,16 @@ function listTargetNames(definitions) {
8355
8468
  }
8356
8469
 
8357
8470
  // src/evaluation/providers/provider-discovery.ts
8358
- import path29 from "node:path";
8471
+ import path30 from "node:path";
8359
8472
  import fg2 from "fast-glob";
8360
8473
  async function discoverProviders(registry, baseDir) {
8361
8474
  const patterns = ["*.ts", "*.js", "*.mts", "*.mjs"];
8362
8475
  const candidateDirs = [];
8363
- let dir = path29.resolve(baseDir);
8364
- const root = path29.parse(dir).root;
8476
+ let dir = path30.resolve(baseDir);
8477
+ const root = path30.parse(dir).root;
8365
8478
  while (dir !== root) {
8366
- candidateDirs.push(path29.join(dir, ".agentv", "providers"));
8367
- dir = path29.dirname(dir);
8479
+ candidateDirs.push(path30.join(dir, ".agentv", "providers"));
8480
+ dir = path30.dirname(dir);
8368
8481
  }
8369
8482
  let files = [];
8370
8483
  for (const providersDir of candidateDirs) {
@@ -8380,7 +8493,7 @@ async function discoverProviders(registry, baseDir) {
8380
8493
  }
8381
8494
  const discoveredKinds = [];
8382
8495
  for (const filePath of files) {
8383
- const basename = path29.basename(filePath);
8496
+ const basename = path30.basename(filePath);
8384
8497
  const kindName = basename.replace(/\.(ts|js|mts|mjs)$/, "");
8385
8498
  if (registry.has(kindName)) {
8386
8499
  continue;
@@ -8587,16 +8700,16 @@ async function execFileWithStdinNode(argv, stdinPayload, options) {
8587
8700
  });
8588
8701
  }
8589
8702
  async function execShellWithStdin(command, stdinPayload, options = {}) {
8590
- const { mkdir: mkdir14, readFile: readFile12, rm: rm6, writeFile: writeFile9 } = await import("node:fs/promises");
8703
+ const { mkdir: mkdir15, readFile: readFile13, rm: rm7, writeFile: writeFile10 } = await import("node:fs/promises");
8591
8704
  const { tmpdir: tmpdir3 } = await import("node:os");
8592
- const path40 = await import("node:path");
8705
+ const path42 = await import("node:path");
8593
8706
  const { randomUUID: randomUUID8 } = await import("node:crypto");
8594
- const dir = path40.join(tmpdir3(), `agentv-exec-${randomUUID8()}`);
8595
- await mkdir14(dir, { recursive: true });
8596
- const stdinPath = path40.join(dir, "stdin.txt");
8597
- const stdoutPath = path40.join(dir, "stdout.txt");
8598
- const stderrPath = path40.join(dir, "stderr.txt");
8599
- await writeFile9(stdinPath, stdinPayload, "utf8");
8707
+ const dir = path42.join(tmpdir3(), `agentv-exec-${randomUUID8()}`);
8708
+ await mkdir15(dir, { recursive: true });
8709
+ const stdinPath = path42.join(dir, "stdin.txt");
8710
+ const stdoutPath = path42.join(dir, "stdout.txt");
8711
+ const stderrPath = path42.join(dir, "stderr.txt");
8712
+ await writeFile10(stdinPath, stdinPayload, "utf8");
8600
8713
  const wrappedCommand = process.platform === "win32" ? `(${command}) < ${shellEscapePath(stdinPath)} > ${shellEscapePath(stdoutPath)} 2> ${shellEscapePath(stderrPath)}` : `(${command}) < ${shellEscapePath(stdinPath)} > ${shellEscapePath(stdoutPath)} 2> ${shellEscapePath(stderrPath)}`;
8601
8714
  const { spawn: spawn4 } = await import("node:child_process");
8602
8715
  try {
@@ -8625,11 +8738,11 @@ async function execShellWithStdin(command, stdinPayload, options = {}) {
8625
8738
  resolve(code ?? 0);
8626
8739
  });
8627
8740
  });
8628
- const stdout = (await readFile12(stdoutPath, "utf8")).replace(/\r\n/g, "\n");
8629
- const stderr = (await readFile12(stderrPath, "utf8")).replace(/\r\n/g, "\n");
8741
+ const stdout = (await readFile13(stdoutPath, "utf8")).replace(/\r\n/g, "\n");
8742
+ const stderr = (await readFile13(stderrPath, "utf8")).replace(/\r\n/g, "\n");
8630
8743
  return { stdout, stderr, exitCode };
8631
8744
  } finally {
8632
- await rm6(dir, { recursive: true, force: true });
8745
+ await rm7(dir, { recursive: true, force: true });
8633
8746
  }
8634
8747
  }
8635
8748
 
@@ -8947,7 +9060,7 @@ var CodeEvaluator = class {
8947
9060
  outputPath,
8948
9061
  guidelineFiles: context.evalCase.guideline_paths,
8949
9062
  inputFiles: context.evalCase.file_paths.filter(
8950
- (path40) => !context.evalCase.guideline_paths.includes(path40)
9063
+ (path42) => !context.evalCase.guideline_paths.includes(path42)
8951
9064
  ),
8952
9065
  input: context.evalCase.input,
8953
9066
  trace: context.trace ?? null,
@@ -9197,6 +9310,8 @@ ${context.fileChanges}`;
9197
9310
  };
9198
9311
  } catch (e) {
9199
9312
  const message = e instanceof Error ? e.message : String(e);
9313
+ const evalName = context.evaluator?.name ?? "llm-judge";
9314
+ console.warn(`\u26A0 LLM judge "${evalName}" failed after 3 attempts (${message}) \u2014 skipped`);
9200
9315
  return {
9201
9316
  score: 0,
9202
9317
  verdict: "skip",
@@ -9225,24 +9340,39 @@ ${context.fileChanges}`;
9225
9340
  systemPrompt,
9226
9341
  target: judgeProvider.targetName
9227
9342
  };
9228
- const { data, tokenUsage } = await this.runWithRetry({
9229
- context,
9230
- judgeProvider,
9231
- systemPrompt,
9232
- userPrompt: prompt,
9233
- schema: rubricEvaluationSchema
9234
- });
9235
- const { score, verdict, hits, misses } = calculateRubricScore(data, rubrics);
9236
- return {
9237
- score,
9238
- verdict,
9239
- hits,
9240
- misses,
9241
- expectedAspectCount: rubrics.length,
9242
- reasoning: data.overall_reasoning,
9243
- evaluatorRawRequest,
9244
- tokenUsage
9245
- };
9343
+ try {
9344
+ const { data, tokenUsage } = await this.runWithRetry({
9345
+ context,
9346
+ judgeProvider,
9347
+ systemPrompt,
9348
+ userPrompt: prompt,
9349
+ schema: rubricEvaluationSchema
9350
+ });
9351
+ const { score, verdict, hits, misses } = calculateRubricScore(data, rubrics);
9352
+ return {
9353
+ score,
9354
+ verdict,
9355
+ hits,
9356
+ misses,
9357
+ expectedAspectCount: rubrics.length,
9358
+ reasoning: data.overall_reasoning,
9359
+ evaluatorRawRequest,
9360
+ tokenUsage
9361
+ };
9362
+ } catch (e) {
9363
+ const message = e instanceof Error ? e.message : String(e);
9364
+ const evalName = context.evaluator?.name ?? "llm-judge";
9365
+ console.warn(`\u26A0 LLM judge "${evalName}" failed after 3 attempts (${message}) \u2014 skipped`);
9366
+ return {
9367
+ score: 0,
9368
+ verdict: "skip",
9369
+ hits: [],
9370
+ misses: [`Judge parse failure after 3 attempts: ${message}`],
9371
+ expectedAspectCount: rubrics.length,
9372
+ reasoning: `Judge parse failure after 3 attempts: ${message}`,
9373
+ evaluatorRawRequest
9374
+ };
9375
+ }
9246
9376
  }
9247
9377
  /**
9248
9378
  * Evaluate using score-range rubrics (analytic rubric scoring).
@@ -9256,25 +9386,40 @@ ${context.fileChanges}`;
9256
9386
  systemPrompt,
9257
9387
  target: judgeProvider.targetName
9258
9388
  };
9259
- const { data, tokenUsage } = await this.runWithRetry({
9260
- context,
9261
- judgeProvider,
9262
- systemPrompt,
9263
- userPrompt: prompt,
9264
- schema: scoreRangeEvaluationSchema
9265
- });
9266
- const { score, verdict, hits, misses, details } = calculateScoreRangeResult(data, rubrics);
9267
- return {
9268
- score,
9269
- verdict,
9270
- hits,
9271
- misses,
9272
- expectedAspectCount: rubrics.length,
9273
- reasoning: data.overall_reasoning,
9274
- evaluatorRawRequest,
9275
- details,
9276
- tokenUsage
9277
- };
9389
+ try {
9390
+ const { data, tokenUsage } = await this.runWithRetry({
9391
+ context,
9392
+ judgeProvider,
9393
+ systemPrompt,
9394
+ userPrompt: prompt,
9395
+ schema: scoreRangeEvaluationSchema
9396
+ });
9397
+ const { score, verdict, hits, misses, details } = calculateScoreRangeResult(data, rubrics);
9398
+ return {
9399
+ score,
9400
+ verdict,
9401
+ hits,
9402
+ misses,
9403
+ expectedAspectCount: rubrics.length,
9404
+ reasoning: data.overall_reasoning,
9405
+ evaluatorRawRequest,
9406
+ details,
9407
+ tokenUsage
9408
+ };
9409
+ } catch (e) {
9410
+ const message = e instanceof Error ? e.message : String(e);
9411
+ const evalName = context.evaluator?.name ?? "llm-judge";
9412
+ console.warn(`\u26A0 LLM judge "${evalName}" failed after 3 attempts (${message}) \u2014 skipped`);
9413
+ return {
9414
+ score: 0,
9415
+ verdict: "skip",
9416
+ hits: [],
9417
+ misses: [`Judge parse failure after 3 attempts: ${message}`],
9418
+ expectedAspectCount: rubrics.length,
9419
+ reasoning: `Judge parse failure after 3 attempts: ${message}`,
9420
+ evaluatorRawRequest
9421
+ };
9422
+ }
9278
9423
  }
9279
9424
  /**
9280
9425
  * Build prompt for score-range rubric evaluation.
@@ -9560,19 +9705,13 @@ var CompositeEvaluator = class {
9560
9705
  runWeightedAverage(results, weights) {
9561
9706
  let totalWeight = 0;
9562
9707
  let weightedSum = 0;
9708
+ let evaluatedCount = 0;
9563
9709
  const allHits = [];
9564
9710
  const allMisses = [];
9565
9711
  const reasoningParts = [];
9566
9712
  const scores = [];
9567
9713
  for (const member of results) {
9568
9714
  const weight = weights?.[member.id] ?? 1;
9569
- totalWeight += weight;
9570
- weightedSum += member.result.score * weight;
9571
- allHits.push(...member.result.hits.map((h) => `[${member.id}] ${h}`));
9572
- allMisses.push(...member.result.misses.map((m) => `[${member.id}] ${m}`));
9573
- if (member.result.reasoning) {
9574
- reasoningParts.push(`${member.id}: ${member.result.reasoning}`);
9575
- }
9576
9715
  scores.push({
9577
9716
  name: member.id,
9578
9717
  type: member.type,
@@ -9587,6 +9726,32 @@ var CompositeEvaluator = class {
9587
9726
  details: member.result.details,
9588
9727
  tokenUsage: member.result.tokenUsage
9589
9728
  });
9729
+ if (member.result.verdict === "skip") {
9730
+ continue;
9731
+ }
9732
+ evaluatedCount++;
9733
+ totalWeight += weight;
9734
+ weightedSum += member.result.score * weight;
9735
+ allHits.push(...member.result.hits.map((h) => `[${member.id}] ${h}`));
9736
+ allMisses.push(...member.result.misses.map((m) => `[${member.id}] ${m}`));
9737
+ if (member.result.reasoning) {
9738
+ reasoningParts.push(`${member.id}: ${member.result.reasoning}`);
9739
+ }
9740
+ }
9741
+ if (evaluatedCount === 0 && results.length > 0) {
9742
+ return {
9743
+ score: 0,
9744
+ verdict: "skip",
9745
+ hits: [],
9746
+ misses: [],
9747
+ expectedAspectCount: 1,
9748
+ reasoning: "All evaluators skipped (infrastructure failure)",
9749
+ evaluatorRawRequest: {
9750
+ aggregator: "weighted_average",
9751
+ ...weights ? { weights } : {}
9752
+ },
9753
+ scores
9754
+ };
9590
9755
  }
9591
9756
  const finalScore = totalWeight > 0 ? weightedSum / totalWeight : 0;
9592
9757
  return {
@@ -9610,19 +9775,8 @@ var CompositeEvaluator = class {
9610
9775
  const reasoningParts = [];
9611
9776
  let passingCount = 0;
9612
9777
  let borderlineCount = 0;
9778
+ let evaluatedCount = 0;
9613
9779
  for (const member of results) {
9614
- const isPassing = member.result.verdict === "pass" || member.result.verdict === "borderline";
9615
- if (isPassing) {
9616
- passingCount++;
9617
- if (member.result.verdict === "borderline") {
9618
- borderlineCount++;
9619
- }
9620
- }
9621
- allHits.push(...member.result.hits.map((h) => `[${member.id}] ${h}`));
9622
- allMisses.push(...member.result.misses.map((m) => `[${member.id}] ${m}`));
9623
- if (member.result.reasoning) {
9624
- reasoningParts.push(`${member.id}: ${member.result.reasoning}`);
9625
- }
9626
9780
  scores.push({
9627
9781
  name: member.id,
9628
9782
  type: member.type,
@@ -9636,8 +9790,39 @@ var CompositeEvaluator = class {
9636
9790
  details: member.result.details,
9637
9791
  tokenUsage: member.result.tokenUsage
9638
9792
  });
9793
+ if (member.result.verdict === "skip") {
9794
+ continue;
9795
+ }
9796
+ evaluatedCount++;
9797
+ const isPassing = member.result.verdict === "pass" || member.result.verdict === "borderline";
9798
+ if (isPassing) {
9799
+ passingCount++;
9800
+ if (member.result.verdict === "borderline") {
9801
+ borderlineCount++;
9802
+ }
9803
+ }
9804
+ allHits.push(...member.result.hits.map((h) => `[${member.id}] ${h}`));
9805
+ allMisses.push(...member.result.misses.map((m) => `[${member.id}] ${m}`));
9806
+ if (member.result.reasoning) {
9807
+ reasoningParts.push(`${member.id}: ${member.result.reasoning}`);
9808
+ }
9809
+ }
9810
+ if (evaluatedCount === 0 && results.length > 0) {
9811
+ return {
9812
+ score: 0,
9813
+ verdict: "skip",
9814
+ hits: [],
9815
+ misses: [],
9816
+ expectedAspectCount: 1,
9817
+ reasoning: "All evaluators skipped (infrastructure failure)",
9818
+ evaluatorRawRequest: {
9819
+ aggregator: "threshold",
9820
+ threshold
9821
+ },
9822
+ scores
9823
+ };
9639
9824
  }
9640
- const totalCount = results.length;
9825
+ const totalCount = evaluatedCount;
9641
9826
  const score = totalCount > 0 ? passingCount / totalCount : 0;
9642
9827
  const pass = score >= threshold;
9643
9828
  if (pass && borderlineCount > 0) {
@@ -10145,115 +10330,115 @@ var FieldAccuracyEvaluator = class {
10145
10330
  * Evaluate a single field against the expected value.
10146
10331
  */
10147
10332
  evaluateField(fieldConfig, candidateData, expectedData) {
10148
- const { path: path40, match, required = true, weight = 1 } = fieldConfig;
10149
- const candidateValue = resolvePath(candidateData, path40);
10150
- const expectedValue = resolvePath(expectedData, path40);
10333
+ const { path: path42, match, required = true, weight = 1 } = fieldConfig;
10334
+ const candidateValue = resolvePath(candidateData, path42);
10335
+ const expectedValue = resolvePath(expectedData, path42);
10151
10336
  if (expectedValue === void 0) {
10152
10337
  return {
10153
- path: path40,
10338
+ path: path42,
10154
10339
  score: 1,
10155
10340
  // No expected value means no comparison needed
10156
10341
  weight,
10157
10342
  hit: true,
10158
- message: `${path40}: no expected value`
10343
+ message: `${path42}: no expected value`
10159
10344
  };
10160
10345
  }
10161
10346
  if (candidateValue === void 0) {
10162
10347
  if (required) {
10163
10348
  return {
10164
- path: path40,
10349
+ path: path42,
10165
10350
  score: 0,
10166
10351
  weight,
10167
10352
  hit: false,
10168
- message: `${path40} (required, missing)`
10353
+ message: `${path42} (required, missing)`
10169
10354
  };
10170
10355
  }
10171
10356
  return {
10172
- path: path40,
10357
+ path: path42,
10173
10358
  score: 1,
10174
10359
  // Don't penalize missing optional fields
10175
10360
  weight: 0,
10176
10361
  // Zero weight means it won't affect the score
10177
10362
  hit: true,
10178
- message: `${path40}: optional field missing`
10363
+ message: `${path42}: optional field missing`
10179
10364
  };
10180
10365
  }
10181
10366
  switch (match) {
10182
10367
  case "exact":
10183
- return this.compareExact(path40, candidateValue, expectedValue, weight);
10368
+ return this.compareExact(path42, candidateValue, expectedValue, weight);
10184
10369
  case "numeric_tolerance":
10185
10370
  return this.compareNumericTolerance(
10186
- path40,
10371
+ path42,
10187
10372
  candidateValue,
10188
10373
  expectedValue,
10189
10374
  fieldConfig,
10190
10375
  weight
10191
10376
  );
10192
10377
  case "date":
10193
- return this.compareDate(path40, candidateValue, expectedValue, fieldConfig, weight);
10378
+ return this.compareDate(path42, candidateValue, expectedValue, fieldConfig, weight);
10194
10379
  default:
10195
10380
  return {
10196
- path: path40,
10381
+ path: path42,
10197
10382
  score: 0,
10198
10383
  weight,
10199
10384
  hit: false,
10200
- message: `${path40}: unknown match type "${match}"`
10385
+ message: `${path42}: unknown match type "${match}"`
10201
10386
  };
10202
10387
  }
10203
10388
  }
10204
10389
  /**
10205
10390
  * Exact equality comparison.
10206
10391
  */
10207
- compareExact(path40, candidateValue, expectedValue, weight) {
10392
+ compareExact(path42, candidateValue, expectedValue, weight) {
10208
10393
  if (deepEqual(candidateValue, expectedValue)) {
10209
10394
  return {
10210
- path: path40,
10395
+ path: path42,
10211
10396
  score: 1,
10212
10397
  weight,
10213
10398
  hit: true,
10214
- message: path40
10399
+ message: path42
10215
10400
  };
10216
10401
  }
10217
10402
  if (typeof candidateValue !== typeof expectedValue) {
10218
10403
  return {
10219
- path: path40,
10404
+ path: path42,
10220
10405
  score: 0,
10221
10406
  weight,
10222
10407
  hit: false,
10223
- message: `${path40} (type mismatch: got ${typeof candidateValue}, expected ${typeof expectedValue})`
10408
+ message: `${path42} (type mismatch: got ${typeof candidateValue}, expected ${typeof expectedValue})`
10224
10409
  };
10225
10410
  }
10226
10411
  return {
10227
- path: path40,
10412
+ path: path42,
10228
10413
  score: 0,
10229
10414
  weight,
10230
10415
  hit: false,
10231
- message: `${path40} (value mismatch)`
10416
+ message: `${path42} (value mismatch)`
10232
10417
  };
10233
10418
  }
10234
10419
  /**
10235
10420
  * Numeric comparison with absolute or relative tolerance.
10236
10421
  */
10237
- compareNumericTolerance(path40, candidateValue, expectedValue, fieldConfig, weight) {
10422
+ compareNumericTolerance(path42, candidateValue, expectedValue, fieldConfig, weight) {
10238
10423
  const { tolerance = 0, relative = false } = fieldConfig;
10239
10424
  const candidateNum = toNumber2(candidateValue);
10240
10425
  const expectedNum = toNumber2(expectedValue);
10241
10426
  if (candidateNum === null || expectedNum === null) {
10242
10427
  return {
10243
- path: path40,
10428
+ path: path42,
10244
10429
  score: 0,
10245
10430
  weight,
10246
10431
  hit: false,
10247
- message: `${path40} (non-numeric value)`
10432
+ message: `${path42} (non-numeric value)`
10248
10433
  };
10249
10434
  }
10250
10435
  if (!Number.isFinite(candidateNum) || !Number.isFinite(expectedNum)) {
10251
10436
  return {
10252
- path: path40,
10437
+ path: path42,
10253
10438
  score: 0,
10254
10439
  weight,
10255
10440
  hit: false,
10256
- message: `${path40} (invalid numeric value)`
10441
+ message: `${path42} (invalid numeric value)`
10257
10442
  };
10258
10443
  }
10259
10444
  const diff = Math.abs(candidateNum - expectedNum);
@@ -10266,61 +10451,61 @@ var FieldAccuracyEvaluator = class {
10266
10451
  }
10267
10452
  if (withinTolerance) {
10268
10453
  return {
10269
- path: path40,
10454
+ path: path42,
10270
10455
  score: 1,
10271
10456
  weight,
10272
10457
  hit: true,
10273
- message: `${path40} (within tolerance: diff=${diff.toFixed(2)})`
10458
+ message: `${path42} (within tolerance: diff=${diff.toFixed(2)})`
10274
10459
  };
10275
10460
  }
10276
10461
  return {
10277
- path: path40,
10462
+ path: path42,
10278
10463
  score: 0,
10279
10464
  weight,
10280
10465
  hit: false,
10281
- message: `${path40} (outside tolerance: diff=${diff.toFixed(2)}, tolerance=${tolerance})`
10466
+ message: `${path42} (outside tolerance: diff=${diff.toFixed(2)}, tolerance=${tolerance})`
10282
10467
  };
10283
10468
  }
10284
10469
  /**
10285
10470
  * Date comparison with format normalization.
10286
10471
  */
10287
- compareDate(path40, candidateValue, expectedValue, fieldConfig, weight) {
10472
+ compareDate(path42, candidateValue, expectedValue, fieldConfig, weight) {
10288
10473
  const formats = fieldConfig.formats ?? DEFAULT_DATE_FORMATS;
10289
10474
  const candidateDate = parseDate(String(candidateValue), formats);
10290
10475
  const expectedDate = parseDate(String(expectedValue), formats);
10291
10476
  if (candidateDate === null) {
10292
10477
  return {
10293
- path: path40,
10478
+ path: path42,
10294
10479
  score: 0,
10295
10480
  weight,
10296
10481
  hit: false,
10297
- message: `${path40} (unparseable candidate date)`
10482
+ message: `${path42} (unparseable candidate date)`
10298
10483
  };
10299
10484
  }
10300
10485
  if (expectedDate === null) {
10301
10486
  return {
10302
- path: path40,
10487
+ path: path42,
10303
10488
  score: 0,
10304
10489
  weight,
10305
10490
  hit: false,
10306
- message: `${path40} (unparseable expected date)`
10491
+ message: `${path42} (unparseable expected date)`
10307
10492
  };
10308
10493
  }
10309
10494
  if (candidateDate.getFullYear() === expectedDate.getFullYear() && candidateDate.getMonth() === expectedDate.getMonth() && candidateDate.getDate() === expectedDate.getDate()) {
10310
10495
  return {
10311
- path: path40,
10496
+ path: path42,
10312
10497
  score: 1,
10313
10498
  weight,
10314
10499
  hit: true,
10315
- message: path40
10500
+ message: path42
10316
10501
  };
10317
10502
  }
10318
10503
  return {
10319
- path: path40,
10504
+ path: path42,
10320
10505
  score: 0,
10321
10506
  weight,
10322
10507
  hit: false,
10323
- message: `${path40} (date mismatch: got ${formatDateISO(candidateDate)}, expected ${formatDateISO(expectedDate)})`
10508
+ message: `${path42} (date mismatch: got ${formatDateISO(candidateDate)}, expected ${formatDateISO(expectedDate)})`
10324
10509
  };
10325
10510
  }
10326
10511
  /**
@@ -10361,11 +10546,11 @@ var FieldAccuracyEvaluator = class {
10361
10546
  };
10362
10547
  }
10363
10548
  };
10364
- function resolvePath(obj, path40) {
10365
- if (!path40 || !obj) {
10549
+ function resolvePath(obj, path42) {
10550
+ if (!path42 || !obj) {
10366
10551
  return void 0;
10367
10552
  }
10368
- const parts = path40.split(/\.|\[|\]/).filter((p) => p.length > 0);
10553
+ const parts = path42.split(/\.|\[|\]/).filter((p) => p.length > 0);
10369
10554
  let current = obj;
10370
10555
  for (const part of parts) {
10371
10556
  if (current === null || current === void 0) {
@@ -10497,7 +10682,7 @@ var LatencyEvaluator = class {
10497
10682
 
10498
10683
  // src/evaluation/evaluators/agent-judge.ts
10499
10684
  import fs2 from "node:fs/promises";
10500
- import path30 from "node:path";
10685
+ import path31 from "node:path";
10501
10686
  import { generateText as generateText4, stepCountIs, tool } from "ai";
10502
10687
  import { z as z4 } from "zod";
10503
10688
  var DEFAULT_MAX_STEPS = 10;
@@ -10846,8 +11031,8 @@ ${outputSchema}`;
10846
11031
  }
10847
11032
  };
10848
11033
  function resolveSandboxed(basePath, relativePath) {
10849
- const resolved = path30.resolve(basePath, relativePath);
10850
- if (!resolved.startsWith(basePath + path30.sep) && resolved !== basePath) {
11034
+ const resolved = path31.resolve(basePath, relativePath);
11035
+ if (!resolved.startsWith(basePath + path31.sep) && resolved !== basePath) {
10851
11036
  throw new Error(`Path '${relativePath}' is outside the workspace`);
10852
11037
  }
10853
11038
  return resolved;
@@ -10930,11 +11115,11 @@ async function searchDirectory(dirPath, workspacePath, regex, matches) {
10930
11115
  for (const entry of entries) {
10931
11116
  if (matches.length >= MAX_SEARCH_MATCHES) return;
10932
11117
  if (SEARCH_SKIP_DIRS.has(entry.name)) continue;
10933
- const fullPath = path30.join(dirPath, entry.name);
11118
+ const fullPath = path31.join(dirPath, entry.name);
10934
11119
  if (entry.isDirectory()) {
10935
11120
  await searchDirectory(fullPath, workspacePath, regex, matches);
10936
11121
  } else if (entry.isFile()) {
10937
- const ext = path30.extname(entry.name).toLowerCase();
11122
+ const ext = path31.extname(entry.name).toLowerCase();
10938
11123
  if (BINARY_EXTENSIONS.has(ext)) continue;
10939
11124
  try {
10940
11125
  const stat8 = await fs2.stat(fullPath);
@@ -10946,7 +11131,7 @@ async function searchDirectory(dirPath, workspacePath, regex, matches) {
10946
11131
  regex.lastIndex = 0;
10947
11132
  if (regex.test(lines[i])) {
10948
11133
  matches.push({
10949
- file: path30.relative(workspacePath, fullPath),
11134
+ file: path31.relative(workspacePath, fullPath),
10950
11135
  line: i + 1,
10951
11136
  text: lines[i].substring(0, 200)
10952
11137
  });
@@ -11183,8 +11368,8 @@ var TokenUsageEvaluator = class {
11183
11368
  };
11184
11369
 
11185
11370
  // src/evaluation/evaluators/tool-trajectory.ts
11186
- function getNestedValue(obj, path40) {
11187
- const parts = path40.split(".");
11371
+ function getNestedValue(obj, path42) {
11372
+ const parts = path42.split(".");
11188
11373
  let current = obj;
11189
11374
  for (const part of parts) {
11190
11375
  if (current === null || current === void 0 || typeof current !== "object") {
@@ -11745,9 +11930,9 @@ function runEqualsAssertion(output, value) {
11745
11930
  }
11746
11931
 
11747
11932
  // src/evaluation/orchestrator.ts
11748
- import { createHash as createHash2, randomUUID as randomUUID7 } from "node:crypto";
11749
- import { mkdir as mkdir12, stat as stat7 } from "node:fs/promises";
11750
- import path37 from "node:path";
11933
+ import { createHash as createHash3, randomUUID as randomUUID7 } from "node:crypto";
11934
+ import { mkdir as mkdir13, stat as stat7 } from "node:fs/promises";
11935
+ import path39 from "node:path";
11751
11936
  import micromatch4 from "micromatch";
11752
11937
 
11753
11938
  // ../../node_modules/.bun/yocto-queue@1.2.2/node_modules/yocto-queue/index.js
@@ -11938,7 +12123,7 @@ var DeterministicAssertionEvaluator = class {
11938
12123
  import { readFileSync } from "node:fs";
11939
12124
 
11940
12125
  // src/evaluation/evaluators/prompt-resolution.ts
11941
- import path31 from "node:path";
12126
+ import path32 from "node:path";
11942
12127
  async function resolveCustomPrompt(promptConfig, context, timeoutMs) {
11943
12128
  if (promptConfig.resolvedPromptScript && promptConfig.resolvedPromptScript.length > 0) {
11944
12129
  if (!context) {
@@ -11987,7 +12172,7 @@ async function executePromptTemplate(script, context, config, timeoutMs) {
11987
12172
  };
11988
12173
  const inputJson = JSON.stringify(toSnakeCaseDeep(payload), null, 2);
11989
12174
  const scriptPath = script[script.length - 1];
11990
- const cwd = path31.dirname(scriptPath);
12175
+ const cwd = path32.dirname(scriptPath);
11991
12176
  try {
11992
12177
  const stdout = await executeScript(script, inputJson, timeoutMs, cwd);
11993
12178
  const prompt = stdout.trim();
@@ -12280,16 +12465,16 @@ function createBuiltinRegistry() {
12280
12465
  }
12281
12466
 
12282
12467
  // src/evaluation/registry/assertion-discovery.ts
12283
- import path32 from "node:path";
12468
+ import path33 from "node:path";
12284
12469
  import fg3 from "fast-glob";
12285
12470
  async function discoverAssertions(registry, baseDir) {
12286
12471
  const patterns = ["*.ts", "*.js", "*.mts", "*.mjs"];
12287
12472
  const candidateDirs = [];
12288
- let dir = path32.resolve(baseDir);
12289
- const root = path32.parse(dir).root;
12473
+ let dir = path33.resolve(baseDir);
12474
+ const root = path33.parse(dir).root;
12290
12475
  while (dir !== root) {
12291
- candidateDirs.push(path32.join(dir, ".agentv", "assertions"));
12292
- dir = path32.dirname(dir);
12476
+ candidateDirs.push(path33.join(dir, ".agentv", "assertions"));
12477
+ dir = path33.dirname(dir);
12293
12478
  }
12294
12479
  let files = [];
12295
12480
  for (const assertionsDir of candidateDirs) {
@@ -12305,7 +12490,7 @@ async function discoverAssertions(registry, baseDir) {
12305
12490
  }
12306
12491
  const discoveredTypes = [];
12307
12492
  for (const filePath of files) {
12308
- const basename = path32.basename(filePath);
12493
+ const basename = path33.basename(filePath);
12309
12494
  const typeName = basename.replace(/\.(ts|js|mts|mjs)$/, "");
12310
12495
  if (registry.has(typeName)) {
12311
12496
  continue;
@@ -12465,7 +12650,7 @@ function getTCritical(df) {
12465
12650
  // src/evaluation/workspace/file-changes.ts
12466
12651
  import { exec as execCallback } from "node:child_process";
12467
12652
  import { readdirSync as readdirSync2, statSync } from "node:fs";
12468
- import path33 from "node:path";
12653
+ import path34 from "node:path";
12469
12654
  import { promisify as promisify4 } from "node:util";
12470
12655
  var execAsync4 = promisify4(execCallback);
12471
12656
  function gitExecOpts(workspacePath) {
@@ -12499,10 +12684,10 @@ async function stageNestedRepoChanges(workspacePath) {
12499
12684
  }
12500
12685
  for (const entry of entries) {
12501
12686
  if (entry === ".git" || entry === "node_modules") continue;
12502
- const childPath = path33.join(workspacePath, entry);
12687
+ const childPath = path34.join(workspacePath, entry);
12503
12688
  try {
12504
12689
  if (!statSync(childPath).isDirectory()) continue;
12505
- if (!statSync(path33.join(childPath, ".git")).isDirectory()) continue;
12690
+ if (!statSync(path34.join(childPath, ".git")).isDirectory()) continue;
12506
12691
  } catch {
12507
12692
  continue;
12508
12693
  }
@@ -12513,9 +12698,7 @@ async function stageNestedRepoChanges(workspacePath) {
12513
12698
 
12514
12699
  // src/evaluation/workspace/manager.ts
12515
12700
  import { cp, mkdir as mkdir10, readdir as readdir3, rm as rm4, stat as stat5 } from "node:fs/promises";
12516
- import os3 from "node:os";
12517
- import path34 from "node:path";
12518
- var DEFAULT_WORKSPACE_ROOT = path34.join(os3.homedir(), ".agentv", "workspaces");
12701
+ import path35 from "node:path";
12519
12702
  var TemplateNotFoundError = class extends Error {
12520
12703
  constructor(templatePath) {
12521
12704
  super(`Workspace template not found: ${templatePath}`);
@@ -12544,15 +12727,15 @@ async function isDirectory(filePath) {
12544
12727
  }
12545
12728
  }
12546
12729
  function getWorkspacePath(evalRunId, caseId, workspaceRoot) {
12547
- const root = workspaceRoot ?? DEFAULT_WORKSPACE_ROOT;
12548
- return path34.join(root, evalRunId, caseId);
12730
+ const root = workspaceRoot ?? getWorkspacesRoot();
12731
+ return path35.join(root, evalRunId, caseId);
12549
12732
  }
12550
12733
  async function copyDirectoryRecursive(src, dest) {
12551
12734
  await mkdir10(dest, { recursive: true });
12552
12735
  const entries = await readdir3(src, { withFileTypes: true });
12553
12736
  for (const entry of entries) {
12554
- const srcPath = path34.join(src, entry.name);
12555
- const destPath = path34.join(dest, entry.name);
12737
+ const srcPath = path35.join(src, entry.name);
12738
+ const destPath = path35.join(dest, entry.name);
12556
12739
  if (entry.name === ".git") {
12557
12740
  continue;
12558
12741
  }
@@ -12564,7 +12747,7 @@ async function copyDirectoryRecursive(src, dest) {
12564
12747
  }
12565
12748
  }
12566
12749
  async function createTempWorkspace(templatePath, evalRunId, caseId, workspaceRoot) {
12567
- const resolvedTemplatePath = path34.resolve(templatePath);
12750
+ const resolvedTemplatePath = path35.resolve(templatePath);
12568
12751
  if (!await fileExists(resolvedTemplatePath)) {
12569
12752
  throw new TemplateNotFoundError(resolvedTemplatePath);
12570
12753
  }
@@ -12612,25 +12795,21 @@ async function cleanupWorkspace(workspacePath) {
12612
12795
  }
12613
12796
  }
12614
12797
  async function cleanupEvalWorkspaces(evalRunId, workspaceRoot) {
12615
- const root = workspaceRoot ?? DEFAULT_WORKSPACE_ROOT;
12616
- const evalDir = path34.join(root, evalRunId);
12798
+ const root = workspaceRoot ?? getWorkspacesRoot();
12799
+ const evalDir = path35.join(root, evalRunId);
12617
12800
  if (await fileExists(evalDir)) {
12618
12801
  await rm4(evalDir, { recursive: true, force: true });
12619
12802
  }
12620
12803
  }
12621
12804
 
12622
- // src/evaluation/workspace/repo-manager.ts
12805
+ // src/evaluation/workspace/pool-manager.ts
12623
12806
  import { execFile } from "node:child_process";
12624
12807
  import { createHash } from "node:crypto";
12625
12808
  import { existsSync as existsSync2 } from "node:fs";
12626
- import { mkdir as mkdir11, rm as rm5, unlink, writeFile as writeFile7 } from "node:fs/promises";
12627
- import os4 from "node:os";
12628
- import path35 from "node:path";
12809
+ import { cp as cp2, mkdir as mkdir11, readFile as readFile11, readdir as readdir4, rm as rm5, unlink, writeFile as writeFile7 } from "node:fs/promises";
12810
+ import path36 from "node:path";
12629
12811
  import { promisify as promisify5 } from "node:util";
12630
12812
  var execFileAsync = promisify5(execFile);
12631
- var DEFAULT_CACHE_DIR = path35.join(os4.homedir(), ".agentv", "git-cache");
12632
- var DEFAULT_TIMEOUT_MS2 = 3e5;
12633
- var LOCK_TIMEOUT_MS = 6e4;
12634
12813
  function gitEnv() {
12635
12814
  const env = { ...process.env };
12636
12815
  for (const key of Object.keys(env)) {
@@ -12645,49 +12824,340 @@ function gitEnv() {
12645
12824
  GIT_SSH_COMMAND: "ssh -o BatchMode=yes"
12646
12825
  };
12647
12826
  }
12648
- function cacheKey(source) {
12649
- const raw = source.type === "git" ? source.url.toLowerCase().replace(/\.git$/, "") : source.path;
12650
- return createHash("sha256").update(raw).digest("hex");
12651
- }
12652
- function getSourceUrl(source) {
12653
- return source.type === "git" ? source.url : source.path;
12654
- }
12655
12827
  async function git(args, opts) {
12656
12828
  const { stdout } = await execFileAsync("git", args, {
12657
12829
  cwd: opts?.cwd,
12658
- timeout: opts?.timeout ?? DEFAULT_TIMEOUT_MS2,
12830
+ timeout: opts?.timeout ?? 3e5,
12659
12831
  env: gitEnv(),
12660
12832
  maxBuffer: 50 * 1024 * 1024
12661
- // 50MB
12662
12833
  });
12663
12834
  return stdout.trim();
12664
12835
  }
12665
- async function acquireLock(lockPath) {
12666
- const start = Date.now();
12667
- while (Date.now() - start < LOCK_TIMEOUT_MS) {
12668
- try {
12669
- await writeFile7(lockPath, String(process.pid), { flag: "wx" });
12670
- return;
12671
- } catch (err) {
12672
- if (err.code === "EEXIST") {
12673
- await new Promise((r) => setTimeout(r, 200));
12836
+ function normalizeRepoForFingerprint(repo) {
12837
+ const source = repo.source.type === "git" ? { type: "git", url: repo.source.url.toLowerCase().replace(/\.git$/, "") } : { type: "local", path: repo.source.path };
12838
+ const result = {
12839
+ path: repo.path,
12840
+ source,
12841
+ ref: repo.checkout?.ref ?? "HEAD"
12842
+ };
12843
+ if (repo.clone?.depth !== void 0) {
12844
+ result.depth = repo.clone.depth;
12845
+ }
12846
+ if (repo.clone?.filter !== void 0) {
12847
+ result.filter = repo.clone.filter;
12848
+ }
12849
+ if (repo.clone?.sparse?.length) {
12850
+ result.sparse = [...repo.clone.sparse].sort();
12851
+ }
12852
+ return result;
12853
+ }
12854
+ function computeWorkspaceFingerprint(templatePath, repos) {
12855
+ const canonical = {
12856
+ templatePath: templatePath ?? null,
12857
+ repos: [...repos].sort((a, b) => a.path.localeCompare(b.path)).map(normalizeRepoForFingerprint)
12858
+ };
12859
+ return createHash("sha256").update(JSON.stringify(canonical)).digest("hex");
12860
+ }
12861
+ async function copyDirectoryRecursive2(src, dest, skipDirs) {
12862
+ await mkdir11(dest, { recursive: true });
12863
+ const entries = await readdir4(src, { withFileTypes: true });
12864
+ for (const entry of entries) {
12865
+ const srcPath = path36.join(src, entry.name);
12866
+ const destPath = path36.join(dest, entry.name);
12867
+ if (entry.name === ".git") {
12868
+ continue;
12869
+ }
12870
+ if (entry.isDirectory()) {
12871
+ if (skipDirs?.has(entry.name)) {
12674
12872
  continue;
12675
12873
  }
12676
- throw err;
12874
+ await copyDirectoryRecursive2(srcPath, destPath, skipDirs);
12875
+ } else {
12876
+ await cp2(srcPath, destPath, { preserveTimestamps: true, force: true });
12677
12877
  }
12678
12878
  }
12679
- throw new Error(`Timed out waiting for lock: ${lockPath}`);
12680
12879
  }
12681
- async function releaseLock(lockPath) {
12682
- try {
12683
- await unlink(lockPath);
12880
+ var WorkspacePoolManager = class {
12881
+ poolRoot;
12882
+ constructor(poolRoot) {
12883
+ this.poolRoot = poolRoot ?? getWorkspacePoolRoot();
12884
+ }
12885
+ /**
12886
+ * Acquire a workspace slot from the pool.
12887
+ *
12888
+ * 1. Compute fingerprint from template + repos
12889
+ * 2. Check drift (compare stored metadata.json fingerprint vs computed)
12890
+ * 3. If drift: warn, remove all slots, rematerialize
12891
+ * 4. Acquire a slot (try-lock slot-0, slot-1, ..., up to maxSlots)
12892
+ * 5. If slot exists: reset repos, re-copy template files (skip repo directories)
12893
+ * 6. If new slot: copy template, materialize all repos, write metadata.json
12894
+ * 7. Return the slot (with path, index, isExisting)
12895
+ */
12896
+ async acquireWorkspace(options) {
12897
+ const { templatePath, repos, maxSlots, repoManager } = options;
12898
+ const fingerprint = computeWorkspaceFingerprint(templatePath, repos);
12899
+ const poolDir = path36.join(this.poolRoot, fingerprint);
12900
+ await mkdir11(poolDir, { recursive: true });
12901
+ const drifted = await this.checkDrift(poolDir, fingerprint);
12902
+ if (drifted) {
12903
+ console.warn(
12904
+ `[workspace-pool] Drift detected for fingerprint ${fingerprint.slice(0, 12)}... Removing stale slots.`
12905
+ );
12906
+ await this.removeAllSlots(poolDir);
12907
+ }
12908
+ for (let i = 0; i < maxSlots; i++) {
12909
+ const slotPath = path36.join(poolDir, `slot-${i}`);
12910
+ const lockPath = `${slotPath}.lock`;
12911
+ const locked = await this.tryLock(lockPath);
12912
+ if (!locked) {
12913
+ continue;
12914
+ }
12915
+ const slotExists = existsSync2(slotPath);
12916
+ if (slotExists) {
12917
+ await this.resetSlot(slotPath, templatePath, repos);
12918
+ return {
12919
+ index: i,
12920
+ path: slotPath,
12921
+ isExisting: true,
12922
+ lockPath,
12923
+ fingerprint,
12924
+ poolDir
12925
+ };
12926
+ }
12927
+ await mkdir11(slotPath, { recursive: true });
12928
+ if (templatePath) {
12929
+ await copyDirectoryRecursive2(templatePath, slotPath);
12930
+ }
12931
+ if (repos.length > 0) {
12932
+ await repoManager.materializeAll(repos, slotPath);
12933
+ }
12934
+ await this.writeMetadata(poolDir, fingerprint, templatePath ?? null, repos);
12935
+ return {
12936
+ index: i,
12937
+ path: slotPath,
12938
+ isExisting: false,
12939
+ lockPath,
12940
+ fingerprint,
12941
+ poolDir
12942
+ };
12943
+ }
12944
+ throw new Error(
12945
+ `All ${maxSlots} pool slots are locked for fingerprint ${fingerprint.slice(0, 12)}...`
12946
+ );
12947
+ }
12948
+ /** Remove lock file to release a slot. */
12949
+ async releaseSlot(slot) {
12950
+ try {
12951
+ await unlink(slot.lockPath);
12952
+ } catch {
12953
+ }
12954
+ }
12955
+ /**
12956
+ * Try to acquire a PID-based lock file.
12957
+ * On EEXIST, read PID and check if process is alive. If dead, stale lock — remove and retry.
12958
+ * Returns true if lock acquired, false if slot is actively locked.
12959
+ * Uses a bounded loop (max 3 attempts) to avoid unbounded recursion.
12960
+ */
12961
+ async tryLock(lockPath) {
12962
+ for (let attempt = 0; attempt < 3; attempt++) {
12963
+ try {
12964
+ await writeFile7(lockPath, String(process.pid), { flag: "wx" });
12965
+ return true;
12966
+ } catch (err) {
12967
+ if (err.code !== "EEXIST") {
12968
+ throw err;
12969
+ }
12970
+ try {
12971
+ const pidStr = await readFile11(lockPath, "utf-8");
12972
+ const pid = Number.parseInt(pidStr.trim(), 10);
12973
+ if (!Number.isNaN(pid)) {
12974
+ try {
12975
+ process.kill(pid, 0);
12976
+ return false;
12977
+ } catch {
12978
+ await unlink(lockPath).catch(() => {
12979
+ });
12980
+ continue;
12981
+ }
12982
+ }
12983
+ } catch {
12984
+ }
12985
+ return false;
12986
+ }
12987
+ }
12988
+ return false;
12989
+ }
12990
+ /**
12991
+ * Check if the stored fingerprint in metadata.json differs from the computed one.
12992
+ * Returns true if drifted, false otherwise.
12993
+ * Returns false (no drift) if metadata.json doesn't exist (first use).
12994
+ */
12995
+ async checkDrift(poolDir, fingerprint) {
12996
+ const metadataPath = path36.join(poolDir, "metadata.json");
12997
+ try {
12998
+ const raw = await readFile11(metadataPath, "utf-8");
12999
+ const metadata = JSON.parse(raw);
13000
+ return metadata.fingerprint !== fingerprint;
13001
+ } catch {
13002
+ return false;
13003
+ }
13004
+ }
13005
+ /** Write metadata.json with fingerprint, inputs, and timestamp. */
13006
+ async writeMetadata(poolDir, fingerprint, templatePath, repos) {
13007
+ const metadata = {
13008
+ fingerprint,
13009
+ templatePath,
13010
+ repos,
13011
+ createdAt: (/* @__PURE__ */ new Date()).toISOString()
13012
+ };
13013
+ await writeFile7(path36.join(poolDir, "metadata.json"), JSON.stringify(metadata, null, 2));
13014
+ }
13015
+ /** Remove all slot directories and their lock files from a pool directory. */
13016
+ async removeAllSlots(poolDir) {
13017
+ const entries = await readdir4(poolDir);
13018
+ for (const entry of entries) {
13019
+ if (entry.startsWith("slot-") && !entry.endsWith(".lock")) {
13020
+ const lockPath = path36.join(poolDir, `${entry}.lock`);
13021
+ if (existsSync2(lockPath)) {
13022
+ try {
13023
+ const pidStr = await readFile11(lockPath, "utf-8");
13024
+ const pid = Number.parseInt(pidStr.trim(), 10);
13025
+ if (!Number.isNaN(pid)) {
13026
+ try {
13027
+ process.kill(pid, 0);
13028
+ console.warn(`[workspace-pool] Skipping slot ${entry}: locked by PID ${pid}`);
13029
+ continue;
13030
+ } catch {
13031
+ }
13032
+ }
13033
+ } catch {
13034
+ }
13035
+ }
13036
+ await rm5(path36.join(poolDir, entry), { recursive: true, force: true });
13037
+ await rm5(lockPath, { force: true }).catch(() => {
13038
+ });
13039
+ }
13040
+ }
13041
+ await rm5(path36.join(poolDir, "metadata.json"), { force: true }).catch(() => {
13042
+ });
13043
+ }
13044
+ /**
13045
+ * Reset an existing slot for reuse:
13046
+ * 1. Reset repos (git reset --hard {ref} && git clean -fd per repo)
13047
+ * 2. Re-copy template files (skip repo directories)
13048
+ */
13049
+ async resetSlot(slotPath, templatePath, repos) {
13050
+ for (const repo of repos) {
13051
+ const repoDir = path36.join(slotPath, repo.path);
13052
+ if (!existsSync2(repoDir)) {
13053
+ continue;
13054
+ }
13055
+ const ref = repo.checkout?.ref ?? "HEAD";
13056
+ await git(["reset", "--hard", ref], { cwd: repoDir });
13057
+ await git(["clean", "-fd"], { cwd: repoDir });
13058
+ }
13059
+ if (templatePath) {
13060
+ const repoDirNames = new Set(
13061
+ repos.map((r) => {
13062
+ const normalized = r.path.replace(/^\.\//, "");
13063
+ return normalized.split("/")[0];
13064
+ })
13065
+ );
13066
+ await copyDirectoryRecursive2(templatePath, slotPath, repoDirNames);
13067
+ }
13068
+ }
13069
+ };
13070
+
13071
+ // src/evaluation/workspace/repo-manager.ts
13072
+ import { execFile as execFile2 } from "node:child_process";
13073
+ import { createHash as createHash2 } from "node:crypto";
13074
+ import { existsSync as existsSync3 } from "node:fs";
13075
+ import { mkdir as mkdir12, rm as rm6, unlink as unlink2, writeFile as writeFile8 } from "node:fs/promises";
13076
+ import path37 from "node:path";
13077
+ import { promisify as promisify6 } from "node:util";
13078
+ var execFileAsync2 = promisify6(execFile2);
13079
+ var DEFAULT_TIMEOUT_MS2 = 3e5;
13080
+ var LOCK_TIMEOUT_MS = 6e4;
13081
+ function gitEnv2() {
13082
+ const env = { ...process.env };
13083
+ for (const key of Object.keys(env)) {
13084
+ if (key.startsWith("GIT_") && key !== "GIT_SSH_COMMAND") {
13085
+ delete env[key];
13086
+ }
13087
+ }
13088
+ return {
13089
+ ...env,
13090
+ GIT_TERMINAL_PROMPT: "0",
13091
+ GIT_ASKPASS: "",
13092
+ GIT_SSH_COMMAND: "ssh -o BatchMode=yes"
13093
+ };
13094
+ }
13095
+ function cacheKey(source) {
13096
+ const raw = source.type === "git" ? source.url.toLowerCase().replace(/\.git$/, "") : source.path;
13097
+ return createHash2("sha256").update(raw).digest("hex");
13098
+ }
13099
+ function getSourceUrl(source) {
13100
+ return source.type === "git" ? source.url : source.path;
13101
+ }
13102
+ async function git2(args, opts) {
13103
+ const { stdout } = await execFileAsync2("git", args, {
13104
+ cwd: opts?.cwd,
13105
+ timeout: opts?.timeout ?? DEFAULT_TIMEOUT_MS2,
13106
+ env: gitEnv2(),
13107
+ maxBuffer: 50 * 1024 * 1024
13108
+ // 50MB
13109
+ });
13110
+ return stdout.trim();
13111
+ }
13112
+ async function acquireLock(lockPath) {
13113
+ const start = Date.now();
13114
+ while (Date.now() - start < LOCK_TIMEOUT_MS) {
13115
+ try {
13116
+ await writeFile8(lockPath, String(process.pid), { flag: "wx" });
13117
+ return;
13118
+ } catch (err) {
13119
+ if (err.code === "EEXIST") {
13120
+ await new Promise((r) => setTimeout(r, 200));
13121
+ continue;
13122
+ }
13123
+ throw err;
13124
+ }
13125
+ }
13126
+ throw new Error(`Timed out waiting for lock: ${lockPath}`);
13127
+ }
13128
+ async function releaseLock(lockPath) {
13129
+ try {
13130
+ await unlink2(lockPath);
12684
13131
  } catch {
12685
13132
  }
12686
13133
  }
12687
13134
  var RepoManager = class {
12688
13135
  cacheDir;
12689
- constructor(cacheDir) {
12690
- this.cacheDir = cacheDir ?? DEFAULT_CACHE_DIR;
13136
+ verbose;
13137
+ constructor(cacheDir, verbose = false) {
13138
+ this.cacheDir = cacheDir ?? getGitCacheRoot();
13139
+ this.verbose = verbose;
13140
+ }
13141
+ async runGit(args, opts) {
13142
+ const startedAt = Date.now();
13143
+ if (this.verbose) {
13144
+ console.log(`[repo] git start cwd=${opts?.cwd ?? process.cwd()} args=${args.join(" ")}`);
13145
+ }
13146
+ try {
13147
+ const output = await git2(args, opts);
13148
+ if (this.verbose) {
13149
+ console.log(`[repo] git ok durationMs=${Date.now() - startedAt} args=${args.join(" ")}`);
13150
+ }
13151
+ return output;
13152
+ } catch (error) {
13153
+ if (this.verbose) {
13154
+ const message = error instanceof Error ? error.message : String(error);
13155
+ console.log(
13156
+ `[repo] git fail durationMs=${Date.now() - startedAt} args=${args.join(" ")} error=${message}`
13157
+ );
13158
+ }
13159
+ throw error;
13160
+ }
12691
13161
  }
12692
13162
  /**
12693
13163
  * Ensure a bare mirror cache exists for the given source.
@@ -12696,11 +13166,19 @@ var RepoManager = class {
12696
13166
  */
12697
13167
  async ensureCache(source, depth, resolve) {
12698
13168
  const key = cacheKey(source);
12699
- const cachePath = path35.join(this.cacheDir, key);
13169
+ const cachePath = path37.join(this.cacheDir, key);
12700
13170
  const lockPath = `${cachePath}.lock`;
12701
- const cacheExists = existsSync2(path35.join(cachePath, "HEAD"));
13171
+ const cacheExists = existsSync3(path37.join(cachePath, "HEAD"));
13172
+ if (this.verbose) {
13173
+ console.log(
13174
+ `[repo] ensureCache source=${getSourceUrl(source)} resolve=${resolve ?? "remote"} cache=${cachePath}`
13175
+ );
13176
+ }
12702
13177
  if (resolve === "local") {
12703
13178
  if (cacheExists) {
13179
+ if (this.verbose) {
13180
+ console.log(`[repo] using existing local cache ${cachePath}`);
13181
+ }
12704
13182
  return cachePath;
12705
13183
  }
12706
13184
  const url = getSourceUrl(source);
@@ -12708,16 +13186,26 @@ var RepoManager = class {
12708
13186
  `No cache found for \`${url}\`. Run \`agentv cache add --url ${url} --from <local-path>\` to seed it.`
12709
13187
  );
12710
13188
  }
12711
- await mkdir11(this.cacheDir, { recursive: true });
13189
+ await mkdir12(this.cacheDir, { recursive: true });
13190
+ const lockStartedAt = Date.now();
12712
13191
  await acquireLock(lockPath);
13192
+ if (this.verbose) {
13193
+ console.log(`[repo] lock acquired path=${lockPath} waitedMs=${Date.now() - lockStartedAt}`);
13194
+ }
12713
13195
  try {
12714
13196
  if (cacheExists) {
13197
+ if (this.verbose) {
13198
+ console.log(`[repo] refreshing existing cache ${cachePath}`);
13199
+ }
12715
13200
  const fetchArgs = ["fetch", "--prune"];
12716
13201
  if (depth) {
12717
13202
  fetchArgs.push("--depth", String(depth));
12718
13203
  }
12719
- await git(fetchArgs, { cwd: cachePath });
13204
+ await this.runGit(fetchArgs, { cwd: cachePath });
12720
13205
  } else {
13206
+ if (this.verbose) {
13207
+ console.log(`[repo] creating new cache ${cachePath}`);
13208
+ }
12721
13209
  const cloneArgs = ["clone", "--mirror", "--bare"];
12722
13210
  if (depth) {
12723
13211
  cloneArgs.push("--depth", String(depth));
@@ -12725,10 +13213,13 @@ var RepoManager = class {
12725
13213
  const sourceUrl = getSourceUrl(source);
12726
13214
  const cloneUrl = depth && source.type === "local" ? `file://${sourceUrl}` : sourceUrl;
12727
13215
  cloneArgs.push(cloneUrl, cachePath);
12728
- await git(cloneArgs);
13216
+ await this.runGit(cloneArgs);
12729
13217
  }
12730
13218
  } finally {
12731
13219
  await releaseLock(lockPath);
13220
+ if (this.verbose) {
13221
+ console.log(`[repo] lock released path=${lockPath}`);
13222
+ }
12732
13223
  }
12733
13224
  return cachePath;
12734
13225
  }
@@ -12737,7 +13228,13 @@ var RepoManager = class {
12737
13228
  * Handles checkout, ref resolution, ancestor walking, shallow clone, sparse checkout.
12738
13229
  */
12739
13230
  async materialize(repo, workspacePath) {
12740
- const targetDir = path35.join(workspacePath, repo.path);
13231
+ const targetDir = path37.join(workspacePath, repo.path);
13232
+ const startedAt = Date.now();
13233
+ if (this.verbose) {
13234
+ console.log(
13235
+ `[repo] materialize start path=${repo.path} source=${getSourceUrl(repo.source)} workspace=${workspacePath}`
13236
+ );
13237
+ }
12741
13238
  const cachePath = await this.ensureCache(
12742
13239
  repo.source,
12743
13240
  repo.clone?.depth,
@@ -12753,10 +13250,10 @@ var RepoManager = class {
12753
13250
  cloneArgs.push("--no-checkout");
12754
13251
  const cloneUrl = repo.clone?.depth || repo.clone?.filter ? `file://${cachePath}` : cachePath;
12755
13252
  cloneArgs.push(cloneUrl, targetDir);
12756
- await git(cloneArgs);
13253
+ await this.runGit(cloneArgs);
12757
13254
  if (repo.clone?.sparse?.length) {
12758
- await git(["sparse-checkout", "init", "--cone"], { cwd: targetDir });
12759
- await git(["sparse-checkout", "set", ...repo.clone.sparse], { cwd: targetDir });
13255
+ await this.runGit(["sparse-checkout", "init", "--cone"], { cwd: targetDir });
13256
+ await this.runGit(["sparse-checkout", "set", ...repo.clone.sparse], { cwd: targetDir });
12760
13257
  }
12761
13258
  const ref = repo.checkout?.ref ?? "HEAD";
12762
13259
  const resolve = repo.checkout?.resolve ?? "remote";
@@ -12764,7 +13261,7 @@ var RepoManager = class {
12764
13261
  if (resolve === "remote" && repo.source.type === "git") {
12765
13262
  const url = getSourceUrl(repo.source);
12766
13263
  try {
12767
- const lsOutput = await git(["ls-remote", url, ref]);
13264
+ const lsOutput = await this.runGit(["ls-remote", url, ref]);
12768
13265
  const match = lsOutput.split(" ")[0];
12769
13266
  if (!match) {
12770
13267
  throw new Error(`Ref '${ref}' not found on remote ${url}`);
@@ -12777,17 +13274,26 @@ var RepoManager = class {
12777
13274
  } else {
12778
13275
  resolvedSha = ref;
12779
13276
  }
12780
- await git(["checkout", resolvedSha], { cwd: targetDir });
13277
+ if (this.verbose) {
13278
+ console.log(
13279
+ `[repo] checkout path=${repo.path} ref=${ref} resolved=${resolvedSha} resolve=${resolve}`
13280
+ );
13281
+ }
13282
+ await this.runGit(["checkout", resolvedSha], { cwd: targetDir });
12781
13283
  const ancestor = repo.checkout?.ancestor ?? 0;
12782
13284
  if (ancestor > 0) {
12783
13285
  try {
12784
- const ancestorSha = await git(["rev-parse", `HEAD~${ancestor}`], { cwd: targetDir });
12785
- await git(["checkout", ancestorSha], { cwd: targetDir });
13286
+ const ancestorSha = await this.runGit(["rev-parse", `HEAD~${ancestor}`], {
13287
+ cwd: targetDir
13288
+ });
13289
+ await this.runGit(["checkout", ancestorSha], { cwd: targetDir });
12786
13290
  } catch {
12787
13291
  if (repo.clone?.depth) {
12788
- await git(["fetch", "--deepen", String(ancestor)], { cwd: targetDir });
12789
- const ancestorSha = await git(["rev-parse", `HEAD~${ancestor}`], { cwd: targetDir });
12790
- await git(["checkout", ancestorSha], { cwd: targetDir });
13292
+ await this.runGit(["fetch", "--deepen", String(ancestor)], { cwd: targetDir });
13293
+ const ancestorSha = await this.runGit(["rev-parse", `HEAD~${ancestor}`], {
13294
+ cwd: targetDir
13295
+ });
13296
+ await this.runGit(["checkout", ancestorSha], { cwd: targetDir });
12791
13297
  } else {
12792
13298
  throw new Error(
12793
13299
  `Cannot resolve ancestor ${ancestor} of ref '${ref}'. If using shallow clone, increase clone.depth to at least ${ancestor + 1}.`
@@ -12795,27 +13301,38 @@ var RepoManager = class {
12795
13301
  }
12796
13302
  }
12797
13303
  }
13304
+ if (this.verbose) {
13305
+ console.log(
13306
+ `[repo] materialize done path=${repo.path} target=${targetDir} durationMs=${Date.now() - startedAt}`
13307
+ );
13308
+ }
12798
13309
  }
12799
13310
  /** Materialize all repos into the workspace. */
12800
13311
  async materializeAll(repos, workspacePath) {
13312
+ if (this.verbose) {
13313
+ console.log(`[repo] materializeAll count=${repos.length} workspace=${workspacePath}`);
13314
+ }
12801
13315
  for (const repo of repos) {
12802
13316
  await this.materialize(repo, workspacePath);
12803
13317
  }
13318
+ if (this.verbose) {
13319
+ console.log("[repo] materializeAll complete");
13320
+ }
12804
13321
  }
12805
13322
  /** Reset repos in workspace to their checkout state. */
12806
13323
  async reset(repos, workspacePath, strategy) {
12807
13324
  if (strategy === "recreate") {
12808
13325
  for (const repo of repos) {
12809
- const targetDir = path35.join(workspacePath, repo.path);
12810
- await rm5(targetDir, { recursive: true, force: true });
13326
+ const targetDir = path37.join(workspacePath, repo.path);
13327
+ await rm6(targetDir, { recursive: true, force: true });
12811
13328
  }
12812
13329
  await this.materializeAll(repos, workspacePath);
12813
13330
  return;
12814
13331
  }
12815
13332
  for (const repo of repos) {
12816
- const targetDir = path35.join(workspacePath, repo.path);
12817
- await git(["reset", "--hard", "HEAD"], { cwd: targetDir });
12818
- await git(["clean", "-fd"], { cwd: targetDir });
13333
+ const targetDir = path37.join(workspacePath, repo.path);
13334
+ await this.runGit(["reset", "--hard", "HEAD"], { cwd: targetDir });
13335
+ await this.runGit(["clean", "-fd"], { cwd: targetDir });
12819
13336
  }
12820
13337
  }
12821
13338
  /**
@@ -12825,21 +13342,21 @@ var RepoManager = class {
12825
13342
  async seedCache(localPath, remoteUrl, opts) {
12826
13343
  const source = { type: "git", url: remoteUrl };
12827
13344
  const key = cacheKey(source);
12828
- const cachePath = path35.join(this.cacheDir, key);
13345
+ const cachePath = path37.join(this.cacheDir, key);
12829
13346
  const lockPath = `${cachePath}.lock`;
12830
- await mkdir11(this.cacheDir, { recursive: true });
13347
+ await mkdir12(this.cacheDir, { recursive: true });
12831
13348
  await acquireLock(lockPath);
12832
13349
  try {
12833
- if (existsSync2(path35.join(cachePath, "HEAD"))) {
13350
+ if (existsSync3(path37.join(cachePath, "HEAD"))) {
12834
13351
  if (!opts?.force) {
12835
13352
  throw new Error(
12836
13353
  `Cache already exists for ${remoteUrl} at ${cachePath}. Use force to overwrite.`
12837
13354
  );
12838
13355
  }
12839
- await rm5(cachePath, { recursive: true, force: true });
13356
+ await rm6(cachePath, { recursive: true, force: true });
12840
13357
  }
12841
- await git(["clone", "--mirror", "--bare", localPath, cachePath]);
12842
- await git(["remote", "set-url", "origin", remoteUrl], { cwd: cachePath });
13358
+ await git2(["clone", "--mirror", "--bare", localPath, cachePath]);
13359
+ await git2(["remote", "set-url", "origin", remoteUrl], { cwd: cachePath });
12843
13360
  } finally {
12844
13361
  await releaseLock(lockPath);
12845
13362
  }
@@ -12847,41 +13364,41 @@ var RepoManager = class {
12847
13364
  }
12848
13365
  /** Remove the entire cache directory. */
12849
13366
  async cleanCache() {
12850
- await rm5(this.cacheDir, { recursive: true, force: true });
13367
+ await rm6(this.cacheDir, { recursive: true, force: true });
12851
13368
  }
12852
13369
  };
12853
13370
 
12854
13371
  // src/evaluation/workspace/resolve.ts
12855
- import { readdir as readdir4, stat as stat6 } from "node:fs/promises";
12856
- import path36 from "node:path";
13372
+ import { readdir as readdir5, stat as stat6 } from "node:fs/promises";
13373
+ import path38 from "node:path";
12857
13374
  async function resolveWorkspaceTemplate(templatePath) {
12858
13375
  if (!templatePath) {
12859
13376
  return void 0;
12860
13377
  }
12861
- const resolved = path36.resolve(templatePath);
13378
+ const resolved = path38.resolve(templatePath);
12862
13379
  const stats = await stat6(resolved);
12863
13380
  if (stats.isFile()) {
12864
13381
  return {
12865
- dir: path36.dirname(resolved),
13382
+ dir: path38.dirname(resolved),
12866
13383
  workspaceFile: resolved
12867
13384
  };
12868
13385
  }
12869
13386
  if (!stats.isDirectory()) {
12870
13387
  throw new Error(`workspace template is neither a file nor a directory: ${resolved}`);
12871
13388
  }
12872
- const entries = await readdir4(resolved);
13389
+ const entries = await readdir5(resolved);
12873
13390
  const workspaceFiles = entries.filter((e) => e.endsWith(".code-workspace"));
12874
13391
  if (workspaceFiles.length === 1) {
12875
13392
  return {
12876
13393
  dir: resolved,
12877
- workspaceFile: path36.join(resolved, workspaceFiles[0])
13394
+ workspaceFile: path38.join(resolved, workspaceFiles[0])
12878
13395
  };
12879
13396
  }
12880
13397
  if (workspaceFiles.length > 1) {
12881
13398
  const conventionFile = workspaceFiles.find((f) => f === "template.code-workspace");
12882
13399
  return {
12883
13400
  dir: resolved,
12884
- workspaceFile: conventionFile ? path36.join(resolved, conventionFile) : void 0
13401
+ workspaceFile: conventionFile ? path38.join(resolved, conventionFile) : void 0
12885
13402
  };
12886
13403
  }
12887
13404
  return { dir: resolved };
@@ -12963,7 +13480,10 @@ async function runEvaluation(options) {
12963
13480
  trials,
12964
13481
  streamCallbacks,
12965
13482
  totalBudgetUsd,
12966
- failOnError
13483
+ failOnError,
13484
+ poolWorkspaces,
13485
+ poolMaxSlots: configPoolMaxSlots,
13486
+ workspace: userWorkspacePath
12967
13487
  } = options;
12968
13488
  let useCache = options.useCache;
12969
13489
  if (trials && trials.count > 1 && useCache) {
@@ -13037,7 +13557,7 @@ async function runEvaluation(options) {
13037
13557
  ];
13038
13558
  const evaluatorRegistry = buildEvaluatorRegistry(evaluators, resolveJudgeProvider);
13039
13559
  const typeRegistry = createBuiltinRegistry();
13040
- const discoveryBaseDir = evalFilePath ? path37.dirname(path37.resolve(evalFilePath)) : process.cwd();
13560
+ const discoveryBaseDir = evalFilePath ? path39.dirname(path39.resolve(evalFilePath)) : process.cwd();
13041
13561
  const evalDir = discoveryBaseDir;
13042
13562
  await discoverAssertions(typeRegistry, discoveryBaseDir);
13043
13563
  const providerRegistry = createBuiltinProviderRegistry();
@@ -13093,11 +13613,25 @@ async function runEvaluation(options) {
13093
13613
  const resolvedTemplate = await resolveWorkspaceTemplate(rawTemplate);
13094
13614
  const workspaceTemplate = resolvedTemplate?.dir;
13095
13615
  let suiteWorkspaceFile = resolvedTemplate?.workspaceFile;
13616
+ const setupLog = (message) => {
13617
+ if (verbose) {
13618
+ console.log(`[setup] ${message}`);
13619
+ }
13620
+ };
13096
13621
  const isPerTestIsolation = suiteWorkspace?.isolation === "per_test";
13097
- const hasSharedWorkspace = !!(workspaceTemplate || suiteWorkspace?.before_all || suiteWorkspace?.repos?.length && !isPerTestIsolation);
13622
+ if (userWorkspacePath && isPerTestIsolation) {
13623
+ throw new Error(
13624
+ "--workspace is incompatible with isolation: per_test. Use isolation: shared (default)."
13625
+ );
13626
+ }
13627
+ const hasSharedWorkspace = !!(userWorkspacePath || workspaceTemplate || suiteWorkspace?.before_all || suiteWorkspace?.repos?.length && !isPerTestIsolation);
13628
+ const usePool = poolWorkspaces === true && !!suiteWorkspace?.repos?.length && !isPerTestIsolation && !userWorkspacePath;
13098
13629
  const requestedWorkers = options.maxConcurrency ?? target.workers ?? 1;
13099
- const workers = hasSharedWorkspace ? 1 : requestedWorkers;
13100
- if (hasSharedWorkspace && requestedWorkers > 1) {
13630
+ const workers = hasSharedWorkspace && !usePool ? 1 : requestedWorkers;
13631
+ setupLog(
13632
+ `sharedWorkspace=${hasSharedWorkspace} perTestIsolation=${isPerTestIsolation} usePool=${usePool} requestedWorkers=${requestedWorkers} effectiveWorkers=${workers}`
13633
+ );
13634
+ if (hasSharedWorkspace && !usePool && requestedWorkers > 1) {
13101
13635
  console.warn(
13102
13636
  `Warning: Shared workspace requires sequential execution. Overriding workers from ${requestedWorkers} to 1.`
13103
13637
  );
@@ -13106,285 +13640,383 @@ async function runEvaluation(options) {
13106
13640
  let sharedWorkspacePath;
13107
13641
  let sharedBaselineCommit;
13108
13642
  let beforeAllOutput;
13109
- if (workspaceTemplate) {
13643
+ let poolManager;
13644
+ let poolSlot;
13645
+ const poolSlots = [];
13646
+ const availablePoolSlots = [];
13647
+ const poolSlotBaselines = /* @__PURE__ */ new Map();
13648
+ const poolMaxSlots = Math.min(configPoolMaxSlots ?? 10, 50);
13649
+ if (userWorkspacePath) {
13650
+ sharedWorkspacePath = userWorkspacePath;
13651
+ setupLog(`using user-provided workspace: ${userWorkspacePath}`);
13652
+ } else if (usePool && suiteWorkspace?.repos) {
13653
+ const slotsNeeded = workers;
13654
+ setupLog(`acquiring ${slotsNeeded} workspace pool slot(s) (pool capacity: ${poolMaxSlots})`);
13655
+ poolManager = new WorkspacePoolManager(getWorkspacePoolRoot());
13656
+ const poolRepoManager = new RepoManager(void 0, verbose);
13657
+ for (let i = 0; i < slotsNeeded; i++) {
13658
+ const slot = await poolManager.acquireWorkspace({
13659
+ templatePath: workspaceTemplate,
13660
+ repos: suiteWorkspace.repos,
13661
+ maxSlots: poolMaxSlots,
13662
+ repoManager: poolRepoManager
13663
+ });
13664
+ poolSlots.push(slot);
13665
+ setupLog(`pool slot ${i} acquired at: ${slot.path} (existing=${slot.isExisting})`);
13666
+ }
13667
+ if (slotsNeeded === 1) {
13668
+ poolSlot = poolSlots[0];
13669
+ sharedWorkspacePath = poolSlot.path;
13670
+ } else {
13671
+ availablePoolSlots.push(...poolSlots);
13672
+ }
13673
+ } else if (workspaceTemplate) {
13674
+ setupLog(`creating shared workspace from template: ${workspaceTemplate}`);
13110
13675
  try {
13111
13676
  sharedWorkspacePath = await createTempWorkspace(workspaceTemplate, evalRunId, "shared");
13677
+ setupLog(`shared workspace created at: ${sharedWorkspacePath}`);
13112
13678
  } catch (error) {
13113
13679
  const message = error instanceof Error ? error.message : String(error);
13114
13680
  throw new Error(`Failed to create shared workspace: ${message}`);
13115
13681
  }
13682
+ } else if (suiteWorkspace?.before_all || suiteWorkspace?.repos?.length && !isPerTestIsolation) {
13683
+ sharedWorkspacePath = getWorkspacePath(evalRunId, "shared");
13684
+ await mkdir13(sharedWorkspacePath, { recursive: true });
13685
+ setupLog(`created empty shared workspace at: ${sharedWorkspacePath}`);
13686
+ }
13687
+ try {
13116
13688
  if (suiteWorkspaceFile && sharedWorkspacePath) {
13117
- const copiedWorkspaceFile = path37.join(sharedWorkspacePath, path37.basename(suiteWorkspaceFile));
13689
+ const copiedWorkspaceFile = path39.join(sharedWorkspacePath, path39.basename(suiteWorkspaceFile));
13118
13690
  try {
13119
13691
  await stat7(copiedWorkspaceFile);
13120
13692
  suiteWorkspaceFile = copiedWorkspaceFile;
13121
13693
  } catch {
13122
13694
  }
13123
13695
  }
13124
- } else if (suiteWorkspace?.before_all || suiteWorkspace?.repos?.length && !isPerTestIsolation) {
13125
- sharedWorkspacePath = getWorkspacePath(evalRunId, "shared");
13126
- await mkdir12(sharedWorkspacePath, { recursive: true });
13127
- }
13128
- const repoManager = suiteWorkspace?.repos?.length ? new RepoManager() : void 0;
13129
- if (repoManager && sharedWorkspacePath && suiteWorkspace?.repos && !isPerTestIsolation) {
13130
- try {
13131
- await repoManager.materializeAll(suiteWorkspace.repos, sharedWorkspacePath);
13132
- } catch (error) {
13133
- const message = error instanceof Error ? error.message : String(error);
13134
- if (sharedWorkspacePath) {
13135
- await cleanupWorkspace(sharedWorkspacePath).catch(() => {
13136
- });
13137
- }
13138
- throw new Error(`Failed to materialize repos: ${message}`);
13139
- }
13140
- }
13141
- if (sharedWorkspacePath && suiteWorkspace?.before_all) {
13142
- const scriptContext = {
13143
- workspacePath: sharedWorkspacePath,
13144
- testId: "__before_all__",
13145
- evalRunId,
13146
- evalDir
13147
- };
13148
- try {
13149
- beforeAllOutput = await executeWorkspaceScript(suiteWorkspace.before_all, scriptContext);
13150
- } catch (error) {
13151
- const message = error instanceof Error ? error.message : String(error);
13152
- if (sharedWorkspacePath) {
13153
- await cleanupWorkspace(sharedWorkspacePath).catch(() => {
13154
- });
13155
- }
13156
- throw new Error(`before_all script failed: ${message}`);
13157
- }
13158
- }
13159
- if (sharedWorkspacePath) {
13160
- try {
13161
- sharedBaselineCommit = await initializeBaseline(sharedWorkspacePath);
13162
- } catch {
13163
- }
13164
- }
13165
- let nextWorkerId = 1;
13166
- const workerIdByEvalId = /* @__PURE__ */ new Map();
13167
- let beforeAllOutputAttached = false;
13168
- let cumulativeBudgetCost = 0;
13169
- let budgetExhausted = false;
13170
- let failOnErrorTriggered = false;
13171
- const promises = filteredEvalCases.map(
13172
- (evalCase) => limit(async () => {
13173
- const workerId = nextWorkerId++;
13174
- workerIdByEvalId.set(evalCase.id, workerId);
13175
- if (totalBudgetUsd !== void 0 && budgetExhausted) {
13176
- const budgetResult = {
13177
- timestamp: (now ?? (() => /* @__PURE__ */ new Date()))().toISOString(),
13178
- testId: evalCase.id,
13179
- dataset: evalCase.dataset,
13180
- score: 0,
13181
- hits: [],
13182
- misses: [],
13183
- answer: "",
13184
- target: target.name,
13185
- error: `Suite budget exceeded ($${cumulativeBudgetCost.toFixed(4)} / $${totalBudgetUsd.toFixed(4)})`,
13186
- budgetExceeded: true,
13187
- executionStatus: "execution_error",
13188
- failureStage: "setup",
13189
- failureReasonCode: "budget_exceeded",
13190
- executionError: {
13191
- message: `Suite budget exceeded ($${cumulativeBudgetCost.toFixed(4)} / $${totalBudgetUsd.toFixed(4)})`,
13192
- stage: "setup"
13193
- }
13194
- };
13195
- if (onProgress) {
13196
- await onProgress({
13197
- workerId,
13198
- testId: evalCase.id,
13199
- status: "failed",
13200
- completedAt: Date.now(),
13201
- error: budgetResult.error
13696
+ const repoManager = suiteWorkspace?.repos?.length && !usePool && !userWorkspacePath ? new RepoManager(void 0, verbose) : void 0;
13697
+ if (repoManager && sharedWorkspacePath && suiteWorkspace?.repos && !isPerTestIsolation) {
13698
+ setupLog(
13699
+ `materializing ${suiteWorkspace.repos.length} shared repo(s) into ${sharedWorkspacePath}`
13700
+ );
13701
+ try {
13702
+ await repoManager.materializeAll(suiteWorkspace.repos, sharedWorkspacePath);
13703
+ setupLog("shared repo materialization complete");
13704
+ } catch (error) {
13705
+ const message = error instanceof Error ? error.message : String(error);
13706
+ if (sharedWorkspacePath && !userWorkspacePath) {
13707
+ await cleanupWorkspace(sharedWorkspacePath).catch(() => {
13202
13708
  });
13203
13709
  }
13204
- if (onResult) {
13205
- await onResult(budgetResult);
13206
- }
13207
- return budgetResult;
13710
+ throw new Error(`Failed to materialize repos: ${message}`);
13208
13711
  }
13209
- if (failOnError === true && failOnErrorTriggered) {
13210
- const errorMsg = "Halted: execution error encountered with fail_on_error enabled";
13211
- const haltResult = {
13212
- timestamp: (now ?? (() => /* @__PURE__ */ new Date()))().toISOString(),
13213
- testId: evalCase.id,
13214
- dataset: evalCase.dataset,
13215
- score: 0,
13216
- hits: [],
13217
- misses: [],
13218
- answer: "",
13219
- target: target.name,
13220
- error: errorMsg,
13221
- executionStatus: "execution_error",
13222
- failureStage: "setup",
13223
- failureReasonCode: "error_threshold_exceeded",
13224
- executionError: { message: errorMsg, stage: "setup" }
13225
- };
13226
- if (onProgress) {
13227
- await onProgress({
13228
- workerId,
13229
- testId: evalCase.id,
13230
- status: "failed",
13231
- completedAt: Date.now(),
13232
- error: haltResult.error
13712
+ }
13713
+ if (sharedWorkspacePath && suiteWorkspace?.before_all) {
13714
+ const beforeAllCommand = (suiteWorkspace.before_all.command ?? suiteWorkspace.before_all.script ?? []).join(" ");
13715
+ setupLog(
13716
+ `running shared before_all in cwd=${suiteWorkspace.before_all.cwd ?? evalDir} command=${beforeAllCommand}`
13717
+ );
13718
+ const scriptContext = {
13719
+ workspacePath: sharedWorkspacePath,
13720
+ testId: "__before_all__",
13721
+ evalRunId,
13722
+ evalDir
13723
+ };
13724
+ try {
13725
+ beforeAllOutput = await executeWorkspaceScript(suiteWorkspace.before_all, scriptContext);
13726
+ setupLog("shared before_all completed");
13727
+ } catch (error) {
13728
+ const message = error instanceof Error ? error.message : String(error);
13729
+ if (sharedWorkspacePath && !userWorkspacePath) {
13730
+ await cleanupWorkspace(sharedWorkspacePath).catch(() => {
13233
13731
  });
13234
13732
  }
13235
- if (onResult) {
13236
- await onResult(haltResult);
13237
- }
13238
- return haltResult;
13733
+ throw new Error(`before_all script failed: ${message}`);
13239
13734
  }
13240
- if (onProgress) {
13241
- await onProgress({
13242
- workerId,
13243
- testId: evalCase.id,
13244
- status: "running",
13245
- startedAt: Date.now()
13246
- });
13247
- }
13248
- try {
13249
- const judgeProvider = await resolveJudgeProvider(target);
13250
- const runCaseOptions = {
13251
- evalCase,
13252
- provider: primaryProvider,
13253
- target,
13254
- evaluators: evaluatorRegistry,
13255
- maxRetries,
13256
- agentTimeoutMs,
13257
- cache,
13258
- useCache,
13259
- now,
13260
- judgeProvider,
13261
- targetResolver,
13262
- availableTargets,
13735
+ }
13736
+ if (availablePoolSlots.length > 0 && suiteWorkspace?.before_all) {
13737
+ for (const slot of availablePoolSlots) {
13738
+ setupLog(`running before_all on pool slot ${slot.index}`);
13739
+ const scriptContext = {
13740
+ workspacePath: slot.path,
13741
+ testId: "__before_all__",
13263
13742
  evalRunId,
13264
- keepWorkspaces,
13265
- cleanupWorkspaces,
13266
- sharedWorkspacePath,
13267
- sharedBaselineCommit,
13268
- suiteWorkspaceFile,
13269
- streamCallbacks,
13270
- typeRegistry,
13271
- repoManager,
13272
13743
  evalDir
13273
13744
  };
13274
- let result = trials && trials.count > 1 ? await runEvalCaseWithTrials(runCaseOptions, trials) : await runEvalCase(runCaseOptions);
13275
- if (totalBudgetUsd !== void 0) {
13276
- let caseCost;
13277
- if (result.trials && result.trials.length > 0) {
13278
- const trialCostSum = result.trials.reduce((sum, t) => sum + (t.costUsd ?? 0), 0);
13279
- if (trialCostSum > 0) {
13280
- caseCost = trialCostSum;
13745
+ try {
13746
+ const output = await executeWorkspaceScript(suiteWorkspace.before_all, scriptContext);
13747
+ if (!beforeAllOutput) beforeAllOutput = output;
13748
+ setupLog(`before_all completed on pool slot ${slot.index}`);
13749
+ } catch (error) {
13750
+ const message = error instanceof Error ? error.message : String(error);
13751
+ throw new Error(`before_all script failed on pool slot ${slot.index}: ${message}`);
13752
+ }
13753
+ }
13754
+ }
13755
+ if (sharedWorkspacePath) {
13756
+ try {
13757
+ sharedBaselineCommit = await initializeBaseline(sharedWorkspacePath);
13758
+ setupLog(`shared baseline initialized: ${sharedBaselineCommit}`);
13759
+ } catch {
13760
+ setupLog("shared baseline initialization skipped (non-fatal)");
13761
+ }
13762
+ }
13763
+ if (availablePoolSlots.length > 0) {
13764
+ for (const slot of availablePoolSlots) {
13765
+ try {
13766
+ const baseline = await initializeBaseline(slot.path);
13767
+ poolSlotBaselines.set(slot.path, baseline);
13768
+ setupLog(`pool slot ${slot.index} baseline initialized: ${baseline}`);
13769
+ } catch {
13770
+ setupLog(`pool slot ${slot.index} baseline initialization skipped (non-fatal)`);
13771
+ }
13772
+ }
13773
+ }
13774
+ let nextWorkerId = 1;
13775
+ const workerIdByEvalId = /* @__PURE__ */ new Map();
13776
+ let beforeAllOutputAttached = false;
13777
+ let cumulativeBudgetCost = 0;
13778
+ let budgetExhausted = false;
13779
+ let failOnErrorTriggered = false;
13780
+ const promises = filteredEvalCases.map(
13781
+ (evalCase) => limit(async () => {
13782
+ const workerId = nextWorkerId++;
13783
+ workerIdByEvalId.set(evalCase.id, workerId);
13784
+ if (totalBudgetUsd !== void 0 && budgetExhausted) {
13785
+ const budgetResult = {
13786
+ timestamp: (now ?? (() => /* @__PURE__ */ new Date()))().toISOString(),
13787
+ testId: evalCase.id,
13788
+ dataset: evalCase.dataset,
13789
+ score: 0,
13790
+ hits: [],
13791
+ misses: [],
13792
+ answer: "",
13793
+ target: target.name,
13794
+ error: `Suite budget exceeded ($${cumulativeBudgetCost.toFixed(4)} / $${totalBudgetUsd.toFixed(4)})`,
13795
+ budgetExceeded: true,
13796
+ executionStatus: "execution_error",
13797
+ failureStage: "setup",
13798
+ failureReasonCode: "budget_exceeded",
13799
+ executionError: {
13800
+ message: `Suite budget exceeded ($${cumulativeBudgetCost.toFixed(4)} / $${totalBudgetUsd.toFixed(4)})`,
13801
+ stage: "setup"
13281
13802
  }
13282
- } else {
13283
- caseCost = result.costUsd;
13803
+ };
13804
+ if (onProgress) {
13805
+ await onProgress({
13806
+ workerId,
13807
+ testId: evalCase.id,
13808
+ status: "failed",
13809
+ completedAt: Date.now(),
13810
+ error: budgetResult.error
13811
+ });
13284
13812
  }
13285
- if (caseCost !== void 0) {
13286
- cumulativeBudgetCost += caseCost;
13287
- if (cumulativeBudgetCost >= totalBudgetUsd) {
13288
- budgetExhausted = true;
13289
- }
13813
+ if (onResult) {
13814
+ await onResult(budgetResult);
13290
13815
  }
13816
+ return budgetResult;
13291
13817
  }
13292
- if (failOnError === true && result.executionStatus === "execution_error") {
13293
- failOnErrorTriggered = true;
13294
- }
13295
- if (beforeAllOutput && !beforeAllOutputAttached) {
13296
- result = { ...result, beforeAllOutput };
13297
- beforeAllOutputAttached = true;
13818
+ if (failOnError === true && failOnErrorTriggered) {
13819
+ const errorMsg = "Halted: execution error encountered with fail_on_error enabled";
13820
+ const haltResult = {
13821
+ timestamp: (now ?? (() => /* @__PURE__ */ new Date()))().toISOString(),
13822
+ testId: evalCase.id,
13823
+ dataset: evalCase.dataset,
13824
+ score: 0,
13825
+ hits: [],
13826
+ misses: [],
13827
+ answer: "",
13828
+ target: target.name,
13829
+ error: errorMsg,
13830
+ executionStatus: "execution_error",
13831
+ failureStage: "setup",
13832
+ failureReasonCode: "error_threshold_exceeded",
13833
+ executionError: { message: errorMsg, stage: "setup" }
13834
+ };
13835
+ if (onProgress) {
13836
+ await onProgress({
13837
+ workerId,
13838
+ testId: evalCase.id,
13839
+ status: "failed",
13840
+ completedAt: Date.now(),
13841
+ error: haltResult.error
13842
+ });
13843
+ }
13844
+ if (onResult) {
13845
+ await onResult(haltResult);
13846
+ }
13847
+ return haltResult;
13298
13848
  }
13299
13849
  if (onProgress) {
13300
13850
  await onProgress({
13301
13851
  workerId,
13302
13852
  testId: evalCase.id,
13303
- status: result.error ? "failed" : "completed",
13304
- startedAt: 0,
13305
- // Not used for completed status
13306
- completedAt: Date.now(),
13307
- error: result.error
13853
+ status: "running",
13854
+ startedAt: Date.now()
13308
13855
  });
13309
13856
  }
13310
- if (onResult) {
13311
- await onResult(result);
13857
+ const testPoolSlot = availablePoolSlots.length > 0 ? availablePoolSlots.pop() : void 0;
13858
+ const testWorkspacePath = testPoolSlot?.path ?? sharedWorkspacePath;
13859
+ const testBaselineCommit = testPoolSlot ? poolSlotBaselines.get(testPoolSlot.path) : sharedBaselineCommit;
13860
+ try {
13861
+ const judgeProvider = await resolveJudgeProvider(target);
13862
+ const runCaseOptions = {
13863
+ evalCase,
13864
+ provider: primaryProvider,
13865
+ target,
13866
+ evaluators: evaluatorRegistry,
13867
+ maxRetries,
13868
+ agentTimeoutMs,
13869
+ cache,
13870
+ useCache,
13871
+ now,
13872
+ judgeProvider,
13873
+ targetResolver,
13874
+ availableTargets,
13875
+ evalRunId,
13876
+ keepWorkspaces,
13877
+ cleanupWorkspaces,
13878
+ sharedWorkspacePath: testWorkspacePath,
13879
+ sharedBaselineCommit: testBaselineCommit,
13880
+ suiteWorkspaceFile,
13881
+ streamCallbacks,
13882
+ typeRegistry,
13883
+ repoManager,
13884
+ evalDir
13885
+ };
13886
+ let result = trials && trials.count > 1 ? await runEvalCaseWithTrials(runCaseOptions, trials) : await runEvalCase(runCaseOptions);
13887
+ if (totalBudgetUsd !== void 0) {
13888
+ let caseCost;
13889
+ if (result.trials && result.trials.length > 0) {
13890
+ const trialCostSum = result.trials.reduce((sum, t) => sum + (t.costUsd ?? 0), 0);
13891
+ if (trialCostSum > 0) {
13892
+ caseCost = trialCostSum;
13893
+ }
13894
+ } else {
13895
+ caseCost = result.costUsd;
13896
+ }
13897
+ if (caseCost !== void 0) {
13898
+ cumulativeBudgetCost += caseCost;
13899
+ if (cumulativeBudgetCost >= totalBudgetUsd) {
13900
+ budgetExhausted = true;
13901
+ }
13902
+ }
13903
+ }
13904
+ if (failOnError === true && result.executionStatus === "execution_error") {
13905
+ failOnErrorTriggered = true;
13906
+ }
13907
+ if (beforeAllOutput && !beforeAllOutputAttached) {
13908
+ result = { ...result, beforeAllOutput };
13909
+ beforeAllOutputAttached = true;
13910
+ }
13911
+ if (onProgress) {
13912
+ await onProgress({
13913
+ workerId,
13914
+ testId: evalCase.id,
13915
+ status: result.error ? "failed" : "completed",
13916
+ startedAt: 0,
13917
+ // Not used for completed status
13918
+ completedAt: Date.now(),
13919
+ error: result.error
13920
+ });
13921
+ }
13922
+ if (onResult) {
13923
+ await onResult(result);
13924
+ }
13925
+ return result;
13926
+ } catch (error) {
13927
+ if (onProgress) {
13928
+ await onProgress({
13929
+ workerId,
13930
+ testId: evalCase.id,
13931
+ status: "failed",
13932
+ completedAt: Date.now(),
13933
+ error: error instanceof Error ? error.message : String(error)
13934
+ });
13935
+ }
13936
+ throw error;
13937
+ } finally {
13938
+ if (testPoolSlot) {
13939
+ availablePoolSlots.push(testPoolSlot);
13940
+ }
13312
13941
  }
13313
- return result;
13314
- } catch (error) {
13315
- if (onProgress) {
13316
- await onProgress({
13317
- workerId,
13318
- testId: evalCase.id,
13319
- status: "failed",
13320
- completedAt: Date.now(),
13321
- error: error instanceof Error ? error.message : String(error)
13322
- });
13942
+ })
13943
+ );
13944
+ const settled = await Promise.allSettled(promises);
13945
+ const results = [];
13946
+ for (let i = 0; i < settled.length; i++) {
13947
+ const outcome = settled[i];
13948
+ if (outcome.status === "fulfilled") {
13949
+ results.push(outcome.value);
13950
+ } else {
13951
+ const evalCase = filteredEvalCases[i];
13952
+ const formattingMode = usesFileReferencePrompt(primaryProvider) ? "agent" : "lm";
13953
+ const promptInputs = await buildPromptInputs(evalCase, formattingMode);
13954
+ const errorResult = buildErrorResult(
13955
+ evalCase,
13956
+ target.name,
13957
+ (now ?? (() => /* @__PURE__ */ new Date()))(),
13958
+ outcome.reason,
13959
+ promptInputs,
13960
+ primaryProvider,
13961
+ "agent",
13962
+ "provider_error"
13963
+ );
13964
+ results.push(errorResult);
13965
+ if (onResult) {
13966
+ await onResult(errorResult);
13323
13967
  }
13324
- throw error;
13325
13968
  }
13326
- })
13327
- );
13328
- const settled = await Promise.allSettled(promises);
13329
- const results = [];
13330
- for (let i = 0; i < settled.length; i++) {
13331
- const outcome = settled[i];
13332
- if (outcome.status === "fulfilled") {
13333
- results.push(outcome.value);
13334
- } else {
13335
- const evalCase = filteredEvalCases[i];
13336
- const formattingMode = usesFileReferencePrompt(primaryProvider) ? "agent" : "lm";
13337
- const promptInputs = await buildPromptInputs(evalCase, formattingMode);
13338
- const errorResult = buildErrorResult(
13339
- evalCase,
13340
- target.name,
13341
- (now ?? (() => /* @__PURE__ */ new Date()))(),
13342
- outcome.reason,
13343
- promptInputs,
13344
- primaryProvider,
13345
- "agent",
13346
- "provider_error"
13347
- );
13348
- results.push(errorResult);
13349
- if (onResult) {
13350
- await onResult(errorResult);
13969
+ }
13970
+ const afterAllWorkspaces = poolSlots.length > 1 ? poolSlots.map((s) => s.path) : sharedWorkspacePath ? [sharedWorkspacePath] : [];
13971
+ if (afterAllWorkspaces.length > 0 && suiteWorkspace?.after_all) {
13972
+ for (const wsPath of afterAllWorkspaces) {
13973
+ const scriptContext = {
13974
+ workspacePath: wsPath,
13975
+ testId: "__after_all__",
13976
+ evalRunId,
13977
+ evalDir
13978
+ };
13979
+ try {
13980
+ const afterAllOutput = await executeWorkspaceScript(
13981
+ suiteWorkspace.after_all,
13982
+ scriptContext,
13983
+ "warn"
13984
+ );
13985
+ if (afterAllOutput && results.length > 0 && wsPath === afterAllWorkspaces[0]) {
13986
+ results[results.length - 1] = { ...results[results.length - 1], afterAllOutput };
13987
+ }
13988
+ } catch {
13989
+ }
13351
13990
  }
13352
13991
  }
13353
- }
13354
- if (sharedWorkspacePath && suiteWorkspace?.after_all) {
13355
- const scriptContext = {
13356
- workspacePath: sharedWorkspacePath,
13357
- testId: "__after_all__",
13358
- evalRunId,
13359
- evalDir
13360
- };
13361
- try {
13362
- const afterAllOutput = await executeWorkspaceScript(
13363
- suiteWorkspace.after_all,
13364
- scriptContext,
13365
- "warn"
13366
- );
13367
- if (afterAllOutput && results.length > 0) {
13368
- results[results.length - 1] = { ...results[results.length - 1], afterAllOutput };
13992
+ if (sharedWorkspacePath && !poolSlot && poolSlots.length === 0 && !userWorkspacePath) {
13993
+ const hasFailure = results.some((r) => !!r.error || r.score < 0.5);
13994
+ if (cleanupWorkspaces) {
13995
+ await cleanupWorkspace(sharedWorkspacePath).catch(() => {
13996
+ });
13997
+ } else if (!hasFailure && !keepWorkspaces) {
13998
+ await cleanupWorkspace(sharedWorkspacePath).catch(() => {
13999
+ });
13369
14000
  }
13370
- } catch {
13371
14001
  }
13372
- }
13373
- if (sharedWorkspacePath) {
13374
- const hasFailure = results.some((r) => !!r.error || r.score < 0.5);
13375
14002
  if (cleanupWorkspaces) {
13376
- await cleanupWorkspace(sharedWorkspacePath).catch(() => {
13377
- });
13378
- } else if (!hasFailure && !keepWorkspaces) {
13379
- await cleanupWorkspace(sharedWorkspacePath).catch(() => {
14003
+ await cleanupEvalWorkspaces(evalRunId).catch(() => {
13380
14004
  });
13381
14005
  }
14006
+ return results;
14007
+ } finally {
14008
+ if (poolManager) {
14009
+ if (poolSlot) {
14010
+ await poolManager.releaseSlot(poolSlot);
14011
+ }
14012
+ for (const slot of poolSlots) {
14013
+ if (slot !== poolSlot) {
14014
+ await poolManager.releaseSlot(slot).catch(() => {
14015
+ });
14016
+ }
14017
+ }
14018
+ }
13382
14019
  }
13383
- if (cleanupWorkspaces) {
13384
- await cleanupEvalWorkspaces(evalRunId).catch(() => {
13385
- });
13386
- }
13387
- return results;
13388
14020
  }
13389
14021
  async function runBatchEvaluation(options) {
13390
14022
  const {
@@ -13563,6 +14195,7 @@ async function runEvalCase(options) {
13563
14195
  repoManager,
13564
14196
  evalDir
13565
14197
  } = options;
14198
+ const setupDebug = process.env.AGENTV_SETUP_DEBUG === "1";
13566
14199
  const formattingMode = usesFileReferencePrompt(provider) ? "agent" : "lm";
13567
14200
  const promptInputs = await buildPromptInputs(evalCase, formattingMode);
13568
14201
  const typeRegistry = providedTypeRegistry ?? createBuiltinRegistry();
@@ -13600,7 +14233,7 @@ async function runEvalCase(options) {
13600
14233
  );
13601
14234
  }
13602
14235
  if (caseWorkspaceFile && workspacePath) {
13603
- const copiedFile = path37.join(workspacePath, path37.basename(caseWorkspaceFile));
14236
+ const copiedFile = path39.join(workspacePath, path39.basename(caseWorkspaceFile));
13604
14237
  try {
13605
14238
  await stat7(copiedFile);
13606
14239
  caseWorkspaceFile = copiedFile;
@@ -13610,12 +14243,20 @@ async function runEvalCase(options) {
13610
14243
  }
13611
14244
  if (!workspacePath && (evalCase.workspace?.before_all || evalCase.workspace?.repos?.length) && evalRunId) {
13612
14245
  workspacePath = getWorkspacePath(evalRunId, evalCase.id);
13613
- await mkdir12(workspacePath, { recursive: true });
14246
+ await mkdir13(workspacePath, { recursive: true });
13614
14247
  }
13615
14248
  if (evalCase.workspace?.repos?.length && workspacePath) {
13616
- const perCaseRepoManager = new RepoManager();
14249
+ const perCaseRepoManager = new RepoManager(void 0, setupDebug);
13617
14250
  try {
14251
+ if (setupDebug) {
14252
+ console.log(
14253
+ `[setup] test=${evalCase.id} materializing ${evalCase.workspace.repos.length} per-test repo(s) into ${workspacePath}`
14254
+ );
14255
+ }
13618
14256
  await perCaseRepoManager.materializeAll(evalCase.workspace.repos, workspacePath);
14257
+ if (setupDebug) {
14258
+ console.log(`[setup] test=${evalCase.id} per-test repo materialization complete`);
14259
+ }
13619
14260
  } catch (error) {
13620
14261
  const message = error instanceof Error ? error.message : String(error);
13621
14262
  return buildErrorResult(
@@ -13631,6 +14272,12 @@ async function runEvalCase(options) {
13631
14272
  }
13632
14273
  }
13633
14274
  if (workspacePath && evalCase.workspace?.before_all) {
14275
+ const beforeAllCommand = (evalCase.workspace.before_all.command ?? evalCase.workspace.before_all.script ?? []).join(" ");
14276
+ if (setupDebug) {
14277
+ console.log(
14278
+ `[setup] test=${evalCase.id} running before_all in cwd=${evalCase.workspace.before_all.cwd ?? evalDir} command=${beforeAllCommand}`
14279
+ );
14280
+ }
13634
14281
  const scriptContext = {
13635
14282
  workspacePath,
13636
14283
  testId: evalCase.id,
@@ -13644,6 +14291,9 @@ async function runEvalCase(options) {
13644
14291
  evalCase.workspace.before_all,
13645
14292
  scriptContext
13646
14293
  );
14294
+ if (setupDebug) {
14295
+ console.log(`[setup] test=${evalCase.id} before_all completed`);
14296
+ }
13647
14297
  } catch (error) {
13648
14298
  const message = error instanceof Error ? error.message : String(error);
13649
14299
  if (forceCleanup && workspacePath) {
@@ -14193,7 +14843,7 @@ async function runEvaluatorList(options) {
14193
14843
  fileChanges,
14194
14844
  workspacePath
14195
14845
  };
14196
- const evalFileDir = evalCase.guideline_paths[0] ? path37.dirname(evalCase.guideline_paths[0]) : process.cwd();
14846
+ const evalFileDir = evalCase.guideline_paths[0] ? path39.dirname(evalCase.guideline_paths[0]) : process.cwd();
14197
14847
  const dispatchContext = {
14198
14848
  judgeProvider,
14199
14849
  targetResolver,
@@ -14427,7 +15077,7 @@ function extractProviderError(response) {
14427
15077
  return trimmed.length > 0 ? trimmed : void 0;
14428
15078
  }
14429
15079
  function createCacheKey(provider, target, evalCase, promptInputs) {
14430
- const hash = createHash2("sha256");
15080
+ const hash = createHash3("sha256");
14431
15081
  hash.update(provider.id);
14432
15082
  hash.update(target.name);
14433
15083
  hash.update(evalCase.id);
@@ -14495,8 +15145,8 @@ function computeWeightedMean(entries) {
14495
15145
  }
14496
15146
 
14497
15147
  // src/evaluation/evaluate.ts
14498
- import { existsSync as existsSync3 } from "node:fs";
14499
- import path38 from "node:path";
15148
+ import { existsSync as existsSync4 } from "node:fs";
15149
+ import path40 from "node:path";
14500
15150
  async function evaluate(config) {
14501
15151
  const startTime = Date.now();
14502
15152
  if (config.tests && config.specFile) {
@@ -14518,13 +15168,13 @@ async function evaluate(config) {
14518
15168
  let evalCases;
14519
15169
  let testFilePath;
14520
15170
  if (config.specFile) {
14521
- testFilePath = path38.resolve(config.specFile);
15171
+ testFilePath = path40.resolve(config.specFile);
14522
15172
  evalCases = await loadTests(testFilePath, repoRoot, {
14523
15173
  verbose: config.verbose,
14524
15174
  filter: config.filter
14525
15175
  });
14526
15176
  } else {
14527
- testFilePath = path38.join(process.cwd(), "__programmatic__.yaml");
15177
+ testFilePath = path40.join(process.cwd(), "__programmatic__.yaml");
14528
15178
  evalCases = (config.tests ?? []).map((test) => {
14529
15179
  const input = typeof test.input === "string" ? [{ role: "user", content: test.input }] : test.input;
14530
15180
  const question = typeof test.input === "string" ? test.input : test.input.find((m) => m.role === "user")?.content ?? "";
@@ -14610,11 +15260,11 @@ function computeSummary(results, durationMs) {
14610
15260
  var TARGET_FILE_CANDIDATES = [".agentv/targets.yaml", ".agentv/targets.yml"];
14611
15261
  async function discoverDefaultTarget(repoRoot) {
14612
15262
  const cwd = process.cwd();
14613
- const chain = buildDirectoryChain(path38.join(cwd, "_placeholder"), repoRoot);
15263
+ const chain = buildDirectoryChain(path40.join(cwd, "_placeholder"), repoRoot);
14614
15264
  for (const dir of chain) {
14615
15265
  for (const candidate of TARGET_FILE_CANDIDATES) {
14616
- const targetsPath = path38.join(dir, candidate);
14617
- if (!existsSync3(targetsPath)) continue;
15266
+ const targetsPath = path40.join(dir, candidate);
15267
+ if (!existsSync4(targetsPath)) continue;
14618
15268
  try {
14619
15269
  const definitions = await readTargetDefinitions(targetsPath);
14620
15270
  const defaultTarget = definitions.find((d) => d.name === "default");
@@ -14628,11 +15278,11 @@ async function discoverDefaultTarget(repoRoot) {
14628
15278
  async function loadEnvHierarchy(repoRoot) {
14629
15279
  const { readFileSync: readFileSync2 } = await import("node:fs");
14630
15280
  const cwd = process.cwd();
14631
- const chain = buildDirectoryChain(path38.join(cwd, "_placeholder"), repoRoot);
15281
+ const chain = buildDirectoryChain(path40.join(cwd, "_placeholder"), repoRoot);
14632
15282
  const envFiles = [];
14633
15283
  for (const dir of chain) {
14634
- const envPath = path38.join(dir, ".env");
14635
- if (existsSync3(envPath)) envFiles.push(envPath);
15284
+ const envPath = path40.join(dir, ".env");
15285
+ if (existsSync4(envPath)) envFiles.push(envPath);
14636
15286
  }
14637
15287
  for (let i = envFiles.length - 1; i >= 0; i--) {
14638
15288
  try {
@@ -14710,12 +15360,12 @@ var CONFIG_FILE_NAMES = [
14710
15360
  ".agentv/config.js"
14711
15361
  ];
14712
15362
  async function loadTsConfig(projectRoot) {
14713
- const { existsSync: existsSync4 } = await import("node:fs");
15363
+ const { existsSync: existsSync5 } = await import("node:fs");
14714
15364
  const { pathToFileURL } = await import("node:url");
14715
15365
  const { join: join2 } = await import("node:path");
14716
15366
  for (const fileName of CONFIG_FILE_NAMES) {
14717
15367
  const filePath = join2(projectRoot, fileName);
14718
- if (!existsSync4(filePath)) {
15368
+ if (!existsSync5(filePath)) {
14719
15369
  continue;
14720
15370
  }
14721
15371
  try {
@@ -14812,8 +15462,8 @@ function buildPrompt(criteria, question, referenceAnswer) {
14812
15462
  }
14813
15463
 
14814
15464
  // src/evaluation/cache/response-cache.ts
14815
- import { mkdir as mkdir13, readFile as readFile11, writeFile as writeFile8 } from "node:fs/promises";
14816
- import path39 from "node:path";
15465
+ import { mkdir as mkdir14, readFile as readFile12, writeFile as writeFile9 } from "node:fs/promises";
15466
+ import path41 from "node:path";
14817
15467
  var DEFAULT_CACHE_PATH = ".agentv/cache";
14818
15468
  var ResponseCache = class {
14819
15469
  cachePath;
@@ -14823,7 +15473,7 @@ var ResponseCache = class {
14823
15473
  async get(key) {
14824
15474
  const filePath = this.keyToPath(key);
14825
15475
  try {
14826
- const data = await readFile11(filePath, "utf8");
15476
+ const data = await readFile12(filePath, "utf8");
14827
15477
  return JSON.parse(data);
14828
15478
  } catch {
14829
15479
  return void 0;
@@ -14831,13 +15481,13 @@ var ResponseCache = class {
14831
15481
  }
14832
15482
  async set(key, value) {
14833
15483
  const filePath = this.keyToPath(key);
14834
- const dir = path39.dirname(filePath);
14835
- await mkdir13(dir, { recursive: true });
14836
- await writeFile8(filePath, JSON.stringify(value, null, 2), "utf8");
15484
+ const dir = path41.dirname(filePath);
15485
+ await mkdir14(dir, { recursive: true });
15486
+ await writeFile9(filePath, JSON.stringify(value, null, 2), "utf8");
14837
15487
  }
14838
15488
  keyToPath(key) {
14839
15489
  const prefix = key.slice(0, 2);
14840
- return path39.join(this.cachePath, prefix, `${key}.json`);
15490
+ return path41.join(this.cachePath, prefix, `${key}.json`);
14841
15491
  }
14842
15492
  };
14843
15493
  function shouldEnableCache(params) {
@@ -15319,6 +15969,7 @@ export {
15319
15969
  TokenUsageEvaluator,
15320
15970
  ToolTrajectoryEvaluator,
15321
15971
  WorkspaceCreationError,
15972
+ WorkspacePoolManager,
15322
15973
  assembleLlmJudgePrompt,
15323
15974
  avgToolDurationMs,
15324
15975
  buildDirectoryChain,
@@ -15333,6 +15984,7 @@ export {
15333
15984
  cleanupEvalWorkspaces,
15334
15985
  cleanupWorkspace,
15335
15986
  computeTraceSummary,
15987
+ computeWorkspaceFingerprint,
15336
15988
  consumeClaudeLogEntries,
15337
15989
  consumeCodexLogEntries,
15338
15990
  consumeCopilotCliLogEntries,
@@ -15364,8 +16016,14 @@ export {
15364
16016
  findGitRoot,
15365
16017
  freeformEvaluationSchema,
15366
16018
  generateRubrics,
16019
+ getAgentvHome,
16020
+ getGitCacheRoot,
15367
16021
  getHitCount,
16022
+ getSubagentsRoot,
16023
+ getTraceStateRoot,
15368
16024
  getWorkspacePath,
16025
+ getWorkspacePoolRoot,
16026
+ getWorkspacesRoot,
15369
16027
  initializeBaseline,
15370
16028
  isEvaluatorKind,
15371
16029
  isGuidelineFile,