agentv 2.14.3 → 2.16.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -148,7 +148,7 @@ var require_dist = __commonJS({
148
148
  }
149
149
  });
150
150
 
151
- // ../../packages/core/dist/chunk-N55K52OO.js
151
+ // ../../packages/core/dist/chunk-E6AJPAXM.js
152
152
  import { constants } from "node:fs";
153
153
  import { access, readFile } from "node:fs/promises";
154
154
  import path from "node:path";
@@ -632,8 +632,8 @@ function getErrorMap() {
632
632
 
633
633
  // ../../node_modules/.bun/zod@3.25.76/node_modules/zod/v3/helpers/parseUtil.js
634
634
  var makeIssue = (params) => {
635
- const { data, path: path41, errorMaps, issueData } = params;
636
- const fullPath = [...path41, ...issueData.path || []];
635
+ const { data, path: path42, errorMaps, issueData } = params;
636
+ const fullPath = [...path42, ...issueData.path || []];
637
637
  const fullIssue = {
638
638
  ...issueData,
639
639
  path: fullPath
@@ -749,11 +749,11 @@ var errorUtil;
749
749
 
750
750
  // ../../node_modules/.bun/zod@3.25.76/node_modules/zod/v3/types.js
751
751
  var ParseInputLazyPath = class {
752
- constructor(parent, value, path41, key) {
752
+ constructor(parent, value, path42, key) {
753
753
  this._cachedPath = [];
754
754
  this.parent = parent;
755
755
  this.data = value;
756
- this._path = path41;
756
+ this._path = path42;
757
757
  this._key = key;
758
758
  }
759
759
  get path() {
@@ -4195,7 +4195,7 @@ var coerce = {
4195
4195
  };
4196
4196
  var NEVER = INVALID;
4197
4197
 
4198
- // ../../packages/core/dist/chunk-N55K52OO.js
4198
+ // ../../packages/core/dist/chunk-E6AJPAXM.js
4199
4199
  var TEST_MESSAGE_ROLE_VALUES = ["system", "user", "assistant", "tool"];
4200
4200
  var TEST_MESSAGE_ROLES = TEST_MESSAGE_ROLE_VALUES;
4201
4201
  var TEST_MESSAGE_ROLE_SET = new Set(TEST_MESSAGE_ROLE_VALUES);
@@ -6661,10 +6661,10 @@ function assignProp(target, prop, value) {
6661
6661
  configurable: true
6662
6662
  });
6663
6663
  }
6664
- function getElementAtPath(obj, path41) {
6665
- if (!path41)
6664
+ function getElementAtPath(obj, path42) {
6665
+ if (!path42)
6666
6666
  return obj;
6667
- return path41.reduce((acc, key) => acc?.[key], obj);
6667
+ return path42.reduce((acc, key) => acc?.[key], obj);
6668
6668
  }
6669
6669
  function promiseAllObject(promisesObj) {
6670
6670
  const keys = Object.keys(promisesObj);
@@ -6984,11 +6984,11 @@ function aborted(x, startIndex = 0) {
6984
6984
  }
6985
6985
  return false;
6986
6986
  }
6987
- function prefixIssues(path41, issues) {
6987
+ function prefixIssues(path42, issues) {
6988
6988
  return issues.map((iss) => {
6989
6989
  var _a17;
6990
6990
  (_a17 = iss).path ?? (_a17.path = []);
6991
- iss.path.unshift(path41);
6991
+ iss.path.unshift(path42);
6992
6992
  return iss;
6993
6993
  });
6994
6994
  }
@@ -7125,7 +7125,7 @@ function treeifyError(error40, _mapper) {
7125
7125
  return issue2.message;
7126
7126
  };
7127
7127
  const result = { errors: [] };
7128
- const processError = (error41, path41 = []) => {
7128
+ const processError = (error41, path42 = []) => {
7129
7129
  var _a17, _b8;
7130
7130
  for (const issue2 of error41.issues) {
7131
7131
  if (issue2.code === "invalid_union" && issue2.errors.length) {
@@ -7135,7 +7135,7 @@ function treeifyError(error40, _mapper) {
7135
7135
  } else if (issue2.code === "invalid_element") {
7136
7136
  processError({ issues: issue2.issues }, issue2.path);
7137
7137
  } else {
7138
- const fullpath = [...path41, ...issue2.path];
7138
+ const fullpath = [...path42, ...issue2.path];
7139
7139
  if (fullpath.length === 0) {
7140
7140
  result.errors.push(mapper(issue2));
7141
7141
  continue;
@@ -7165,9 +7165,9 @@ function treeifyError(error40, _mapper) {
7165
7165
  processError(error40);
7166
7166
  return result;
7167
7167
  }
7168
- function toDotPath(path41) {
7168
+ function toDotPath(path42) {
7169
7169
  const segs = [];
7170
- for (const seg of path41) {
7170
+ for (const seg of path42) {
7171
7171
  if (typeof seg === "number")
7172
7172
  segs.push(`[${seg}]`);
7173
7173
  else if (typeof seg === "symbol")
@@ -26720,14 +26720,14 @@ function createAzure(options = {}) {
26720
26720
  description: "Azure OpenAI resource name"
26721
26721
  });
26722
26722
  const apiVersion = (_a17 = options.apiVersion) != null ? _a17 : "v1";
26723
- const url2 = ({ path: path41, modelId }) => {
26723
+ const url2 = ({ path: path42, modelId }) => {
26724
26724
  var _a24;
26725
26725
  const baseUrlPrefix = (_a24 = options.baseURL) != null ? _a24 : `https://${getResourceName()}.openai.azure.com/openai`;
26726
26726
  let fullUrl;
26727
26727
  if (options.useDeploymentBasedUrls) {
26728
- fullUrl = new URL(`${baseUrlPrefix}/deployments/${modelId}${path41}`);
26728
+ fullUrl = new URL(`${baseUrlPrefix}/deployments/${modelId}${path42}`);
26729
26729
  } else {
26730
- fullUrl = new URL(`${baseUrlPrefix}/v1${path41}`);
26730
+ fullUrl = new URL(`${baseUrlPrefix}/v1${path42}`);
26731
26731
  }
26732
26732
  fullUrl.searchParams.set("api-version", apiVersion);
26733
26733
  return fullUrl.toString();
@@ -33962,7 +33962,7 @@ import fs2 from "node:fs/promises";
33962
33962
  import path31 from "node:path";
33963
33963
  import { createHash as createHash2, randomUUID as randomUUID7 } from "node:crypto";
33964
33964
  import { mkdir as mkdir12, stat as stat7 } from "node:fs/promises";
33965
- import path38 from "node:path";
33965
+ import path39 from "node:path";
33966
33966
  import micromatch4 from "micromatch";
33967
33967
  import { readFileSync } from "node:fs";
33968
33968
  import path322 from "node:path";
@@ -33977,15 +33977,18 @@ import path35 from "node:path";
33977
33977
  import { execFile } from "node:child_process";
33978
33978
  import { createHash } from "node:crypto";
33979
33979
  import { existsSync as existsSync2 } from "node:fs";
33980
- import { mkdir as mkdir11, rm as rm5, unlink, writeFile as writeFile7 } from "node:fs/promises";
33980
+ import { cp as cp2, mkdir as mkdir11, readFile as readFile11, readdir as readdir4, rm as rm5, unlink, writeFile as writeFile7 } from "node:fs/promises";
33981
33981
  import path36 from "node:path";
33982
33982
  import { promisify as promisify5 } from "node:util";
33983
- import { readdir as readdir4, stat as stat6 } from "node:fs/promises";
33983
+ import { execFile as execFile2 } from "node:child_process";
33984
33984
  import path37 from "node:path";
33985
+ import { promisify as promisify6 } from "node:util";
33986
+ import { readdir as readdir5, stat as stat6 } from "node:fs/promises";
33987
+ import path38 from "node:path";
33985
33988
  import { existsSync as existsSync3 } from "node:fs";
33986
- import path39 from "node:path";
33987
- import { mkdir as mkdir13, readFile as readFile11, writeFile as writeFile8 } from "node:fs/promises";
33988
33989
  import path40 from "node:path";
33990
+ import { mkdir as mkdir13, readFile as readFile12, writeFile as writeFile8 } from "node:fs/promises";
33991
+ import path41 from "node:path";
33989
33992
  function computeTraceSummary(messages) {
33990
33993
  const toolCallCounts = {};
33991
33994
  const toolDurations = {};
@@ -34546,6 +34549,17 @@ function parseExecutionDefaults(raw, configPath) {
34546
34549
  } else if (otelFile !== void 0) {
34547
34550
  logWarning(`Invalid execution.otel_file in ${configPath}, expected non-empty string`);
34548
34551
  }
34552
+ if (typeof obj.pool_workspaces === "boolean") {
34553
+ result.pool_workspaces = obj.pool_workspaces;
34554
+ } else if (obj.pool_workspaces !== void 0) {
34555
+ logWarning(`Invalid execution.pool_workspaces in ${configPath}, expected boolean`);
34556
+ }
34557
+ const poolSlots = obj.pool_slots;
34558
+ if (typeof poolSlots === "number" && Number.isInteger(poolSlots) && poolSlots >= 1 && poolSlots <= 50) {
34559
+ result.pool_slots = poolSlots;
34560
+ } else if (poolSlots !== void 0) {
34561
+ logWarning(`Invalid execution.pool_slots in ${configPath}, expected integer 1-50`);
34562
+ }
34549
34563
  return Object.keys(result).length > 0 ? result : void 0;
34550
34564
  }
34551
34565
  function logWarning(message) {
@@ -35961,6 +35975,7 @@ async function processMessages(options) {
35961
35975
  repoRootPath,
35962
35976
  guidelinePatterns,
35963
35977
  guidelinePaths,
35978
+ treatFileSegmentsAsGuidelines,
35964
35979
  textParts,
35965
35980
  messageType,
35966
35981
  verbose
@@ -36008,16 +36023,20 @@ async function processMessages(options) {
36008
36023
  }
36009
36024
  try {
36010
36025
  const fileContent = (await readFile4(resolvedPath, "utf8")).replace(/\r\n/g, "\n");
36011
- if (messageType === "input" && guidelinePatterns && guidelinePaths) {
36012
- const relativeToRepo = path5.relative(repoRootPath, resolvedPath);
36013
- if (isGuidelineFile(relativeToRepo, guidelinePatterns)) {
36014
- guidelinePaths.push(path5.resolve(resolvedPath));
36015
- if (verbose) {
36016
- console.log(` [Guideline] Found: ${displayPath}`);
36017
- console.log(` Resolved to: ${resolvedPath}`);
36018
- }
36019
- continue;
36026
+ const classifyAsGuideline = shouldTreatAsGuideline({
36027
+ messageType,
36028
+ resolvedPath,
36029
+ repoRootPath,
36030
+ guidelinePatterns,
36031
+ treatFileSegmentsAsGuidelines
36032
+ });
36033
+ if (classifyAsGuideline && guidelinePaths) {
36034
+ guidelinePaths.push(path5.resolve(resolvedPath));
36035
+ if (verbose) {
36036
+ console.log(` [Guideline] Found: ${displayPath}`);
36037
+ console.log(` Resolved to: ${resolvedPath}`);
36020
36038
  }
36039
+ continue;
36021
36040
  }
36022
36041
  segments.push({
36023
36042
  type: "file",
@@ -36046,6 +36065,26 @@ async function processMessages(options) {
36046
36065
  }
36047
36066
  return segments;
36048
36067
  }
36068
+ function shouldTreatAsGuideline(options) {
36069
+ const {
36070
+ messageType,
36071
+ resolvedPath,
36072
+ repoRootPath,
36073
+ guidelinePatterns,
36074
+ treatFileSegmentsAsGuidelines
36075
+ } = options;
36076
+ if (messageType !== "input") {
36077
+ return false;
36078
+ }
36079
+ if (treatFileSegmentsAsGuidelines) {
36080
+ return true;
36081
+ }
36082
+ if (!guidelinePatterns || guidelinePatterns.length === 0) {
36083
+ return false;
36084
+ }
36085
+ const relativeToRepo = path5.relative(repoRootPath, resolvedPath);
36086
+ return isGuidelineFile(relativeToRepo, guidelinePatterns);
36087
+ }
36049
36088
  function asString3(value) {
36050
36089
  return typeof value === "string" ? value : void 0;
36051
36090
  }
@@ -36380,6 +36419,8 @@ async function loadTestsFromJsonl(evalFilePath, repoRoot, options) {
36380
36419
  for (const guidelinePath of testCase.guideline_paths) {
36381
36420
  console.log(` - ${guidelinePath}`);
36382
36421
  }
36422
+ } else if (!guidelinePatterns || guidelinePatterns.length === 0) {
36423
+ console.log(" No guidelines found (guideline_patterns not configured)");
36383
36424
  } else {
36384
36425
  console.log(" No guidelines found");
36385
36426
  }
@@ -36740,7 +36781,7 @@ async function loadTestsFromYaml(evalFilePath, repoRoot, options) {
36740
36781
  } else {
36741
36782
  throw new Error(`Invalid test file format: ${evalFilePath} - missing 'tests' field`);
36742
36783
  }
36743
- const suiteWorkspace = parseWorkspaceConfig(suite.workspace, evalFileDir);
36784
+ const suiteWorkspace = await resolveWorkspaceConfig(suite.workspace, evalFileDir);
36744
36785
  const suiteInputMessages = expandInputShorthand(suite.input);
36745
36786
  const rawGlobalExecution = isJsonObject(suite.execution) ? suite.execution : void 0;
36746
36787
  const _globalTarget = asString6(rawGlobalExecution?.target) ?? asString6(suite.target);
@@ -36776,12 +36817,24 @@ async function loadTestsFromYaml(evalFilePath, repoRoot, options) {
36776
36817
  }
36777
36818
  const caseExecution = isJsonObject(evalcase.execution) ? evalcase.execution : void 0;
36778
36819
  const skipDefaults = caseExecution?.skip_defaults === true;
36779
- const inputMessages = suiteInputMessages && !skipDefaults ? [...suiteInputMessages, ...testInputMessages] : testInputMessages;
36820
+ const effectiveSuiteInputMessages = suiteInputMessages && !skipDefaults ? suiteInputMessages : void 0;
36821
+ const inputMessages = effectiveSuiteInputMessages ? [...effectiveSuiteInputMessages, ...testInputMessages] : testInputMessages;
36780
36822
  const hasExpectedMessages = expectedMessages.length > 0;
36781
36823
  const guidelinePaths = [];
36782
36824
  const inputTextParts = [];
36783
- const inputSegments = await processMessages({
36784
- messages: inputMessages,
36825
+ const suiteInputSegments = effectiveSuiteInputMessages ? await processMessages({
36826
+ messages: effectiveSuiteInputMessages,
36827
+ searchRoots,
36828
+ repoRootPath,
36829
+ guidelinePatterns,
36830
+ guidelinePaths,
36831
+ treatFileSegmentsAsGuidelines: true,
36832
+ textParts: inputTextParts,
36833
+ messageType: "input",
36834
+ verbose
36835
+ }) : [];
36836
+ const testInputSegments = await processMessages({
36837
+ messages: testInputMessages,
36785
36838
  searchRoots,
36786
36839
  repoRootPath,
36787
36840
  guidelinePatterns,
@@ -36790,6 +36843,7 @@ async function loadTestsFromYaml(evalFilePath, repoRoot, options) {
36790
36843
  messageType: "input",
36791
36844
  verbose
36792
36845
  });
36846
+ const inputSegments = [...suiteInputSegments, ...testInputSegments];
36793
36847
  const outputSegments = hasExpectedMessages ? await processExpectedMessages({
36794
36848
  messages: expectedMessages,
36795
36849
  searchRoots,
@@ -36837,7 +36891,7 @@ async function loadTestsFromYaml(evalFilePath, repoRoot, options) {
36837
36891
  ...guidelinePaths.map((guidelinePath) => path8.resolve(guidelinePath)),
36838
36892
  ...userFilePaths
36839
36893
  ];
36840
- const caseWorkspace = parseWorkspaceConfig(evalcase.workspace, evalFileDir);
36894
+ const caseWorkspace = await resolveWorkspaceConfig(evalcase.workspace, evalFileDir);
36841
36895
  const mergedWorkspace = mergeWorkspaceConfigs(suiteWorkspace, caseWorkspace);
36842
36896
  const metadata = isJsonObject(evalcase.metadata) ? evalcase.metadata : void 0;
36843
36897
  const caseTargets = extractTargetsFromTestCase(evalcase);
@@ -36868,6 +36922,8 @@ async function loadTestsFromYaml(evalFilePath, repoRoot, options) {
36868
36922
  for (const guidelinePath of testCase.guideline_paths) {
36869
36923
  console.log(` - ${guidelinePath}`);
36870
36924
  }
36925
+ } else if (!guidelinePatterns || guidelinePatterns.length === 0) {
36926
+ console.log(" No guidelines found (guideline_patterns not configured)");
36871
36927
  } else {
36872
36928
  console.log(" No guidelines found");
36873
36929
  }
@@ -36956,17 +37012,58 @@ function parseRepoConfig(raw) {
36956
37012
  ...clone2 !== void 0 && { clone: clone2 }
36957
37013
  };
36958
37014
  }
36959
- function parseResetConfig(raw) {
37015
+ function parseWorkspaceHookConfig(raw, evalFileDir) {
36960
37016
  if (!isJsonObject(raw)) return void 0;
37017
+ const script = parseWorkspaceScriptConfig(raw, evalFileDir);
36961
37018
  const obj = raw;
36962
- const strategy = obj.strategy === "none" || obj.strategy === "hard" || obj.strategy === "recreate" ? obj.strategy : void 0;
36963
- const afterEach = typeof obj.after_each === "boolean" ? obj.after_each : void 0;
36964
- if (!strategy && afterEach === void 0) return void 0;
37019
+ const reset = obj.reset === "none" || obj.reset === "fast" || obj.reset === "strict" ? obj.reset : void 0;
37020
+ const clean = obj.clean === "always" || obj.clean === "on_success" || obj.clean === "on_failure" || obj.clean === "never" ? obj.clean : void 0;
37021
+ if (!script && !reset && !clean) return void 0;
36965
37022
  return {
36966
- ...strategy !== void 0 && { strategy },
36967
- ...afterEach !== void 0 && { after_each: afterEach }
37023
+ ...script ?? {},
37024
+ ...reset !== void 0 && { reset },
37025
+ ...clean !== void 0 && { clean }
36968
37026
  };
36969
37027
  }
37028
+ function parseWorkspaceHooksConfig(raw, evalFileDir) {
37029
+ if (!isJsonObject(raw)) return void 0;
37030
+ const obj = raw;
37031
+ const beforeAllTests = parseWorkspaceHookConfig(obj.before_all_tests, evalFileDir);
37032
+ const beforeEachTest = parseWorkspaceHookConfig(obj.before_each_test, evalFileDir);
37033
+ const afterEachTest = parseWorkspaceHookConfig(obj.after_each_test, evalFileDir);
37034
+ const afterAllTests = parseWorkspaceHookConfig(obj.after_all_tests, evalFileDir);
37035
+ const onReuse = parseWorkspaceHookConfig(obj.on_reuse, evalFileDir);
37036
+ const onFinish = parseWorkspaceHookConfig(obj.on_finish, evalFileDir);
37037
+ const hooks = {
37038
+ ...beforeAllTests !== void 0 && { before_all_tests: beforeAllTests },
37039
+ ...beforeEachTest !== void 0 && { before_each_test: beforeEachTest },
37040
+ ...afterEachTest !== void 0 && { after_each_test: afterEachTest },
37041
+ ...afterAllTests !== void 0 && { after_all_tests: afterAllTests },
37042
+ ...onReuse !== void 0 && { on_reuse: onReuse },
37043
+ ...onFinish !== void 0 && { on_finish: onFinish }
37044
+ };
37045
+ return Object.keys(hooks).length > 0 ? hooks : void 0;
37046
+ }
37047
+ async function resolveWorkspaceConfig(raw, evalFileDir) {
37048
+ if (typeof raw === "string") {
37049
+ const workspaceFilePath = path8.resolve(evalFileDir, raw);
37050
+ let content;
37051
+ try {
37052
+ content = await readFile7(workspaceFilePath, "utf8");
37053
+ } catch {
37054
+ throw new Error(`Workspace file not found: ${raw} (resolved to ${workspaceFilePath})`);
37055
+ }
37056
+ const parsed = parse22(content);
37057
+ if (!isJsonObject(parsed)) {
37058
+ throw new Error(
37059
+ `Invalid workspace file format: ${workspaceFilePath} (expected a YAML object)`
37060
+ );
37061
+ }
37062
+ const workspaceFileDir = path8.dirname(workspaceFilePath);
37063
+ return parseWorkspaceConfig(parsed, workspaceFileDir);
37064
+ }
37065
+ return parseWorkspaceConfig(raw, evalFileDir);
37066
+ }
36970
37067
  function parseWorkspaceConfig(raw, evalFileDir) {
36971
37068
  if (!isJsonObject(raw)) return void 0;
36972
37069
  const obj = raw;
@@ -36976,37 +37073,56 @@ function parseWorkspaceConfig(raw, evalFileDir) {
36976
37073
  }
36977
37074
  const isolation = obj.isolation === "shared" || obj.isolation === "per_test" ? obj.isolation : void 0;
36978
37075
  const repos = Array.isArray(obj.repos) ? obj.repos.map(parseRepoConfig).filter(Boolean) : void 0;
36979
- const reset = parseResetConfig(obj.reset);
36980
- const beforeAll = parseWorkspaceScriptConfig(obj.before_all, evalFileDir);
36981
- const afterAll = parseWorkspaceScriptConfig(obj.after_all, evalFileDir);
36982
- const beforeEach = parseWorkspaceScriptConfig(obj.before_each, evalFileDir);
36983
- const afterEach = parseWorkspaceScriptConfig(obj.after_each, evalFileDir);
36984
- if (!template && !isolation && !repos && !reset && !beforeAll && !afterAll && !beforeEach && !afterEach)
37076
+ const hooks = parseWorkspaceHooksConfig(obj.hooks, evalFileDir);
37077
+ const mode = obj.mode === "pooled" || obj.mode === "ephemeral" || obj.mode === "static" ? obj.mode : void 0;
37078
+ const staticPath = typeof obj.static_path === "string" ? obj.static_path : void 0;
37079
+ const pool = typeof obj.pool === "boolean" ? obj.pool : void 0;
37080
+ if (!template && !isolation && !repos && !hooks && !mode && !staticPath && pool === void 0)
36985
37081
  return void 0;
36986
37082
  return {
36987
37083
  ...template !== void 0 && { template },
36988
37084
  ...isolation !== void 0 && { isolation },
36989
37085
  ...repos !== void 0 && { repos },
36990
- ...reset !== void 0 && { reset },
36991
- ...beforeAll !== void 0 && { before_all: beforeAll },
36992
- ...afterAll !== void 0 && { after_all: afterAll },
36993
- ...beforeEach !== void 0 && { before_each: beforeEach },
36994
- ...afterEach !== void 0 && { after_each: afterEach }
37086
+ ...hooks !== void 0 && { hooks },
37087
+ ...mode !== void 0 && { mode },
37088
+ ...staticPath !== void 0 && { static_path: staticPath },
37089
+ ...pool !== void 0 && { pool }
36995
37090
  };
36996
37091
  }
36997
37092
  function mergeWorkspaceConfigs(suiteLevel, caseLevel) {
36998
37093
  if (!suiteLevel && !caseLevel) return void 0;
36999
37094
  if (!suiteLevel) return caseLevel;
37000
37095
  if (!caseLevel) return suiteLevel;
37096
+ const mergeHook = (suiteHook, caseHook) => {
37097
+ if (!suiteHook && !caseHook) return void 0;
37098
+ return {
37099
+ ...suiteHook ?? {},
37100
+ ...caseHook ?? {}
37101
+ };
37102
+ };
37103
+ const mergedHooks = {
37104
+ before_all_tests: mergeHook(
37105
+ suiteLevel.hooks?.before_all_tests,
37106
+ caseLevel.hooks?.before_all_tests
37107
+ ),
37108
+ before_each_test: mergeHook(
37109
+ suiteLevel.hooks?.before_each_test,
37110
+ caseLevel.hooks?.before_each_test
37111
+ ),
37112
+ after_each_test: mergeHook(suiteLevel.hooks?.after_each_test, caseLevel.hooks?.after_each_test),
37113
+ after_all_tests: mergeHook(suiteLevel.hooks?.after_all_tests, caseLevel.hooks?.after_all_tests),
37114
+ on_reuse: mergeHook(suiteLevel.hooks?.on_reuse, caseLevel.hooks?.on_reuse),
37115
+ on_finish: mergeHook(suiteLevel.hooks?.on_finish, caseLevel.hooks?.on_finish)
37116
+ };
37117
+ const hasHooks = Object.values(mergedHooks).some((hook) => hook !== void 0);
37001
37118
  return {
37002
37119
  template: caseLevel.template ?? suiteLevel.template,
37003
37120
  isolation: caseLevel.isolation ?? suiteLevel.isolation,
37004
37121
  repos: caseLevel.repos ?? suiteLevel.repos,
37005
- reset: caseLevel.reset ?? suiteLevel.reset,
37006
- before_all: caseLevel.before_all ?? suiteLevel.before_all,
37007
- after_all: caseLevel.after_all ?? suiteLevel.after_all,
37008
- before_each: caseLevel.before_each ?? suiteLevel.before_each,
37009
- after_each: caseLevel.after_each ?? suiteLevel.after_each
37122
+ ...hasHooks && { hooks: mergedHooks },
37123
+ mode: caseLevel.mode ?? suiteLevel.mode,
37124
+ static_path: caseLevel.static_path ?? suiteLevel.static_path,
37125
+ pool: caseLevel.pool ?? suiteLevel.pool
37010
37126
  };
37011
37127
  }
37012
37128
  function asString6(value) {
@@ -40929,15 +41045,15 @@ function getAgentvHome() {
40929
41045
  function getWorkspacesRoot() {
40930
41046
  return path21.join(getAgentvHome(), "workspaces");
40931
41047
  }
40932
- function getGitCacheRoot() {
40933
- return path21.join(getAgentvHome(), "git-cache");
40934
- }
40935
41048
  function getSubagentsRoot() {
40936
41049
  return path21.join(getAgentvHome(), "subagents");
40937
41050
  }
40938
41051
  function getTraceStateRoot() {
40939
41052
  return path21.join(getAgentvHome(), "trace-state");
40940
41053
  }
41054
+ function getWorkspacePoolRoot() {
41055
+ return path21.join(getAgentvHome(), "workspace-pool");
41056
+ }
40941
41057
  var DEFAULT_LOCK_NAME = "subagent.lock";
40942
41058
  var DEFAULT_ALIVE_FILENAME = ".alive";
40943
41059
  function getDefaultSubagentRoot(vscodeCmd = "code") {
@@ -41738,8 +41854,6 @@ var AGENTV_REQUEST_TEMPLATE = `[[ ## task ## ]]
41738
41854
 
41739
41855
  **IMPORTANT**: Follow these exact steps:
41740
41856
  1. Create and write your complete response to: {{responseFileTmp}}
41741
- - All intended file outputs/changes MUST be written in your response file.
41742
- - For each intended file, include the repo name, relative path and unified git diff following the convention \`diff --git ...\`.
41743
41857
  2. When completely finished, run these PowerShell commands to signal completion:
41744
41858
  \`\`\`
41745
41859
  Move-Item -LiteralPath '{{responseFileTmp}}' -Destination '{{responseFileFinal}}'
@@ -41756,8 +41870,6 @@ var AGENTV_BATCH_REQUEST_TEMPLATE = `[[ ## task ## ]]
41756
41870
 
41757
41871
  **IMPORTANT**: Follow these exact steps:
41758
41872
  1. Create and write your complete response to: {{responseFileTmp}}
41759
- - All intended file outputs/changes MUST be written in your response file.
41760
- - For each intended file, include the repo name, relative path and unified git diff following the convention \`diff --git ...\`.
41761
41873
  2. When completely finished and the response is stable, rename it to: {{responseFileFinal}}
41762
41874
  3. Do not unlock the workspace from this request; batch orchestration will handle unlocking after all responses are ready.
41763
41875
  `;
@@ -42347,15 +42459,15 @@ async function execFileWithStdinNode(argv, stdinPayload, options) {
42347
42459
  });
42348
42460
  }
42349
42461
  async function execShellWithStdin(command, stdinPayload, options = {}) {
42350
- const { mkdir: mkdir14, readFile: readFile12, rm: rm6, writeFile: writeFile9 } = await import("node:fs/promises");
42462
+ const { mkdir: mkdir14, readFile: readFile13, rm: rm6, writeFile: writeFile9 } = await import("node:fs/promises");
42351
42463
  const { tmpdir: tmpdir3 } = await import("node:os");
42352
- const path41 = await import("node:path");
42464
+ const path42 = await import("node:path");
42353
42465
  const { randomUUID: randomUUID8 } = await import("node:crypto");
42354
- const dir = path41.join(tmpdir3(), `agentv-exec-${randomUUID8()}`);
42466
+ const dir = path42.join(tmpdir3(), `agentv-exec-${randomUUID8()}`);
42355
42467
  await mkdir14(dir, { recursive: true });
42356
- const stdinPath = path41.join(dir, "stdin.txt");
42357
- const stdoutPath = path41.join(dir, "stdout.txt");
42358
- const stderrPath = path41.join(dir, "stderr.txt");
42468
+ const stdinPath = path42.join(dir, "stdin.txt");
42469
+ const stdoutPath = path42.join(dir, "stdout.txt");
42470
+ const stderrPath = path42.join(dir, "stderr.txt");
42359
42471
  await writeFile9(stdinPath, stdinPayload, "utf8");
42360
42472
  const wrappedCommand = process.platform === "win32" ? `(${command}) < ${shellEscapePath(stdinPath)} > ${shellEscapePath(stdoutPath)} 2> ${shellEscapePath(stderrPath)}` : `(${command}) < ${shellEscapePath(stdinPath)} > ${shellEscapePath(stdoutPath)} 2> ${shellEscapePath(stderrPath)}`;
42361
42473
  const { spawn: spawn4 } = await import("node:child_process");
@@ -42385,8 +42497,8 @@ async function execShellWithStdin(command, stdinPayload, options = {}) {
42385
42497
  resolve2(code ?? 0);
42386
42498
  });
42387
42499
  });
42388
- const stdout = (await readFile12(stdoutPath, "utf8")).replace(/\r\n/g, "\n");
42389
- const stderr = (await readFile12(stderrPath, "utf8")).replace(/\r\n/g, "\n");
42500
+ const stdout = (await readFile13(stdoutPath, "utf8")).replace(/\r\n/g, "\n");
42501
+ const stderr = (await readFile13(stderrPath, "utf8")).replace(/\r\n/g, "\n");
42390
42502
  return { stdout, stderr, exitCode };
42391
42503
  } finally {
42392
42504
  await rm6(dir, { recursive: true, force: true });
@@ -42699,7 +42811,7 @@ var CodeEvaluator = class {
42699
42811
  outputPath,
42700
42812
  guidelineFiles: context.evalCase.guideline_paths,
42701
42813
  inputFiles: context.evalCase.file_paths.filter(
42702
- (path41) => !context.evalCase.guideline_paths.includes(path41)
42814
+ (path42) => !context.evalCase.guideline_paths.includes(path42)
42703
42815
  ),
42704
42816
  input: context.evalCase.input,
42705
42817
  trace: context.trace ?? null,
@@ -42942,6 +43054,8 @@ ${context.fileChanges}`;
42942
43054
  };
42943
43055
  } catch (e) {
42944
43056
  const message = e instanceof Error ? e.message : String(e);
43057
+ const evalName = context.evaluator?.name ?? "llm-judge";
43058
+ console.warn(`\u26A0 LLM judge "${evalName}" failed after 3 attempts (${message}) \u2014 skipped`);
42945
43059
  return {
42946
43060
  score: 0,
42947
43061
  verdict: "skip",
@@ -42970,24 +43084,39 @@ ${context.fileChanges}`;
42970
43084
  systemPrompt,
42971
43085
  target: judgeProvider.targetName
42972
43086
  };
42973
- const { data, tokenUsage } = await this.runWithRetry({
42974
- context,
42975
- judgeProvider,
42976
- systemPrompt,
42977
- userPrompt: prompt,
42978
- schema: rubricEvaluationSchema
42979
- });
42980
- const { score, verdict, hits, misses } = calculateRubricScore(data, rubrics);
42981
- return {
42982
- score,
42983
- verdict,
42984
- hits,
42985
- misses,
42986
- expectedAspectCount: rubrics.length,
42987
- reasoning: data.overall_reasoning,
42988
- evaluatorRawRequest,
42989
- tokenUsage
42990
- };
43087
+ try {
43088
+ const { data, tokenUsage } = await this.runWithRetry({
43089
+ context,
43090
+ judgeProvider,
43091
+ systemPrompt,
43092
+ userPrompt: prompt,
43093
+ schema: rubricEvaluationSchema
43094
+ });
43095
+ const { score, verdict, hits, misses } = calculateRubricScore(data, rubrics);
43096
+ return {
43097
+ score,
43098
+ verdict,
43099
+ hits,
43100
+ misses,
43101
+ expectedAspectCount: rubrics.length,
43102
+ reasoning: data.overall_reasoning,
43103
+ evaluatorRawRequest,
43104
+ tokenUsage
43105
+ };
43106
+ } catch (e) {
43107
+ const message = e instanceof Error ? e.message : String(e);
43108
+ const evalName = context.evaluator?.name ?? "llm-judge";
43109
+ console.warn(`\u26A0 LLM judge "${evalName}" failed after 3 attempts (${message}) \u2014 skipped`);
43110
+ return {
43111
+ score: 0,
43112
+ verdict: "skip",
43113
+ hits: [],
43114
+ misses: [`Judge parse failure after 3 attempts: ${message}`],
43115
+ expectedAspectCount: rubrics.length,
43116
+ reasoning: `Judge parse failure after 3 attempts: ${message}`,
43117
+ evaluatorRawRequest
43118
+ };
43119
+ }
42991
43120
  }
42992
43121
  /**
42993
43122
  * Evaluate using score-range rubrics (analytic rubric scoring).
@@ -43001,25 +43130,40 @@ ${context.fileChanges}`;
43001
43130
  systemPrompt,
43002
43131
  target: judgeProvider.targetName
43003
43132
  };
43004
- const { data, tokenUsage } = await this.runWithRetry({
43005
- context,
43006
- judgeProvider,
43007
- systemPrompt,
43008
- userPrompt: prompt,
43009
- schema: scoreRangeEvaluationSchema
43010
- });
43011
- const { score, verdict, hits, misses, details } = calculateScoreRangeResult(data, rubrics);
43012
- return {
43013
- score,
43014
- verdict,
43015
- hits,
43016
- misses,
43017
- expectedAspectCount: rubrics.length,
43018
- reasoning: data.overall_reasoning,
43019
- evaluatorRawRequest,
43020
- details,
43021
- tokenUsage
43022
- };
43133
+ try {
43134
+ const { data, tokenUsage } = await this.runWithRetry({
43135
+ context,
43136
+ judgeProvider,
43137
+ systemPrompt,
43138
+ userPrompt: prompt,
43139
+ schema: scoreRangeEvaluationSchema
43140
+ });
43141
+ const { score, verdict, hits, misses, details } = calculateScoreRangeResult(data, rubrics);
43142
+ return {
43143
+ score,
43144
+ verdict,
43145
+ hits,
43146
+ misses,
43147
+ expectedAspectCount: rubrics.length,
43148
+ reasoning: data.overall_reasoning,
43149
+ evaluatorRawRequest,
43150
+ details,
43151
+ tokenUsage
43152
+ };
43153
+ } catch (e) {
43154
+ const message = e instanceof Error ? e.message : String(e);
43155
+ const evalName = context.evaluator?.name ?? "llm-judge";
43156
+ console.warn(`\u26A0 LLM judge "${evalName}" failed after 3 attempts (${message}) \u2014 skipped`);
43157
+ return {
43158
+ score: 0,
43159
+ verdict: "skip",
43160
+ hits: [],
43161
+ misses: [`Judge parse failure after 3 attempts: ${message}`],
43162
+ expectedAspectCount: rubrics.length,
43163
+ reasoning: `Judge parse failure after 3 attempts: ${message}`,
43164
+ evaluatorRawRequest
43165
+ };
43166
+ }
43023
43167
  }
43024
43168
  /**
43025
43169
  * Build prompt for score-range rubric evaluation.
@@ -43303,19 +43447,13 @@ var CompositeEvaluator = class {
43303
43447
  runWeightedAverage(results, weights) {
43304
43448
  let totalWeight = 0;
43305
43449
  let weightedSum = 0;
43450
+ let evaluatedCount = 0;
43306
43451
  const allHits = [];
43307
43452
  const allMisses = [];
43308
43453
  const reasoningParts = [];
43309
43454
  const scores = [];
43310
43455
  for (const member of results) {
43311
43456
  const weight = weights?.[member.id] ?? 1;
43312
- totalWeight += weight;
43313
- weightedSum += member.result.score * weight;
43314
- allHits.push(...member.result.hits.map((h) => `[${member.id}] ${h}`));
43315
- allMisses.push(...member.result.misses.map((m) => `[${member.id}] ${m}`));
43316
- if (member.result.reasoning) {
43317
- reasoningParts.push(`${member.id}: ${member.result.reasoning}`);
43318
- }
43319
43457
  scores.push({
43320
43458
  name: member.id,
43321
43459
  type: member.type,
@@ -43330,6 +43468,32 @@ var CompositeEvaluator = class {
43330
43468
  details: member.result.details,
43331
43469
  tokenUsage: member.result.tokenUsage
43332
43470
  });
43471
+ if (member.result.verdict === "skip") {
43472
+ continue;
43473
+ }
43474
+ evaluatedCount++;
43475
+ totalWeight += weight;
43476
+ weightedSum += member.result.score * weight;
43477
+ allHits.push(...member.result.hits.map((h) => `[${member.id}] ${h}`));
43478
+ allMisses.push(...member.result.misses.map((m) => `[${member.id}] ${m}`));
43479
+ if (member.result.reasoning) {
43480
+ reasoningParts.push(`${member.id}: ${member.result.reasoning}`);
43481
+ }
43482
+ }
43483
+ if (evaluatedCount === 0 && results.length > 0) {
43484
+ return {
43485
+ score: 0,
43486
+ verdict: "skip",
43487
+ hits: [],
43488
+ misses: [],
43489
+ expectedAspectCount: 1,
43490
+ reasoning: "All evaluators skipped (infrastructure failure)",
43491
+ evaluatorRawRequest: {
43492
+ aggregator: "weighted_average",
43493
+ ...weights ? { weights } : {}
43494
+ },
43495
+ scores
43496
+ };
43333
43497
  }
43334
43498
  const finalScore = totalWeight > 0 ? weightedSum / totalWeight : 0;
43335
43499
  return {
@@ -43353,19 +43517,8 @@ var CompositeEvaluator = class {
43353
43517
  const reasoningParts = [];
43354
43518
  let passingCount = 0;
43355
43519
  let borderlineCount = 0;
43520
+ let evaluatedCount = 0;
43356
43521
  for (const member of results) {
43357
- const isPassing = member.result.verdict === "pass" || member.result.verdict === "borderline";
43358
- if (isPassing) {
43359
- passingCount++;
43360
- if (member.result.verdict === "borderline") {
43361
- borderlineCount++;
43362
- }
43363
- }
43364
- allHits.push(...member.result.hits.map((h) => `[${member.id}] ${h}`));
43365
- allMisses.push(...member.result.misses.map((m) => `[${member.id}] ${m}`));
43366
- if (member.result.reasoning) {
43367
- reasoningParts.push(`${member.id}: ${member.result.reasoning}`);
43368
- }
43369
43522
  scores.push({
43370
43523
  name: member.id,
43371
43524
  type: member.type,
@@ -43379,8 +43532,39 @@ var CompositeEvaluator = class {
43379
43532
  details: member.result.details,
43380
43533
  tokenUsage: member.result.tokenUsage
43381
43534
  });
43535
+ if (member.result.verdict === "skip") {
43536
+ continue;
43537
+ }
43538
+ evaluatedCount++;
43539
+ const isPassing = member.result.verdict === "pass" || member.result.verdict === "borderline";
43540
+ if (isPassing) {
43541
+ passingCount++;
43542
+ if (member.result.verdict === "borderline") {
43543
+ borderlineCount++;
43544
+ }
43545
+ }
43546
+ allHits.push(...member.result.hits.map((h) => `[${member.id}] ${h}`));
43547
+ allMisses.push(...member.result.misses.map((m) => `[${member.id}] ${m}`));
43548
+ if (member.result.reasoning) {
43549
+ reasoningParts.push(`${member.id}: ${member.result.reasoning}`);
43550
+ }
43382
43551
  }
43383
- const totalCount = results.length;
43552
+ if (evaluatedCount === 0 && results.length > 0) {
43553
+ return {
43554
+ score: 0,
43555
+ verdict: "skip",
43556
+ hits: [],
43557
+ misses: [],
43558
+ expectedAspectCount: 1,
43559
+ reasoning: "All evaluators skipped (infrastructure failure)",
43560
+ evaluatorRawRequest: {
43561
+ aggregator: "threshold",
43562
+ threshold
43563
+ },
43564
+ scores
43565
+ };
43566
+ }
43567
+ const totalCount = evaluatedCount;
43384
43568
  const score = totalCount > 0 ? passingCount / totalCount : 0;
43385
43569
  const pass = score >= threshold;
43386
43570
  if (pass && borderlineCount > 0) {
@@ -43882,115 +44066,115 @@ var FieldAccuracyEvaluator = class {
43882
44066
  * Evaluate a single field against the expected value.
43883
44067
  */
43884
44068
  evaluateField(fieldConfig, candidateData, expectedData) {
43885
- const { path: path41, match, required: required2 = true, weight = 1 } = fieldConfig;
43886
- const candidateValue = resolvePath(candidateData, path41);
43887
- const expectedValue = resolvePath(expectedData, path41);
44069
+ const { path: path42, match, required: required2 = true, weight = 1 } = fieldConfig;
44070
+ const candidateValue = resolvePath(candidateData, path42);
44071
+ const expectedValue = resolvePath(expectedData, path42);
43888
44072
  if (expectedValue === void 0) {
43889
44073
  return {
43890
- path: path41,
44074
+ path: path42,
43891
44075
  score: 1,
43892
44076
  // No expected value means no comparison needed
43893
44077
  weight,
43894
44078
  hit: true,
43895
- message: `${path41}: no expected value`
44079
+ message: `${path42}: no expected value`
43896
44080
  };
43897
44081
  }
43898
44082
  if (candidateValue === void 0) {
43899
44083
  if (required2) {
43900
44084
  return {
43901
- path: path41,
44085
+ path: path42,
43902
44086
  score: 0,
43903
44087
  weight,
43904
44088
  hit: false,
43905
- message: `${path41} (required, missing)`
44089
+ message: `${path42} (required, missing)`
43906
44090
  };
43907
44091
  }
43908
44092
  return {
43909
- path: path41,
44093
+ path: path42,
43910
44094
  score: 1,
43911
44095
  // Don't penalize missing optional fields
43912
44096
  weight: 0,
43913
44097
  // Zero weight means it won't affect the score
43914
44098
  hit: true,
43915
- message: `${path41}: optional field missing`
44099
+ message: `${path42}: optional field missing`
43916
44100
  };
43917
44101
  }
43918
44102
  switch (match) {
43919
44103
  case "exact":
43920
- return this.compareExact(path41, candidateValue, expectedValue, weight);
44104
+ return this.compareExact(path42, candidateValue, expectedValue, weight);
43921
44105
  case "numeric_tolerance":
43922
44106
  return this.compareNumericTolerance(
43923
- path41,
44107
+ path42,
43924
44108
  candidateValue,
43925
44109
  expectedValue,
43926
44110
  fieldConfig,
43927
44111
  weight
43928
44112
  );
43929
44113
  case "date":
43930
- return this.compareDate(path41, candidateValue, expectedValue, fieldConfig, weight);
44114
+ return this.compareDate(path42, candidateValue, expectedValue, fieldConfig, weight);
43931
44115
  default:
43932
44116
  return {
43933
- path: path41,
44117
+ path: path42,
43934
44118
  score: 0,
43935
44119
  weight,
43936
44120
  hit: false,
43937
- message: `${path41}: unknown match type "${match}"`
44121
+ message: `${path42}: unknown match type "${match}"`
43938
44122
  };
43939
44123
  }
43940
44124
  }
43941
44125
  /**
43942
44126
  * Exact equality comparison.
43943
44127
  */
43944
- compareExact(path41, candidateValue, expectedValue, weight) {
44128
+ compareExact(path42, candidateValue, expectedValue, weight) {
43945
44129
  if (deepEqual(candidateValue, expectedValue)) {
43946
44130
  return {
43947
- path: path41,
44131
+ path: path42,
43948
44132
  score: 1,
43949
44133
  weight,
43950
44134
  hit: true,
43951
- message: path41
44135
+ message: path42
43952
44136
  };
43953
44137
  }
43954
44138
  if (typeof candidateValue !== typeof expectedValue) {
43955
44139
  return {
43956
- path: path41,
44140
+ path: path42,
43957
44141
  score: 0,
43958
44142
  weight,
43959
44143
  hit: false,
43960
- message: `${path41} (type mismatch: got ${typeof candidateValue}, expected ${typeof expectedValue})`
44144
+ message: `${path42} (type mismatch: got ${typeof candidateValue}, expected ${typeof expectedValue})`
43961
44145
  };
43962
44146
  }
43963
44147
  return {
43964
- path: path41,
44148
+ path: path42,
43965
44149
  score: 0,
43966
44150
  weight,
43967
44151
  hit: false,
43968
- message: `${path41} (value mismatch)`
44152
+ message: `${path42} (value mismatch)`
43969
44153
  };
43970
44154
  }
43971
44155
  /**
43972
44156
  * Numeric comparison with absolute or relative tolerance.
43973
44157
  */
43974
- compareNumericTolerance(path41, candidateValue, expectedValue, fieldConfig, weight) {
44158
+ compareNumericTolerance(path42, candidateValue, expectedValue, fieldConfig, weight) {
43975
44159
  const { tolerance = 0, relative = false } = fieldConfig;
43976
44160
  const candidateNum = toNumber2(candidateValue);
43977
44161
  const expectedNum = toNumber2(expectedValue);
43978
44162
  if (candidateNum === null || expectedNum === null) {
43979
44163
  return {
43980
- path: path41,
44164
+ path: path42,
43981
44165
  score: 0,
43982
44166
  weight,
43983
44167
  hit: false,
43984
- message: `${path41} (non-numeric value)`
44168
+ message: `${path42} (non-numeric value)`
43985
44169
  };
43986
44170
  }
43987
44171
  if (!Number.isFinite(candidateNum) || !Number.isFinite(expectedNum)) {
43988
44172
  return {
43989
- path: path41,
44173
+ path: path42,
43990
44174
  score: 0,
43991
44175
  weight,
43992
44176
  hit: false,
43993
- message: `${path41} (invalid numeric value)`
44177
+ message: `${path42} (invalid numeric value)`
43994
44178
  };
43995
44179
  }
43996
44180
  const diff = Math.abs(candidateNum - expectedNum);
@@ -44003,61 +44187,61 @@ var FieldAccuracyEvaluator = class {
44003
44187
  }
44004
44188
  if (withinTolerance) {
44005
44189
  return {
44006
- path: path41,
44190
+ path: path42,
44007
44191
  score: 1,
44008
44192
  weight,
44009
44193
  hit: true,
44010
- message: `${path41} (within tolerance: diff=${diff.toFixed(2)})`
44194
+ message: `${path42} (within tolerance: diff=${diff.toFixed(2)})`
44011
44195
  };
44012
44196
  }
44013
44197
  return {
44014
- path: path41,
44198
+ path: path42,
44015
44199
  score: 0,
44016
44200
  weight,
44017
44201
  hit: false,
44018
- message: `${path41} (outside tolerance: diff=${diff.toFixed(2)}, tolerance=${tolerance})`
44202
+ message: `${path42} (outside tolerance: diff=${diff.toFixed(2)}, tolerance=${tolerance})`
44019
44203
  };
44020
44204
  }
44021
44205
  /**
44022
44206
  * Date comparison with format normalization.
44023
44207
  */
44024
- compareDate(path41, candidateValue, expectedValue, fieldConfig, weight) {
44208
+ compareDate(path42, candidateValue, expectedValue, fieldConfig, weight) {
44025
44209
  const formats = fieldConfig.formats ?? DEFAULT_DATE_FORMATS;
44026
44210
  const candidateDate = parseDate(String(candidateValue), formats);
44027
44211
  const expectedDate = parseDate(String(expectedValue), formats);
44028
44212
  if (candidateDate === null) {
44029
44213
  return {
44030
- path: path41,
44214
+ path: path42,
44031
44215
  score: 0,
44032
44216
  weight,
44033
44217
  hit: false,
44034
- message: `${path41} (unparseable candidate date)`
44218
+ message: `${path42} (unparseable candidate date)`
44035
44219
  };
44036
44220
  }
44037
44221
  if (expectedDate === null) {
44038
44222
  return {
44039
- path: path41,
44223
+ path: path42,
44040
44224
  score: 0,
44041
44225
  weight,
44042
44226
  hit: false,
44043
- message: `${path41} (unparseable expected date)`
44227
+ message: `${path42} (unparseable expected date)`
44044
44228
  };
44045
44229
  }
44046
44230
  if (candidateDate.getFullYear() === expectedDate.getFullYear() && candidateDate.getMonth() === expectedDate.getMonth() && candidateDate.getDate() === expectedDate.getDate()) {
44047
44231
  return {
44048
- path: path41,
44232
+ path: path42,
44049
44233
  score: 1,
44050
44234
  weight,
44051
44235
  hit: true,
44052
- message: path41
44236
+ message: path42
44053
44237
  };
44054
44238
  }
44055
44239
  return {
44056
- path: path41,
44240
+ path: path42,
44057
44241
  score: 0,
44058
44242
  weight,
44059
44243
  hit: false,
44060
- message: `${path41} (date mismatch: got ${formatDateISO(candidateDate)}, expected ${formatDateISO(expectedDate)})`
44244
+ message: `${path42} (date mismatch: got ${formatDateISO(candidateDate)}, expected ${formatDateISO(expectedDate)})`
44061
44245
  };
44062
44246
  }
44063
44247
  /**
@@ -44098,11 +44282,11 @@ var FieldAccuracyEvaluator = class {
44098
44282
  };
44099
44283
  }
44100
44284
  };
44101
- function resolvePath(obj, path41) {
44102
- if (!path41 || !obj) {
44285
+ function resolvePath(obj, path42) {
44286
+ if (!path42 || !obj) {
44103
44287
  return void 0;
44104
44288
  }
44105
- const parts = path41.split(/\.|\[|\]/).filter((p) => p.length > 0);
44289
+ const parts = path42.split(/\.|\[|\]/).filter((p) => p.length > 0);
44106
44290
  let current = obj;
44107
44291
  for (const part of parts) {
44108
44292
  if (current === null || current === void 0) {
@@ -44906,8 +45090,8 @@ var TokenUsageEvaluator = class {
44906
45090
  };
44907
45091
  }
44908
45092
  };
44909
- function getNestedValue(obj, path41) {
44910
- const parts = path41.split(".");
45093
+ function getNestedValue(obj, path42) {
45094
+ const parts = path42.split(".");
44911
45095
  let current = obj;
44912
45096
  for (const part of parts) {
44913
45097
  if (current === null || current === void 0 || typeof current !== "object") {
@@ -46302,8 +46486,6 @@ async function cleanupEvalWorkspaces(evalRunId, workspaceRoot) {
46302
46486
  }
46303
46487
  }
46304
46488
  var execFileAsync = promisify5(execFile);
46305
- var DEFAULT_TIMEOUT_MS2 = 3e5;
46306
- var LOCK_TIMEOUT_MS = 6e4;
46307
46489
  function gitEnv() {
46308
46490
  const env = { ...process.env };
46309
46491
  for (const key of Object.keys(env)) {
@@ -46318,65 +46500,296 @@ function gitEnv() {
46318
46500
  GIT_SSH_COMMAND: "ssh -o BatchMode=yes"
46319
46501
  };
46320
46502
  }
46321
- function cacheKey(source) {
46322
- const raw = source.type === "git" ? source.url.toLowerCase().replace(/\.git$/, "") : source.path;
46323
- return createHash("sha256").update(raw).digest("hex");
46324
- }
46325
- function getSourceUrl(source) {
46326
- return source.type === "git" ? source.url : source.path;
46327
- }
46328
46503
  async function git(args, opts) {
46329
46504
  const { stdout } = await execFileAsync("git", args, {
46330
46505
  cwd: opts?.cwd,
46331
- timeout: opts?.timeout ?? DEFAULT_TIMEOUT_MS2,
46506
+ timeout: opts?.timeout ?? 3e5,
46332
46507
  env: gitEnv(),
46333
46508
  maxBuffer: 50 * 1024 * 1024
46334
- // 50MB
46335
46509
  });
46336
46510
  return stdout.trim();
46337
46511
  }
46338
- async function acquireLock(lockPath) {
46339
- const start = Date.now();
46340
- while (Date.now() - start < LOCK_TIMEOUT_MS) {
46341
- try {
46342
- await writeFile7(lockPath, String(process.pid), { flag: "wx" });
46343
- return;
46344
- } catch (err) {
46345
- if (err.code === "EEXIST") {
46346
- await new Promise((r) => setTimeout(r, 200));
46512
+ function normalizeRepoForFingerprint(repo) {
46513
+ const source = repo.source.type === "git" ? { type: "git", url: repo.source.url.toLowerCase().replace(/\.git$/, "") } : { type: "local", path: repo.source.path };
46514
+ const result = {
46515
+ path: repo.path,
46516
+ source,
46517
+ ref: repo.checkout?.ref ?? "HEAD"
46518
+ };
46519
+ if (repo.clone?.depth !== void 0) {
46520
+ result.depth = repo.clone.depth;
46521
+ }
46522
+ if (repo.clone?.filter !== void 0) {
46523
+ result.filter = repo.clone.filter;
46524
+ }
46525
+ if (repo.clone?.sparse?.length) {
46526
+ result.sparse = [...repo.clone.sparse].sort();
46527
+ }
46528
+ return result;
46529
+ }
46530
+ function computeWorkspaceFingerprint(templatePath, repos) {
46531
+ const canonical = {
46532
+ templatePath: templatePath ?? null,
46533
+ repos: [...repos].sort((a, b) => a.path.localeCompare(b.path)).map(normalizeRepoForFingerprint)
46534
+ };
46535
+ return createHash("sha256").update(JSON.stringify(canonical)).digest("hex");
46536
+ }
46537
+ async function copyDirectoryRecursive2(src, dest, skipDirs) {
46538
+ await mkdir11(dest, { recursive: true });
46539
+ const entries = await readdir4(src, { withFileTypes: true });
46540
+ for (const entry of entries) {
46541
+ const srcPath = path36.join(src, entry.name);
46542
+ const destPath = path36.join(dest, entry.name);
46543
+ if (entry.name === ".git") {
46544
+ continue;
46545
+ }
46546
+ if (entry.isDirectory()) {
46547
+ if (skipDirs?.has(entry.name)) {
46347
46548
  continue;
46348
46549
  }
46349
- throw err;
46550
+ await copyDirectoryRecursive2(srcPath, destPath, skipDirs);
46551
+ } else {
46552
+ await cp2(srcPath, destPath, { preserveTimestamps: true, force: true });
46350
46553
  }
46351
46554
  }
46352
- throw new Error(`Timed out waiting for lock: ${lockPath}`);
46353
46555
  }
46354
- async function releaseLock(lockPath) {
46355
- try {
46356
- await unlink(lockPath);
46357
- } catch {
46556
+ var WorkspacePoolManager = class {
46557
+ poolRoot;
46558
+ constructor(poolRoot) {
46559
+ this.poolRoot = poolRoot ?? getWorkspacePoolRoot();
46560
+ }
46561
+ /**
46562
+ * Acquire a workspace slot from the pool.
46563
+ *
46564
+ * 1. Compute fingerprint from template + repos
46565
+ * 2. Check drift (compare stored metadata.json fingerprint vs computed)
46566
+ * 3. If drift: warn, remove all slots, rematerialize
46567
+ * 4. Acquire a slot (try-lock slot-0, slot-1, ..., up to maxSlots)
46568
+ * 5. If slot exists: reset repos, re-copy template files (skip repo directories)
46569
+ * 6. If new slot: copy template, materialize all repos, write metadata.json
46570
+ * 7. Return the slot (with path, index, isExisting)
46571
+ */
46572
+ async acquireWorkspace(options) {
46573
+ const { templatePath, repos, maxSlots, repoManager, poolReset } = options;
46574
+ const fingerprint = computeWorkspaceFingerprint(templatePath, repos);
46575
+ const poolDir = path36.join(this.poolRoot, fingerprint);
46576
+ await mkdir11(poolDir, { recursive: true });
46577
+ const drifted = await this.checkDrift(poolDir, fingerprint);
46578
+ if (drifted) {
46579
+ console.warn(
46580
+ `[workspace-pool] Drift detected for fingerprint ${fingerprint.slice(0, 12)}... Removing stale slots.`
46581
+ );
46582
+ await this.removeAllSlots(poolDir);
46583
+ }
46584
+ for (let i = 0; i < maxSlots; i++) {
46585
+ const slotPath = path36.join(poolDir, `slot-${i}`);
46586
+ const lockPath = `${slotPath}.lock`;
46587
+ const locked = await this.tryLock(lockPath);
46588
+ if (!locked) {
46589
+ continue;
46590
+ }
46591
+ const slotExists = existsSync2(slotPath);
46592
+ if (slotExists) {
46593
+ await this.resetSlot(slotPath, templatePath, repos, poolReset);
46594
+ return {
46595
+ index: i,
46596
+ path: slotPath,
46597
+ isExisting: true,
46598
+ lockPath,
46599
+ fingerprint,
46600
+ poolDir
46601
+ };
46602
+ }
46603
+ await mkdir11(slotPath, { recursive: true });
46604
+ if (templatePath) {
46605
+ await copyDirectoryRecursive2(templatePath, slotPath);
46606
+ }
46607
+ if (repos.length > 0) {
46608
+ await repoManager.materializeAll(repos, slotPath);
46609
+ }
46610
+ await this.writeMetadata(poolDir, fingerprint, templatePath ?? null, repos);
46611
+ return {
46612
+ index: i,
46613
+ path: slotPath,
46614
+ isExisting: false,
46615
+ lockPath,
46616
+ fingerprint,
46617
+ poolDir
46618
+ };
46619
+ }
46620
+ throw new Error(
46621
+ `All ${maxSlots} pool slots are locked for fingerprint ${fingerprint.slice(0, 12)}...`
46622
+ );
46623
+ }
46624
+ /** Remove lock file to release a slot. */
46625
+ async releaseSlot(slot) {
46626
+ try {
46627
+ await unlink(slot.lockPath);
46628
+ } catch {
46629
+ }
46630
+ }
46631
+ /**
46632
+ * Try to acquire a PID-based lock file.
46633
+ * On EEXIST, read PID and check if process is alive. If dead, stale lock — remove and retry.
46634
+ * Returns true if lock acquired, false if slot is actively locked.
46635
+ * Uses a bounded loop (max 3 attempts) to avoid unbounded recursion.
46636
+ */
46637
+ async tryLock(lockPath) {
46638
+ for (let attempt = 0; attempt < 3; attempt++) {
46639
+ try {
46640
+ await writeFile7(lockPath, String(process.pid), { flag: "wx" });
46641
+ return true;
46642
+ } catch (err) {
46643
+ if (err.code !== "EEXIST") {
46644
+ throw err;
46645
+ }
46646
+ try {
46647
+ const pidStr = await readFile11(lockPath, "utf-8");
46648
+ const pid = Number.parseInt(pidStr.trim(), 10);
46649
+ if (!Number.isNaN(pid)) {
46650
+ try {
46651
+ process.kill(pid, 0);
46652
+ return false;
46653
+ } catch {
46654
+ await unlink(lockPath).catch(() => {
46655
+ });
46656
+ continue;
46657
+ }
46658
+ }
46659
+ } catch {
46660
+ }
46661
+ return false;
46662
+ }
46663
+ }
46664
+ return false;
46665
+ }
46666
+ /**
46667
+ * Check if the stored fingerprint in metadata.json differs from the computed one.
46668
+ * Returns true if drifted, false otherwise.
46669
+ * Returns false (no drift) if metadata.json doesn't exist (first use).
46670
+ */
46671
+ async checkDrift(poolDir, fingerprint) {
46672
+ const metadataPath = path36.join(poolDir, "metadata.json");
46673
+ try {
46674
+ const raw = await readFile11(metadataPath, "utf-8");
46675
+ const metadata = JSON.parse(raw);
46676
+ return metadata.fingerprint !== fingerprint;
46677
+ } catch {
46678
+ return false;
46679
+ }
46680
+ }
46681
+ /** Write metadata.json with fingerprint, inputs, and timestamp. */
46682
+ async writeMetadata(poolDir, fingerprint, templatePath, repos) {
46683
+ const metadata = {
46684
+ fingerprint,
46685
+ templatePath,
46686
+ repos,
46687
+ createdAt: (/* @__PURE__ */ new Date()).toISOString()
46688
+ };
46689
+ await writeFile7(path36.join(poolDir, "metadata.json"), JSON.stringify(metadata, null, 2));
46690
+ }
46691
+ /** Remove all slot directories and their lock files from a pool directory. */
46692
+ async removeAllSlots(poolDir) {
46693
+ const entries = await readdir4(poolDir);
46694
+ for (const entry of entries) {
46695
+ if (entry.startsWith("slot-") && !entry.endsWith(".lock")) {
46696
+ const lockPath = path36.join(poolDir, `${entry}.lock`);
46697
+ if (existsSync2(lockPath)) {
46698
+ try {
46699
+ const pidStr = await readFile11(lockPath, "utf-8");
46700
+ const pid = Number.parseInt(pidStr.trim(), 10);
46701
+ if (!Number.isNaN(pid)) {
46702
+ try {
46703
+ process.kill(pid, 0);
46704
+ console.warn(`[workspace-pool] Skipping slot ${entry}: locked by PID ${pid}`);
46705
+ continue;
46706
+ } catch {
46707
+ }
46708
+ }
46709
+ } catch {
46710
+ }
46711
+ }
46712
+ await rm5(path36.join(poolDir, entry), { recursive: true, force: true });
46713
+ await rm5(lockPath, { force: true }).catch(() => {
46714
+ });
46715
+ }
46716
+ }
46717
+ await rm5(path36.join(poolDir, "metadata.json"), { force: true }).catch(() => {
46718
+ });
46719
+ }
46720
+ /**
46721
+ * Reset an existing slot for reuse:
46722
+ * 1. Reset repos (git reset --hard {ref} && git clean -fd per repo)
46723
+ * 2. Re-copy template files (skip repo directories)
46724
+ */
46725
+ async resetSlot(slotPath, templatePath, repos, poolReset = "fast") {
46726
+ for (const repo of repos) {
46727
+ const repoDir = path36.join(slotPath, repo.path);
46728
+ if (!existsSync2(repoDir)) {
46729
+ continue;
46730
+ }
46731
+ if (poolReset === "none") {
46732
+ continue;
46733
+ }
46734
+ const ref = repo.checkout?.ref ?? "HEAD";
46735
+ await git(["reset", "--hard", ref], { cwd: repoDir });
46736
+ const cleanFlag = poolReset === "strict" ? "-fdx" : "-fd";
46737
+ await git(["clean", cleanFlag], { cwd: repoDir });
46738
+ }
46739
+ if (templatePath) {
46740
+ const repoDirNames = new Set(
46741
+ repos.map((r) => {
46742
+ const normalized = r.path.replace(/^\.\//, "");
46743
+ return normalized.split("/")[0];
46744
+ })
46745
+ );
46746
+ await copyDirectoryRecursive2(templatePath, slotPath, repoDirNames);
46747
+ }
46358
46748
  }
46749
+ };
46750
+ var execFileAsync2 = promisify6(execFile2);
46751
+ var DEFAULT_TIMEOUT_MS2 = 3e5;
46752
+ function gitEnv2() {
46753
+ const env = { ...process.env };
46754
+ for (const key of Object.keys(env)) {
46755
+ if (key.startsWith("GIT_") && key !== "GIT_SSH_COMMAND") {
46756
+ delete env[key];
46757
+ }
46758
+ }
46759
+ return {
46760
+ ...env,
46761
+ GIT_TERMINAL_PROMPT: "0",
46762
+ GIT_ASKPASS: "",
46763
+ GIT_SSH_COMMAND: "ssh -o BatchMode=yes"
46764
+ };
46765
+ }
46766
+ function getSourceUrl(source) {
46767
+ return source.type === "git" ? source.url : source.path;
46768
+ }
46769
+ async function git2(args, opts) {
46770
+ const { stdout } = await execFileAsync2("git", args, {
46771
+ cwd: opts?.cwd,
46772
+ timeout: opts?.timeout ?? DEFAULT_TIMEOUT_MS2,
46773
+ env: gitEnv2(),
46774
+ maxBuffer: 50 * 1024 * 1024
46775
+ // 50MB
46776
+ });
46777
+ return stdout.trim();
46359
46778
  }
46360
46779
  var RepoManager = class {
46361
- cacheDir;
46362
46780
  verbose;
46363
- constructor(cacheDir, verbose = false) {
46364
- this.cacheDir = cacheDir ?? getGitCacheRoot();
46781
+ constructor(verbose = false) {
46365
46782
  this.verbose = verbose;
46366
46783
  }
46367
46784
  async runGit(args, opts) {
46368
46785
  const startedAt = Date.now();
46369
46786
  if (this.verbose) {
46370
- console.log(
46371
- `[repo] git start cwd=${opts?.cwd ?? process.cwd()} args=${args.join(" ")}`
46372
- );
46787
+ console.log(`[repo] git start cwd=${opts?.cwd ?? process.cwd()} args=${args.join(" ")}`);
46373
46788
  }
46374
46789
  try {
46375
- const output = await git(args, opts);
46790
+ const output = await git2(args, opts);
46376
46791
  if (this.verbose) {
46377
- console.log(
46378
- `[repo] git ok durationMs=${Date.now() - startedAt} args=${args.join(" ")}`
46379
- );
46792
+ console.log(`[repo] git ok durationMs=${Date.now() - startedAt} args=${args.join(" ")}`);
46380
46793
  }
46381
46794
  return output;
46382
46795
  } catch (error40) {
@@ -46390,88 +46803,18 @@ var RepoManager = class {
46390
46803
  }
46391
46804
  }
46392
46805
  /**
46393
- * Ensure a bare mirror cache exists for the given source.
46394
- * Creates on first access, fetches updates on subsequent calls.
46395
- * Returns the absolute path to the cache directory.
46396
- */
46397
- async ensureCache(source, depth, resolve2) {
46398
- const key = cacheKey(source);
46399
- const cachePath = path36.join(this.cacheDir, key);
46400
- const lockPath = `${cachePath}.lock`;
46401
- const cacheExists = existsSync2(path36.join(cachePath, "HEAD"));
46402
- if (this.verbose) {
46403
- console.log(
46404
- `[repo] ensureCache source=${getSourceUrl(source)} resolve=${resolve2 ?? "remote"} cache=${cachePath}`
46405
- );
46406
- }
46407
- if (resolve2 === "local") {
46408
- if (cacheExists) {
46409
- if (this.verbose) {
46410
- console.log(`[repo] using existing local cache ${cachePath}`);
46411
- }
46412
- return cachePath;
46413
- }
46414
- const url2 = getSourceUrl(source);
46415
- throw new Error(
46416
- `No cache found for \`${url2}\`. Run \`agentv cache add --url ${url2} --from <local-path>\` to seed it.`
46417
- );
46418
- }
46419
- await mkdir11(this.cacheDir, { recursive: true });
46420
- const lockStartedAt = Date.now();
46421
- await acquireLock(lockPath);
46422
- if (this.verbose) {
46423
- console.log(
46424
- `[repo] lock acquired path=${lockPath} waitedMs=${Date.now() - lockStartedAt}`
46425
- );
46426
- }
46427
- try {
46428
- if (cacheExists) {
46429
- if (this.verbose) {
46430
- console.log(`[repo] refreshing existing cache ${cachePath}`);
46431
- }
46432
- const fetchArgs = ["fetch", "--prune"];
46433
- if (depth) {
46434
- fetchArgs.push("--depth", String(depth));
46435
- }
46436
- await this.runGit(fetchArgs, { cwd: cachePath });
46437
- } else {
46438
- if (this.verbose) {
46439
- console.log(`[repo] creating new cache ${cachePath}`);
46440
- }
46441
- const cloneArgs = ["clone", "--mirror", "--bare"];
46442
- if (depth) {
46443
- cloneArgs.push("--depth", String(depth));
46444
- }
46445
- const sourceUrl = getSourceUrl(source);
46446
- const cloneUrl = depth && source.type === "local" ? `file://${sourceUrl}` : sourceUrl;
46447
- cloneArgs.push(cloneUrl, cachePath);
46448
- await this.runGit(cloneArgs);
46449
- }
46450
- } finally {
46451
- await releaseLock(lockPath);
46452
- if (this.verbose) {
46453
- console.log(`[repo] lock released path=${lockPath}`);
46454
- }
46455
- }
46456
- return cachePath;
46457
- }
46458
- /**
46459
- * Clone a repo from cache into the workspace at the configured path.
46806
+ * Clone a repo directly from source into the workspace at the configured path.
46460
46807
  * Handles checkout, ref resolution, ancestor walking, shallow clone, sparse checkout.
46461
46808
  */
46462
46809
  async materialize(repo, workspacePath) {
46463
- const targetDir = path36.join(workspacePath, repo.path);
46810
+ const targetDir = path37.join(workspacePath, repo.path);
46811
+ const sourceUrl = getSourceUrl(repo.source);
46464
46812
  const startedAt = Date.now();
46465
46813
  if (this.verbose) {
46466
46814
  console.log(
46467
- `[repo] materialize start path=${repo.path} source=${getSourceUrl(repo.source)} workspace=${workspacePath}`
46815
+ `[repo] materialize start path=${repo.path} source=${sourceUrl} workspace=${workspacePath}`
46468
46816
  );
46469
46817
  }
46470
- const cachePath = await this.ensureCache(
46471
- repo.source,
46472
- repo.clone?.depth,
46473
- repo.checkout?.resolve
46474
- );
46475
46818
  const cloneArgs = ["clone"];
46476
46819
  if (repo.clone?.depth) {
46477
46820
  cloneArgs.push("--depth", String(repo.clone.depth));
@@ -46480,7 +46823,7 @@ var RepoManager = class {
46480
46823
  cloneArgs.push("--filter", repo.clone.filter);
46481
46824
  }
46482
46825
  cloneArgs.push("--no-checkout");
46483
- const cloneUrl = repo.clone?.depth || repo.clone?.filter ? `file://${cachePath}` : cachePath;
46826
+ const cloneUrl = (repo.clone?.depth || repo.clone?.filter) && repo.source.type === "local" ? `file://${sourceUrl}` : sourceUrl;
46484
46827
  cloneArgs.push(cloneUrl, targetDir);
46485
46828
  await this.runGit(cloneArgs);
46486
46829
  if (repo.clone?.sparse?.length) {
@@ -46552,81 +46895,43 @@ var RepoManager = class {
46552
46895
  }
46553
46896
  }
46554
46897
  /** Reset repos in workspace to their checkout state. */
46555
- async reset(repos, workspacePath, strategy) {
46556
- if (strategy === "recreate") {
46557
- for (const repo of repos) {
46558
- const targetDir = path36.join(workspacePath, repo.path);
46559
- await rm5(targetDir, { recursive: true, force: true });
46560
- }
46561
- await this.materializeAll(repos, workspacePath);
46562
- return;
46563
- }
46898
+ async reset(repos, workspacePath, reset) {
46899
+ const cleanFlag = reset === "strict" ? "-fdx" : "-fd";
46564
46900
  for (const repo of repos) {
46565
- const targetDir = path36.join(workspacePath, repo.path);
46901
+ const targetDir = path37.join(workspacePath, repo.path);
46566
46902
  await this.runGit(["reset", "--hard", "HEAD"], { cwd: targetDir });
46567
- await this.runGit(["clean", "-fd"], { cwd: targetDir });
46903
+ await this.runGit(["clean", cleanFlag], { cwd: targetDir });
46568
46904
  }
46569
46905
  }
46570
- /**
46571
- * Seed the cache from a local repository, setting the remote to a given URL.
46572
- * Useful for avoiding slow network clones when a local clone already exists.
46573
- */
46574
- async seedCache(localPath, remoteUrl, opts) {
46575
- const source = { type: "git", url: remoteUrl };
46576
- const key = cacheKey(source);
46577
- const cachePath = path36.join(this.cacheDir, key);
46578
- const lockPath = `${cachePath}.lock`;
46579
- await mkdir11(this.cacheDir, { recursive: true });
46580
- await acquireLock(lockPath);
46581
- try {
46582
- if (existsSync2(path36.join(cachePath, "HEAD"))) {
46583
- if (!opts?.force) {
46584
- throw new Error(
46585
- `Cache already exists for ${remoteUrl} at ${cachePath}. Use force to overwrite.`
46586
- );
46587
- }
46588
- await rm5(cachePath, { recursive: true, force: true });
46589
- }
46590
- await git(["clone", "--mirror", "--bare", localPath, cachePath]);
46591
- await git(["remote", "set-url", "origin", remoteUrl], { cwd: cachePath });
46592
- } finally {
46593
- await releaseLock(lockPath);
46594
- }
46595
- return cachePath;
46596
- }
46597
- /** Remove the entire cache directory. */
46598
- async cleanCache() {
46599
- await rm5(this.cacheDir, { recursive: true, force: true });
46600
- }
46601
46906
  };
46602
46907
  async function resolveWorkspaceTemplate(templatePath) {
46603
46908
  if (!templatePath) {
46604
46909
  return void 0;
46605
46910
  }
46606
- const resolved = path37.resolve(templatePath);
46911
+ const resolved = path38.resolve(templatePath);
46607
46912
  const stats = await stat6(resolved);
46608
46913
  if (stats.isFile()) {
46609
46914
  return {
46610
- dir: path37.dirname(resolved),
46915
+ dir: path38.dirname(resolved),
46611
46916
  workspaceFile: resolved
46612
46917
  };
46613
46918
  }
46614
46919
  if (!stats.isDirectory()) {
46615
46920
  throw new Error(`workspace template is neither a file nor a directory: ${resolved}`);
46616
46921
  }
46617
- const entries = await readdir4(resolved);
46922
+ const entries = await readdir5(resolved);
46618
46923
  const workspaceFiles = entries.filter((e) => e.endsWith(".code-workspace"));
46619
46924
  if (workspaceFiles.length === 1) {
46620
46925
  return {
46621
46926
  dir: resolved,
46622
- workspaceFile: path37.join(resolved, workspaceFiles[0])
46927
+ workspaceFile: path38.join(resolved, workspaceFiles[0])
46623
46928
  };
46624
46929
  }
46625
46930
  if (workspaceFiles.length > 1) {
46626
46931
  const conventionFile = workspaceFiles.find((f) => f === "template.code-workspace");
46627
46932
  return {
46628
46933
  dir: resolved,
46629
- workspaceFile: conventionFile ? path37.join(resolved, conventionFile) : void 0
46934
+ workspaceFile: conventionFile ? path38.join(resolved, conventionFile) : void 0
46630
46935
  };
46631
46936
  }
46632
46937
  return { dir: resolved };
@@ -46674,6 +46979,22 @@ function classifyQualityStatus(score) {
46674
46979
  function usesFileReferencePrompt(provider) {
46675
46980
  return isAgentProvider(provider) || provider.kind === "cli";
46676
46981
  }
46982
+ function toScriptConfig(hook, hookName, context) {
46983
+ const command = hook.command ?? hook.script;
46984
+ if (!command || command.length === 0) {
46985
+ throw new Error(`${hookName} hook in ${context} requires command or script`);
46986
+ }
46987
+ return {
46988
+ command,
46989
+ ...hook.timeout_ms !== void 0 && { timeout_ms: hook.timeout_ms },
46990
+ ...hook.timeoutMs !== void 0 && { timeoutMs: hook.timeoutMs },
46991
+ ...hook.cwd !== void 0 && { cwd: hook.cwd },
46992
+ ...hook.script !== void 0 && { script: hook.script }
46993
+ };
46994
+ }
46995
+ function hasHookCommand(hook) {
46996
+ return !!(hook?.command && hook.command.length > 0 || hook?.script && hook.script.length > 0);
46997
+ }
46677
46998
  function getWorkspaceTemplate(target) {
46678
46999
  const config2 = target.config;
46679
47000
  if ("workspaceTemplate" in config2 && typeof config2.workspaceTemplate === "string") {
@@ -46704,7 +47025,15 @@ async function runEvaluation(options) {
46704
47025
  trials,
46705
47026
  streamCallbacks,
46706
47027
  totalBudgetUsd,
46707
- failOnError
47028
+ failOnError,
47029
+ poolWorkspaces,
47030
+ poolMaxSlots: configPoolMaxSlots,
47031
+ workspace: legacyWorkspacePath,
47032
+ workspaceMode,
47033
+ workspacePath,
47034
+ workspaceClean,
47035
+ retainOnSuccess,
47036
+ retainOnFailure
46708
47037
  } = options;
46709
47038
  let useCache = options.useCache;
46710
47039
  if (trials && trials.count > 1 && useCache) {
@@ -46778,7 +47107,7 @@ async function runEvaluation(options) {
46778
47107
  ];
46779
47108
  const evaluatorRegistry = buildEvaluatorRegistry(evaluators, resolveJudgeProvider);
46780
47109
  const typeRegistry = createBuiltinRegistry();
46781
- const discoveryBaseDir = evalFilePath ? path38.dirname(path38.resolve(evalFilePath)) : process.cwd();
47110
+ const discoveryBaseDir = evalFilePath ? path39.dirname(path39.resolve(evalFilePath)) : process.cwd();
46782
47111
  const evalDir = discoveryBaseDir;
46783
47112
  await discoverAssertions(typeRegistry, discoveryBaseDir);
46784
47113
  const providerRegistry = createBuiltinProviderRegistry();
@@ -46840,13 +47169,29 @@ async function runEvaluation(options) {
46840
47169
  }
46841
47170
  };
46842
47171
  const isPerTestIsolation = suiteWorkspace?.isolation === "per_test";
46843
- const hasSharedWorkspace = !!(workspaceTemplate || suiteWorkspace?.before_all || suiteWorkspace?.repos?.length && !isPerTestIsolation);
47172
+ const configuredMode = suiteWorkspace?.mode ?? workspaceMode;
47173
+ const configuredStaticPath = suiteWorkspace?.static_path ?? workspacePath ?? legacyWorkspacePath;
47174
+ const useStaticWorkspace = configuredMode === "static" || !!configuredStaticPath && !configuredMode;
47175
+ if (useStaticWorkspace && isPerTestIsolation) {
47176
+ throw new Error(
47177
+ "static workspace mode is incompatible with isolation: per_test. Use isolation: shared (default)."
47178
+ );
47179
+ }
47180
+ if (configuredMode === "static" && !configuredStaticPath) {
47181
+ throw new Error("workspace.mode=static requires workspace.static_path or --workspace-path");
47182
+ }
47183
+ const hasSharedWorkspace = !!(useStaticWorkspace || workspaceTemplate || suiteWorkspace?.hooks || suiteWorkspace?.repos?.length && !isPerTestIsolation);
47184
+ const poolEnabled = configuredMode === "pooled" ? true : configuredMode === "ephemeral" || useStaticWorkspace ? false : suiteWorkspace?.pool ?? poolWorkspaces ?? true;
47185
+ const usePool = poolEnabled !== false && !!suiteWorkspace?.repos?.length && !isPerTestIsolation && !useStaticWorkspace;
47186
+ const finishCleanPolicy = suiteWorkspace?.hooks?.on_finish?.clean;
47187
+ const resolvedRetainOnSuccess = (finishCleanPolicy === "always" || finishCleanPolicy === "on_success" ? "cleanup" : finishCleanPolicy === "on_failure" || finishCleanPolicy === "never" ? "keep" : void 0) ?? retainOnSuccess ?? (keepWorkspaces ? "keep" : "cleanup");
47188
+ const resolvedRetainOnFailure = (finishCleanPolicy === "always" || finishCleanPolicy === "on_failure" ? "cleanup" : finishCleanPolicy === "on_success" || finishCleanPolicy === "never" ? "keep" : void 0) ?? retainOnFailure ?? (cleanupWorkspaces ? "cleanup" : "keep");
46844
47189
  const requestedWorkers = options.maxConcurrency ?? target.workers ?? 1;
46845
- const workers = hasSharedWorkspace ? 1 : requestedWorkers;
47190
+ const workers = hasSharedWorkspace && !usePool ? 1 : requestedWorkers;
46846
47191
  setupLog(
46847
- `sharedWorkspace=${hasSharedWorkspace} perTestIsolation=${isPerTestIsolation} requestedWorkers=${requestedWorkers} effectiveWorkers=${workers}`
47192
+ `sharedWorkspace=${hasSharedWorkspace} perTestIsolation=${isPerTestIsolation} usePool=${usePool} requestedWorkers=${requestedWorkers} effectiveWorkers=${workers}`
46848
47193
  );
46849
- if (hasSharedWorkspace && requestedWorkers > 1) {
47194
+ if (hasSharedWorkspace && !usePool && requestedWorkers > 1) {
46850
47195
  console.warn(
46851
47196
  `Warning: Shared workspace requires sequential execution. Overriding workers from ${requestedWorkers} to 1.`
46852
47197
  );
@@ -46855,7 +47200,38 @@ async function runEvaluation(options) {
46855
47200
  let sharedWorkspacePath;
46856
47201
  let sharedBaselineCommit;
46857
47202
  let beforeAllOutput;
46858
- if (workspaceTemplate) {
47203
+ let poolManager;
47204
+ let poolSlot;
47205
+ const poolSlots = [];
47206
+ const availablePoolSlots = [];
47207
+ const poolSlotBaselines = /* @__PURE__ */ new Map();
47208
+ const poolMaxSlots = Math.min(configPoolMaxSlots ?? 10, 50);
47209
+ if (useStaticWorkspace && configuredStaticPath) {
47210
+ sharedWorkspacePath = configuredStaticPath;
47211
+ setupLog(`using static workspace: ${configuredStaticPath}`);
47212
+ } else if (usePool && suiteWorkspace?.repos) {
47213
+ const slotsNeeded = workers;
47214
+ setupLog(`acquiring ${slotsNeeded} workspace pool slot(s) (pool capacity: ${poolMaxSlots})`);
47215
+ poolManager = new WorkspacePoolManager(getWorkspacePoolRoot());
47216
+ const poolRepoManager = new RepoManager(verbose);
47217
+ for (let i = 0; i < slotsNeeded; i++) {
47218
+ const slot = await poolManager.acquireWorkspace({
47219
+ templatePath: workspaceTemplate,
47220
+ repos: suiteWorkspace.repos,
47221
+ maxSlots: poolMaxSlots,
47222
+ repoManager: poolRepoManager,
47223
+ poolReset: (workspaceClean === "full" ? "strict" : workspaceClean === "standard" ? "fast" : null) ?? suiteWorkspace.hooks?.on_reuse?.reset ?? "fast"
47224
+ });
47225
+ poolSlots.push(slot);
47226
+ setupLog(`pool slot ${i} acquired at: ${slot.path} (existing=${slot.isExisting})`);
47227
+ }
47228
+ if (slotsNeeded === 1) {
47229
+ poolSlot = poolSlots[0];
47230
+ sharedWorkspacePath = poolSlot.path;
47231
+ } else {
47232
+ availablePoolSlots.push(...poolSlots);
47233
+ }
47234
+ } else if (workspaceTemplate) {
46859
47235
  setupLog(`creating shared workspace from template: ${workspaceTemplate}`);
46860
47236
  try {
46861
47237
  sharedWorkspacePath = await createTempWorkspace(workspaceTemplate, evalRunId, "shared");
@@ -46864,288 +47240,359 @@ async function runEvaluation(options) {
46864
47240
  const message = error40 instanceof Error ? error40.message : String(error40);
46865
47241
  throw new Error(`Failed to create shared workspace: ${message}`);
46866
47242
  }
47243
+ } else if (suiteWorkspace?.hooks || suiteWorkspace?.repos?.length && !isPerTestIsolation) {
47244
+ sharedWorkspacePath = getWorkspacePath(evalRunId, "shared");
47245
+ await mkdir12(sharedWorkspacePath, { recursive: true });
47246
+ setupLog(`created empty shared workspace at: ${sharedWorkspacePath}`);
47247
+ }
47248
+ try {
46867
47249
  if (suiteWorkspaceFile && sharedWorkspacePath) {
46868
- const copiedWorkspaceFile = path38.join(sharedWorkspacePath, path38.basename(suiteWorkspaceFile));
47250
+ const copiedWorkspaceFile = path39.join(sharedWorkspacePath, path39.basename(suiteWorkspaceFile));
46869
47251
  try {
46870
47252
  await stat7(copiedWorkspaceFile);
46871
47253
  suiteWorkspaceFile = copiedWorkspaceFile;
46872
47254
  } catch {
46873
47255
  }
46874
47256
  }
46875
- } else if (suiteWorkspace?.before_all || suiteWorkspace?.repos?.length && !isPerTestIsolation) {
46876
- sharedWorkspacePath = getWorkspacePath(evalRunId, "shared");
46877
- await mkdir12(sharedWorkspacePath, { recursive: true });
46878
- setupLog(`created empty shared workspace at: ${sharedWorkspacePath}`);
46879
- }
46880
- const repoManager = suiteWorkspace?.repos?.length ? new RepoManager(void 0, verbose) : void 0;
46881
- if (repoManager && sharedWorkspacePath && suiteWorkspace?.repos && !isPerTestIsolation) {
46882
- setupLog(`materializing ${suiteWorkspace.repos.length} shared repo(s) into ${sharedWorkspacePath}`);
46883
- try {
46884
- await repoManager.materializeAll(suiteWorkspace.repos, sharedWorkspacePath);
46885
- setupLog("shared repo materialization complete");
46886
- } catch (error40) {
46887
- const message = error40 instanceof Error ? error40.message : String(error40);
46888
- if (sharedWorkspacePath) {
46889
- await cleanupWorkspace(sharedWorkspacePath).catch(() => {
46890
- });
46891
- }
46892
- throw new Error(`Failed to materialize repos: ${message}`);
46893
- }
46894
- }
46895
- if (sharedWorkspacePath && suiteWorkspace?.before_all) {
46896
- const beforeAllCommand = (suiteWorkspace.before_all.command ?? suiteWorkspace.before_all.script ?? []).join(" ");
46897
- setupLog(
46898
- `running shared before_all in cwd=${suiteWorkspace.before_all.cwd ?? evalDir} command=${beforeAllCommand}`
46899
- );
46900
- const scriptContext = {
46901
- workspacePath: sharedWorkspacePath,
46902
- testId: "__before_all__",
46903
- evalRunId,
46904
- evalDir
46905
- };
46906
- try {
46907
- beforeAllOutput = await executeWorkspaceScript(suiteWorkspace.before_all, scriptContext);
46908
- setupLog("shared before_all completed");
46909
- } catch (error40) {
46910
- const message = error40 instanceof Error ? error40.message : String(error40);
46911
- if (sharedWorkspacePath) {
46912
- await cleanupWorkspace(sharedWorkspacePath).catch(() => {
46913
- });
46914
- }
46915
- throw new Error(`before_all script failed: ${message}`);
46916
- }
46917
- }
46918
- if (sharedWorkspacePath) {
46919
- try {
46920
- sharedBaselineCommit = await initializeBaseline(sharedWorkspacePath);
46921
- setupLog(`shared baseline initialized: ${sharedBaselineCommit}`);
46922
- } catch {
46923
- setupLog("shared baseline initialization skipped (non-fatal)");
46924
- }
46925
- }
46926
- let nextWorkerId = 1;
46927
- const workerIdByEvalId = /* @__PURE__ */ new Map();
46928
- let beforeAllOutputAttached = false;
46929
- let cumulativeBudgetCost = 0;
46930
- let budgetExhausted = false;
46931
- let failOnErrorTriggered = false;
46932
- const promises = filteredEvalCases.map(
46933
- (evalCase) => limit(async () => {
46934
- const workerId = nextWorkerId++;
46935
- workerIdByEvalId.set(evalCase.id, workerId);
46936
- if (totalBudgetUsd !== void 0 && budgetExhausted) {
46937
- const budgetResult = {
46938
- timestamp: (now ?? (() => /* @__PURE__ */ new Date()))().toISOString(),
46939
- testId: evalCase.id,
46940
- dataset: evalCase.dataset,
46941
- score: 0,
46942
- hits: [],
46943
- misses: [],
46944
- answer: "",
46945
- target: target.name,
46946
- error: `Suite budget exceeded ($${cumulativeBudgetCost.toFixed(4)} / $${totalBudgetUsd.toFixed(4)})`,
46947
- budgetExceeded: true,
46948
- executionStatus: "execution_error",
46949
- failureStage: "setup",
46950
- failureReasonCode: "budget_exceeded",
46951
- executionError: {
46952
- message: `Suite budget exceeded ($${cumulativeBudgetCost.toFixed(4)} / $${totalBudgetUsd.toFixed(4)})`,
46953
- stage: "setup"
46954
- }
46955
- };
46956
- if (onProgress) {
46957
- await onProgress({
46958
- workerId,
46959
- testId: evalCase.id,
46960
- status: "failed",
46961
- completedAt: Date.now(),
46962
- error: budgetResult.error
47257
+ const repoManager = suiteWorkspace?.repos?.length && !usePool && !useStaticWorkspace ? new RepoManager(verbose) : void 0;
47258
+ if (repoManager && sharedWorkspacePath && suiteWorkspace?.repos && !isPerTestIsolation) {
47259
+ setupLog(
47260
+ `materializing ${suiteWorkspace.repos.length} shared repo(s) into ${sharedWorkspacePath}`
47261
+ );
47262
+ try {
47263
+ await repoManager.materializeAll(suiteWorkspace.repos, sharedWorkspacePath);
47264
+ setupLog("shared repo materialization complete");
47265
+ } catch (error40) {
47266
+ const message = error40 instanceof Error ? error40.message : String(error40);
47267
+ if (sharedWorkspacePath && !useStaticWorkspace) {
47268
+ await cleanupWorkspace(sharedWorkspacePath).catch(() => {
46963
47269
  });
46964
47270
  }
46965
- if (onResult) {
46966
- await onResult(budgetResult);
46967
- }
46968
- return budgetResult;
47271
+ throw new Error(`Failed to materialize repos: ${message}`);
46969
47272
  }
46970
- if (failOnError === true && failOnErrorTriggered) {
46971
- const errorMsg = "Halted: execution error encountered with fail_on_error enabled";
46972
- const haltResult = {
46973
- timestamp: (now ?? (() => /* @__PURE__ */ new Date()))().toISOString(),
46974
- testId: evalCase.id,
46975
- dataset: evalCase.dataset,
46976
- score: 0,
46977
- hits: [],
46978
- misses: [],
46979
- answer: "",
46980
- target: target.name,
46981
- error: errorMsg,
46982
- executionStatus: "execution_error",
46983
- failureStage: "setup",
46984
- failureReasonCode: "error_threshold_exceeded",
46985
- executionError: { message: errorMsg, stage: "setup" }
46986
- };
46987
- if (onProgress) {
46988
- await onProgress({
46989
- workerId,
46990
- testId: evalCase.id,
46991
- status: "failed",
46992
- completedAt: Date.now(),
46993
- error: haltResult.error
47273
+ }
47274
+ const suiteBeforeAllHook = suiteWorkspace?.hooks?.before_all_tests;
47275
+ if (sharedWorkspacePath && hasHookCommand(suiteBeforeAllHook)) {
47276
+ const beforeAllHook = suiteBeforeAllHook;
47277
+ const beforeAllCommand = (beforeAllHook.command ?? beforeAllHook.script ?? []).join(" ");
47278
+ setupLog(
47279
+ `running shared before_all in cwd=${beforeAllHook.cwd ?? evalDir} command=${beforeAllCommand}`
47280
+ );
47281
+ const scriptContext = {
47282
+ workspacePath: sharedWorkspacePath,
47283
+ testId: "__before_all__",
47284
+ evalRunId,
47285
+ evalDir
47286
+ };
47287
+ try {
47288
+ beforeAllOutput = await executeWorkspaceScript(
47289
+ toScriptConfig(beforeAllHook, "before_all_tests", "suite workspace"),
47290
+ scriptContext
47291
+ );
47292
+ setupLog("shared before_all completed");
47293
+ } catch (error40) {
47294
+ const message = error40 instanceof Error ? error40.message : String(error40);
47295
+ if (sharedWorkspacePath && !useStaticWorkspace) {
47296
+ await cleanupWorkspace(sharedWorkspacePath).catch(() => {
46994
47297
  });
46995
47298
  }
46996
- if (onResult) {
46997
- await onResult(haltResult);
46998
- }
46999
- return haltResult;
47299
+ throw new Error(`before_all script failed: ${message}`);
47000
47300
  }
47001
- if (onProgress) {
47002
- await onProgress({
47003
- workerId,
47004
- testId: evalCase.id,
47005
- status: "running",
47006
- startedAt: Date.now()
47007
- });
47008
- }
47009
- try {
47010
- const judgeProvider = await resolveJudgeProvider(target);
47011
- const runCaseOptions = {
47012
- evalCase,
47013
- provider: primaryProvider,
47014
- target,
47015
- evaluators: evaluatorRegistry,
47016
- maxRetries,
47017
- agentTimeoutMs,
47018
- cache,
47019
- useCache,
47020
- now,
47021
- judgeProvider,
47022
- targetResolver,
47023
- availableTargets,
47301
+ }
47302
+ if (availablePoolSlots.length > 0 && hasHookCommand(suiteBeforeAllHook)) {
47303
+ const beforeAllHook = suiteBeforeAllHook;
47304
+ for (const slot of availablePoolSlots) {
47305
+ setupLog(`running before_all on pool slot ${slot.index}`);
47306
+ const scriptContext = {
47307
+ workspacePath: slot.path,
47308
+ testId: "__before_all__",
47024
47309
  evalRunId,
47025
- keepWorkspaces,
47026
- cleanupWorkspaces,
47027
- sharedWorkspacePath,
47028
- sharedBaselineCommit,
47029
- suiteWorkspaceFile,
47030
- streamCallbacks,
47031
- typeRegistry,
47032
- repoManager,
47033
47310
  evalDir
47034
47311
  };
47035
- let result = trials && trials.count > 1 ? await runEvalCaseWithTrials(runCaseOptions, trials) : await runEvalCase(runCaseOptions);
47036
- if (totalBudgetUsd !== void 0) {
47037
- let caseCost;
47038
- if (result.trials && result.trials.length > 0) {
47039
- const trialCostSum = result.trials.reduce((sum, t) => sum + (t.costUsd ?? 0), 0);
47040
- if (trialCostSum > 0) {
47041
- caseCost = trialCostSum;
47312
+ try {
47313
+ const output = await executeWorkspaceScript(
47314
+ toScriptConfig(beforeAllHook, "before_all_tests", "suite workspace"),
47315
+ scriptContext
47316
+ );
47317
+ if (!beforeAllOutput) beforeAllOutput = output;
47318
+ setupLog(`before_all completed on pool slot ${slot.index}`);
47319
+ } catch (error40) {
47320
+ const message = error40 instanceof Error ? error40.message : String(error40);
47321
+ throw new Error(`before_all script failed on pool slot ${slot.index}: ${message}`);
47322
+ }
47323
+ }
47324
+ }
47325
+ if (sharedWorkspacePath) {
47326
+ try {
47327
+ sharedBaselineCommit = await initializeBaseline(sharedWorkspacePath);
47328
+ setupLog(`shared baseline initialized: ${sharedBaselineCommit}`);
47329
+ } catch {
47330
+ setupLog("shared baseline initialization skipped (non-fatal)");
47331
+ }
47332
+ }
47333
+ if (availablePoolSlots.length > 0) {
47334
+ for (const slot of availablePoolSlots) {
47335
+ try {
47336
+ const baseline = await initializeBaseline(slot.path);
47337
+ poolSlotBaselines.set(slot.path, baseline);
47338
+ setupLog(`pool slot ${slot.index} baseline initialized: ${baseline}`);
47339
+ } catch {
47340
+ setupLog(`pool slot ${slot.index} baseline initialization skipped (non-fatal)`);
47341
+ }
47342
+ }
47343
+ }
47344
+ let nextWorkerId = 1;
47345
+ const workerIdByEvalId = /* @__PURE__ */ new Map();
47346
+ let beforeAllOutputAttached = false;
47347
+ let cumulativeBudgetCost = 0;
47348
+ let budgetExhausted = false;
47349
+ let failOnErrorTriggered = false;
47350
+ const promises = filteredEvalCases.map(
47351
+ (evalCase) => limit(async () => {
47352
+ const workerId = nextWorkerId++;
47353
+ workerIdByEvalId.set(evalCase.id, workerId);
47354
+ if (totalBudgetUsd !== void 0 && budgetExhausted) {
47355
+ const budgetResult = {
47356
+ timestamp: (now ?? (() => /* @__PURE__ */ new Date()))().toISOString(),
47357
+ testId: evalCase.id,
47358
+ dataset: evalCase.dataset,
47359
+ score: 0,
47360
+ hits: [],
47361
+ misses: [],
47362
+ answer: "",
47363
+ target: target.name,
47364
+ error: `Suite budget exceeded ($${cumulativeBudgetCost.toFixed(4)} / $${totalBudgetUsd.toFixed(4)})`,
47365
+ budgetExceeded: true,
47366
+ executionStatus: "execution_error",
47367
+ failureStage: "setup",
47368
+ failureReasonCode: "budget_exceeded",
47369
+ executionError: {
47370
+ message: `Suite budget exceeded ($${cumulativeBudgetCost.toFixed(4)} / $${totalBudgetUsd.toFixed(4)})`,
47371
+ stage: "setup"
47042
47372
  }
47043
- } else {
47044
- caseCost = result.costUsd;
47373
+ };
47374
+ if (onProgress) {
47375
+ await onProgress({
47376
+ workerId,
47377
+ testId: evalCase.id,
47378
+ status: "failed",
47379
+ completedAt: Date.now(),
47380
+ error: budgetResult.error
47381
+ });
47045
47382
  }
47046
- if (caseCost !== void 0) {
47047
- cumulativeBudgetCost += caseCost;
47048
- if (cumulativeBudgetCost >= totalBudgetUsd) {
47049
- budgetExhausted = true;
47050
- }
47383
+ if (onResult) {
47384
+ await onResult(budgetResult);
47051
47385
  }
47386
+ return budgetResult;
47052
47387
  }
47053
- if (failOnError === true && result.executionStatus === "execution_error") {
47054
- failOnErrorTriggered = true;
47055
- }
47056
- if (beforeAllOutput && !beforeAllOutputAttached) {
47057
- result = { ...result, beforeAllOutput };
47058
- beforeAllOutputAttached = true;
47388
+ if (failOnError === true && failOnErrorTriggered) {
47389
+ const errorMsg = "Halted: execution error encountered with fail_on_error enabled";
47390
+ const haltResult = {
47391
+ timestamp: (now ?? (() => /* @__PURE__ */ new Date()))().toISOString(),
47392
+ testId: evalCase.id,
47393
+ dataset: evalCase.dataset,
47394
+ score: 0,
47395
+ hits: [],
47396
+ misses: [],
47397
+ answer: "",
47398
+ target: target.name,
47399
+ error: errorMsg,
47400
+ executionStatus: "execution_error",
47401
+ failureStage: "setup",
47402
+ failureReasonCode: "error_threshold_exceeded",
47403
+ executionError: { message: errorMsg, stage: "setup" }
47404
+ };
47405
+ if (onProgress) {
47406
+ await onProgress({
47407
+ workerId,
47408
+ testId: evalCase.id,
47409
+ status: "failed",
47410
+ completedAt: Date.now(),
47411
+ error: haltResult.error
47412
+ });
47413
+ }
47414
+ if (onResult) {
47415
+ await onResult(haltResult);
47416
+ }
47417
+ return haltResult;
47059
47418
  }
47060
47419
  if (onProgress) {
47061
47420
  await onProgress({
47062
47421
  workerId,
47063
47422
  testId: evalCase.id,
47064
- status: result.error ? "failed" : "completed",
47065
- startedAt: 0,
47066
- // Not used for completed status
47067
- completedAt: Date.now(),
47068
- error: result.error
47423
+ status: "running",
47424
+ startedAt: Date.now()
47069
47425
  });
47070
47426
  }
47071
- if (onResult) {
47072
- await onResult(result);
47427
+ const testPoolSlot = availablePoolSlots.length > 0 ? availablePoolSlots.pop() : void 0;
47428
+ const testWorkspacePath = testPoolSlot?.path ?? sharedWorkspacePath;
47429
+ const testBaselineCommit = testPoolSlot ? poolSlotBaselines.get(testPoolSlot.path) : sharedBaselineCommit;
47430
+ try {
47431
+ const judgeProvider = await resolveJudgeProvider(target);
47432
+ const runCaseOptions = {
47433
+ evalCase,
47434
+ provider: primaryProvider,
47435
+ target,
47436
+ evaluators: evaluatorRegistry,
47437
+ maxRetries,
47438
+ agentTimeoutMs,
47439
+ cache,
47440
+ useCache,
47441
+ now,
47442
+ judgeProvider,
47443
+ targetResolver,
47444
+ availableTargets,
47445
+ evalRunId,
47446
+ keepWorkspaces,
47447
+ cleanupWorkspaces,
47448
+ retainOnSuccess: resolvedRetainOnSuccess,
47449
+ retainOnFailure: resolvedRetainOnFailure,
47450
+ sharedWorkspacePath: testWorkspacePath,
47451
+ sharedBaselineCommit: testBaselineCommit,
47452
+ suiteWorkspaceFile,
47453
+ streamCallbacks,
47454
+ typeRegistry,
47455
+ repoManager,
47456
+ evalDir
47457
+ };
47458
+ let result = trials && trials.count > 1 ? await runEvalCaseWithTrials(runCaseOptions, trials) : await runEvalCase(runCaseOptions);
47459
+ if (totalBudgetUsd !== void 0) {
47460
+ let caseCost;
47461
+ if (result.trials && result.trials.length > 0) {
47462
+ const trialCostSum = result.trials.reduce((sum, t) => sum + (t.costUsd ?? 0), 0);
47463
+ if (trialCostSum > 0) {
47464
+ caseCost = trialCostSum;
47465
+ }
47466
+ } else {
47467
+ caseCost = result.costUsd;
47468
+ }
47469
+ if (caseCost !== void 0) {
47470
+ cumulativeBudgetCost += caseCost;
47471
+ if (cumulativeBudgetCost >= totalBudgetUsd) {
47472
+ budgetExhausted = true;
47473
+ }
47474
+ }
47475
+ }
47476
+ if (failOnError === true && result.executionStatus === "execution_error") {
47477
+ failOnErrorTriggered = true;
47478
+ }
47479
+ if (beforeAllOutput && !beforeAllOutputAttached) {
47480
+ result = { ...result, beforeAllOutput };
47481
+ beforeAllOutputAttached = true;
47482
+ }
47483
+ if (onProgress) {
47484
+ await onProgress({
47485
+ workerId,
47486
+ testId: evalCase.id,
47487
+ status: result.error ? "failed" : "completed",
47488
+ startedAt: 0,
47489
+ // Not used for completed status
47490
+ completedAt: Date.now(),
47491
+ error: result.error
47492
+ });
47493
+ }
47494
+ if (onResult) {
47495
+ await onResult(result);
47496
+ }
47497
+ return result;
47498
+ } catch (error40) {
47499
+ if (onProgress) {
47500
+ await onProgress({
47501
+ workerId,
47502
+ testId: evalCase.id,
47503
+ status: "failed",
47504
+ completedAt: Date.now(),
47505
+ error: error40 instanceof Error ? error40.message : String(error40)
47506
+ });
47507
+ }
47508
+ throw error40;
47509
+ } finally {
47510
+ if (testPoolSlot) {
47511
+ availablePoolSlots.push(testPoolSlot);
47512
+ }
47073
47513
  }
47074
- return result;
47075
- } catch (error40) {
47076
- if (onProgress) {
47077
- await onProgress({
47078
- workerId,
47079
- testId: evalCase.id,
47080
- status: "failed",
47081
- completedAt: Date.now(),
47082
- error: error40 instanceof Error ? error40.message : String(error40)
47083
- });
47514
+ })
47515
+ );
47516
+ const settled = await Promise.allSettled(promises);
47517
+ const results = [];
47518
+ for (let i = 0; i < settled.length; i++) {
47519
+ const outcome = settled[i];
47520
+ if (outcome.status === "fulfilled") {
47521
+ results.push(outcome.value);
47522
+ } else {
47523
+ const evalCase = filteredEvalCases[i];
47524
+ const formattingMode = usesFileReferencePrompt(primaryProvider) ? "agent" : "lm";
47525
+ const promptInputs = await buildPromptInputs(evalCase, formattingMode);
47526
+ const errorResult = buildErrorResult(
47527
+ evalCase,
47528
+ target.name,
47529
+ (now ?? (() => /* @__PURE__ */ new Date()))(),
47530
+ outcome.reason,
47531
+ promptInputs,
47532
+ primaryProvider,
47533
+ "agent",
47534
+ "provider_error"
47535
+ );
47536
+ results.push(errorResult);
47537
+ if (onResult) {
47538
+ await onResult(errorResult);
47084
47539
  }
47085
- throw error40;
47086
47540
  }
47087
- })
47088
- );
47089
- const settled = await Promise.allSettled(promises);
47090
- const results = [];
47091
- for (let i = 0; i < settled.length; i++) {
47092
- const outcome = settled[i];
47093
- if (outcome.status === "fulfilled") {
47094
- results.push(outcome.value);
47095
- } else {
47096
- const evalCase = filteredEvalCases[i];
47097
- const formattingMode = usesFileReferencePrompt(primaryProvider) ? "agent" : "lm";
47098
- const promptInputs = await buildPromptInputs(evalCase, formattingMode);
47099
- const errorResult = buildErrorResult(
47100
- evalCase,
47101
- target.name,
47102
- (now ?? (() => /* @__PURE__ */ new Date()))(),
47103
- outcome.reason,
47104
- promptInputs,
47105
- primaryProvider,
47106
- "agent",
47107
- "provider_error"
47108
- );
47109
- results.push(errorResult);
47110
- if (onResult) {
47111
- await onResult(errorResult);
47541
+ }
47542
+ const afterAllWorkspaces = poolSlots.length > 1 ? poolSlots.map((s) => s.path) : sharedWorkspacePath ? [sharedWorkspacePath] : [];
47543
+ const suiteAfterAllHook = suiteWorkspace?.hooks?.after_all_tests;
47544
+ if (afterAllWorkspaces.length > 0 && hasHookCommand(suiteAfterAllHook)) {
47545
+ const afterAllHook = suiteAfterAllHook;
47546
+ for (const wsPath of afterAllWorkspaces) {
47547
+ const scriptContext = {
47548
+ workspacePath: wsPath,
47549
+ testId: "__after_all__",
47550
+ evalRunId,
47551
+ evalDir
47552
+ };
47553
+ try {
47554
+ const afterAllOutput = await executeWorkspaceScript(
47555
+ toScriptConfig(afterAllHook, "after_all_tests", "suite workspace"),
47556
+ scriptContext,
47557
+ "warn"
47558
+ );
47559
+ if (afterAllOutput && results.length > 0 && wsPath === afterAllWorkspaces[0]) {
47560
+ results[results.length - 1] = { ...results[results.length - 1], afterAllOutput };
47561
+ }
47562
+ } catch {
47563
+ }
47112
47564
  }
47113
47565
  }
47114
- }
47115
- if (sharedWorkspacePath && suiteWorkspace?.after_all) {
47116
- const scriptContext = {
47117
- workspacePath: sharedWorkspacePath,
47118
- testId: "__after_all__",
47119
- evalRunId,
47120
- evalDir
47121
- };
47122
- try {
47123
- const afterAllOutput = await executeWorkspaceScript(
47124
- suiteWorkspace.after_all,
47125
- scriptContext,
47126
- "warn"
47127
- );
47128
- if (afterAllOutput && results.length > 0) {
47129
- results[results.length - 1] = { ...results[results.length - 1], afterAllOutput };
47566
+ if (sharedWorkspacePath && !poolSlot && poolSlots.length === 0 && !useStaticWorkspace) {
47567
+ const hasFailure = results.some((r) => !!r.error || r.score < 0.5);
47568
+ if (hasFailure) {
47569
+ if (resolvedRetainOnFailure === "cleanup") {
47570
+ await cleanupWorkspace(sharedWorkspacePath).catch(() => {
47571
+ });
47572
+ }
47573
+ } else if (resolvedRetainOnSuccess === "cleanup") {
47574
+ await cleanupWorkspace(sharedWorkspacePath).catch(() => {
47575
+ });
47130
47576
  }
47131
- } catch {
47132
47577
  }
47133
- }
47134
- if (sharedWorkspacePath) {
47135
- const hasFailure = results.some((r) => !!r.error || r.score < 0.5);
47136
47578
  if (cleanupWorkspaces) {
47137
- await cleanupWorkspace(sharedWorkspacePath).catch(() => {
47138
- });
47139
- } else if (!hasFailure && !keepWorkspaces) {
47140
- await cleanupWorkspace(sharedWorkspacePath).catch(() => {
47579
+ await cleanupEvalWorkspaces(evalRunId).catch(() => {
47141
47580
  });
47142
47581
  }
47582
+ return results;
47583
+ } finally {
47584
+ if (poolManager) {
47585
+ if (poolSlot) {
47586
+ await poolManager.releaseSlot(poolSlot);
47587
+ }
47588
+ for (const slot of poolSlots) {
47589
+ if (slot !== poolSlot) {
47590
+ await poolManager.releaseSlot(slot).catch(() => {
47591
+ });
47592
+ }
47593
+ }
47594
+ }
47143
47595
  }
47144
- if (cleanupWorkspaces) {
47145
- await cleanupEvalWorkspaces(evalRunId).catch(() => {
47146
- });
47147
- }
47148
- return results;
47149
47596
  }
47150
47597
  async function runBatchEvaluation(options) {
47151
47598
  const {
@@ -47317,6 +47764,8 @@ async function runEvalCase(options) {
47317
47764
  evalRunId,
47318
47765
  keepWorkspaces,
47319
47766
  cleanupWorkspaces: forceCleanup,
47767
+ retainOnSuccess,
47768
+ retainOnFailure,
47320
47769
  sharedWorkspacePath,
47321
47770
  sharedBaselineCommit,
47322
47771
  suiteWorkspaceFile,
@@ -47328,10 +47777,10 @@ async function runEvalCase(options) {
47328
47777
  const formattingMode = usesFileReferencePrompt(provider) ? "agent" : "lm";
47329
47778
  const promptInputs = await buildPromptInputs(evalCase, formattingMode);
47330
47779
  const typeRegistry = providedTypeRegistry ?? createBuiltinRegistry();
47331
- const cacheKey2 = useCache ? createCacheKey(provider, target, evalCase, promptInputs) : void 0;
47780
+ const cacheKey = useCache ? createCacheKey(provider, target, evalCase, promptInputs) : void 0;
47332
47781
  let cachedResponse;
47333
- if (cacheKey2 && cache) {
47334
- cachedResponse = await cache.get(cacheKey2);
47782
+ if (cacheKey && cache) {
47783
+ cachedResponse = await cache.get(cacheKey);
47335
47784
  }
47336
47785
  const nowFn = now ?? (() => /* @__PURE__ */ new Date());
47337
47786
  let workspacePath = sharedWorkspacePath;
@@ -47362,7 +47811,7 @@ async function runEvalCase(options) {
47362
47811
  );
47363
47812
  }
47364
47813
  if (caseWorkspaceFile && workspacePath) {
47365
- const copiedFile = path38.join(workspacePath, path38.basename(caseWorkspaceFile));
47814
+ const copiedFile = path39.join(workspacePath, path39.basename(caseWorkspaceFile));
47366
47815
  try {
47367
47816
  await stat7(copiedFile);
47368
47817
  caseWorkspaceFile = copiedFile;
@@ -47370,12 +47819,12 @@ async function runEvalCase(options) {
47370
47819
  }
47371
47820
  }
47372
47821
  }
47373
- if (!workspacePath && (evalCase.workspace?.before_all || evalCase.workspace?.repos?.length) && evalRunId) {
47822
+ if (!workspacePath && (evalCase.workspace?.hooks || evalCase.workspace?.repos?.length) && evalRunId) {
47374
47823
  workspacePath = getWorkspacePath(evalRunId, evalCase.id);
47375
47824
  await mkdir12(workspacePath, { recursive: true });
47376
47825
  }
47377
47826
  if (evalCase.workspace?.repos?.length && workspacePath) {
47378
- const perCaseRepoManager = new RepoManager(void 0, setupDebug);
47827
+ const perCaseRepoManager = new RepoManager(setupDebug);
47379
47828
  try {
47380
47829
  if (setupDebug) {
47381
47830
  console.log(
@@ -47400,11 +47849,13 @@ async function runEvalCase(options) {
47400
47849
  );
47401
47850
  }
47402
47851
  }
47403
- if (workspacePath && evalCase.workspace?.before_all) {
47404
- const beforeAllCommand = (evalCase.workspace.before_all.command ?? evalCase.workspace.before_all.script ?? []).join(" ");
47852
+ const caseBeforeAllHook = evalCase.workspace?.hooks?.before_all_tests;
47853
+ if (workspacePath && hasHookCommand(caseBeforeAllHook)) {
47854
+ const beforeAllHook = caseBeforeAllHook;
47855
+ const beforeAllCommand = (beforeAllHook.command ?? beforeAllHook.script ?? []).join(" ");
47405
47856
  if (setupDebug) {
47406
47857
  console.log(
47407
- `[setup] test=${evalCase.id} running before_all in cwd=${evalCase.workspace.before_all.cwd ?? evalDir} command=${beforeAllCommand}`
47858
+ `[setup] test=${evalCase.id} running before_all in cwd=${beforeAllHook.cwd ?? evalDir} command=${beforeAllCommand}`
47408
47859
  );
47409
47860
  }
47410
47861
  const scriptContext = {
@@ -47417,7 +47868,7 @@ async function runEvalCase(options) {
47417
47868
  };
47418
47869
  try {
47419
47870
  beforeAllOutput = await executeWorkspaceScript(
47420
- evalCase.workspace.before_all,
47871
+ toScriptConfig(beforeAllHook, "before_all_tests", `test '${evalCase.id}'`),
47421
47872
  scriptContext
47422
47873
  );
47423
47874
  if (setupDebug) {
@@ -47442,7 +47893,9 @@ async function runEvalCase(options) {
47442
47893
  }
47443
47894
  }
47444
47895
  }
47445
- if (workspacePath && evalCase.workspace?.before_each) {
47896
+ const caseBeforeEachHook = evalCase.workspace?.hooks?.before_each_test;
47897
+ if (workspacePath && hasHookCommand(caseBeforeEachHook)) {
47898
+ const beforeEachHook = caseBeforeEachHook;
47446
47899
  const scriptContext = {
47447
47900
  workspacePath,
47448
47901
  testId: evalCase.id,
@@ -47453,7 +47906,7 @@ async function runEvalCase(options) {
47453
47906
  };
47454
47907
  try {
47455
47908
  beforeEachOutput = await executeWorkspaceScript(
47456
- evalCase.workspace.before_each,
47909
+ toScriptConfig(beforeEachHook, "before_each_test", `test '${evalCase.id}'`),
47457
47910
  scriptContext
47458
47911
  );
47459
47912
  } catch (error40) {
@@ -47541,8 +47994,8 @@ async function runEvalCase(options) {
47541
47994
  }
47542
47995
  return errorResult;
47543
47996
  }
47544
- if (cacheKey2 && cache && !cachedResponse) {
47545
- await cache.set(cacheKey2, providerResponse);
47997
+ if (cacheKey && cache && !cachedResponse) {
47998
+ await cache.set(cacheKey, providerResponse);
47546
47999
  }
47547
48000
  const output = providerResponse.output;
47548
48001
  const hasExecutionMetrics = providerResponse.tokenUsage !== void 0 || providerResponse.costUsd !== void 0 || providerResponse.durationMs !== void 0;
@@ -47570,17 +48023,19 @@ async function runEvalCase(options) {
47570
48023
  }
47571
48024
  }
47572
48025
  const providerError = extractProviderError(providerResponse);
47573
- if (repoManager && workspacePath && evalCase.workspace?.reset?.after_each && evalCase.workspace.reset.strategy && evalCase.workspace.reset.strategy !== "none" && evalCase.workspace.repos) {
48026
+ if (repoManager && workspacePath && evalCase.workspace?.hooks?.after_each_test?.reset && evalCase.workspace.hooks.after_each_test.reset !== "none" && evalCase.workspace.repos) {
47574
48027
  try {
47575
48028
  await repoManager.reset(
47576
48029
  evalCase.workspace.repos,
47577
48030
  workspacePath,
47578
- evalCase.workspace.reset.strategy
48031
+ evalCase.workspace.hooks.after_each_test.reset
47579
48032
  );
47580
48033
  } catch {
47581
48034
  }
47582
48035
  }
47583
- if (workspacePath && evalCase.workspace?.after_each) {
48036
+ const caseAfterEachHook = evalCase.workspace?.hooks?.after_each_test;
48037
+ if (workspacePath && hasHookCommand(caseAfterEachHook)) {
48038
+ const afterEachHook = caseAfterEachHook;
47584
48039
  const scriptContext = {
47585
48040
  workspacePath,
47586
48041
  testId: evalCase.id,
@@ -47591,7 +48046,7 @@ async function runEvalCase(options) {
47591
48046
  };
47592
48047
  try {
47593
48048
  afterEachOutput = await executeWorkspaceScript(
47594
- evalCase.workspace.after_each,
48049
+ toScriptConfig(afterEachHook, "after_each_test", `test '${evalCase.id}'`),
47595
48050
  scriptContext,
47596
48051
  "warn"
47597
48052
  );
@@ -47641,8 +48096,13 @@ async function runEvalCase(options) {
47641
48096
  await cleanupWorkspace(workspacePath).catch(() => {
47642
48097
  });
47643
48098
  } else if (isFailure) {
47644
- return { ...finalResult, workspacePath };
47645
- } else if (!keepWorkspaces) {
48099
+ if ((retainOnFailure ?? "keep") === "cleanup") {
48100
+ await cleanupWorkspace(workspacePath).catch(() => {
48101
+ });
48102
+ } else {
48103
+ return { ...finalResult, workspacePath };
48104
+ }
48105
+ } else if ((retainOnSuccess ?? (keepWorkspaces ? "keep" : "cleanup")) !== "keep") {
47646
48106
  await cleanupWorkspace(workspacePath).catch(() => {
47647
48107
  });
47648
48108
  }
@@ -47660,11 +48120,12 @@ async function runEvalCase(options) {
47660
48120
  "evaluator_error"
47661
48121
  );
47662
48122
  if (workspacePath && !isSharedWorkspace) {
47663
- if (forceCleanup) {
48123
+ if (forceCleanup || (retainOnFailure ?? "keep") === "cleanup") {
47664
48124
  await cleanupWorkspace(workspacePath).catch(() => {
47665
48125
  });
48126
+ } else {
48127
+ return { ...errorResult, workspacePath, beforeEachOutput, afterEachOutput };
47666
48128
  }
47667
- return { ...errorResult, workspacePath, beforeEachOutput, afterEachOutput };
47668
48129
  }
47669
48130
  return { ...errorResult, beforeEachOutput, afterEachOutput };
47670
48131
  }
@@ -47683,7 +48144,9 @@ async function runEvalCaseWithTrials(options, trialsConfig) {
47683
48144
  useCache: false,
47684
48145
  // Force cleanup for intermediate trials
47685
48146
  cleanupWorkspaces: isLastDeclaredTrial ? options.cleanupWorkspaces : true,
47686
- keepWorkspaces: isLastDeclaredTrial ? options.keepWorkspaces : false
48147
+ keepWorkspaces: isLastDeclaredTrial ? options.keepWorkspaces : false,
48148
+ retainOnSuccess: isLastDeclaredTrial ? options.retainOnSuccess : "cleanup",
48149
+ retainOnFailure: isLastDeclaredTrial ? options.retainOnFailure : "cleanup"
47687
48150
  };
47688
48151
  const result = await runEvalCase(trialOptions);
47689
48152
  allResults.push(result);
@@ -47972,7 +48435,7 @@ async function runEvaluatorList(options) {
47972
48435
  fileChanges,
47973
48436
  workspacePath
47974
48437
  };
47975
- const evalFileDir = evalCase.guideline_paths[0] ? path38.dirname(evalCase.guideline_paths[0]) : process.cwd();
48438
+ const evalFileDir = evalCase.guideline_paths[0] ? path39.dirname(evalCase.guideline_paths[0]) : process.cwd();
47976
48439
  const dispatchContext = {
47977
48440
  judgeProvider,
47978
48441
  targetResolver,
@@ -48293,13 +48756,13 @@ async function evaluate(config2) {
48293
48756
  let evalCases;
48294
48757
  let testFilePath;
48295
48758
  if (config2.specFile) {
48296
- testFilePath = path39.resolve(config2.specFile);
48759
+ testFilePath = path40.resolve(config2.specFile);
48297
48760
  evalCases = await loadTests(testFilePath, repoRoot, {
48298
48761
  verbose: config2.verbose,
48299
48762
  filter: config2.filter
48300
48763
  });
48301
48764
  } else {
48302
- testFilePath = path39.join(process.cwd(), "__programmatic__.yaml");
48765
+ testFilePath = path40.join(process.cwd(), "__programmatic__.yaml");
48303
48766
  evalCases = (config2.tests ?? []).map((test) => {
48304
48767
  const input = typeof test.input === "string" ? [{ role: "user", content: test.input }] : test.input;
48305
48768
  const question = typeof test.input === "string" ? test.input : test.input.find((m) => m.role === "user")?.content ?? "";
@@ -48385,10 +48848,10 @@ function computeSummary(results, durationMs) {
48385
48848
  var TARGET_FILE_CANDIDATES = [".agentv/targets.yaml", ".agentv/targets.yml"];
48386
48849
  async function discoverDefaultTarget(repoRoot) {
48387
48850
  const cwd = process.cwd();
48388
- const chain = buildDirectoryChain(path39.join(cwd, "_placeholder"), repoRoot);
48851
+ const chain = buildDirectoryChain(path40.join(cwd, "_placeholder"), repoRoot);
48389
48852
  for (const dir of chain) {
48390
48853
  for (const candidate of TARGET_FILE_CANDIDATES) {
48391
- const targetsPath = path39.join(dir, candidate);
48854
+ const targetsPath = path40.join(dir, candidate);
48392
48855
  if (!existsSync3(targetsPath)) continue;
48393
48856
  try {
48394
48857
  const definitions = await readTargetDefinitions(targetsPath);
@@ -48403,10 +48866,10 @@ async function discoverDefaultTarget(repoRoot) {
48403
48866
  async function loadEnvHierarchy(repoRoot) {
48404
48867
  const { readFileSync: readFileSync2 } = await import("node:fs");
48405
48868
  const cwd = process.cwd();
48406
- const chain = buildDirectoryChain(path39.join(cwd, "_placeholder"), repoRoot);
48869
+ const chain = buildDirectoryChain(path40.join(cwd, "_placeholder"), repoRoot);
48407
48870
  const envFiles = [];
48408
48871
  for (const dir of chain) {
48409
- const envPath = path39.join(dir, ".env");
48872
+ const envPath = path40.join(dir, ".env");
48410
48873
  if (existsSync3(envPath)) envFiles.push(envPath);
48411
48874
  }
48412
48875
  for (let i = envFiles.length - 1; i >= 0; i--) {
@@ -48587,7 +49050,7 @@ var ResponseCache = class {
48587
49050
  async get(key) {
48588
49051
  const filePath = this.keyToPath(key);
48589
49052
  try {
48590
- const data = await readFile11(filePath, "utf8");
49053
+ const data = await readFile12(filePath, "utf8");
48591
49054
  return JSON.parse(data);
48592
49055
  } catch {
48593
49056
  return void 0;
@@ -48595,13 +49058,13 @@ var ResponseCache = class {
48595
49058
  }
48596
49059
  async set(key, value) {
48597
49060
  const filePath = this.keyToPath(key);
48598
- const dir = path40.dirname(filePath);
49061
+ const dir = path41.dirname(filePath);
48599
49062
  await mkdir13(dir, { recursive: true });
48600
49063
  await writeFile8(filePath, JSON.stringify(value, null, 2), "utf8");
48601
49064
  }
48602
49065
  keyToPath(key) {
48603
49066
  const prefix = key.slice(0, 2);
48604
- return path40.join(this.cachePath, prefix, `${key}.json`);
49067
+ return path41.join(this.cachePath, prefix, `${key}.json`);
48605
49068
  }
48606
49069
  };
48607
49070
  function shouldEnableCache(params) {
@@ -49107,9 +49570,9 @@ export {
49107
49570
  ProviderRegistry,
49108
49571
  getAgentvHome,
49109
49572
  getWorkspacesRoot,
49110
- getGitCacheRoot,
49111
49573
  getSubagentsRoot,
49112
49574
  getTraceStateRoot,
49575
+ getWorkspacePoolRoot,
49113
49576
  ensureVSCodeSubagents,
49114
49577
  readTargetDefinitions,
49115
49578
  listTargetNames,
@@ -49171,6 +49634,8 @@ export {
49171
49634
  createTempWorkspace,
49172
49635
  cleanupWorkspace,
49173
49636
  cleanupEvalWorkspaces,
49637
+ computeWorkspaceFingerprint,
49638
+ WorkspacePoolManager,
49174
49639
  RepoManager,
49175
49640
  resolveWorkspaceTemplate,
49176
49641
  executeWorkspaceScript,
@@ -49189,4 +49654,4 @@ export {
49189
49654
  OtelStreamingObserver,
49190
49655
  createAgentKernel
49191
49656
  };
49192
- //# sourceMappingURL=chunk-TK4PB62M.js.map
49657
+ //# sourceMappingURL=chunk-LZ5MPQFM.js.map