agentv 2.14.3 → 2.16.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/{chunk-D3WKMO6D.js → chunk-JZ62HLUC.js} +254 -119
- package/dist/chunk-JZ62HLUC.js.map +1 -0
- package/dist/{chunk-TK4PB62M.js → chunk-LZ5MPQFM.js} +1085 -620
- package/dist/chunk-LZ5MPQFM.js.map +1 -0
- package/dist/{chunk-MDAFSKDI.js → chunk-QLCVA3ZS.js} +56 -12
- package/dist/chunk-QLCVA3ZS.js.map +1 -0
- package/dist/cli.js +3 -3
- package/dist/{dist-F62E44UQ.js → dist-BMNEH377.js} +8 -4
- package/dist/index.js +3 -3
- package/dist/{interactive-SQSP4PL3.js → interactive-ZXYNPRCT.js} +3 -3
- package/package.json +1 -1
- package/dist/chunk-D3WKMO6D.js.map +0 -1
- package/dist/chunk-MDAFSKDI.js.map +0 -1
- package/dist/chunk-TK4PB62M.js.map +0 -1
- /package/dist/{dist-F62E44UQ.js.map → dist-BMNEH377.js.map} +0 -0
- /package/dist/{interactive-SQSP4PL3.js.map → interactive-ZXYNPRCT.js.map} +0 -0
|
@@ -148,7 +148,7 @@ var require_dist = __commonJS({
|
|
|
148
148
|
}
|
|
149
149
|
});
|
|
150
150
|
|
|
151
|
-
// ../../packages/core/dist/chunk-
|
|
151
|
+
// ../../packages/core/dist/chunk-E6AJPAXM.js
|
|
152
152
|
import { constants } from "node:fs";
|
|
153
153
|
import { access, readFile } from "node:fs/promises";
|
|
154
154
|
import path from "node:path";
|
|
@@ -632,8 +632,8 @@ function getErrorMap() {
|
|
|
632
632
|
|
|
633
633
|
// ../../node_modules/.bun/zod@3.25.76/node_modules/zod/v3/helpers/parseUtil.js
|
|
634
634
|
var makeIssue = (params) => {
|
|
635
|
-
const { data, path:
|
|
636
|
-
const fullPath = [...
|
|
635
|
+
const { data, path: path42, errorMaps, issueData } = params;
|
|
636
|
+
const fullPath = [...path42, ...issueData.path || []];
|
|
637
637
|
const fullIssue = {
|
|
638
638
|
...issueData,
|
|
639
639
|
path: fullPath
|
|
@@ -749,11 +749,11 @@ var errorUtil;
|
|
|
749
749
|
|
|
750
750
|
// ../../node_modules/.bun/zod@3.25.76/node_modules/zod/v3/types.js
|
|
751
751
|
var ParseInputLazyPath = class {
|
|
752
|
-
constructor(parent, value,
|
|
752
|
+
constructor(parent, value, path42, key) {
|
|
753
753
|
this._cachedPath = [];
|
|
754
754
|
this.parent = parent;
|
|
755
755
|
this.data = value;
|
|
756
|
-
this._path =
|
|
756
|
+
this._path = path42;
|
|
757
757
|
this._key = key;
|
|
758
758
|
}
|
|
759
759
|
get path() {
|
|
@@ -4195,7 +4195,7 @@ var coerce = {
|
|
|
4195
4195
|
};
|
|
4196
4196
|
var NEVER = INVALID;
|
|
4197
4197
|
|
|
4198
|
-
// ../../packages/core/dist/chunk-
|
|
4198
|
+
// ../../packages/core/dist/chunk-E6AJPAXM.js
|
|
4199
4199
|
var TEST_MESSAGE_ROLE_VALUES = ["system", "user", "assistant", "tool"];
|
|
4200
4200
|
var TEST_MESSAGE_ROLES = TEST_MESSAGE_ROLE_VALUES;
|
|
4201
4201
|
var TEST_MESSAGE_ROLE_SET = new Set(TEST_MESSAGE_ROLE_VALUES);
|
|
@@ -6661,10 +6661,10 @@ function assignProp(target, prop, value) {
|
|
|
6661
6661
|
configurable: true
|
|
6662
6662
|
});
|
|
6663
6663
|
}
|
|
6664
|
-
function getElementAtPath(obj,
|
|
6665
|
-
if (!
|
|
6664
|
+
function getElementAtPath(obj, path42) {
|
|
6665
|
+
if (!path42)
|
|
6666
6666
|
return obj;
|
|
6667
|
-
return
|
|
6667
|
+
return path42.reduce((acc, key) => acc?.[key], obj);
|
|
6668
6668
|
}
|
|
6669
6669
|
function promiseAllObject(promisesObj) {
|
|
6670
6670
|
const keys = Object.keys(promisesObj);
|
|
@@ -6984,11 +6984,11 @@ function aborted(x, startIndex = 0) {
|
|
|
6984
6984
|
}
|
|
6985
6985
|
return false;
|
|
6986
6986
|
}
|
|
6987
|
-
function prefixIssues(
|
|
6987
|
+
function prefixIssues(path42, issues) {
|
|
6988
6988
|
return issues.map((iss) => {
|
|
6989
6989
|
var _a17;
|
|
6990
6990
|
(_a17 = iss).path ?? (_a17.path = []);
|
|
6991
|
-
iss.path.unshift(
|
|
6991
|
+
iss.path.unshift(path42);
|
|
6992
6992
|
return iss;
|
|
6993
6993
|
});
|
|
6994
6994
|
}
|
|
@@ -7125,7 +7125,7 @@ function treeifyError(error40, _mapper) {
|
|
|
7125
7125
|
return issue2.message;
|
|
7126
7126
|
};
|
|
7127
7127
|
const result = { errors: [] };
|
|
7128
|
-
const processError = (error41,
|
|
7128
|
+
const processError = (error41, path42 = []) => {
|
|
7129
7129
|
var _a17, _b8;
|
|
7130
7130
|
for (const issue2 of error41.issues) {
|
|
7131
7131
|
if (issue2.code === "invalid_union" && issue2.errors.length) {
|
|
@@ -7135,7 +7135,7 @@ function treeifyError(error40, _mapper) {
|
|
|
7135
7135
|
} else if (issue2.code === "invalid_element") {
|
|
7136
7136
|
processError({ issues: issue2.issues }, issue2.path);
|
|
7137
7137
|
} else {
|
|
7138
|
-
const fullpath = [...
|
|
7138
|
+
const fullpath = [...path42, ...issue2.path];
|
|
7139
7139
|
if (fullpath.length === 0) {
|
|
7140
7140
|
result.errors.push(mapper(issue2));
|
|
7141
7141
|
continue;
|
|
@@ -7165,9 +7165,9 @@ function treeifyError(error40, _mapper) {
|
|
|
7165
7165
|
processError(error40);
|
|
7166
7166
|
return result;
|
|
7167
7167
|
}
|
|
7168
|
-
function toDotPath(
|
|
7168
|
+
function toDotPath(path42) {
|
|
7169
7169
|
const segs = [];
|
|
7170
|
-
for (const seg of
|
|
7170
|
+
for (const seg of path42) {
|
|
7171
7171
|
if (typeof seg === "number")
|
|
7172
7172
|
segs.push(`[${seg}]`);
|
|
7173
7173
|
else if (typeof seg === "symbol")
|
|
@@ -26720,14 +26720,14 @@ function createAzure(options = {}) {
|
|
|
26720
26720
|
description: "Azure OpenAI resource name"
|
|
26721
26721
|
});
|
|
26722
26722
|
const apiVersion = (_a17 = options.apiVersion) != null ? _a17 : "v1";
|
|
26723
|
-
const url2 = ({ path:
|
|
26723
|
+
const url2 = ({ path: path42, modelId }) => {
|
|
26724
26724
|
var _a24;
|
|
26725
26725
|
const baseUrlPrefix = (_a24 = options.baseURL) != null ? _a24 : `https://${getResourceName()}.openai.azure.com/openai`;
|
|
26726
26726
|
let fullUrl;
|
|
26727
26727
|
if (options.useDeploymentBasedUrls) {
|
|
26728
|
-
fullUrl = new URL(`${baseUrlPrefix}/deployments/${modelId}${
|
|
26728
|
+
fullUrl = new URL(`${baseUrlPrefix}/deployments/${modelId}${path42}`);
|
|
26729
26729
|
} else {
|
|
26730
|
-
fullUrl = new URL(`${baseUrlPrefix}/v1${
|
|
26730
|
+
fullUrl = new URL(`${baseUrlPrefix}/v1${path42}`);
|
|
26731
26731
|
}
|
|
26732
26732
|
fullUrl.searchParams.set("api-version", apiVersion);
|
|
26733
26733
|
return fullUrl.toString();
|
|
@@ -33962,7 +33962,7 @@ import fs2 from "node:fs/promises";
|
|
|
33962
33962
|
import path31 from "node:path";
|
|
33963
33963
|
import { createHash as createHash2, randomUUID as randomUUID7 } from "node:crypto";
|
|
33964
33964
|
import { mkdir as mkdir12, stat as stat7 } from "node:fs/promises";
|
|
33965
|
-
import
|
|
33965
|
+
import path39 from "node:path";
|
|
33966
33966
|
import micromatch4 from "micromatch";
|
|
33967
33967
|
import { readFileSync } from "node:fs";
|
|
33968
33968
|
import path322 from "node:path";
|
|
@@ -33977,15 +33977,18 @@ import path35 from "node:path";
|
|
|
33977
33977
|
import { execFile } from "node:child_process";
|
|
33978
33978
|
import { createHash } from "node:crypto";
|
|
33979
33979
|
import { existsSync as existsSync2 } from "node:fs";
|
|
33980
|
-
import { mkdir as mkdir11, rm as rm5, unlink, writeFile as writeFile7 } from "node:fs/promises";
|
|
33980
|
+
import { cp as cp2, mkdir as mkdir11, readFile as readFile11, readdir as readdir4, rm as rm5, unlink, writeFile as writeFile7 } from "node:fs/promises";
|
|
33981
33981
|
import path36 from "node:path";
|
|
33982
33982
|
import { promisify as promisify5 } from "node:util";
|
|
33983
|
-
import {
|
|
33983
|
+
import { execFile as execFile2 } from "node:child_process";
|
|
33984
33984
|
import path37 from "node:path";
|
|
33985
|
+
import { promisify as promisify6 } from "node:util";
|
|
33986
|
+
import { readdir as readdir5, stat as stat6 } from "node:fs/promises";
|
|
33987
|
+
import path38 from "node:path";
|
|
33985
33988
|
import { existsSync as existsSync3 } from "node:fs";
|
|
33986
|
-
import path39 from "node:path";
|
|
33987
|
-
import { mkdir as mkdir13, readFile as readFile11, writeFile as writeFile8 } from "node:fs/promises";
|
|
33988
33989
|
import path40 from "node:path";
|
|
33990
|
+
import { mkdir as mkdir13, readFile as readFile12, writeFile as writeFile8 } from "node:fs/promises";
|
|
33991
|
+
import path41 from "node:path";
|
|
33989
33992
|
function computeTraceSummary(messages) {
|
|
33990
33993
|
const toolCallCounts = {};
|
|
33991
33994
|
const toolDurations = {};
|
|
@@ -34546,6 +34549,17 @@ function parseExecutionDefaults(raw, configPath) {
|
|
|
34546
34549
|
} else if (otelFile !== void 0) {
|
|
34547
34550
|
logWarning(`Invalid execution.otel_file in ${configPath}, expected non-empty string`);
|
|
34548
34551
|
}
|
|
34552
|
+
if (typeof obj.pool_workspaces === "boolean") {
|
|
34553
|
+
result.pool_workspaces = obj.pool_workspaces;
|
|
34554
|
+
} else if (obj.pool_workspaces !== void 0) {
|
|
34555
|
+
logWarning(`Invalid execution.pool_workspaces in ${configPath}, expected boolean`);
|
|
34556
|
+
}
|
|
34557
|
+
const poolSlots = obj.pool_slots;
|
|
34558
|
+
if (typeof poolSlots === "number" && Number.isInteger(poolSlots) && poolSlots >= 1 && poolSlots <= 50) {
|
|
34559
|
+
result.pool_slots = poolSlots;
|
|
34560
|
+
} else if (poolSlots !== void 0) {
|
|
34561
|
+
logWarning(`Invalid execution.pool_slots in ${configPath}, expected integer 1-50`);
|
|
34562
|
+
}
|
|
34549
34563
|
return Object.keys(result).length > 0 ? result : void 0;
|
|
34550
34564
|
}
|
|
34551
34565
|
function logWarning(message) {
|
|
@@ -35961,6 +35975,7 @@ async function processMessages(options) {
|
|
|
35961
35975
|
repoRootPath,
|
|
35962
35976
|
guidelinePatterns,
|
|
35963
35977
|
guidelinePaths,
|
|
35978
|
+
treatFileSegmentsAsGuidelines,
|
|
35964
35979
|
textParts,
|
|
35965
35980
|
messageType,
|
|
35966
35981
|
verbose
|
|
@@ -36008,16 +36023,20 @@ async function processMessages(options) {
|
|
|
36008
36023
|
}
|
|
36009
36024
|
try {
|
|
36010
36025
|
const fileContent = (await readFile4(resolvedPath, "utf8")).replace(/\r\n/g, "\n");
|
|
36011
|
-
|
|
36012
|
-
|
|
36013
|
-
|
|
36014
|
-
|
|
36015
|
-
|
|
36016
|
-
|
|
36017
|
-
|
|
36018
|
-
|
|
36019
|
-
|
|
36026
|
+
const classifyAsGuideline = shouldTreatAsGuideline({
|
|
36027
|
+
messageType,
|
|
36028
|
+
resolvedPath,
|
|
36029
|
+
repoRootPath,
|
|
36030
|
+
guidelinePatterns,
|
|
36031
|
+
treatFileSegmentsAsGuidelines
|
|
36032
|
+
});
|
|
36033
|
+
if (classifyAsGuideline && guidelinePaths) {
|
|
36034
|
+
guidelinePaths.push(path5.resolve(resolvedPath));
|
|
36035
|
+
if (verbose) {
|
|
36036
|
+
console.log(` [Guideline] Found: ${displayPath}`);
|
|
36037
|
+
console.log(` Resolved to: ${resolvedPath}`);
|
|
36020
36038
|
}
|
|
36039
|
+
continue;
|
|
36021
36040
|
}
|
|
36022
36041
|
segments.push({
|
|
36023
36042
|
type: "file",
|
|
@@ -36046,6 +36065,26 @@ async function processMessages(options) {
|
|
|
36046
36065
|
}
|
|
36047
36066
|
return segments;
|
|
36048
36067
|
}
|
|
36068
|
+
function shouldTreatAsGuideline(options) {
|
|
36069
|
+
const {
|
|
36070
|
+
messageType,
|
|
36071
|
+
resolvedPath,
|
|
36072
|
+
repoRootPath,
|
|
36073
|
+
guidelinePatterns,
|
|
36074
|
+
treatFileSegmentsAsGuidelines
|
|
36075
|
+
} = options;
|
|
36076
|
+
if (messageType !== "input") {
|
|
36077
|
+
return false;
|
|
36078
|
+
}
|
|
36079
|
+
if (treatFileSegmentsAsGuidelines) {
|
|
36080
|
+
return true;
|
|
36081
|
+
}
|
|
36082
|
+
if (!guidelinePatterns || guidelinePatterns.length === 0) {
|
|
36083
|
+
return false;
|
|
36084
|
+
}
|
|
36085
|
+
const relativeToRepo = path5.relative(repoRootPath, resolvedPath);
|
|
36086
|
+
return isGuidelineFile(relativeToRepo, guidelinePatterns);
|
|
36087
|
+
}
|
|
36049
36088
|
function asString3(value) {
|
|
36050
36089
|
return typeof value === "string" ? value : void 0;
|
|
36051
36090
|
}
|
|
@@ -36380,6 +36419,8 @@ async function loadTestsFromJsonl(evalFilePath, repoRoot, options) {
|
|
|
36380
36419
|
for (const guidelinePath of testCase.guideline_paths) {
|
|
36381
36420
|
console.log(` - ${guidelinePath}`);
|
|
36382
36421
|
}
|
|
36422
|
+
} else if (!guidelinePatterns || guidelinePatterns.length === 0) {
|
|
36423
|
+
console.log(" No guidelines found (guideline_patterns not configured)");
|
|
36383
36424
|
} else {
|
|
36384
36425
|
console.log(" No guidelines found");
|
|
36385
36426
|
}
|
|
@@ -36740,7 +36781,7 @@ async function loadTestsFromYaml(evalFilePath, repoRoot, options) {
|
|
|
36740
36781
|
} else {
|
|
36741
36782
|
throw new Error(`Invalid test file format: ${evalFilePath} - missing 'tests' field`);
|
|
36742
36783
|
}
|
|
36743
|
-
const suiteWorkspace =
|
|
36784
|
+
const suiteWorkspace = await resolveWorkspaceConfig(suite.workspace, evalFileDir);
|
|
36744
36785
|
const suiteInputMessages = expandInputShorthand(suite.input);
|
|
36745
36786
|
const rawGlobalExecution = isJsonObject(suite.execution) ? suite.execution : void 0;
|
|
36746
36787
|
const _globalTarget = asString6(rawGlobalExecution?.target) ?? asString6(suite.target);
|
|
@@ -36776,12 +36817,24 @@ async function loadTestsFromYaml(evalFilePath, repoRoot, options) {
|
|
|
36776
36817
|
}
|
|
36777
36818
|
const caseExecution = isJsonObject(evalcase.execution) ? evalcase.execution : void 0;
|
|
36778
36819
|
const skipDefaults = caseExecution?.skip_defaults === true;
|
|
36779
|
-
const
|
|
36820
|
+
const effectiveSuiteInputMessages = suiteInputMessages && !skipDefaults ? suiteInputMessages : void 0;
|
|
36821
|
+
const inputMessages = effectiveSuiteInputMessages ? [...effectiveSuiteInputMessages, ...testInputMessages] : testInputMessages;
|
|
36780
36822
|
const hasExpectedMessages = expectedMessages.length > 0;
|
|
36781
36823
|
const guidelinePaths = [];
|
|
36782
36824
|
const inputTextParts = [];
|
|
36783
|
-
const
|
|
36784
|
-
messages:
|
|
36825
|
+
const suiteInputSegments = effectiveSuiteInputMessages ? await processMessages({
|
|
36826
|
+
messages: effectiveSuiteInputMessages,
|
|
36827
|
+
searchRoots,
|
|
36828
|
+
repoRootPath,
|
|
36829
|
+
guidelinePatterns,
|
|
36830
|
+
guidelinePaths,
|
|
36831
|
+
treatFileSegmentsAsGuidelines: true,
|
|
36832
|
+
textParts: inputTextParts,
|
|
36833
|
+
messageType: "input",
|
|
36834
|
+
verbose
|
|
36835
|
+
}) : [];
|
|
36836
|
+
const testInputSegments = await processMessages({
|
|
36837
|
+
messages: testInputMessages,
|
|
36785
36838
|
searchRoots,
|
|
36786
36839
|
repoRootPath,
|
|
36787
36840
|
guidelinePatterns,
|
|
@@ -36790,6 +36843,7 @@ async function loadTestsFromYaml(evalFilePath, repoRoot, options) {
|
|
|
36790
36843
|
messageType: "input",
|
|
36791
36844
|
verbose
|
|
36792
36845
|
});
|
|
36846
|
+
const inputSegments = [...suiteInputSegments, ...testInputSegments];
|
|
36793
36847
|
const outputSegments = hasExpectedMessages ? await processExpectedMessages({
|
|
36794
36848
|
messages: expectedMessages,
|
|
36795
36849
|
searchRoots,
|
|
@@ -36837,7 +36891,7 @@ async function loadTestsFromYaml(evalFilePath, repoRoot, options) {
|
|
|
36837
36891
|
...guidelinePaths.map((guidelinePath) => path8.resolve(guidelinePath)),
|
|
36838
36892
|
...userFilePaths
|
|
36839
36893
|
];
|
|
36840
|
-
const caseWorkspace =
|
|
36894
|
+
const caseWorkspace = await resolveWorkspaceConfig(evalcase.workspace, evalFileDir);
|
|
36841
36895
|
const mergedWorkspace = mergeWorkspaceConfigs(suiteWorkspace, caseWorkspace);
|
|
36842
36896
|
const metadata = isJsonObject(evalcase.metadata) ? evalcase.metadata : void 0;
|
|
36843
36897
|
const caseTargets = extractTargetsFromTestCase(evalcase);
|
|
@@ -36868,6 +36922,8 @@ async function loadTestsFromYaml(evalFilePath, repoRoot, options) {
|
|
|
36868
36922
|
for (const guidelinePath of testCase.guideline_paths) {
|
|
36869
36923
|
console.log(` - ${guidelinePath}`);
|
|
36870
36924
|
}
|
|
36925
|
+
} else if (!guidelinePatterns || guidelinePatterns.length === 0) {
|
|
36926
|
+
console.log(" No guidelines found (guideline_patterns not configured)");
|
|
36871
36927
|
} else {
|
|
36872
36928
|
console.log(" No guidelines found");
|
|
36873
36929
|
}
|
|
@@ -36956,17 +37012,58 @@ function parseRepoConfig(raw) {
|
|
|
36956
37012
|
...clone2 !== void 0 && { clone: clone2 }
|
|
36957
37013
|
};
|
|
36958
37014
|
}
|
|
36959
|
-
function
|
|
37015
|
+
function parseWorkspaceHookConfig(raw, evalFileDir) {
|
|
36960
37016
|
if (!isJsonObject(raw)) return void 0;
|
|
37017
|
+
const script = parseWorkspaceScriptConfig(raw, evalFileDir);
|
|
36961
37018
|
const obj = raw;
|
|
36962
|
-
const
|
|
36963
|
-
const
|
|
36964
|
-
if (!
|
|
37019
|
+
const reset = obj.reset === "none" || obj.reset === "fast" || obj.reset === "strict" ? obj.reset : void 0;
|
|
37020
|
+
const clean = obj.clean === "always" || obj.clean === "on_success" || obj.clean === "on_failure" || obj.clean === "never" ? obj.clean : void 0;
|
|
37021
|
+
if (!script && !reset && !clean) return void 0;
|
|
36965
37022
|
return {
|
|
36966
|
-
...
|
|
36967
|
-
...
|
|
37023
|
+
...script ?? {},
|
|
37024
|
+
...reset !== void 0 && { reset },
|
|
37025
|
+
...clean !== void 0 && { clean }
|
|
36968
37026
|
};
|
|
36969
37027
|
}
|
|
37028
|
+
function parseWorkspaceHooksConfig(raw, evalFileDir) {
|
|
37029
|
+
if (!isJsonObject(raw)) return void 0;
|
|
37030
|
+
const obj = raw;
|
|
37031
|
+
const beforeAllTests = parseWorkspaceHookConfig(obj.before_all_tests, evalFileDir);
|
|
37032
|
+
const beforeEachTest = parseWorkspaceHookConfig(obj.before_each_test, evalFileDir);
|
|
37033
|
+
const afterEachTest = parseWorkspaceHookConfig(obj.after_each_test, evalFileDir);
|
|
37034
|
+
const afterAllTests = parseWorkspaceHookConfig(obj.after_all_tests, evalFileDir);
|
|
37035
|
+
const onReuse = parseWorkspaceHookConfig(obj.on_reuse, evalFileDir);
|
|
37036
|
+
const onFinish = parseWorkspaceHookConfig(obj.on_finish, evalFileDir);
|
|
37037
|
+
const hooks = {
|
|
37038
|
+
...beforeAllTests !== void 0 && { before_all_tests: beforeAllTests },
|
|
37039
|
+
...beforeEachTest !== void 0 && { before_each_test: beforeEachTest },
|
|
37040
|
+
...afterEachTest !== void 0 && { after_each_test: afterEachTest },
|
|
37041
|
+
...afterAllTests !== void 0 && { after_all_tests: afterAllTests },
|
|
37042
|
+
...onReuse !== void 0 && { on_reuse: onReuse },
|
|
37043
|
+
...onFinish !== void 0 && { on_finish: onFinish }
|
|
37044
|
+
};
|
|
37045
|
+
return Object.keys(hooks).length > 0 ? hooks : void 0;
|
|
37046
|
+
}
|
|
37047
|
+
async function resolveWorkspaceConfig(raw, evalFileDir) {
|
|
37048
|
+
if (typeof raw === "string") {
|
|
37049
|
+
const workspaceFilePath = path8.resolve(evalFileDir, raw);
|
|
37050
|
+
let content;
|
|
37051
|
+
try {
|
|
37052
|
+
content = await readFile7(workspaceFilePath, "utf8");
|
|
37053
|
+
} catch {
|
|
37054
|
+
throw new Error(`Workspace file not found: ${raw} (resolved to ${workspaceFilePath})`);
|
|
37055
|
+
}
|
|
37056
|
+
const parsed = parse22(content);
|
|
37057
|
+
if (!isJsonObject(parsed)) {
|
|
37058
|
+
throw new Error(
|
|
37059
|
+
`Invalid workspace file format: ${workspaceFilePath} (expected a YAML object)`
|
|
37060
|
+
);
|
|
37061
|
+
}
|
|
37062
|
+
const workspaceFileDir = path8.dirname(workspaceFilePath);
|
|
37063
|
+
return parseWorkspaceConfig(parsed, workspaceFileDir);
|
|
37064
|
+
}
|
|
37065
|
+
return parseWorkspaceConfig(raw, evalFileDir);
|
|
37066
|
+
}
|
|
36970
37067
|
function parseWorkspaceConfig(raw, evalFileDir) {
|
|
36971
37068
|
if (!isJsonObject(raw)) return void 0;
|
|
36972
37069
|
const obj = raw;
|
|
@@ -36976,37 +37073,56 @@ function parseWorkspaceConfig(raw, evalFileDir) {
|
|
|
36976
37073
|
}
|
|
36977
37074
|
const isolation = obj.isolation === "shared" || obj.isolation === "per_test" ? obj.isolation : void 0;
|
|
36978
37075
|
const repos = Array.isArray(obj.repos) ? obj.repos.map(parseRepoConfig).filter(Boolean) : void 0;
|
|
36979
|
-
const
|
|
36980
|
-
const
|
|
36981
|
-
const
|
|
36982
|
-
const
|
|
36983
|
-
|
|
36984
|
-
if (!template && !isolation && !repos && !reset && !beforeAll && !afterAll && !beforeEach && !afterEach)
|
|
37076
|
+
const hooks = parseWorkspaceHooksConfig(obj.hooks, evalFileDir);
|
|
37077
|
+
const mode = obj.mode === "pooled" || obj.mode === "ephemeral" || obj.mode === "static" ? obj.mode : void 0;
|
|
37078
|
+
const staticPath = typeof obj.static_path === "string" ? obj.static_path : void 0;
|
|
37079
|
+
const pool = typeof obj.pool === "boolean" ? obj.pool : void 0;
|
|
37080
|
+
if (!template && !isolation && !repos && !hooks && !mode && !staticPath && pool === void 0)
|
|
36985
37081
|
return void 0;
|
|
36986
37082
|
return {
|
|
36987
37083
|
...template !== void 0 && { template },
|
|
36988
37084
|
...isolation !== void 0 && { isolation },
|
|
36989
37085
|
...repos !== void 0 && { repos },
|
|
36990
|
-
...
|
|
36991
|
-
...
|
|
36992
|
-
...
|
|
36993
|
-
...
|
|
36994
|
-
...afterEach !== void 0 && { after_each: afterEach }
|
|
37086
|
+
...hooks !== void 0 && { hooks },
|
|
37087
|
+
...mode !== void 0 && { mode },
|
|
37088
|
+
...staticPath !== void 0 && { static_path: staticPath },
|
|
37089
|
+
...pool !== void 0 && { pool }
|
|
36995
37090
|
};
|
|
36996
37091
|
}
|
|
36997
37092
|
function mergeWorkspaceConfigs(suiteLevel, caseLevel) {
|
|
36998
37093
|
if (!suiteLevel && !caseLevel) return void 0;
|
|
36999
37094
|
if (!suiteLevel) return caseLevel;
|
|
37000
37095
|
if (!caseLevel) return suiteLevel;
|
|
37096
|
+
const mergeHook = (suiteHook, caseHook) => {
|
|
37097
|
+
if (!suiteHook && !caseHook) return void 0;
|
|
37098
|
+
return {
|
|
37099
|
+
...suiteHook ?? {},
|
|
37100
|
+
...caseHook ?? {}
|
|
37101
|
+
};
|
|
37102
|
+
};
|
|
37103
|
+
const mergedHooks = {
|
|
37104
|
+
before_all_tests: mergeHook(
|
|
37105
|
+
suiteLevel.hooks?.before_all_tests,
|
|
37106
|
+
caseLevel.hooks?.before_all_tests
|
|
37107
|
+
),
|
|
37108
|
+
before_each_test: mergeHook(
|
|
37109
|
+
suiteLevel.hooks?.before_each_test,
|
|
37110
|
+
caseLevel.hooks?.before_each_test
|
|
37111
|
+
),
|
|
37112
|
+
after_each_test: mergeHook(suiteLevel.hooks?.after_each_test, caseLevel.hooks?.after_each_test),
|
|
37113
|
+
after_all_tests: mergeHook(suiteLevel.hooks?.after_all_tests, caseLevel.hooks?.after_all_tests),
|
|
37114
|
+
on_reuse: mergeHook(suiteLevel.hooks?.on_reuse, caseLevel.hooks?.on_reuse),
|
|
37115
|
+
on_finish: mergeHook(suiteLevel.hooks?.on_finish, caseLevel.hooks?.on_finish)
|
|
37116
|
+
};
|
|
37117
|
+
const hasHooks = Object.values(mergedHooks).some((hook) => hook !== void 0);
|
|
37001
37118
|
return {
|
|
37002
37119
|
template: caseLevel.template ?? suiteLevel.template,
|
|
37003
37120
|
isolation: caseLevel.isolation ?? suiteLevel.isolation,
|
|
37004
37121
|
repos: caseLevel.repos ?? suiteLevel.repos,
|
|
37005
|
-
|
|
37006
|
-
|
|
37007
|
-
|
|
37008
|
-
|
|
37009
|
-
after_each: caseLevel.after_each ?? suiteLevel.after_each
|
|
37122
|
+
...hasHooks && { hooks: mergedHooks },
|
|
37123
|
+
mode: caseLevel.mode ?? suiteLevel.mode,
|
|
37124
|
+
static_path: caseLevel.static_path ?? suiteLevel.static_path,
|
|
37125
|
+
pool: caseLevel.pool ?? suiteLevel.pool
|
|
37010
37126
|
};
|
|
37011
37127
|
}
|
|
37012
37128
|
function asString6(value) {
|
|
@@ -40929,15 +41045,15 @@ function getAgentvHome() {
|
|
|
40929
41045
|
function getWorkspacesRoot() {
|
|
40930
41046
|
return path21.join(getAgentvHome(), "workspaces");
|
|
40931
41047
|
}
|
|
40932
|
-
function getGitCacheRoot() {
|
|
40933
|
-
return path21.join(getAgentvHome(), "git-cache");
|
|
40934
|
-
}
|
|
40935
41048
|
function getSubagentsRoot() {
|
|
40936
41049
|
return path21.join(getAgentvHome(), "subagents");
|
|
40937
41050
|
}
|
|
40938
41051
|
function getTraceStateRoot() {
|
|
40939
41052
|
return path21.join(getAgentvHome(), "trace-state");
|
|
40940
41053
|
}
|
|
41054
|
+
function getWorkspacePoolRoot() {
|
|
41055
|
+
return path21.join(getAgentvHome(), "workspace-pool");
|
|
41056
|
+
}
|
|
40941
41057
|
var DEFAULT_LOCK_NAME = "subagent.lock";
|
|
40942
41058
|
var DEFAULT_ALIVE_FILENAME = ".alive";
|
|
40943
41059
|
function getDefaultSubagentRoot(vscodeCmd = "code") {
|
|
@@ -41738,8 +41854,6 @@ var AGENTV_REQUEST_TEMPLATE = `[[ ## task ## ]]
|
|
|
41738
41854
|
|
|
41739
41855
|
**IMPORTANT**: Follow these exact steps:
|
|
41740
41856
|
1. Create and write your complete response to: {{responseFileTmp}}
|
|
41741
|
-
- All intended file outputs/changes MUST be written in your response file.
|
|
41742
|
-
- For each intended file, include the repo name, relative path and unified git diff following the convention \`diff --git ...\`.
|
|
41743
41857
|
2. When completely finished, run these PowerShell commands to signal completion:
|
|
41744
41858
|
\`\`\`
|
|
41745
41859
|
Move-Item -LiteralPath '{{responseFileTmp}}' -Destination '{{responseFileFinal}}'
|
|
@@ -41756,8 +41870,6 @@ var AGENTV_BATCH_REQUEST_TEMPLATE = `[[ ## task ## ]]
|
|
|
41756
41870
|
|
|
41757
41871
|
**IMPORTANT**: Follow these exact steps:
|
|
41758
41872
|
1. Create and write your complete response to: {{responseFileTmp}}
|
|
41759
|
-
- All intended file outputs/changes MUST be written in your response file.
|
|
41760
|
-
- For each intended file, include the repo name, relative path and unified git diff following the convention \`diff --git ...\`.
|
|
41761
41873
|
2. When completely finished and the response is stable, rename it to: {{responseFileFinal}}
|
|
41762
41874
|
3. Do not unlock the workspace from this request; batch orchestration will handle unlocking after all responses are ready.
|
|
41763
41875
|
`;
|
|
@@ -42347,15 +42459,15 @@ async function execFileWithStdinNode(argv, stdinPayload, options) {
|
|
|
42347
42459
|
});
|
|
42348
42460
|
}
|
|
42349
42461
|
async function execShellWithStdin(command, stdinPayload, options = {}) {
|
|
42350
|
-
const { mkdir: mkdir14, readFile:
|
|
42462
|
+
const { mkdir: mkdir14, readFile: readFile13, rm: rm6, writeFile: writeFile9 } = await import("node:fs/promises");
|
|
42351
42463
|
const { tmpdir: tmpdir3 } = await import("node:os");
|
|
42352
|
-
const
|
|
42464
|
+
const path42 = await import("node:path");
|
|
42353
42465
|
const { randomUUID: randomUUID8 } = await import("node:crypto");
|
|
42354
|
-
const dir =
|
|
42466
|
+
const dir = path42.join(tmpdir3(), `agentv-exec-${randomUUID8()}`);
|
|
42355
42467
|
await mkdir14(dir, { recursive: true });
|
|
42356
|
-
const stdinPath =
|
|
42357
|
-
const stdoutPath =
|
|
42358
|
-
const stderrPath =
|
|
42468
|
+
const stdinPath = path42.join(dir, "stdin.txt");
|
|
42469
|
+
const stdoutPath = path42.join(dir, "stdout.txt");
|
|
42470
|
+
const stderrPath = path42.join(dir, "stderr.txt");
|
|
42359
42471
|
await writeFile9(stdinPath, stdinPayload, "utf8");
|
|
42360
42472
|
const wrappedCommand = process.platform === "win32" ? `(${command}) < ${shellEscapePath(stdinPath)} > ${shellEscapePath(stdoutPath)} 2> ${shellEscapePath(stderrPath)}` : `(${command}) < ${shellEscapePath(stdinPath)} > ${shellEscapePath(stdoutPath)} 2> ${shellEscapePath(stderrPath)}`;
|
|
42361
42473
|
const { spawn: spawn4 } = await import("node:child_process");
|
|
@@ -42385,8 +42497,8 @@ async function execShellWithStdin(command, stdinPayload, options = {}) {
|
|
|
42385
42497
|
resolve2(code ?? 0);
|
|
42386
42498
|
});
|
|
42387
42499
|
});
|
|
42388
|
-
const stdout = (await
|
|
42389
|
-
const stderr = (await
|
|
42500
|
+
const stdout = (await readFile13(stdoutPath, "utf8")).replace(/\r\n/g, "\n");
|
|
42501
|
+
const stderr = (await readFile13(stderrPath, "utf8")).replace(/\r\n/g, "\n");
|
|
42390
42502
|
return { stdout, stderr, exitCode };
|
|
42391
42503
|
} finally {
|
|
42392
42504
|
await rm6(dir, { recursive: true, force: true });
|
|
@@ -42699,7 +42811,7 @@ var CodeEvaluator = class {
|
|
|
42699
42811
|
outputPath,
|
|
42700
42812
|
guidelineFiles: context.evalCase.guideline_paths,
|
|
42701
42813
|
inputFiles: context.evalCase.file_paths.filter(
|
|
42702
|
-
(
|
|
42814
|
+
(path42) => !context.evalCase.guideline_paths.includes(path42)
|
|
42703
42815
|
),
|
|
42704
42816
|
input: context.evalCase.input,
|
|
42705
42817
|
trace: context.trace ?? null,
|
|
@@ -42942,6 +43054,8 @@ ${context.fileChanges}`;
|
|
|
42942
43054
|
};
|
|
42943
43055
|
} catch (e) {
|
|
42944
43056
|
const message = e instanceof Error ? e.message : String(e);
|
|
43057
|
+
const evalName = context.evaluator?.name ?? "llm-judge";
|
|
43058
|
+
console.warn(`\u26A0 LLM judge "${evalName}" failed after 3 attempts (${message}) \u2014 skipped`);
|
|
42945
43059
|
return {
|
|
42946
43060
|
score: 0,
|
|
42947
43061
|
verdict: "skip",
|
|
@@ -42970,24 +43084,39 @@ ${context.fileChanges}`;
|
|
|
42970
43084
|
systemPrompt,
|
|
42971
43085
|
target: judgeProvider.targetName
|
|
42972
43086
|
};
|
|
42973
|
-
|
|
42974
|
-
|
|
42975
|
-
|
|
42976
|
-
|
|
42977
|
-
|
|
42978
|
-
|
|
42979
|
-
|
|
42980
|
-
|
|
42981
|
-
|
|
42982
|
-
|
|
42983
|
-
|
|
42984
|
-
|
|
42985
|
-
|
|
42986
|
-
|
|
42987
|
-
|
|
42988
|
-
|
|
42989
|
-
|
|
42990
|
-
|
|
43087
|
+
try {
|
|
43088
|
+
const { data, tokenUsage } = await this.runWithRetry({
|
|
43089
|
+
context,
|
|
43090
|
+
judgeProvider,
|
|
43091
|
+
systemPrompt,
|
|
43092
|
+
userPrompt: prompt,
|
|
43093
|
+
schema: rubricEvaluationSchema
|
|
43094
|
+
});
|
|
43095
|
+
const { score, verdict, hits, misses } = calculateRubricScore(data, rubrics);
|
|
43096
|
+
return {
|
|
43097
|
+
score,
|
|
43098
|
+
verdict,
|
|
43099
|
+
hits,
|
|
43100
|
+
misses,
|
|
43101
|
+
expectedAspectCount: rubrics.length,
|
|
43102
|
+
reasoning: data.overall_reasoning,
|
|
43103
|
+
evaluatorRawRequest,
|
|
43104
|
+
tokenUsage
|
|
43105
|
+
};
|
|
43106
|
+
} catch (e) {
|
|
43107
|
+
const message = e instanceof Error ? e.message : String(e);
|
|
43108
|
+
const evalName = context.evaluator?.name ?? "llm-judge";
|
|
43109
|
+
console.warn(`\u26A0 LLM judge "${evalName}" failed after 3 attempts (${message}) \u2014 skipped`);
|
|
43110
|
+
return {
|
|
43111
|
+
score: 0,
|
|
43112
|
+
verdict: "skip",
|
|
43113
|
+
hits: [],
|
|
43114
|
+
misses: [`Judge parse failure after 3 attempts: ${message}`],
|
|
43115
|
+
expectedAspectCount: rubrics.length,
|
|
43116
|
+
reasoning: `Judge parse failure after 3 attempts: ${message}`,
|
|
43117
|
+
evaluatorRawRequest
|
|
43118
|
+
};
|
|
43119
|
+
}
|
|
42991
43120
|
}
|
|
42992
43121
|
/**
|
|
42993
43122
|
* Evaluate using score-range rubrics (analytic rubric scoring).
|
|
@@ -43001,25 +43130,40 @@ ${context.fileChanges}`;
|
|
|
43001
43130
|
systemPrompt,
|
|
43002
43131
|
target: judgeProvider.targetName
|
|
43003
43132
|
};
|
|
43004
|
-
|
|
43005
|
-
|
|
43006
|
-
|
|
43007
|
-
|
|
43008
|
-
|
|
43009
|
-
|
|
43010
|
-
|
|
43011
|
-
|
|
43012
|
-
|
|
43013
|
-
|
|
43014
|
-
|
|
43015
|
-
|
|
43016
|
-
|
|
43017
|
-
|
|
43018
|
-
|
|
43019
|
-
|
|
43020
|
-
|
|
43021
|
-
|
|
43022
|
-
|
|
43133
|
+
try {
|
|
43134
|
+
const { data, tokenUsage } = await this.runWithRetry({
|
|
43135
|
+
context,
|
|
43136
|
+
judgeProvider,
|
|
43137
|
+
systemPrompt,
|
|
43138
|
+
userPrompt: prompt,
|
|
43139
|
+
schema: scoreRangeEvaluationSchema
|
|
43140
|
+
});
|
|
43141
|
+
const { score, verdict, hits, misses, details } = calculateScoreRangeResult(data, rubrics);
|
|
43142
|
+
return {
|
|
43143
|
+
score,
|
|
43144
|
+
verdict,
|
|
43145
|
+
hits,
|
|
43146
|
+
misses,
|
|
43147
|
+
expectedAspectCount: rubrics.length,
|
|
43148
|
+
reasoning: data.overall_reasoning,
|
|
43149
|
+
evaluatorRawRequest,
|
|
43150
|
+
details,
|
|
43151
|
+
tokenUsage
|
|
43152
|
+
};
|
|
43153
|
+
} catch (e) {
|
|
43154
|
+
const message = e instanceof Error ? e.message : String(e);
|
|
43155
|
+
const evalName = context.evaluator?.name ?? "llm-judge";
|
|
43156
|
+
console.warn(`\u26A0 LLM judge "${evalName}" failed after 3 attempts (${message}) \u2014 skipped`);
|
|
43157
|
+
return {
|
|
43158
|
+
score: 0,
|
|
43159
|
+
verdict: "skip",
|
|
43160
|
+
hits: [],
|
|
43161
|
+
misses: [`Judge parse failure after 3 attempts: ${message}`],
|
|
43162
|
+
expectedAspectCount: rubrics.length,
|
|
43163
|
+
reasoning: `Judge parse failure after 3 attempts: ${message}`,
|
|
43164
|
+
evaluatorRawRequest
|
|
43165
|
+
};
|
|
43166
|
+
}
|
|
43023
43167
|
}
|
|
43024
43168
|
/**
|
|
43025
43169
|
* Build prompt for score-range rubric evaluation.
|
|
@@ -43303,19 +43447,13 @@ var CompositeEvaluator = class {
|
|
|
43303
43447
|
runWeightedAverage(results, weights) {
|
|
43304
43448
|
let totalWeight = 0;
|
|
43305
43449
|
let weightedSum = 0;
|
|
43450
|
+
let evaluatedCount = 0;
|
|
43306
43451
|
const allHits = [];
|
|
43307
43452
|
const allMisses = [];
|
|
43308
43453
|
const reasoningParts = [];
|
|
43309
43454
|
const scores = [];
|
|
43310
43455
|
for (const member of results) {
|
|
43311
43456
|
const weight = weights?.[member.id] ?? 1;
|
|
43312
|
-
totalWeight += weight;
|
|
43313
|
-
weightedSum += member.result.score * weight;
|
|
43314
|
-
allHits.push(...member.result.hits.map((h) => `[${member.id}] ${h}`));
|
|
43315
|
-
allMisses.push(...member.result.misses.map((m) => `[${member.id}] ${m}`));
|
|
43316
|
-
if (member.result.reasoning) {
|
|
43317
|
-
reasoningParts.push(`${member.id}: ${member.result.reasoning}`);
|
|
43318
|
-
}
|
|
43319
43457
|
scores.push({
|
|
43320
43458
|
name: member.id,
|
|
43321
43459
|
type: member.type,
|
|
@@ -43330,6 +43468,32 @@ var CompositeEvaluator = class {
|
|
|
43330
43468
|
details: member.result.details,
|
|
43331
43469
|
tokenUsage: member.result.tokenUsage
|
|
43332
43470
|
});
|
|
43471
|
+
if (member.result.verdict === "skip") {
|
|
43472
|
+
continue;
|
|
43473
|
+
}
|
|
43474
|
+
evaluatedCount++;
|
|
43475
|
+
totalWeight += weight;
|
|
43476
|
+
weightedSum += member.result.score * weight;
|
|
43477
|
+
allHits.push(...member.result.hits.map((h) => `[${member.id}] ${h}`));
|
|
43478
|
+
allMisses.push(...member.result.misses.map((m) => `[${member.id}] ${m}`));
|
|
43479
|
+
if (member.result.reasoning) {
|
|
43480
|
+
reasoningParts.push(`${member.id}: ${member.result.reasoning}`);
|
|
43481
|
+
}
|
|
43482
|
+
}
|
|
43483
|
+
if (evaluatedCount === 0 && results.length > 0) {
|
|
43484
|
+
return {
|
|
43485
|
+
score: 0,
|
|
43486
|
+
verdict: "skip",
|
|
43487
|
+
hits: [],
|
|
43488
|
+
misses: [],
|
|
43489
|
+
expectedAspectCount: 1,
|
|
43490
|
+
reasoning: "All evaluators skipped (infrastructure failure)",
|
|
43491
|
+
evaluatorRawRequest: {
|
|
43492
|
+
aggregator: "weighted_average",
|
|
43493
|
+
...weights ? { weights } : {}
|
|
43494
|
+
},
|
|
43495
|
+
scores
|
|
43496
|
+
};
|
|
43333
43497
|
}
|
|
43334
43498
|
const finalScore = totalWeight > 0 ? weightedSum / totalWeight : 0;
|
|
43335
43499
|
return {
|
|
@@ -43353,19 +43517,8 @@ var CompositeEvaluator = class {
|
|
|
43353
43517
|
const reasoningParts = [];
|
|
43354
43518
|
let passingCount = 0;
|
|
43355
43519
|
let borderlineCount = 0;
|
|
43520
|
+
let evaluatedCount = 0;
|
|
43356
43521
|
for (const member of results) {
|
|
43357
|
-
const isPassing = member.result.verdict === "pass" || member.result.verdict === "borderline";
|
|
43358
|
-
if (isPassing) {
|
|
43359
|
-
passingCount++;
|
|
43360
|
-
if (member.result.verdict === "borderline") {
|
|
43361
|
-
borderlineCount++;
|
|
43362
|
-
}
|
|
43363
|
-
}
|
|
43364
|
-
allHits.push(...member.result.hits.map((h) => `[${member.id}] ${h}`));
|
|
43365
|
-
allMisses.push(...member.result.misses.map((m) => `[${member.id}] ${m}`));
|
|
43366
|
-
if (member.result.reasoning) {
|
|
43367
|
-
reasoningParts.push(`${member.id}: ${member.result.reasoning}`);
|
|
43368
|
-
}
|
|
43369
43522
|
scores.push({
|
|
43370
43523
|
name: member.id,
|
|
43371
43524
|
type: member.type,
|
|
@@ -43379,8 +43532,39 @@ var CompositeEvaluator = class {
|
|
|
43379
43532
|
details: member.result.details,
|
|
43380
43533
|
tokenUsage: member.result.tokenUsage
|
|
43381
43534
|
});
|
|
43535
|
+
if (member.result.verdict === "skip") {
|
|
43536
|
+
continue;
|
|
43537
|
+
}
|
|
43538
|
+
evaluatedCount++;
|
|
43539
|
+
const isPassing = member.result.verdict === "pass" || member.result.verdict === "borderline";
|
|
43540
|
+
if (isPassing) {
|
|
43541
|
+
passingCount++;
|
|
43542
|
+
if (member.result.verdict === "borderline") {
|
|
43543
|
+
borderlineCount++;
|
|
43544
|
+
}
|
|
43545
|
+
}
|
|
43546
|
+
allHits.push(...member.result.hits.map((h) => `[${member.id}] ${h}`));
|
|
43547
|
+
allMisses.push(...member.result.misses.map((m) => `[${member.id}] ${m}`));
|
|
43548
|
+
if (member.result.reasoning) {
|
|
43549
|
+
reasoningParts.push(`${member.id}: ${member.result.reasoning}`);
|
|
43550
|
+
}
|
|
43382
43551
|
}
|
|
43383
|
-
|
|
43552
|
+
if (evaluatedCount === 0 && results.length > 0) {
|
|
43553
|
+
return {
|
|
43554
|
+
score: 0,
|
|
43555
|
+
verdict: "skip",
|
|
43556
|
+
hits: [],
|
|
43557
|
+
misses: [],
|
|
43558
|
+
expectedAspectCount: 1,
|
|
43559
|
+
reasoning: "All evaluators skipped (infrastructure failure)",
|
|
43560
|
+
evaluatorRawRequest: {
|
|
43561
|
+
aggregator: "threshold",
|
|
43562
|
+
threshold
|
|
43563
|
+
},
|
|
43564
|
+
scores
|
|
43565
|
+
};
|
|
43566
|
+
}
|
|
43567
|
+
const totalCount = evaluatedCount;
|
|
43384
43568
|
const score = totalCount > 0 ? passingCount / totalCount : 0;
|
|
43385
43569
|
const pass = score >= threshold;
|
|
43386
43570
|
if (pass && borderlineCount > 0) {
|
|
@@ -43882,115 +44066,115 @@ var FieldAccuracyEvaluator = class {
|
|
|
43882
44066
|
* Evaluate a single field against the expected value.
|
|
43883
44067
|
*/
|
|
43884
44068
|
evaluateField(fieldConfig, candidateData, expectedData) {
|
|
43885
|
-
const { path:
|
|
43886
|
-
const candidateValue = resolvePath(candidateData,
|
|
43887
|
-
const expectedValue = resolvePath(expectedData,
|
|
44069
|
+
const { path: path42, match, required: required2 = true, weight = 1 } = fieldConfig;
|
|
44070
|
+
const candidateValue = resolvePath(candidateData, path42);
|
|
44071
|
+
const expectedValue = resolvePath(expectedData, path42);
|
|
43888
44072
|
if (expectedValue === void 0) {
|
|
43889
44073
|
return {
|
|
43890
|
-
path:
|
|
44074
|
+
path: path42,
|
|
43891
44075
|
score: 1,
|
|
43892
44076
|
// No expected value means no comparison needed
|
|
43893
44077
|
weight,
|
|
43894
44078
|
hit: true,
|
|
43895
|
-
message: `${
|
|
44079
|
+
message: `${path42}: no expected value`
|
|
43896
44080
|
};
|
|
43897
44081
|
}
|
|
43898
44082
|
if (candidateValue === void 0) {
|
|
43899
44083
|
if (required2) {
|
|
43900
44084
|
return {
|
|
43901
|
-
path:
|
|
44085
|
+
path: path42,
|
|
43902
44086
|
score: 0,
|
|
43903
44087
|
weight,
|
|
43904
44088
|
hit: false,
|
|
43905
|
-
message: `${
|
|
44089
|
+
message: `${path42} (required, missing)`
|
|
43906
44090
|
};
|
|
43907
44091
|
}
|
|
43908
44092
|
return {
|
|
43909
|
-
path:
|
|
44093
|
+
path: path42,
|
|
43910
44094
|
score: 1,
|
|
43911
44095
|
// Don't penalize missing optional fields
|
|
43912
44096
|
weight: 0,
|
|
43913
44097
|
// Zero weight means it won't affect the score
|
|
43914
44098
|
hit: true,
|
|
43915
|
-
message: `${
|
|
44099
|
+
message: `${path42}: optional field missing`
|
|
43916
44100
|
};
|
|
43917
44101
|
}
|
|
43918
44102
|
switch (match) {
|
|
43919
44103
|
case "exact":
|
|
43920
|
-
return this.compareExact(
|
|
44104
|
+
return this.compareExact(path42, candidateValue, expectedValue, weight);
|
|
43921
44105
|
case "numeric_tolerance":
|
|
43922
44106
|
return this.compareNumericTolerance(
|
|
43923
|
-
|
|
44107
|
+
path42,
|
|
43924
44108
|
candidateValue,
|
|
43925
44109
|
expectedValue,
|
|
43926
44110
|
fieldConfig,
|
|
43927
44111
|
weight
|
|
43928
44112
|
);
|
|
43929
44113
|
case "date":
|
|
43930
|
-
return this.compareDate(
|
|
44114
|
+
return this.compareDate(path42, candidateValue, expectedValue, fieldConfig, weight);
|
|
43931
44115
|
default:
|
|
43932
44116
|
return {
|
|
43933
|
-
path:
|
|
44117
|
+
path: path42,
|
|
43934
44118
|
score: 0,
|
|
43935
44119
|
weight,
|
|
43936
44120
|
hit: false,
|
|
43937
|
-
message: `${
|
|
44121
|
+
message: `${path42}: unknown match type "${match}"`
|
|
43938
44122
|
};
|
|
43939
44123
|
}
|
|
43940
44124
|
}
|
|
43941
44125
|
/**
|
|
43942
44126
|
* Exact equality comparison.
|
|
43943
44127
|
*/
|
|
43944
|
-
compareExact(
|
|
44128
|
+
compareExact(path42, candidateValue, expectedValue, weight) {
|
|
43945
44129
|
if (deepEqual(candidateValue, expectedValue)) {
|
|
43946
44130
|
return {
|
|
43947
|
-
path:
|
|
44131
|
+
path: path42,
|
|
43948
44132
|
score: 1,
|
|
43949
44133
|
weight,
|
|
43950
44134
|
hit: true,
|
|
43951
|
-
message:
|
|
44135
|
+
message: path42
|
|
43952
44136
|
};
|
|
43953
44137
|
}
|
|
43954
44138
|
if (typeof candidateValue !== typeof expectedValue) {
|
|
43955
44139
|
return {
|
|
43956
|
-
path:
|
|
44140
|
+
path: path42,
|
|
43957
44141
|
score: 0,
|
|
43958
44142
|
weight,
|
|
43959
44143
|
hit: false,
|
|
43960
|
-
message: `${
|
|
44144
|
+
message: `${path42} (type mismatch: got ${typeof candidateValue}, expected ${typeof expectedValue})`
|
|
43961
44145
|
};
|
|
43962
44146
|
}
|
|
43963
44147
|
return {
|
|
43964
|
-
path:
|
|
44148
|
+
path: path42,
|
|
43965
44149
|
score: 0,
|
|
43966
44150
|
weight,
|
|
43967
44151
|
hit: false,
|
|
43968
|
-
message: `${
|
|
44152
|
+
message: `${path42} (value mismatch)`
|
|
43969
44153
|
};
|
|
43970
44154
|
}
|
|
43971
44155
|
/**
|
|
43972
44156
|
* Numeric comparison with absolute or relative tolerance.
|
|
43973
44157
|
*/
|
|
43974
|
-
compareNumericTolerance(
|
|
44158
|
+
compareNumericTolerance(path42, candidateValue, expectedValue, fieldConfig, weight) {
|
|
43975
44159
|
const { tolerance = 0, relative = false } = fieldConfig;
|
|
43976
44160
|
const candidateNum = toNumber2(candidateValue);
|
|
43977
44161
|
const expectedNum = toNumber2(expectedValue);
|
|
43978
44162
|
if (candidateNum === null || expectedNum === null) {
|
|
43979
44163
|
return {
|
|
43980
|
-
path:
|
|
44164
|
+
path: path42,
|
|
43981
44165
|
score: 0,
|
|
43982
44166
|
weight,
|
|
43983
44167
|
hit: false,
|
|
43984
|
-
message: `${
|
|
44168
|
+
message: `${path42} (non-numeric value)`
|
|
43985
44169
|
};
|
|
43986
44170
|
}
|
|
43987
44171
|
if (!Number.isFinite(candidateNum) || !Number.isFinite(expectedNum)) {
|
|
43988
44172
|
return {
|
|
43989
|
-
path:
|
|
44173
|
+
path: path42,
|
|
43990
44174
|
score: 0,
|
|
43991
44175
|
weight,
|
|
43992
44176
|
hit: false,
|
|
43993
|
-
message: `${
|
|
44177
|
+
message: `${path42} (invalid numeric value)`
|
|
43994
44178
|
};
|
|
43995
44179
|
}
|
|
43996
44180
|
const diff = Math.abs(candidateNum - expectedNum);
|
|
@@ -44003,61 +44187,61 @@ var FieldAccuracyEvaluator = class {
|
|
|
44003
44187
|
}
|
|
44004
44188
|
if (withinTolerance) {
|
|
44005
44189
|
return {
|
|
44006
|
-
path:
|
|
44190
|
+
path: path42,
|
|
44007
44191
|
score: 1,
|
|
44008
44192
|
weight,
|
|
44009
44193
|
hit: true,
|
|
44010
|
-
message: `${
|
|
44194
|
+
message: `${path42} (within tolerance: diff=${diff.toFixed(2)})`
|
|
44011
44195
|
};
|
|
44012
44196
|
}
|
|
44013
44197
|
return {
|
|
44014
|
-
path:
|
|
44198
|
+
path: path42,
|
|
44015
44199
|
score: 0,
|
|
44016
44200
|
weight,
|
|
44017
44201
|
hit: false,
|
|
44018
|
-
message: `${
|
|
44202
|
+
message: `${path42} (outside tolerance: diff=${diff.toFixed(2)}, tolerance=${tolerance})`
|
|
44019
44203
|
};
|
|
44020
44204
|
}
|
|
44021
44205
|
/**
|
|
44022
44206
|
* Date comparison with format normalization.
|
|
44023
44207
|
*/
|
|
44024
|
-
compareDate(
|
|
44208
|
+
compareDate(path42, candidateValue, expectedValue, fieldConfig, weight) {
|
|
44025
44209
|
const formats = fieldConfig.formats ?? DEFAULT_DATE_FORMATS;
|
|
44026
44210
|
const candidateDate = parseDate(String(candidateValue), formats);
|
|
44027
44211
|
const expectedDate = parseDate(String(expectedValue), formats);
|
|
44028
44212
|
if (candidateDate === null) {
|
|
44029
44213
|
return {
|
|
44030
|
-
path:
|
|
44214
|
+
path: path42,
|
|
44031
44215
|
score: 0,
|
|
44032
44216
|
weight,
|
|
44033
44217
|
hit: false,
|
|
44034
|
-
message: `${
|
|
44218
|
+
message: `${path42} (unparseable candidate date)`
|
|
44035
44219
|
};
|
|
44036
44220
|
}
|
|
44037
44221
|
if (expectedDate === null) {
|
|
44038
44222
|
return {
|
|
44039
|
-
path:
|
|
44223
|
+
path: path42,
|
|
44040
44224
|
score: 0,
|
|
44041
44225
|
weight,
|
|
44042
44226
|
hit: false,
|
|
44043
|
-
message: `${
|
|
44227
|
+
message: `${path42} (unparseable expected date)`
|
|
44044
44228
|
};
|
|
44045
44229
|
}
|
|
44046
44230
|
if (candidateDate.getFullYear() === expectedDate.getFullYear() && candidateDate.getMonth() === expectedDate.getMonth() && candidateDate.getDate() === expectedDate.getDate()) {
|
|
44047
44231
|
return {
|
|
44048
|
-
path:
|
|
44232
|
+
path: path42,
|
|
44049
44233
|
score: 1,
|
|
44050
44234
|
weight,
|
|
44051
44235
|
hit: true,
|
|
44052
|
-
message:
|
|
44236
|
+
message: path42
|
|
44053
44237
|
};
|
|
44054
44238
|
}
|
|
44055
44239
|
return {
|
|
44056
|
-
path:
|
|
44240
|
+
path: path42,
|
|
44057
44241
|
score: 0,
|
|
44058
44242
|
weight,
|
|
44059
44243
|
hit: false,
|
|
44060
|
-
message: `${
|
|
44244
|
+
message: `${path42} (date mismatch: got ${formatDateISO(candidateDate)}, expected ${formatDateISO(expectedDate)})`
|
|
44061
44245
|
};
|
|
44062
44246
|
}
|
|
44063
44247
|
/**
|
|
@@ -44098,11 +44282,11 @@ var FieldAccuracyEvaluator = class {
|
|
|
44098
44282
|
};
|
|
44099
44283
|
}
|
|
44100
44284
|
};
|
|
44101
|
-
function resolvePath(obj,
|
|
44102
|
-
if (!
|
|
44285
|
+
function resolvePath(obj, path42) {
|
|
44286
|
+
if (!path42 || !obj) {
|
|
44103
44287
|
return void 0;
|
|
44104
44288
|
}
|
|
44105
|
-
const parts =
|
|
44289
|
+
const parts = path42.split(/\.|\[|\]/).filter((p) => p.length > 0);
|
|
44106
44290
|
let current = obj;
|
|
44107
44291
|
for (const part of parts) {
|
|
44108
44292
|
if (current === null || current === void 0) {
|
|
@@ -44906,8 +45090,8 @@ var TokenUsageEvaluator = class {
|
|
|
44906
45090
|
};
|
|
44907
45091
|
}
|
|
44908
45092
|
};
|
|
44909
|
-
function getNestedValue(obj,
|
|
44910
|
-
const parts =
|
|
45093
|
+
function getNestedValue(obj, path42) {
|
|
45094
|
+
const parts = path42.split(".");
|
|
44911
45095
|
let current = obj;
|
|
44912
45096
|
for (const part of parts) {
|
|
44913
45097
|
if (current === null || current === void 0 || typeof current !== "object") {
|
|
@@ -46302,8 +46486,6 @@ async function cleanupEvalWorkspaces(evalRunId, workspaceRoot) {
|
|
|
46302
46486
|
}
|
|
46303
46487
|
}
|
|
46304
46488
|
var execFileAsync = promisify5(execFile);
|
|
46305
|
-
var DEFAULT_TIMEOUT_MS2 = 3e5;
|
|
46306
|
-
var LOCK_TIMEOUT_MS = 6e4;
|
|
46307
46489
|
function gitEnv() {
|
|
46308
46490
|
const env = { ...process.env };
|
|
46309
46491
|
for (const key of Object.keys(env)) {
|
|
@@ -46318,65 +46500,296 @@ function gitEnv() {
|
|
|
46318
46500
|
GIT_SSH_COMMAND: "ssh -o BatchMode=yes"
|
|
46319
46501
|
};
|
|
46320
46502
|
}
|
|
46321
|
-
function cacheKey(source) {
|
|
46322
|
-
const raw = source.type === "git" ? source.url.toLowerCase().replace(/\.git$/, "") : source.path;
|
|
46323
|
-
return createHash("sha256").update(raw).digest("hex");
|
|
46324
|
-
}
|
|
46325
|
-
function getSourceUrl(source) {
|
|
46326
|
-
return source.type === "git" ? source.url : source.path;
|
|
46327
|
-
}
|
|
46328
46503
|
async function git(args, opts) {
|
|
46329
46504
|
const { stdout } = await execFileAsync("git", args, {
|
|
46330
46505
|
cwd: opts?.cwd,
|
|
46331
|
-
timeout: opts?.timeout ??
|
|
46506
|
+
timeout: opts?.timeout ?? 3e5,
|
|
46332
46507
|
env: gitEnv(),
|
|
46333
46508
|
maxBuffer: 50 * 1024 * 1024
|
|
46334
|
-
// 50MB
|
|
46335
46509
|
});
|
|
46336
46510
|
return stdout.trim();
|
|
46337
46511
|
}
|
|
46338
|
-
|
|
46339
|
-
const
|
|
46340
|
-
|
|
46341
|
-
|
|
46342
|
-
|
|
46343
|
-
|
|
46344
|
-
|
|
46345
|
-
|
|
46346
|
-
|
|
46512
|
+
function normalizeRepoForFingerprint(repo) {
|
|
46513
|
+
const source = repo.source.type === "git" ? { type: "git", url: repo.source.url.toLowerCase().replace(/\.git$/, "") } : { type: "local", path: repo.source.path };
|
|
46514
|
+
const result = {
|
|
46515
|
+
path: repo.path,
|
|
46516
|
+
source,
|
|
46517
|
+
ref: repo.checkout?.ref ?? "HEAD"
|
|
46518
|
+
};
|
|
46519
|
+
if (repo.clone?.depth !== void 0) {
|
|
46520
|
+
result.depth = repo.clone.depth;
|
|
46521
|
+
}
|
|
46522
|
+
if (repo.clone?.filter !== void 0) {
|
|
46523
|
+
result.filter = repo.clone.filter;
|
|
46524
|
+
}
|
|
46525
|
+
if (repo.clone?.sparse?.length) {
|
|
46526
|
+
result.sparse = [...repo.clone.sparse].sort();
|
|
46527
|
+
}
|
|
46528
|
+
return result;
|
|
46529
|
+
}
|
|
46530
|
+
function computeWorkspaceFingerprint(templatePath, repos) {
|
|
46531
|
+
const canonical = {
|
|
46532
|
+
templatePath: templatePath ?? null,
|
|
46533
|
+
repos: [...repos].sort((a, b) => a.path.localeCompare(b.path)).map(normalizeRepoForFingerprint)
|
|
46534
|
+
};
|
|
46535
|
+
return createHash("sha256").update(JSON.stringify(canonical)).digest("hex");
|
|
46536
|
+
}
|
|
46537
|
+
async function copyDirectoryRecursive2(src, dest, skipDirs) {
|
|
46538
|
+
await mkdir11(dest, { recursive: true });
|
|
46539
|
+
const entries = await readdir4(src, { withFileTypes: true });
|
|
46540
|
+
for (const entry of entries) {
|
|
46541
|
+
const srcPath = path36.join(src, entry.name);
|
|
46542
|
+
const destPath = path36.join(dest, entry.name);
|
|
46543
|
+
if (entry.name === ".git") {
|
|
46544
|
+
continue;
|
|
46545
|
+
}
|
|
46546
|
+
if (entry.isDirectory()) {
|
|
46547
|
+
if (skipDirs?.has(entry.name)) {
|
|
46347
46548
|
continue;
|
|
46348
46549
|
}
|
|
46349
|
-
|
|
46550
|
+
await copyDirectoryRecursive2(srcPath, destPath, skipDirs);
|
|
46551
|
+
} else {
|
|
46552
|
+
await cp2(srcPath, destPath, { preserveTimestamps: true, force: true });
|
|
46350
46553
|
}
|
|
46351
46554
|
}
|
|
46352
|
-
throw new Error(`Timed out waiting for lock: ${lockPath}`);
|
|
46353
46555
|
}
|
|
46354
|
-
|
|
46355
|
-
|
|
46356
|
-
|
|
46357
|
-
|
|
46556
|
+
var WorkspacePoolManager = class {
|
|
46557
|
+
poolRoot;
|
|
46558
|
+
constructor(poolRoot) {
|
|
46559
|
+
this.poolRoot = poolRoot ?? getWorkspacePoolRoot();
|
|
46560
|
+
}
|
|
46561
|
+
/**
|
|
46562
|
+
* Acquire a workspace slot from the pool.
|
|
46563
|
+
*
|
|
46564
|
+
* 1. Compute fingerprint from template + repos
|
|
46565
|
+
* 2. Check drift (compare stored metadata.json fingerprint vs computed)
|
|
46566
|
+
* 3. If drift: warn, remove all slots, rematerialize
|
|
46567
|
+
* 4. Acquire a slot (try-lock slot-0, slot-1, ..., up to maxSlots)
|
|
46568
|
+
* 5. If slot exists: reset repos, re-copy template files (skip repo directories)
|
|
46569
|
+
* 6. If new slot: copy template, materialize all repos, write metadata.json
|
|
46570
|
+
* 7. Return the slot (with path, index, isExisting)
|
|
46571
|
+
*/
|
|
46572
|
+
async acquireWorkspace(options) {
|
|
46573
|
+
const { templatePath, repos, maxSlots, repoManager, poolReset } = options;
|
|
46574
|
+
const fingerprint = computeWorkspaceFingerprint(templatePath, repos);
|
|
46575
|
+
const poolDir = path36.join(this.poolRoot, fingerprint);
|
|
46576
|
+
await mkdir11(poolDir, { recursive: true });
|
|
46577
|
+
const drifted = await this.checkDrift(poolDir, fingerprint);
|
|
46578
|
+
if (drifted) {
|
|
46579
|
+
console.warn(
|
|
46580
|
+
`[workspace-pool] Drift detected for fingerprint ${fingerprint.slice(0, 12)}... Removing stale slots.`
|
|
46581
|
+
);
|
|
46582
|
+
await this.removeAllSlots(poolDir);
|
|
46583
|
+
}
|
|
46584
|
+
for (let i = 0; i < maxSlots; i++) {
|
|
46585
|
+
const slotPath = path36.join(poolDir, `slot-${i}`);
|
|
46586
|
+
const lockPath = `${slotPath}.lock`;
|
|
46587
|
+
const locked = await this.tryLock(lockPath);
|
|
46588
|
+
if (!locked) {
|
|
46589
|
+
continue;
|
|
46590
|
+
}
|
|
46591
|
+
const slotExists = existsSync2(slotPath);
|
|
46592
|
+
if (slotExists) {
|
|
46593
|
+
await this.resetSlot(slotPath, templatePath, repos, poolReset);
|
|
46594
|
+
return {
|
|
46595
|
+
index: i,
|
|
46596
|
+
path: slotPath,
|
|
46597
|
+
isExisting: true,
|
|
46598
|
+
lockPath,
|
|
46599
|
+
fingerprint,
|
|
46600
|
+
poolDir
|
|
46601
|
+
};
|
|
46602
|
+
}
|
|
46603
|
+
await mkdir11(slotPath, { recursive: true });
|
|
46604
|
+
if (templatePath) {
|
|
46605
|
+
await copyDirectoryRecursive2(templatePath, slotPath);
|
|
46606
|
+
}
|
|
46607
|
+
if (repos.length > 0) {
|
|
46608
|
+
await repoManager.materializeAll(repos, slotPath);
|
|
46609
|
+
}
|
|
46610
|
+
await this.writeMetadata(poolDir, fingerprint, templatePath ?? null, repos);
|
|
46611
|
+
return {
|
|
46612
|
+
index: i,
|
|
46613
|
+
path: slotPath,
|
|
46614
|
+
isExisting: false,
|
|
46615
|
+
lockPath,
|
|
46616
|
+
fingerprint,
|
|
46617
|
+
poolDir
|
|
46618
|
+
};
|
|
46619
|
+
}
|
|
46620
|
+
throw new Error(
|
|
46621
|
+
`All ${maxSlots} pool slots are locked for fingerprint ${fingerprint.slice(0, 12)}...`
|
|
46622
|
+
);
|
|
46623
|
+
}
|
|
46624
|
+
/** Remove lock file to release a slot. */
|
|
46625
|
+
async releaseSlot(slot) {
|
|
46626
|
+
try {
|
|
46627
|
+
await unlink(slot.lockPath);
|
|
46628
|
+
} catch {
|
|
46629
|
+
}
|
|
46630
|
+
}
|
|
46631
|
+
/**
|
|
46632
|
+
* Try to acquire a PID-based lock file.
|
|
46633
|
+
* On EEXIST, read PID and check if process is alive. If dead, stale lock — remove and retry.
|
|
46634
|
+
* Returns true if lock acquired, false if slot is actively locked.
|
|
46635
|
+
* Uses a bounded loop (max 3 attempts) to avoid unbounded recursion.
|
|
46636
|
+
*/
|
|
46637
|
+
async tryLock(lockPath) {
|
|
46638
|
+
for (let attempt = 0; attempt < 3; attempt++) {
|
|
46639
|
+
try {
|
|
46640
|
+
await writeFile7(lockPath, String(process.pid), { flag: "wx" });
|
|
46641
|
+
return true;
|
|
46642
|
+
} catch (err) {
|
|
46643
|
+
if (err.code !== "EEXIST") {
|
|
46644
|
+
throw err;
|
|
46645
|
+
}
|
|
46646
|
+
try {
|
|
46647
|
+
const pidStr = await readFile11(lockPath, "utf-8");
|
|
46648
|
+
const pid = Number.parseInt(pidStr.trim(), 10);
|
|
46649
|
+
if (!Number.isNaN(pid)) {
|
|
46650
|
+
try {
|
|
46651
|
+
process.kill(pid, 0);
|
|
46652
|
+
return false;
|
|
46653
|
+
} catch {
|
|
46654
|
+
await unlink(lockPath).catch(() => {
|
|
46655
|
+
});
|
|
46656
|
+
continue;
|
|
46657
|
+
}
|
|
46658
|
+
}
|
|
46659
|
+
} catch {
|
|
46660
|
+
}
|
|
46661
|
+
return false;
|
|
46662
|
+
}
|
|
46663
|
+
}
|
|
46664
|
+
return false;
|
|
46665
|
+
}
|
|
46666
|
+
/**
|
|
46667
|
+
* Check if the stored fingerprint in metadata.json differs from the computed one.
|
|
46668
|
+
* Returns true if drifted, false otherwise.
|
|
46669
|
+
* Returns false (no drift) if metadata.json doesn't exist (first use).
|
|
46670
|
+
*/
|
|
46671
|
+
async checkDrift(poolDir, fingerprint) {
|
|
46672
|
+
const metadataPath = path36.join(poolDir, "metadata.json");
|
|
46673
|
+
try {
|
|
46674
|
+
const raw = await readFile11(metadataPath, "utf-8");
|
|
46675
|
+
const metadata = JSON.parse(raw);
|
|
46676
|
+
return metadata.fingerprint !== fingerprint;
|
|
46677
|
+
} catch {
|
|
46678
|
+
return false;
|
|
46679
|
+
}
|
|
46680
|
+
}
|
|
46681
|
+
/** Write metadata.json with fingerprint, inputs, and timestamp. */
|
|
46682
|
+
async writeMetadata(poolDir, fingerprint, templatePath, repos) {
|
|
46683
|
+
const metadata = {
|
|
46684
|
+
fingerprint,
|
|
46685
|
+
templatePath,
|
|
46686
|
+
repos,
|
|
46687
|
+
createdAt: (/* @__PURE__ */ new Date()).toISOString()
|
|
46688
|
+
};
|
|
46689
|
+
await writeFile7(path36.join(poolDir, "metadata.json"), JSON.stringify(metadata, null, 2));
|
|
46690
|
+
}
|
|
46691
|
+
/** Remove all slot directories and their lock files from a pool directory. */
|
|
46692
|
+
async removeAllSlots(poolDir) {
|
|
46693
|
+
const entries = await readdir4(poolDir);
|
|
46694
|
+
for (const entry of entries) {
|
|
46695
|
+
if (entry.startsWith("slot-") && !entry.endsWith(".lock")) {
|
|
46696
|
+
const lockPath = path36.join(poolDir, `${entry}.lock`);
|
|
46697
|
+
if (existsSync2(lockPath)) {
|
|
46698
|
+
try {
|
|
46699
|
+
const pidStr = await readFile11(lockPath, "utf-8");
|
|
46700
|
+
const pid = Number.parseInt(pidStr.trim(), 10);
|
|
46701
|
+
if (!Number.isNaN(pid)) {
|
|
46702
|
+
try {
|
|
46703
|
+
process.kill(pid, 0);
|
|
46704
|
+
console.warn(`[workspace-pool] Skipping slot ${entry}: locked by PID ${pid}`);
|
|
46705
|
+
continue;
|
|
46706
|
+
} catch {
|
|
46707
|
+
}
|
|
46708
|
+
}
|
|
46709
|
+
} catch {
|
|
46710
|
+
}
|
|
46711
|
+
}
|
|
46712
|
+
await rm5(path36.join(poolDir, entry), { recursive: true, force: true });
|
|
46713
|
+
await rm5(lockPath, { force: true }).catch(() => {
|
|
46714
|
+
});
|
|
46715
|
+
}
|
|
46716
|
+
}
|
|
46717
|
+
await rm5(path36.join(poolDir, "metadata.json"), { force: true }).catch(() => {
|
|
46718
|
+
});
|
|
46719
|
+
}
|
|
46720
|
+
/**
|
|
46721
|
+
* Reset an existing slot for reuse:
|
|
46722
|
+
* 1. Reset repos (git reset --hard {ref} && git clean -fd per repo)
|
|
46723
|
+
* 2. Re-copy template files (skip repo directories)
|
|
46724
|
+
*/
|
|
46725
|
+
async resetSlot(slotPath, templatePath, repos, poolReset = "fast") {
|
|
46726
|
+
for (const repo of repos) {
|
|
46727
|
+
const repoDir = path36.join(slotPath, repo.path);
|
|
46728
|
+
if (!existsSync2(repoDir)) {
|
|
46729
|
+
continue;
|
|
46730
|
+
}
|
|
46731
|
+
if (poolReset === "none") {
|
|
46732
|
+
continue;
|
|
46733
|
+
}
|
|
46734
|
+
const ref = repo.checkout?.ref ?? "HEAD";
|
|
46735
|
+
await git(["reset", "--hard", ref], { cwd: repoDir });
|
|
46736
|
+
const cleanFlag = poolReset === "strict" ? "-fdx" : "-fd";
|
|
46737
|
+
await git(["clean", cleanFlag], { cwd: repoDir });
|
|
46738
|
+
}
|
|
46739
|
+
if (templatePath) {
|
|
46740
|
+
const repoDirNames = new Set(
|
|
46741
|
+
repos.map((r) => {
|
|
46742
|
+
const normalized = r.path.replace(/^\.\//, "");
|
|
46743
|
+
return normalized.split("/")[0];
|
|
46744
|
+
})
|
|
46745
|
+
);
|
|
46746
|
+
await copyDirectoryRecursive2(templatePath, slotPath, repoDirNames);
|
|
46747
|
+
}
|
|
46358
46748
|
}
|
|
46749
|
+
};
|
|
46750
|
+
var execFileAsync2 = promisify6(execFile2);
|
|
46751
|
+
var DEFAULT_TIMEOUT_MS2 = 3e5;
|
|
46752
|
+
function gitEnv2() {
|
|
46753
|
+
const env = { ...process.env };
|
|
46754
|
+
for (const key of Object.keys(env)) {
|
|
46755
|
+
if (key.startsWith("GIT_") && key !== "GIT_SSH_COMMAND") {
|
|
46756
|
+
delete env[key];
|
|
46757
|
+
}
|
|
46758
|
+
}
|
|
46759
|
+
return {
|
|
46760
|
+
...env,
|
|
46761
|
+
GIT_TERMINAL_PROMPT: "0",
|
|
46762
|
+
GIT_ASKPASS: "",
|
|
46763
|
+
GIT_SSH_COMMAND: "ssh -o BatchMode=yes"
|
|
46764
|
+
};
|
|
46765
|
+
}
|
|
46766
|
+
function getSourceUrl(source) {
|
|
46767
|
+
return source.type === "git" ? source.url : source.path;
|
|
46768
|
+
}
|
|
46769
|
+
async function git2(args, opts) {
|
|
46770
|
+
const { stdout } = await execFileAsync2("git", args, {
|
|
46771
|
+
cwd: opts?.cwd,
|
|
46772
|
+
timeout: opts?.timeout ?? DEFAULT_TIMEOUT_MS2,
|
|
46773
|
+
env: gitEnv2(),
|
|
46774
|
+
maxBuffer: 50 * 1024 * 1024
|
|
46775
|
+
// 50MB
|
|
46776
|
+
});
|
|
46777
|
+
return stdout.trim();
|
|
46359
46778
|
}
|
|
46360
46779
|
var RepoManager = class {
|
|
46361
|
-
cacheDir;
|
|
46362
46780
|
verbose;
|
|
46363
|
-
constructor(
|
|
46364
|
-
this.cacheDir = cacheDir ?? getGitCacheRoot();
|
|
46781
|
+
constructor(verbose = false) {
|
|
46365
46782
|
this.verbose = verbose;
|
|
46366
46783
|
}
|
|
46367
46784
|
async runGit(args, opts) {
|
|
46368
46785
|
const startedAt = Date.now();
|
|
46369
46786
|
if (this.verbose) {
|
|
46370
|
-
console.log(
|
|
46371
|
-
`[repo] git start cwd=${opts?.cwd ?? process.cwd()} args=${args.join(" ")}`
|
|
46372
|
-
);
|
|
46787
|
+
console.log(`[repo] git start cwd=${opts?.cwd ?? process.cwd()} args=${args.join(" ")}`);
|
|
46373
46788
|
}
|
|
46374
46789
|
try {
|
|
46375
|
-
const output = await
|
|
46790
|
+
const output = await git2(args, opts);
|
|
46376
46791
|
if (this.verbose) {
|
|
46377
|
-
console.log(
|
|
46378
|
-
`[repo] git ok durationMs=${Date.now() - startedAt} args=${args.join(" ")}`
|
|
46379
|
-
);
|
|
46792
|
+
console.log(`[repo] git ok durationMs=${Date.now() - startedAt} args=${args.join(" ")}`);
|
|
46380
46793
|
}
|
|
46381
46794
|
return output;
|
|
46382
46795
|
} catch (error40) {
|
|
@@ -46390,88 +46803,18 @@ var RepoManager = class {
|
|
|
46390
46803
|
}
|
|
46391
46804
|
}
|
|
46392
46805
|
/**
|
|
46393
|
-
*
|
|
46394
|
-
* Creates on first access, fetches updates on subsequent calls.
|
|
46395
|
-
* Returns the absolute path to the cache directory.
|
|
46396
|
-
*/
|
|
46397
|
-
async ensureCache(source, depth, resolve2) {
|
|
46398
|
-
const key = cacheKey(source);
|
|
46399
|
-
const cachePath = path36.join(this.cacheDir, key);
|
|
46400
|
-
const lockPath = `${cachePath}.lock`;
|
|
46401
|
-
const cacheExists = existsSync2(path36.join(cachePath, "HEAD"));
|
|
46402
|
-
if (this.verbose) {
|
|
46403
|
-
console.log(
|
|
46404
|
-
`[repo] ensureCache source=${getSourceUrl(source)} resolve=${resolve2 ?? "remote"} cache=${cachePath}`
|
|
46405
|
-
);
|
|
46406
|
-
}
|
|
46407
|
-
if (resolve2 === "local") {
|
|
46408
|
-
if (cacheExists) {
|
|
46409
|
-
if (this.verbose) {
|
|
46410
|
-
console.log(`[repo] using existing local cache ${cachePath}`);
|
|
46411
|
-
}
|
|
46412
|
-
return cachePath;
|
|
46413
|
-
}
|
|
46414
|
-
const url2 = getSourceUrl(source);
|
|
46415
|
-
throw new Error(
|
|
46416
|
-
`No cache found for \`${url2}\`. Run \`agentv cache add --url ${url2} --from <local-path>\` to seed it.`
|
|
46417
|
-
);
|
|
46418
|
-
}
|
|
46419
|
-
await mkdir11(this.cacheDir, { recursive: true });
|
|
46420
|
-
const lockStartedAt = Date.now();
|
|
46421
|
-
await acquireLock(lockPath);
|
|
46422
|
-
if (this.verbose) {
|
|
46423
|
-
console.log(
|
|
46424
|
-
`[repo] lock acquired path=${lockPath} waitedMs=${Date.now() - lockStartedAt}`
|
|
46425
|
-
);
|
|
46426
|
-
}
|
|
46427
|
-
try {
|
|
46428
|
-
if (cacheExists) {
|
|
46429
|
-
if (this.verbose) {
|
|
46430
|
-
console.log(`[repo] refreshing existing cache ${cachePath}`);
|
|
46431
|
-
}
|
|
46432
|
-
const fetchArgs = ["fetch", "--prune"];
|
|
46433
|
-
if (depth) {
|
|
46434
|
-
fetchArgs.push("--depth", String(depth));
|
|
46435
|
-
}
|
|
46436
|
-
await this.runGit(fetchArgs, { cwd: cachePath });
|
|
46437
|
-
} else {
|
|
46438
|
-
if (this.verbose) {
|
|
46439
|
-
console.log(`[repo] creating new cache ${cachePath}`);
|
|
46440
|
-
}
|
|
46441
|
-
const cloneArgs = ["clone", "--mirror", "--bare"];
|
|
46442
|
-
if (depth) {
|
|
46443
|
-
cloneArgs.push("--depth", String(depth));
|
|
46444
|
-
}
|
|
46445
|
-
const sourceUrl = getSourceUrl(source);
|
|
46446
|
-
const cloneUrl = depth && source.type === "local" ? `file://${sourceUrl}` : sourceUrl;
|
|
46447
|
-
cloneArgs.push(cloneUrl, cachePath);
|
|
46448
|
-
await this.runGit(cloneArgs);
|
|
46449
|
-
}
|
|
46450
|
-
} finally {
|
|
46451
|
-
await releaseLock(lockPath);
|
|
46452
|
-
if (this.verbose) {
|
|
46453
|
-
console.log(`[repo] lock released path=${lockPath}`);
|
|
46454
|
-
}
|
|
46455
|
-
}
|
|
46456
|
-
return cachePath;
|
|
46457
|
-
}
|
|
46458
|
-
/**
|
|
46459
|
-
* Clone a repo from cache into the workspace at the configured path.
|
|
46806
|
+
* Clone a repo directly from source into the workspace at the configured path.
|
|
46460
46807
|
* Handles checkout, ref resolution, ancestor walking, shallow clone, sparse checkout.
|
|
46461
46808
|
*/
|
|
46462
46809
|
async materialize(repo, workspacePath) {
|
|
46463
|
-
const targetDir =
|
|
46810
|
+
const targetDir = path37.join(workspacePath, repo.path);
|
|
46811
|
+
const sourceUrl = getSourceUrl(repo.source);
|
|
46464
46812
|
const startedAt = Date.now();
|
|
46465
46813
|
if (this.verbose) {
|
|
46466
46814
|
console.log(
|
|
46467
|
-
`[repo] materialize start path=${repo.path} source=${
|
|
46815
|
+
`[repo] materialize start path=${repo.path} source=${sourceUrl} workspace=${workspacePath}`
|
|
46468
46816
|
);
|
|
46469
46817
|
}
|
|
46470
|
-
const cachePath = await this.ensureCache(
|
|
46471
|
-
repo.source,
|
|
46472
|
-
repo.clone?.depth,
|
|
46473
|
-
repo.checkout?.resolve
|
|
46474
|
-
);
|
|
46475
46818
|
const cloneArgs = ["clone"];
|
|
46476
46819
|
if (repo.clone?.depth) {
|
|
46477
46820
|
cloneArgs.push("--depth", String(repo.clone.depth));
|
|
@@ -46480,7 +46823,7 @@ var RepoManager = class {
|
|
|
46480
46823
|
cloneArgs.push("--filter", repo.clone.filter);
|
|
46481
46824
|
}
|
|
46482
46825
|
cloneArgs.push("--no-checkout");
|
|
46483
|
-
const cloneUrl = repo.clone?.depth || repo.clone?.filter ? `file://${
|
|
46826
|
+
const cloneUrl = (repo.clone?.depth || repo.clone?.filter) && repo.source.type === "local" ? `file://${sourceUrl}` : sourceUrl;
|
|
46484
46827
|
cloneArgs.push(cloneUrl, targetDir);
|
|
46485
46828
|
await this.runGit(cloneArgs);
|
|
46486
46829
|
if (repo.clone?.sparse?.length) {
|
|
@@ -46552,81 +46895,43 @@ var RepoManager = class {
|
|
|
46552
46895
|
}
|
|
46553
46896
|
}
|
|
46554
46897
|
/** Reset repos in workspace to their checkout state. */
|
|
46555
|
-
async reset(repos, workspacePath,
|
|
46556
|
-
|
|
46557
|
-
for (const repo of repos) {
|
|
46558
|
-
const targetDir = path36.join(workspacePath, repo.path);
|
|
46559
|
-
await rm5(targetDir, { recursive: true, force: true });
|
|
46560
|
-
}
|
|
46561
|
-
await this.materializeAll(repos, workspacePath);
|
|
46562
|
-
return;
|
|
46563
|
-
}
|
|
46898
|
+
async reset(repos, workspacePath, reset) {
|
|
46899
|
+
const cleanFlag = reset === "strict" ? "-fdx" : "-fd";
|
|
46564
46900
|
for (const repo of repos) {
|
|
46565
|
-
const targetDir =
|
|
46901
|
+
const targetDir = path37.join(workspacePath, repo.path);
|
|
46566
46902
|
await this.runGit(["reset", "--hard", "HEAD"], { cwd: targetDir });
|
|
46567
|
-
await this.runGit(["clean",
|
|
46903
|
+
await this.runGit(["clean", cleanFlag], { cwd: targetDir });
|
|
46568
46904
|
}
|
|
46569
46905
|
}
|
|
46570
|
-
/**
|
|
46571
|
-
* Seed the cache from a local repository, setting the remote to a given URL.
|
|
46572
|
-
* Useful for avoiding slow network clones when a local clone already exists.
|
|
46573
|
-
*/
|
|
46574
|
-
async seedCache(localPath, remoteUrl, opts) {
|
|
46575
|
-
const source = { type: "git", url: remoteUrl };
|
|
46576
|
-
const key = cacheKey(source);
|
|
46577
|
-
const cachePath = path36.join(this.cacheDir, key);
|
|
46578
|
-
const lockPath = `${cachePath}.lock`;
|
|
46579
|
-
await mkdir11(this.cacheDir, { recursive: true });
|
|
46580
|
-
await acquireLock(lockPath);
|
|
46581
|
-
try {
|
|
46582
|
-
if (existsSync2(path36.join(cachePath, "HEAD"))) {
|
|
46583
|
-
if (!opts?.force) {
|
|
46584
|
-
throw new Error(
|
|
46585
|
-
`Cache already exists for ${remoteUrl} at ${cachePath}. Use force to overwrite.`
|
|
46586
|
-
);
|
|
46587
|
-
}
|
|
46588
|
-
await rm5(cachePath, { recursive: true, force: true });
|
|
46589
|
-
}
|
|
46590
|
-
await git(["clone", "--mirror", "--bare", localPath, cachePath]);
|
|
46591
|
-
await git(["remote", "set-url", "origin", remoteUrl], { cwd: cachePath });
|
|
46592
|
-
} finally {
|
|
46593
|
-
await releaseLock(lockPath);
|
|
46594
|
-
}
|
|
46595
|
-
return cachePath;
|
|
46596
|
-
}
|
|
46597
|
-
/** Remove the entire cache directory. */
|
|
46598
|
-
async cleanCache() {
|
|
46599
|
-
await rm5(this.cacheDir, { recursive: true, force: true });
|
|
46600
|
-
}
|
|
46601
46906
|
};
|
|
46602
46907
|
async function resolveWorkspaceTemplate(templatePath) {
|
|
46603
46908
|
if (!templatePath) {
|
|
46604
46909
|
return void 0;
|
|
46605
46910
|
}
|
|
46606
|
-
const resolved =
|
|
46911
|
+
const resolved = path38.resolve(templatePath);
|
|
46607
46912
|
const stats = await stat6(resolved);
|
|
46608
46913
|
if (stats.isFile()) {
|
|
46609
46914
|
return {
|
|
46610
|
-
dir:
|
|
46915
|
+
dir: path38.dirname(resolved),
|
|
46611
46916
|
workspaceFile: resolved
|
|
46612
46917
|
};
|
|
46613
46918
|
}
|
|
46614
46919
|
if (!stats.isDirectory()) {
|
|
46615
46920
|
throw new Error(`workspace template is neither a file nor a directory: ${resolved}`);
|
|
46616
46921
|
}
|
|
46617
|
-
const entries = await
|
|
46922
|
+
const entries = await readdir5(resolved);
|
|
46618
46923
|
const workspaceFiles = entries.filter((e) => e.endsWith(".code-workspace"));
|
|
46619
46924
|
if (workspaceFiles.length === 1) {
|
|
46620
46925
|
return {
|
|
46621
46926
|
dir: resolved,
|
|
46622
|
-
workspaceFile:
|
|
46927
|
+
workspaceFile: path38.join(resolved, workspaceFiles[0])
|
|
46623
46928
|
};
|
|
46624
46929
|
}
|
|
46625
46930
|
if (workspaceFiles.length > 1) {
|
|
46626
46931
|
const conventionFile = workspaceFiles.find((f) => f === "template.code-workspace");
|
|
46627
46932
|
return {
|
|
46628
46933
|
dir: resolved,
|
|
46629
|
-
workspaceFile: conventionFile ?
|
|
46934
|
+
workspaceFile: conventionFile ? path38.join(resolved, conventionFile) : void 0
|
|
46630
46935
|
};
|
|
46631
46936
|
}
|
|
46632
46937
|
return { dir: resolved };
|
|
@@ -46674,6 +46979,22 @@ function classifyQualityStatus(score) {
|
|
|
46674
46979
|
function usesFileReferencePrompt(provider) {
|
|
46675
46980
|
return isAgentProvider(provider) || provider.kind === "cli";
|
|
46676
46981
|
}
|
|
46982
|
+
function toScriptConfig(hook, hookName, context) {
|
|
46983
|
+
const command = hook.command ?? hook.script;
|
|
46984
|
+
if (!command || command.length === 0) {
|
|
46985
|
+
throw new Error(`${hookName} hook in ${context} requires command or script`);
|
|
46986
|
+
}
|
|
46987
|
+
return {
|
|
46988
|
+
command,
|
|
46989
|
+
...hook.timeout_ms !== void 0 && { timeout_ms: hook.timeout_ms },
|
|
46990
|
+
...hook.timeoutMs !== void 0 && { timeoutMs: hook.timeoutMs },
|
|
46991
|
+
...hook.cwd !== void 0 && { cwd: hook.cwd },
|
|
46992
|
+
...hook.script !== void 0 && { script: hook.script }
|
|
46993
|
+
};
|
|
46994
|
+
}
|
|
46995
|
+
function hasHookCommand(hook) {
|
|
46996
|
+
return !!(hook?.command && hook.command.length > 0 || hook?.script && hook.script.length > 0);
|
|
46997
|
+
}
|
|
46677
46998
|
function getWorkspaceTemplate(target) {
|
|
46678
46999
|
const config2 = target.config;
|
|
46679
47000
|
if ("workspaceTemplate" in config2 && typeof config2.workspaceTemplate === "string") {
|
|
@@ -46704,7 +47025,15 @@ async function runEvaluation(options) {
|
|
|
46704
47025
|
trials,
|
|
46705
47026
|
streamCallbacks,
|
|
46706
47027
|
totalBudgetUsd,
|
|
46707
|
-
failOnError
|
|
47028
|
+
failOnError,
|
|
47029
|
+
poolWorkspaces,
|
|
47030
|
+
poolMaxSlots: configPoolMaxSlots,
|
|
47031
|
+
workspace: legacyWorkspacePath,
|
|
47032
|
+
workspaceMode,
|
|
47033
|
+
workspacePath,
|
|
47034
|
+
workspaceClean,
|
|
47035
|
+
retainOnSuccess,
|
|
47036
|
+
retainOnFailure
|
|
46708
47037
|
} = options;
|
|
46709
47038
|
let useCache = options.useCache;
|
|
46710
47039
|
if (trials && trials.count > 1 && useCache) {
|
|
@@ -46778,7 +47107,7 @@ async function runEvaluation(options) {
|
|
|
46778
47107
|
];
|
|
46779
47108
|
const evaluatorRegistry = buildEvaluatorRegistry(evaluators, resolveJudgeProvider);
|
|
46780
47109
|
const typeRegistry = createBuiltinRegistry();
|
|
46781
|
-
const discoveryBaseDir = evalFilePath ?
|
|
47110
|
+
const discoveryBaseDir = evalFilePath ? path39.dirname(path39.resolve(evalFilePath)) : process.cwd();
|
|
46782
47111
|
const evalDir = discoveryBaseDir;
|
|
46783
47112
|
await discoverAssertions(typeRegistry, discoveryBaseDir);
|
|
46784
47113
|
const providerRegistry = createBuiltinProviderRegistry();
|
|
@@ -46840,13 +47169,29 @@ async function runEvaluation(options) {
|
|
|
46840
47169
|
}
|
|
46841
47170
|
};
|
|
46842
47171
|
const isPerTestIsolation = suiteWorkspace?.isolation === "per_test";
|
|
46843
|
-
const
|
|
47172
|
+
const configuredMode = suiteWorkspace?.mode ?? workspaceMode;
|
|
47173
|
+
const configuredStaticPath = suiteWorkspace?.static_path ?? workspacePath ?? legacyWorkspacePath;
|
|
47174
|
+
const useStaticWorkspace = configuredMode === "static" || !!configuredStaticPath && !configuredMode;
|
|
47175
|
+
if (useStaticWorkspace && isPerTestIsolation) {
|
|
47176
|
+
throw new Error(
|
|
47177
|
+
"static workspace mode is incompatible with isolation: per_test. Use isolation: shared (default)."
|
|
47178
|
+
);
|
|
47179
|
+
}
|
|
47180
|
+
if (configuredMode === "static" && !configuredStaticPath) {
|
|
47181
|
+
throw new Error("workspace.mode=static requires workspace.static_path or --workspace-path");
|
|
47182
|
+
}
|
|
47183
|
+
const hasSharedWorkspace = !!(useStaticWorkspace || workspaceTemplate || suiteWorkspace?.hooks || suiteWorkspace?.repos?.length && !isPerTestIsolation);
|
|
47184
|
+
const poolEnabled = configuredMode === "pooled" ? true : configuredMode === "ephemeral" || useStaticWorkspace ? false : suiteWorkspace?.pool ?? poolWorkspaces ?? true;
|
|
47185
|
+
const usePool = poolEnabled !== false && !!suiteWorkspace?.repos?.length && !isPerTestIsolation && !useStaticWorkspace;
|
|
47186
|
+
const finishCleanPolicy = suiteWorkspace?.hooks?.on_finish?.clean;
|
|
47187
|
+
const resolvedRetainOnSuccess = (finishCleanPolicy === "always" || finishCleanPolicy === "on_success" ? "cleanup" : finishCleanPolicy === "on_failure" || finishCleanPolicy === "never" ? "keep" : void 0) ?? retainOnSuccess ?? (keepWorkspaces ? "keep" : "cleanup");
|
|
47188
|
+
const resolvedRetainOnFailure = (finishCleanPolicy === "always" || finishCleanPolicy === "on_failure" ? "cleanup" : finishCleanPolicy === "on_success" || finishCleanPolicy === "never" ? "keep" : void 0) ?? retainOnFailure ?? (cleanupWorkspaces ? "cleanup" : "keep");
|
|
46844
47189
|
const requestedWorkers = options.maxConcurrency ?? target.workers ?? 1;
|
|
46845
|
-
const workers = hasSharedWorkspace ? 1 : requestedWorkers;
|
|
47190
|
+
const workers = hasSharedWorkspace && !usePool ? 1 : requestedWorkers;
|
|
46846
47191
|
setupLog(
|
|
46847
|
-
`sharedWorkspace=${hasSharedWorkspace} perTestIsolation=${isPerTestIsolation} requestedWorkers=${requestedWorkers} effectiveWorkers=${workers}`
|
|
47192
|
+
`sharedWorkspace=${hasSharedWorkspace} perTestIsolation=${isPerTestIsolation} usePool=${usePool} requestedWorkers=${requestedWorkers} effectiveWorkers=${workers}`
|
|
46848
47193
|
);
|
|
46849
|
-
if (hasSharedWorkspace && requestedWorkers > 1) {
|
|
47194
|
+
if (hasSharedWorkspace && !usePool && requestedWorkers > 1) {
|
|
46850
47195
|
console.warn(
|
|
46851
47196
|
`Warning: Shared workspace requires sequential execution. Overriding workers from ${requestedWorkers} to 1.`
|
|
46852
47197
|
);
|
|
@@ -46855,7 +47200,38 @@ async function runEvaluation(options) {
|
|
|
46855
47200
|
let sharedWorkspacePath;
|
|
46856
47201
|
let sharedBaselineCommit;
|
|
46857
47202
|
let beforeAllOutput;
|
|
46858
|
-
|
|
47203
|
+
let poolManager;
|
|
47204
|
+
let poolSlot;
|
|
47205
|
+
const poolSlots = [];
|
|
47206
|
+
const availablePoolSlots = [];
|
|
47207
|
+
const poolSlotBaselines = /* @__PURE__ */ new Map();
|
|
47208
|
+
const poolMaxSlots = Math.min(configPoolMaxSlots ?? 10, 50);
|
|
47209
|
+
if (useStaticWorkspace && configuredStaticPath) {
|
|
47210
|
+
sharedWorkspacePath = configuredStaticPath;
|
|
47211
|
+
setupLog(`using static workspace: ${configuredStaticPath}`);
|
|
47212
|
+
} else if (usePool && suiteWorkspace?.repos) {
|
|
47213
|
+
const slotsNeeded = workers;
|
|
47214
|
+
setupLog(`acquiring ${slotsNeeded} workspace pool slot(s) (pool capacity: ${poolMaxSlots})`);
|
|
47215
|
+
poolManager = new WorkspacePoolManager(getWorkspacePoolRoot());
|
|
47216
|
+
const poolRepoManager = new RepoManager(verbose);
|
|
47217
|
+
for (let i = 0; i < slotsNeeded; i++) {
|
|
47218
|
+
const slot = await poolManager.acquireWorkspace({
|
|
47219
|
+
templatePath: workspaceTemplate,
|
|
47220
|
+
repos: suiteWorkspace.repos,
|
|
47221
|
+
maxSlots: poolMaxSlots,
|
|
47222
|
+
repoManager: poolRepoManager,
|
|
47223
|
+
poolReset: (workspaceClean === "full" ? "strict" : workspaceClean === "standard" ? "fast" : null) ?? suiteWorkspace.hooks?.on_reuse?.reset ?? "fast"
|
|
47224
|
+
});
|
|
47225
|
+
poolSlots.push(slot);
|
|
47226
|
+
setupLog(`pool slot ${i} acquired at: ${slot.path} (existing=${slot.isExisting})`);
|
|
47227
|
+
}
|
|
47228
|
+
if (slotsNeeded === 1) {
|
|
47229
|
+
poolSlot = poolSlots[0];
|
|
47230
|
+
sharedWorkspacePath = poolSlot.path;
|
|
47231
|
+
} else {
|
|
47232
|
+
availablePoolSlots.push(...poolSlots);
|
|
47233
|
+
}
|
|
47234
|
+
} else if (workspaceTemplate) {
|
|
46859
47235
|
setupLog(`creating shared workspace from template: ${workspaceTemplate}`);
|
|
46860
47236
|
try {
|
|
46861
47237
|
sharedWorkspacePath = await createTempWorkspace(workspaceTemplate, evalRunId, "shared");
|
|
@@ -46864,288 +47240,359 @@ async function runEvaluation(options) {
|
|
|
46864
47240
|
const message = error40 instanceof Error ? error40.message : String(error40);
|
|
46865
47241
|
throw new Error(`Failed to create shared workspace: ${message}`);
|
|
46866
47242
|
}
|
|
47243
|
+
} else if (suiteWorkspace?.hooks || suiteWorkspace?.repos?.length && !isPerTestIsolation) {
|
|
47244
|
+
sharedWorkspacePath = getWorkspacePath(evalRunId, "shared");
|
|
47245
|
+
await mkdir12(sharedWorkspacePath, { recursive: true });
|
|
47246
|
+
setupLog(`created empty shared workspace at: ${sharedWorkspacePath}`);
|
|
47247
|
+
}
|
|
47248
|
+
try {
|
|
46867
47249
|
if (suiteWorkspaceFile && sharedWorkspacePath) {
|
|
46868
|
-
const copiedWorkspaceFile =
|
|
47250
|
+
const copiedWorkspaceFile = path39.join(sharedWorkspacePath, path39.basename(suiteWorkspaceFile));
|
|
46869
47251
|
try {
|
|
46870
47252
|
await stat7(copiedWorkspaceFile);
|
|
46871
47253
|
suiteWorkspaceFile = copiedWorkspaceFile;
|
|
46872
47254
|
} catch {
|
|
46873
47255
|
}
|
|
46874
47256
|
}
|
|
46875
|
-
|
|
46876
|
-
sharedWorkspacePath
|
|
46877
|
-
|
|
46878
|
-
|
|
46879
|
-
|
|
46880
|
-
|
|
46881
|
-
|
|
46882
|
-
|
|
46883
|
-
|
|
46884
|
-
|
|
46885
|
-
|
|
46886
|
-
|
|
46887
|
-
const message = error40 instanceof Error ? error40.message : String(error40);
|
|
46888
|
-
if (sharedWorkspacePath) {
|
|
46889
|
-
await cleanupWorkspace(sharedWorkspacePath).catch(() => {
|
|
46890
|
-
});
|
|
46891
|
-
}
|
|
46892
|
-
throw new Error(`Failed to materialize repos: ${message}`);
|
|
46893
|
-
}
|
|
46894
|
-
}
|
|
46895
|
-
if (sharedWorkspacePath && suiteWorkspace?.before_all) {
|
|
46896
|
-
const beforeAllCommand = (suiteWorkspace.before_all.command ?? suiteWorkspace.before_all.script ?? []).join(" ");
|
|
46897
|
-
setupLog(
|
|
46898
|
-
`running shared before_all in cwd=${suiteWorkspace.before_all.cwd ?? evalDir} command=${beforeAllCommand}`
|
|
46899
|
-
);
|
|
46900
|
-
const scriptContext = {
|
|
46901
|
-
workspacePath: sharedWorkspacePath,
|
|
46902
|
-
testId: "__before_all__",
|
|
46903
|
-
evalRunId,
|
|
46904
|
-
evalDir
|
|
46905
|
-
};
|
|
46906
|
-
try {
|
|
46907
|
-
beforeAllOutput = await executeWorkspaceScript(suiteWorkspace.before_all, scriptContext);
|
|
46908
|
-
setupLog("shared before_all completed");
|
|
46909
|
-
} catch (error40) {
|
|
46910
|
-
const message = error40 instanceof Error ? error40.message : String(error40);
|
|
46911
|
-
if (sharedWorkspacePath) {
|
|
46912
|
-
await cleanupWorkspace(sharedWorkspacePath).catch(() => {
|
|
46913
|
-
});
|
|
46914
|
-
}
|
|
46915
|
-
throw new Error(`before_all script failed: ${message}`);
|
|
46916
|
-
}
|
|
46917
|
-
}
|
|
46918
|
-
if (sharedWorkspacePath) {
|
|
46919
|
-
try {
|
|
46920
|
-
sharedBaselineCommit = await initializeBaseline(sharedWorkspacePath);
|
|
46921
|
-
setupLog(`shared baseline initialized: ${sharedBaselineCommit}`);
|
|
46922
|
-
} catch {
|
|
46923
|
-
setupLog("shared baseline initialization skipped (non-fatal)");
|
|
46924
|
-
}
|
|
46925
|
-
}
|
|
46926
|
-
let nextWorkerId = 1;
|
|
46927
|
-
const workerIdByEvalId = /* @__PURE__ */ new Map();
|
|
46928
|
-
let beforeAllOutputAttached = false;
|
|
46929
|
-
let cumulativeBudgetCost = 0;
|
|
46930
|
-
let budgetExhausted = false;
|
|
46931
|
-
let failOnErrorTriggered = false;
|
|
46932
|
-
const promises = filteredEvalCases.map(
|
|
46933
|
-
(evalCase) => limit(async () => {
|
|
46934
|
-
const workerId = nextWorkerId++;
|
|
46935
|
-
workerIdByEvalId.set(evalCase.id, workerId);
|
|
46936
|
-
if (totalBudgetUsd !== void 0 && budgetExhausted) {
|
|
46937
|
-
const budgetResult = {
|
|
46938
|
-
timestamp: (now ?? (() => /* @__PURE__ */ new Date()))().toISOString(),
|
|
46939
|
-
testId: evalCase.id,
|
|
46940
|
-
dataset: evalCase.dataset,
|
|
46941
|
-
score: 0,
|
|
46942
|
-
hits: [],
|
|
46943
|
-
misses: [],
|
|
46944
|
-
answer: "",
|
|
46945
|
-
target: target.name,
|
|
46946
|
-
error: `Suite budget exceeded ($${cumulativeBudgetCost.toFixed(4)} / $${totalBudgetUsd.toFixed(4)})`,
|
|
46947
|
-
budgetExceeded: true,
|
|
46948
|
-
executionStatus: "execution_error",
|
|
46949
|
-
failureStage: "setup",
|
|
46950
|
-
failureReasonCode: "budget_exceeded",
|
|
46951
|
-
executionError: {
|
|
46952
|
-
message: `Suite budget exceeded ($${cumulativeBudgetCost.toFixed(4)} / $${totalBudgetUsd.toFixed(4)})`,
|
|
46953
|
-
stage: "setup"
|
|
46954
|
-
}
|
|
46955
|
-
};
|
|
46956
|
-
if (onProgress) {
|
|
46957
|
-
await onProgress({
|
|
46958
|
-
workerId,
|
|
46959
|
-
testId: evalCase.id,
|
|
46960
|
-
status: "failed",
|
|
46961
|
-
completedAt: Date.now(),
|
|
46962
|
-
error: budgetResult.error
|
|
47257
|
+
const repoManager = suiteWorkspace?.repos?.length && !usePool && !useStaticWorkspace ? new RepoManager(verbose) : void 0;
|
|
47258
|
+
if (repoManager && sharedWorkspacePath && suiteWorkspace?.repos && !isPerTestIsolation) {
|
|
47259
|
+
setupLog(
|
|
47260
|
+
`materializing ${suiteWorkspace.repos.length} shared repo(s) into ${sharedWorkspacePath}`
|
|
47261
|
+
);
|
|
47262
|
+
try {
|
|
47263
|
+
await repoManager.materializeAll(suiteWorkspace.repos, sharedWorkspacePath);
|
|
47264
|
+
setupLog("shared repo materialization complete");
|
|
47265
|
+
} catch (error40) {
|
|
47266
|
+
const message = error40 instanceof Error ? error40.message : String(error40);
|
|
47267
|
+
if (sharedWorkspacePath && !useStaticWorkspace) {
|
|
47268
|
+
await cleanupWorkspace(sharedWorkspacePath).catch(() => {
|
|
46963
47269
|
});
|
|
46964
47270
|
}
|
|
46965
|
-
|
|
46966
|
-
await onResult(budgetResult);
|
|
46967
|
-
}
|
|
46968
|
-
return budgetResult;
|
|
47271
|
+
throw new Error(`Failed to materialize repos: ${message}`);
|
|
46969
47272
|
}
|
|
46970
|
-
|
|
46971
|
-
|
|
46972
|
-
|
|
46973
|
-
|
|
46974
|
-
|
|
46975
|
-
|
|
46976
|
-
|
|
46977
|
-
|
|
46978
|
-
|
|
46979
|
-
|
|
46980
|
-
|
|
46981
|
-
|
|
46982
|
-
|
|
46983
|
-
|
|
46984
|
-
|
|
46985
|
-
|
|
46986
|
-
|
|
46987
|
-
|
|
46988
|
-
|
|
46989
|
-
|
|
46990
|
-
|
|
46991
|
-
|
|
46992
|
-
|
|
46993
|
-
|
|
47273
|
+
}
|
|
47274
|
+
const suiteBeforeAllHook = suiteWorkspace?.hooks?.before_all_tests;
|
|
47275
|
+
if (sharedWorkspacePath && hasHookCommand(suiteBeforeAllHook)) {
|
|
47276
|
+
const beforeAllHook = suiteBeforeAllHook;
|
|
47277
|
+
const beforeAllCommand = (beforeAllHook.command ?? beforeAllHook.script ?? []).join(" ");
|
|
47278
|
+
setupLog(
|
|
47279
|
+
`running shared before_all in cwd=${beforeAllHook.cwd ?? evalDir} command=${beforeAllCommand}`
|
|
47280
|
+
);
|
|
47281
|
+
const scriptContext = {
|
|
47282
|
+
workspacePath: sharedWorkspacePath,
|
|
47283
|
+
testId: "__before_all__",
|
|
47284
|
+
evalRunId,
|
|
47285
|
+
evalDir
|
|
47286
|
+
};
|
|
47287
|
+
try {
|
|
47288
|
+
beforeAllOutput = await executeWorkspaceScript(
|
|
47289
|
+
toScriptConfig(beforeAllHook, "before_all_tests", "suite workspace"),
|
|
47290
|
+
scriptContext
|
|
47291
|
+
);
|
|
47292
|
+
setupLog("shared before_all completed");
|
|
47293
|
+
} catch (error40) {
|
|
47294
|
+
const message = error40 instanceof Error ? error40.message : String(error40);
|
|
47295
|
+
if (sharedWorkspacePath && !useStaticWorkspace) {
|
|
47296
|
+
await cleanupWorkspace(sharedWorkspacePath).catch(() => {
|
|
46994
47297
|
});
|
|
46995
47298
|
}
|
|
46996
|
-
|
|
46997
|
-
await onResult(haltResult);
|
|
46998
|
-
}
|
|
46999
|
-
return haltResult;
|
|
47299
|
+
throw new Error(`before_all script failed: ${message}`);
|
|
47000
47300
|
}
|
|
47001
|
-
|
|
47002
|
-
|
|
47003
|
-
|
|
47004
|
-
|
|
47005
|
-
|
|
47006
|
-
|
|
47007
|
-
|
|
47008
|
-
|
|
47009
|
-
try {
|
|
47010
|
-
const judgeProvider = await resolveJudgeProvider(target);
|
|
47011
|
-
const runCaseOptions = {
|
|
47012
|
-
evalCase,
|
|
47013
|
-
provider: primaryProvider,
|
|
47014
|
-
target,
|
|
47015
|
-
evaluators: evaluatorRegistry,
|
|
47016
|
-
maxRetries,
|
|
47017
|
-
agentTimeoutMs,
|
|
47018
|
-
cache,
|
|
47019
|
-
useCache,
|
|
47020
|
-
now,
|
|
47021
|
-
judgeProvider,
|
|
47022
|
-
targetResolver,
|
|
47023
|
-
availableTargets,
|
|
47301
|
+
}
|
|
47302
|
+
if (availablePoolSlots.length > 0 && hasHookCommand(suiteBeforeAllHook)) {
|
|
47303
|
+
const beforeAllHook = suiteBeforeAllHook;
|
|
47304
|
+
for (const slot of availablePoolSlots) {
|
|
47305
|
+
setupLog(`running before_all on pool slot ${slot.index}`);
|
|
47306
|
+
const scriptContext = {
|
|
47307
|
+
workspacePath: slot.path,
|
|
47308
|
+
testId: "__before_all__",
|
|
47024
47309
|
evalRunId,
|
|
47025
|
-
keepWorkspaces,
|
|
47026
|
-
cleanupWorkspaces,
|
|
47027
|
-
sharedWorkspacePath,
|
|
47028
|
-
sharedBaselineCommit,
|
|
47029
|
-
suiteWorkspaceFile,
|
|
47030
|
-
streamCallbacks,
|
|
47031
|
-
typeRegistry,
|
|
47032
|
-
repoManager,
|
|
47033
47310
|
evalDir
|
|
47034
47311
|
};
|
|
47035
|
-
|
|
47036
|
-
|
|
47037
|
-
|
|
47038
|
-
|
|
47039
|
-
|
|
47040
|
-
|
|
47041
|
-
|
|
47312
|
+
try {
|
|
47313
|
+
const output = await executeWorkspaceScript(
|
|
47314
|
+
toScriptConfig(beforeAllHook, "before_all_tests", "suite workspace"),
|
|
47315
|
+
scriptContext
|
|
47316
|
+
);
|
|
47317
|
+
if (!beforeAllOutput) beforeAllOutput = output;
|
|
47318
|
+
setupLog(`before_all completed on pool slot ${slot.index}`);
|
|
47319
|
+
} catch (error40) {
|
|
47320
|
+
const message = error40 instanceof Error ? error40.message : String(error40);
|
|
47321
|
+
throw new Error(`before_all script failed on pool slot ${slot.index}: ${message}`);
|
|
47322
|
+
}
|
|
47323
|
+
}
|
|
47324
|
+
}
|
|
47325
|
+
if (sharedWorkspacePath) {
|
|
47326
|
+
try {
|
|
47327
|
+
sharedBaselineCommit = await initializeBaseline(sharedWorkspacePath);
|
|
47328
|
+
setupLog(`shared baseline initialized: ${sharedBaselineCommit}`);
|
|
47329
|
+
} catch {
|
|
47330
|
+
setupLog("shared baseline initialization skipped (non-fatal)");
|
|
47331
|
+
}
|
|
47332
|
+
}
|
|
47333
|
+
if (availablePoolSlots.length > 0) {
|
|
47334
|
+
for (const slot of availablePoolSlots) {
|
|
47335
|
+
try {
|
|
47336
|
+
const baseline = await initializeBaseline(slot.path);
|
|
47337
|
+
poolSlotBaselines.set(slot.path, baseline);
|
|
47338
|
+
setupLog(`pool slot ${slot.index} baseline initialized: ${baseline}`);
|
|
47339
|
+
} catch {
|
|
47340
|
+
setupLog(`pool slot ${slot.index} baseline initialization skipped (non-fatal)`);
|
|
47341
|
+
}
|
|
47342
|
+
}
|
|
47343
|
+
}
|
|
47344
|
+
let nextWorkerId = 1;
|
|
47345
|
+
const workerIdByEvalId = /* @__PURE__ */ new Map();
|
|
47346
|
+
let beforeAllOutputAttached = false;
|
|
47347
|
+
let cumulativeBudgetCost = 0;
|
|
47348
|
+
let budgetExhausted = false;
|
|
47349
|
+
let failOnErrorTriggered = false;
|
|
47350
|
+
const promises = filteredEvalCases.map(
|
|
47351
|
+
(evalCase) => limit(async () => {
|
|
47352
|
+
const workerId = nextWorkerId++;
|
|
47353
|
+
workerIdByEvalId.set(evalCase.id, workerId);
|
|
47354
|
+
if (totalBudgetUsd !== void 0 && budgetExhausted) {
|
|
47355
|
+
const budgetResult = {
|
|
47356
|
+
timestamp: (now ?? (() => /* @__PURE__ */ new Date()))().toISOString(),
|
|
47357
|
+
testId: evalCase.id,
|
|
47358
|
+
dataset: evalCase.dataset,
|
|
47359
|
+
score: 0,
|
|
47360
|
+
hits: [],
|
|
47361
|
+
misses: [],
|
|
47362
|
+
answer: "",
|
|
47363
|
+
target: target.name,
|
|
47364
|
+
error: `Suite budget exceeded ($${cumulativeBudgetCost.toFixed(4)} / $${totalBudgetUsd.toFixed(4)})`,
|
|
47365
|
+
budgetExceeded: true,
|
|
47366
|
+
executionStatus: "execution_error",
|
|
47367
|
+
failureStage: "setup",
|
|
47368
|
+
failureReasonCode: "budget_exceeded",
|
|
47369
|
+
executionError: {
|
|
47370
|
+
message: `Suite budget exceeded ($${cumulativeBudgetCost.toFixed(4)} / $${totalBudgetUsd.toFixed(4)})`,
|
|
47371
|
+
stage: "setup"
|
|
47042
47372
|
}
|
|
47043
|
-
}
|
|
47044
|
-
|
|
47373
|
+
};
|
|
47374
|
+
if (onProgress) {
|
|
47375
|
+
await onProgress({
|
|
47376
|
+
workerId,
|
|
47377
|
+
testId: evalCase.id,
|
|
47378
|
+
status: "failed",
|
|
47379
|
+
completedAt: Date.now(),
|
|
47380
|
+
error: budgetResult.error
|
|
47381
|
+
});
|
|
47045
47382
|
}
|
|
47046
|
-
if (
|
|
47047
|
-
|
|
47048
|
-
if (cumulativeBudgetCost >= totalBudgetUsd) {
|
|
47049
|
-
budgetExhausted = true;
|
|
47050
|
-
}
|
|
47383
|
+
if (onResult) {
|
|
47384
|
+
await onResult(budgetResult);
|
|
47051
47385
|
}
|
|
47386
|
+
return budgetResult;
|
|
47052
47387
|
}
|
|
47053
|
-
if (failOnError === true &&
|
|
47054
|
-
|
|
47055
|
-
|
|
47056
|
-
|
|
47057
|
-
|
|
47058
|
-
|
|
47388
|
+
if (failOnError === true && failOnErrorTriggered) {
|
|
47389
|
+
const errorMsg = "Halted: execution error encountered with fail_on_error enabled";
|
|
47390
|
+
const haltResult = {
|
|
47391
|
+
timestamp: (now ?? (() => /* @__PURE__ */ new Date()))().toISOString(),
|
|
47392
|
+
testId: evalCase.id,
|
|
47393
|
+
dataset: evalCase.dataset,
|
|
47394
|
+
score: 0,
|
|
47395
|
+
hits: [],
|
|
47396
|
+
misses: [],
|
|
47397
|
+
answer: "",
|
|
47398
|
+
target: target.name,
|
|
47399
|
+
error: errorMsg,
|
|
47400
|
+
executionStatus: "execution_error",
|
|
47401
|
+
failureStage: "setup",
|
|
47402
|
+
failureReasonCode: "error_threshold_exceeded",
|
|
47403
|
+
executionError: { message: errorMsg, stage: "setup" }
|
|
47404
|
+
};
|
|
47405
|
+
if (onProgress) {
|
|
47406
|
+
await onProgress({
|
|
47407
|
+
workerId,
|
|
47408
|
+
testId: evalCase.id,
|
|
47409
|
+
status: "failed",
|
|
47410
|
+
completedAt: Date.now(),
|
|
47411
|
+
error: haltResult.error
|
|
47412
|
+
});
|
|
47413
|
+
}
|
|
47414
|
+
if (onResult) {
|
|
47415
|
+
await onResult(haltResult);
|
|
47416
|
+
}
|
|
47417
|
+
return haltResult;
|
|
47059
47418
|
}
|
|
47060
47419
|
if (onProgress) {
|
|
47061
47420
|
await onProgress({
|
|
47062
47421
|
workerId,
|
|
47063
47422
|
testId: evalCase.id,
|
|
47064
|
-
status:
|
|
47065
|
-
startedAt:
|
|
47066
|
-
// Not used for completed status
|
|
47067
|
-
completedAt: Date.now(),
|
|
47068
|
-
error: result.error
|
|
47423
|
+
status: "running",
|
|
47424
|
+
startedAt: Date.now()
|
|
47069
47425
|
});
|
|
47070
47426
|
}
|
|
47071
|
-
|
|
47072
|
-
|
|
47427
|
+
const testPoolSlot = availablePoolSlots.length > 0 ? availablePoolSlots.pop() : void 0;
|
|
47428
|
+
const testWorkspacePath = testPoolSlot?.path ?? sharedWorkspacePath;
|
|
47429
|
+
const testBaselineCommit = testPoolSlot ? poolSlotBaselines.get(testPoolSlot.path) : sharedBaselineCommit;
|
|
47430
|
+
try {
|
|
47431
|
+
const judgeProvider = await resolveJudgeProvider(target);
|
|
47432
|
+
const runCaseOptions = {
|
|
47433
|
+
evalCase,
|
|
47434
|
+
provider: primaryProvider,
|
|
47435
|
+
target,
|
|
47436
|
+
evaluators: evaluatorRegistry,
|
|
47437
|
+
maxRetries,
|
|
47438
|
+
agentTimeoutMs,
|
|
47439
|
+
cache,
|
|
47440
|
+
useCache,
|
|
47441
|
+
now,
|
|
47442
|
+
judgeProvider,
|
|
47443
|
+
targetResolver,
|
|
47444
|
+
availableTargets,
|
|
47445
|
+
evalRunId,
|
|
47446
|
+
keepWorkspaces,
|
|
47447
|
+
cleanupWorkspaces,
|
|
47448
|
+
retainOnSuccess: resolvedRetainOnSuccess,
|
|
47449
|
+
retainOnFailure: resolvedRetainOnFailure,
|
|
47450
|
+
sharedWorkspacePath: testWorkspacePath,
|
|
47451
|
+
sharedBaselineCommit: testBaselineCommit,
|
|
47452
|
+
suiteWorkspaceFile,
|
|
47453
|
+
streamCallbacks,
|
|
47454
|
+
typeRegistry,
|
|
47455
|
+
repoManager,
|
|
47456
|
+
evalDir
|
|
47457
|
+
};
|
|
47458
|
+
let result = trials && trials.count > 1 ? await runEvalCaseWithTrials(runCaseOptions, trials) : await runEvalCase(runCaseOptions);
|
|
47459
|
+
if (totalBudgetUsd !== void 0) {
|
|
47460
|
+
let caseCost;
|
|
47461
|
+
if (result.trials && result.trials.length > 0) {
|
|
47462
|
+
const trialCostSum = result.trials.reduce((sum, t) => sum + (t.costUsd ?? 0), 0);
|
|
47463
|
+
if (trialCostSum > 0) {
|
|
47464
|
+
caseCost = trialCostSum;
|
|
47465
|
+
}
|
|
47466
|
+
} else {
|
|
47467
|
+
caseCost = result.costUsd;
|
|
47468
|
+
}
|
|
47469
|
+
if (caseCost !== void 0) {
|
|
47470
|
+
cumulativeBudgetCost += caseCost;
|
|
47471
|
+
if (cumulativeBudgetCost >= totalBudgetUsd) {
|
|
47472
|
+
budgetExhausted = true;
|
|
47473
|
+
}
|
|
47474
|
+
}
|
|
47475
|
+
}
|
|
47476
|
+
if (failOnError === true && result.executionStatus === "execution_error") {
|
|
47477
|
+
failOnErrorTriggered = true;
|
|
47478
|
+
}
|
|
47479
|
+
if (beforeAllOutput && !beforeAllOutputAttached) {
|
|
47480
|
+
result = { ...result, beforeAllOutput };
|
|
47481
|
+
beforeAllOutputAttached = true;
|
|
47482
|
+
}
|
|
47483
|
+
if (onProgress) {
|
|
47484
|
+
await onProgress({
|
|
47485
|
+
workerId,
|
|
47486
|
+
testId: evalCase.id,
|
|
47487
|
+
status: result.error ? "failed" : "completed",
|
|
47488
|
+
startedAt: 0,
|
|
47489
|
+
// Not used for completed status
|
|
47490
|
+
completedAt: Date.now(),
|
|
47491
|
+
error: result.error
|
|
47492
|
+
});
|
|
47493
|
+
}
|
|
47494
|
+
if (onResult) {
|
|
47495
|
+
await onResult(result);
|
|
47496
|
+
}
|
|
47497
|
+
return result;
|
|
47498
|
+
} catch (error40) {
|
|
47499
|
+
if (onProgress) {
|
|
47500
|
+
await onProgress({
|
|
47501
|
+
workerId,
|
|
47502
|
+
testId: evalCase.id,
|
|
47503
|
+
status: "failed",
|
|
47504
|
+
completedAt: Date.now(),
|
|
47505
|
+
error: error40 instanceof Error ? error40.message : String(error40)
|
|
47506
|
+
});
|
|
47507
|
+
}
|
|
47508
|
+
throw error40;
|
|
47509
|
+
} finally {
|
|
47510
|
+
if (testPoolSlot) {
|
|
47511
|
+
availablePoolSlots.push(testPoolSlot);
|
|
47512
|
+
}
|
|
47073
47513
|
}
|
|
47074
|
-
|
|
47075
|
-
|
|
47076
|
-
|
|
47077
|
-
|
|
47078
|
-
|
|
47079
|
-
|
|
47080
|
-
|
|
47081
|
-
|
|
47082
|
-
|
|
47083
|
-
|
|
47514
|
+
})
|
|
47515
|
+
);
|
|
47516
|
+
const settled = await Promise.allSettled(promises);
|
|
47517
|
+
const results = [];
|
|
47518
|
+
for (let i = 0; i < settled.length; i++) {
|
|
47519
|
+
const outcome = settled[i];
|
|
47520
|
+
if (outcome.status === "fulfilled") {
|
|
47521
|
+
results.push(outcome.value);
|
|
47522
|
+
} else {
|
|
47523
|
+
const evalCase = filteredEvalCases[i];
|
|
47524
|
+
const formattingMode = usesFileReferencePrompt(primaryProvider) ? "agent" : "lm";
|
|
47525
|
+
const promptInputs = await buildPromptInputs(evalCase, formattingMode);
|
|
47526
|
+
const errorResult = buildErrorResult(
|
|
47527
|
+
evalCase,
|
|
47528
|
+
target.name,
|
|
47529
|
+
(now ?? (() => /* @__PURE__ */ new Date()))(),
|
|
47530
|
+
outcome.reason,
|
|
47531
|
+
promptInputs,
|
|
47532
|
+
primaryProvider,
|
|
47533
|
+
"agent",
|
|
47534
|
+
"provider_error"
|
|
47535
|
+
);
|
|
47536
|
+
results.push(errorResult);
|
|
47537
|
+
if (onResult) {
|
|
47538
|
+
await onResult(errorResult);
|
|
47084
47539
|
}
|
|
47085
|
-
throw error40;
|
|
47086
47540
|
}
|
|
47087
|
-
}
|
|
47088
|
-
|
|
47089
|
-
|
|
47090
|
-
|
|
47091
|
-
|
|
47092
|
-
|
|
47093
|
-
|
|
47094
|
-
|
|
47095
|
-
|
|
47096
|
-
|
|
47097
|
-
|
|
47098
|
-
|
|
47099
|
-
|
|
47100
|
-
|
|
47101
|
-
|
|
47102
|
-
|
|
47103
|
-
|
|
47104
|
-
|
|
47105
|
-
|
|
47106
|
-
|
|
47107
|
-
|
|
47108
|
-
|
|
47109
|
-
|
|
47110
|
-
if (onResult) {
|
|
47111
|
-
await onResult(errorResult);
|
|
47541
|
+
}
|
|
47542
|
+
const afterAllWorkspaces = poolSlots.length > 1 ? poolSlots.map((s) => s.path) : sharedWorkspacePath ? [sharedWorkspacePath] : [];
|
|
47543
|
+
const suiteAfterAllHook = suiteWorkspace?.hooks?.after_all_tests;
|
|
47544
|
+
if (afterAllWorkspaces.length > 0 && hasHookCommand(suiteAfterAllHook)) {
|
|
47545
|
+
const afterAllHook = suiteAfterAllHook;
|
|
47546
|
+
for (const wsPath of afterAllWorkspaces) {
|
|
47547
|
+
const scriptContext = {
|
|
47548
|
+
workspacePath: wsPath,
|
|
47549
|
+
testId: "__after_all__",
|
|
47550
|
+
evalRunId,
|
|
47551
|
+
evalDir
|
|
47552
|
+
};
|
|
47553
|
+
try {
|
|
47554
|
+
const afterAllOutput = await executeWorkspaceScript(
|
|
47555
|
+
toScriptConfig(afterAllHook, "after_all_tests", "suite workspace"),
|
|
47556
|
+
scriptContext,
|
|
47557
|
+
"warn"
|
|
47558
|
+
);
|
|
47559
|
+
if (afterAllOutput && results.length > 0 && wsPath === afterAllWorkspaces[0]) {
|
|
47560
|
+
results[results.length - 1] = { ...results[results.length - 1], afterAllOutput };
|
|
47561
|
+
}
|
|
47562
|
+
} catch {
|
|
47563
|
+
}
|
|
47112
47564
|
}
|
|
47113
47565
|
}
|
|
47114
|
-
|
|
47115
|
-
|
|
47116
|
-
|
|
47117
|
-
|
|
47118
|
-
|
|
47119
|
-
|
|
47120
|
-
|
|
47121
|
-
|
|
47122
|
-
|
|
47123
|
-
|
|
47124
|
-
suiteWorkspace.after_all,
|
|
47125
|
-
scriptContext,
|
|
47126
|
-
"warn"
|
|
47127
|
-
);
|
|
47128
|
-
if (afterAllOutput && results.length > 0) {
|
|
47129
|
-
results[results.length - 1] = { ...results[results.length - 1], afterAllOutput };
|
|
47566
|
+
if (sharedWorkspacePath && !poolSlot && poolSlots.length === 0 && !useStaticWorkspace) {
|
|
47567
|
+
const hasFailure = results.some((r) => !!r.error || r.score < 0.5);
|
|
47568
|
+
if (hasFailure) {
|
|
47569
|
+
if (resolvedRetainOnFailure === "cleanup") {
|
|
47570
|
+
await cleanupWorkspace(sharedWorkspacePath).catch(() => {
|
|
47571
|
+
});
|
|
47572
|
+
}
|
|
47573
|
+
} else if (resolvedRetainOnSuccess === "cleanup") {
|
|
47574
|
+
await cleanupWorkspace(sharedWorkspacePath).catch(() => {
|
|
47575
|
+
});
|
|
47130
47576
|
}
|
|
47131
|
-
} catch {
|
|
47132
47577
|
}
|
|
47133
|
-
}
|
|
47134
|
-
if (sharedWorkspacePath) {
|
|
47135
|
-
const hasFailure = results.some((r) => !!r.error || r.score < 0.5);
|
|
47136
47578
|
if (cleanupWorkspaces) {
|
|
47137
|
-
await
|
|
47138
|
-
});
|
|
47139
|
-
} else if (!hasFailure && !keepWorkspaces) {
|
|
47140
|
-
await cleanupWorkspace(sharedWorkspacePath).catch(() => {
|
|
47579
|
+
await cleanupEvalWorkspaces(evalRunId).catch(() => {
|
|
47141
47580
|
});
|
|
47142
47581
|
}
|
|
47582
|
+
return results;
|
|
47583
|
+
} finally {
|
|
47584
|
+
if (poolManager) {
|
|
47585
|
+
if (poolSlot) {
|
|
47586
|
+
await poolManager.releaseSlot(poolSlot);
|
|
47587
|
+
}
|
|
47588
|
+
for (const slot of poolSlots) {
|
|
47589
|
+
if (slot !== poolSlot) {
|
|
47590
|
+
await poolManager.releaseSlot(slot).catch(() => {
|
|
47591
|
+
});
|
|
47592
|
+
}
|
|
47593
|
+
}
|
|
47594
|
+
}
|
|
47143
47595
|
}
|
|
47144
|
-
if (cleanupWorkspaces) {
|
|
47145
|
-
await cleanupEvalWorkspaces(evalRunId).catch(() => {
|
|
47146
|
-
});
|
|
47147
|
-
}
|
|
47148
|
-
return results;
|
|
47149
47596
|
}
|
|
47150
47597
|
async function runBatchEvaluation(options) {
|
|
47151
47598
|
const {
|
|
@@ -47317,6 +47764,8 @@ async function runEvalCase(options) {
|
|
|
47317
47764
|
evalRunId,
|
|
47318
47765
|
keepWorkspaces,
|
|
47319
47766
|
cleanupWorkspaces: forceCleanup,
|
|
47767
|
+
retainOnSuccess,
|
|
47768
|
+
retainOnFailure,
|
|
47320
47769
|
sharedWorkspacePath,
|
|
47321
47770
|
sharedBaselineCommit,
|
|
47322
47771
|
suiteWorkspaceFile,
|
|
@@ -47328,10 +47777,10 @@ async function runEvalCase(options) {
|
|
|
47328
47777
|
const formattingMode = usesFileReferencePrompt(provider) ? "agent" : "lm";
|
|
47329
47778
|
const promptInputs = await buildPromptInputs(evalCase, formattingMode);
|
|
47330
47779
|
const typeRegistry = providedTypeRegistry ?? createBuiltinRegistry();
|
|
47331
|
-
const
|
|
47780
|
+
const cacheKey = useCache ? createCacheKey(provider, target, evalCase, promptInputs) : void 0;
|
|
47332
47781
|
let cachedResponse;
|
|
47333
|
-
if (
|
|
47334
|
-
cachedResponse = await cache.get(
|
|
47782
|
+
if (cacheKey && cache) {
|
|
47783
|
+
cachedResponse = await cache.get(cacheKey);
|
|
47335
47784
|
}
|
|
47336
47785
|
const nowFn = now ?? (() => /* @__PURE__ */ new Date());
|
|
47337
47786
|
let workspacePath = sharedWorkspacePath;
|
|
@@ -47362,7 +47811,7 @@ async function runEvalCase(options) {
|
|
|
47362
47811
|
);
|
|
47363
47812
|
}
|
|
47364
47813
|
if (caseWorkspaceFile && workspacePath) {
|
|
47365
|
-
const copiedFile =
|
|
47814
|
+
const copiedFile = path39.join(workspacePath, path39.basename(caseWorkspaceFile));
|
|
47366
47815
|
try {
|
|
47367
47816
|
await stat7(copiedFile);
|
|
47368
47817
|
caseWorkspaceFile = copiedFile;
|
|
@@ -47370,12 +47819,12 @@ async function runEvalCase(options) {
|
|
|
47370
47819
|
}
|
|
47371
47820
|
}
|
|
47372
47821
|
}
|
|
47373
|
-
if (!workspacePath && (evalCase.workspace?.
|
|
47822
|
+
if (!workspacePath && (evalCase.workspace?.hooks || evalCase.workspace?.repos?.length) && evalRunId) {
|
|
47374
47823
|
workspacePath = getWorkspacePath(evalRunId, evalCase.id);
|
|
47375
47824
|
await mkdir12(workspacePath, { recursive: true });
|
|
47376
47825
|
}
|
|
47377
47826
|
if (evalCase.workspace?.repos?.length && workspacePath) {
|
|
47378
|
-
const perCaseRepoManager = new RepoManager(
|
|
47827
|
+
const perCaseRepoManager = new RepoManager(setupDebug);
|
|
47379
47828
|
try {
|
|
47380
47829
|
if (setupDebug) {
|
|
47381
47830
|
console.log(
|
|
@@ -47400,11 +47849,13 @@ async function runEvalCase(options) {
|
|
|
47400
47849
|
);
|
|
47401
47850
|
}
|
|
47402
47851
|
}
|
|
47403
|
-
|
|
47404
|
-
|
|
47852
|
+
const caseBeforeAllHook = evalCase.workspace?.hooks?.before_all_tests;
|
|
47853
|
+
if (workspacePath && hasHookCommand(caseBeforeAllHook)) {
|
|
47854
|
+
const beforeAllHook = caseBeforeAllHook;
|
|
47855
|
+
const beforeAllCommand = (beforeAllHook.command ?? beforeAllHook.script ?? []).join(" ");
|
|
47405
47856
|
if (setupDebug) {
|
|
47406
47857
|
console.log(
|
|
47407
|
-
`[setup] test=${evalCase.id} running before_all in cwd=${
|
|
47858
|
+
`[setup] test=${evalCase.id} running before_all in cwd=${beforeAllHook.cwd ?? evalDir} command=${beforeAllCommand}`
|
|
47408
47859
|
);
|
|
47409
47860
|
}
|
|
47410
47861
|
const scriptContext = {
|
|
@@ -47417,7 +47868,7 @@ async function runEvalCase(options) {
|
|
|
47417
47868
|
};
|
|
47418
47869
|
try {
|
|
47419
47870
|
beforeAllOutput = await executeWorkspaceScript(
|
|
47420
|
-
evalCase.
|
|
47871
|
+
toScriptConfig(beforeAllHook, "before_all_tests", `test '${evalCase.id}'`),
|
|
47421
47872
|
scriptContext
|
|
47422
47873
|
);
|
|
47423
47874
|
if (setupDebug) {
|
|
@@ -47442,7 +47893,9 @@ async function runEvalCase(options) {
|
|
|
47442
47893
|
}
|
|
47443
47894
|
}
|
|
47444
47895
|
}
|
|
47445
|
-
|
|
47896
|
+
const caseBeforeEachHook = evalCase.workspace?.hooks?.before_each_test;
|
|
47897
|
+
if (workspacePath && hasHookCommand(caseBeforeEachHook)) {
|
|
47898
|
+
const beforeEachHook = caseBeforeEachHook;
|
|
47446
47899
|
const scriptContext = {
|
|
47447
47900
|
workspacePath,
|
|
47448
47901
|
testId: evalCase.id,
|
|
@@ -47453,7 +47906,7 @@ async function runEvalCase(options) {
|
|
|
47453
47906
|
};
|
|
47454
47907
|
try {
|
|
47455
47908
|
beforeEachOutput = await executeWorkspaceScript(
|
|
47456
|
-
evalCase.
|
|
47909
|
+
toScriptConfig(beforeEachHook, "before_each_test", `test '${evalCase.id}'`),
|
|
47457
47910
|
scriptContext
|
|
47458
47911
|
);
|
|
47459
47912
|
} catch (error40) {
|
|
@@ -47541,8 +47994,8 @@ async function runEvalCase(options) {
|
|
|
47541
47994
|
}
|
|
47542
47995
|
return errorResult;
|
|
47543
47996
|
}
|
|
47544
|
-
if (
|
|
47545
|
-
await cache.set(
|
|
47997
|
+
if (cacheKey && cache && !cachedResponse) {
|
|
47998
|
+
await cache.set(cacheKey, providerResponse);
|
|
47546
47999
|
}
|
|
47547
48000
|
const output = providerResponse.output;
|
|
47548
48001
|
const hasExecutionMetrics = providerResponse.tokenUsage !== void 0 || providerResponse.costUsd !== void 0 || providerResponse.durationMs !== void 0;
|
|
@@ -47570,17 +48023,19 @@ async function runEvalCase(options) {
|
|
|
47570
48023
|
}
|
|
47571
48024
|
}
|
|
47572
48025
|
const providerError = extractProviderError(providerResponse);
|
|
47573
|
-
if (repoManager && workspacePath && evalCase.workspace?.reset
|
|
48026
|
+
if (repoManager && workspacePath && evalCase.workspace?.hooks?.after_each_test?.reset && evalCase.workspace.hooks.after_each_test.reset !== "none" && evalCase.workspace.repos) {
|
|
47574
48027
|
try {
|
|
47575
48028
|
await repoManager.reset(
|
|
47576
48029
|
evalCase.workspace.repos,
|
|
47577
48030
|
workspacePath,
|
|
47578
|
-
evalCase.workspace.reset
|
|
48031
|
+
evalCase.workspace.hooks.after_each_test.reset
|
|
47579
48032
|
);
|
|
47580
48033
|
} catch {
|
|
47581
48034
|
}
|
|
47582
48035
|
}
|
|
47583
|
-
|
|
48036
|
+
const caseAfterEachHook = evalCase.workspace?.hooks?.after_each_test;
|
|
48037
|
+
if (workspacePath && hasHookCommand(caseAfterEachHook)) {
|
|
48038
|
+
const afterEachHook = caseAfterEachHook;
|
|
47584
48039
|
const scriptContext = {
|
|
47585
48040
|
workspacePath,
|
|
47586
48041
|
testId: evalCase.id,
|
|
@@ -47591,7 +48046,7 @@ async function runEvalCase(options) {
|
|
|
47591
48046
|
};
|
|
47592
48047
|
try {
|
|
47593
48048
|
afterEachOutput = await executeWorkspaceScript(
|
|
47594
|
-
evalCase.
|
|
48049
|
+
toScriptConfig(afterEachHook, "after_each_test", `test '${evalCase.id}'`),
|
|
47595
48050
|
scriptContext,
|
|
47596
48051
|
"warn"
|
|
47597
48052
|
);
|
|
@@ -47641,8 +48096,13 @@ async function runEvalCase(options) {
|
|
|
47641
48096
|
await cleanupWorkspace(workspacePath).catch(() => {
|
|
47642
48097
|
});
|
|
47643
48098
|
} else if (isFailure) {
|
|
47644
|
-
|
|
47645
|
-
|
|
48099
|
+
if ((retainOnFailure ?? "keep") === "cleanup") {
|
|
48100
|
+
await cleanupWorkspace(workspacePath).catch(() => {
|
|
48101
|
+
});
|
|
48102
|
+
} else {
|
|
48103
|
+
return { ...finalResult, workspacePath };
|
|
48104
|
+
}
|
|
48105
|
+
} else if ((retainOnSuccess ?? (keepWorkspaces ? "keep" : "cleanup")) !== "keep") {
|
|
47646
48106
|
await cleanupWorkspace(workspacePath).catch(() => {
|
|
47647
48107
|
});
|
|
47648
48108
|
}
|
|
@@ -47660,11 +48120,12 @@ async function runEvalCase(options) {
|
|
|
47660
48120
|
"evaluator_error"
|
|
47661
48121
|
);
|
|
47662
48122
|
if (workspacePath && !isSharedWorkspace) {
|
|
47663
|
-
if (forceCleanup) {
|
|
48123
|
+
if (forceCleanup || (retainOnFailure ?? "keep") === "cleanup") {
|
|
47664
48124
|
await cleanupWorkspace(workspacePath).catch(() => {
|
|
47665
48125
|
});
|
|
48126
|
+
} else {
|
|
48127
|
+
return { ...errorResult, workspacePath, beforeEachOutput, afterEachOutput };
|
|
47666
48128
|
}
|
|
47667
|
-
return { ...errorResult, workspacePath, beforeEachOutput, afterEachOutput };
|
|
47668
48129
|
}
|
|
47669
48130
|
return { ...errorResult, beforeEachOutput, afterEachOutput };
|
|
47670
48131
|
}
|
|
@@ -47683,7 +48144,9 @@ async function runEvalCaseWithTrials(options, trialsConfig) {
|
|
|
47683
48144
|
useCache: false,
|
|
47684
48145
|
// Force cleanup for intermediate trials
|
|
47685
48146
|
cleanupWorkspaces: isLastDeclaredTrial ? options.cleanupWorkspaces : true,
|
|
47686
|
-
keepWorkspaces: isLastDeclaredTrial ? options.keepWorkspaces : false
|
|
48147
|
+
keepWorkspaces: isLastDeclaredTrial ? options.keepWorkspaces : false,
|
|
48148
|
+
retainOnSuccess: isLastDeclaredTrial ? options.retainOnSuccess : "cleanup",
|
|
48149
|
+
retainOnFailure: isLastDeclaredTrial ? options.retainOnFailure : "cleanup"
|
|
47687
48150
|
};
|
|
47688
48151
|
const result = await runEvalCase(trialOptions);
|
|
47689
48152
|
allResults.push(result);
|
|
@@ -47972,7 +48435,7 @@ async function runEvaluatorList(options) {
|
|
|
47972
48435
|
fileChanges,
|
|
47973
48436
|
workspacePath
|
|
47974
48437
|
};
|
|
47975
|
-
const evalFileDir = evalCase.guideline_paths[0] ?
|
|
48438
|
+
const evalFileDir = evalCase.guideline_paths[0] ? path39.dirname(evalCase.guideline_paths[0]) : process.cwd();
|
|
47976
48439
|
const dispatchContext = {
|
|
47977
48440
|
judgeProvider,
|
|
47978
48441
|
targetResolver,
|
|
@@ -48293,13 +48756,13 @@ async function evaluate(config2) {
|
|
|
48293
48756
|
let evalCases;
|
|
48294
48757
|
let testFilePath;
|
|
48295
48758
|
if (config2.specFile) {
|
|
48296
|
-
testFilePath =
|
|
48759
|
+
testFilePath = path40.resolve(config2.specFile);
|
|
48297
48760
|
evalCases = await loadTests(testFilePath, repoRoot, {
|
|
48298
48761
|
verbose: config2.verbose,
|
|
48299
48762
|
filter: config2.filter
|
|
48300
48763
|
});
|
|
48301
48764
|
} else {
|
|
48302
|
-
testFilePath =
|
|
48765
|
+
testFilePath = path40.join(process.cwd(), "__programmatic__.yaml");
|
|
48303
48766
|
evalCases = (config2.tests ?? []).map((test) => {
|
|
48304
48767
|
const input = typeof test.input === "string" ? [{ role: "user", content: test.input }] : test.input;
|
|
48305
48768
|
const question = typeof test.input === "string" ? test.input : test.input.find((m) => m.role === "user")?.content ?? "";
|
|
@@ -48385,10 +48848,10 @@ function computeSummary(results, durationMs) {
|
|
|
48385
48848
|
var TARGET_FILE_CANDIDATES = [".agentv/targets.yaml", ".agentv/targets.yml"];
|
|
48386
48849
|
async function discoverDefaultTarget(repoRoot) {
|
|
48387
48850
|
const cwd = process.cwd();
|
|
48388
|
-
const chain = buildDirectoryChain(
|
|
48851
|
+
const chain = buildDirectoryChain(path40.join(cwd, "_placeholder"), repoRoot);
|
|
48389
48852
|
for (const dir of chain) {
|
|
48390
48853
|
for (const candidate of TARGET_FILE_CANDIDATES) {
|
|
48391
|
-
const targetsPath =
|
|
48854
|
+
const targetsPath = path40.join(dir, candidate);
|
|
48392
48855
|
if (!existsSync3(targetsPath)) continue;
|
|
48393
48856
|
try {
|
|
48394
48857
|
const definitions = await readTargetDefinitions(targetsPath);
|
|
@@ -48403,10 +48866,10 @@ async function discoverDefaultTarget(repoRoot) {
|
|
|
48403
48866
|
async function loadEnvHierarchy(repoRoot) {
|
|
48404
48867
|
const { readFileSync: readFileSync2 } = await import("node:fs");
|
|
48405
48868
|
const cwd = process.cwd();
|
|
48406
|
-
const chain = buildDirectoryChain(
|
|
48869
|
+
const chain = buildDirectoryChain(path40.join(cwd, "_placeholder"), repoRoot);
|
|
48407
48870
|
const envFiles = [];
|
|
48408
48871
|
for (const dir of chain) {
|
|
48409
|
-
const envPath =
|
|
48872
|
+
const envPath = path40.join(dir, ".env");
|
|
48410
48873
|
if (existsSync3(envPath)) envFiles.push(envPath);
|
|
48411
48874
|
}
|
|
48412
48875
|
for (let i = envFiles.length - 1; i >= 0; i--) {
|
|
@@ -48587,7 +49050,7 @@ var ResponseCache = class {
|
|
|
48587
49050
|
async get(key) {
|
|
48588
49051
|
const filePath = this.keyToPath(key);
|
|
48589
49052
|
try {
|
|
48590
|
-
const data = await
|
|
49053
|
+
const data = await readFile12(filePath, "utf8");
|
|
48591
49054
|
return JSON.parse(data);
|
|
48592
49055
|
} catch {
|
|
48593
49056
|
return void 0;
|
|
@@ -48595,13 +49058,13 @@ var ResponseCache = class {
|
|
|
48595
49058
|
}
|
|
48596
49059
|
async set(key, value) {
|
|
48597
49060
|
const filePath = this.keyToPath(key);
|
|
48598
|
-
const dir =
|
|
49061
|
+
const dir = path41.dirname(filePath);
|
|
48599
49062
|
await mkdir13(dir, { recursive: true });
|
|
48600
49063
|
await writeFile8(filePath, JSON.stringify(value, null, 2), "utf8");
|
|
48601
49064
|
}
|
|
48602
49065
|
keyToPath(key) {
|
|
48603
49066
|
const prefix = key.slice(0, 2);
|
|
48604
|
-
return
|
|
49067
|
+
return path41.join(this.cachePath, prefix, `${key}.json`);
|
|
48605
49068
|
}
|
|
48606
49069
|
};
|
|
48607
49070
|
function shouldEnableCache(params) {
|
|
@@ -49107,9 +49570,9 @@ export {
|
|
|
49107
49570
|
ProviderRegistry,
|
|
49108
49571
|
getAgentvHome,
|
|
49109
49572
|
getWorkspacesRoot,
|
|
49110
|
-
getGitCacheRoot,
|
|
49111
49573
|
getSubagentsRoot,
|
|
49112
49574
|
getTraceStateRoot,
|
|
49575
|
+
getWorkspacePoolRoot,
|
|
49113
49576
|
ensureVSCodeSubagents,
|
|
49114
49577
|
readTargetDefinitions,
|
|
49115
49578
|
listTargetNames,
|
|
@@ -49171,6 +49634,8 @@ export {
|
|
|
49171
49634
|
createTempWorkspace,
|
|
49172
49635
|
cleanupWorkspace,
|
|
49173
49636
|
cleanupEvalWorkspaces,
|
|
49637
|
+
computeWorkspaceFingerprint,
|
|
49638
|
+
WorkspacePoolManager,
|
|
49174
49639
|
RepoManager,
|
|
49175
49640
|
resolveWorkspaceTemplate,
|
|
49176
49641
|
executeWorkspaceScript,
|
|
@@ -49189,4 +49654,4 @@ export {
|
|
|
49189
49654
|
OtelStreamingObserver,
|
|
49190
49655
|
createAgentKernel
|
|
49191
49656
|
};
|
|
49192
|
-
//# sourceMappingURL=chunk-
|
|
49657
|
+
//# sourceMappingURL=chunk-LZ5MPQFM.js.map
|