agentv 2.14.3 → 2.15.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/{chunk-D3WKMO6D.js → chunk-IKGJTJSU.js} +200 -13
- package/dist/chunk-IKGJTJSU.js.map +1 -0
- package/dist/{chunk-TK4PB62M.js → chunk-VBK7BJLE.js} +962 -462
- package/dist/chunk-VBK7BJLE.js.map +1 -0
- package/dist/{chunk-MDAFSKDI.js → chunk-ZDSLKUCM.js} +28 -5
- package/dist/chunk-ZDSLKUCM.js.map +1 -0
- package/dist/cli.js +3 -3
- package/dist/{dist-F62E44UQ.js → dist-XZPR3AGM.js} +8 -2
- package/dist/index.js +3 -3
- package/dist/{interactive-SQSP4PL3.js → interactive-SSGXAAKA.js} +3 -3
- package/package.json +1 -1
- package/dist/chunk-D3WKMO6D.js.map +0 -1
- package/dist/chunk-MDAFSKDI.js.map +0 -1
- package/dist/chunk-TK4PB62M.js.map +0 -1
- /package/dist/{dist-F62E44UQ.js.map → dist-XZPR3AGM.js.map} +0 -0
- /package/dist/{interactive-SQSP4PL3.js.map → interactive-SSGXAAKA.js.map} +0 -0
|
@@ -632,8 +632,8 @@ function getErrorMap() {
|
|
|
632
632
|
|
|
633
633
|
// ../../node_modules/.bun/zod@3.25.76/node_modules/zod/v3/helpers/parseUtil.js
|
|
634
634
|
var makeIssue = (params) => {
|
|
635
|
-
const { data, path:
|
|
636
|
-
const fullPath = [...
|
|
635
|
+
const { data, path: path42, errorMaps, issueData } = params;
|
|
636
|
+
const fullPath = [...path42, ...issueData.path || []];
|
|
637
637
|
const fullIssue = {
|
|
638
638
|
...issueData,
|
|
639
639
|
path: fullPath
|
|
@@ -749,11 +749,11 @@ var errorUtil;
|
|
|
749
749
|
|
|
750
750
|
// ../../node_modules/.bun/zod@3.25.76/node_modules/zod/v3/types.js
|
|
751
751
|
var ParseInputLazyPath = class {
|
|
752
|
-
constructor(parent, value,
|
|
752
|
+
constructor(parent, value, path42, key) {
|
|
753
753
|
this._cachedPath = [];
|
|
754
754
|
this.parent = parent;
|
|
755
755
|
this.data = value;
|
|
756
|
-
this._path =
|
|
756
|
+
this._path = path42;
|
|
757
757
|
this._key = key;
|
|
758
758
|
}
|
|
759
759
|
get path() {
|
|
@@ -6661,10 +6661,10 @@ function assignProp(target, prop, value) {
|
|
|
6661
6661
|
configurable: true
|
|
6662
6662
|
});
|
|
6663
6663
|
}
|
|
6664
|
-
function getElementAtPath(obj,
|
|
6665
|
-
if (!
|
|
6664
|
+
function getElementAtPath(obj, path42) {
|
|
6665
|
+
if (!path42)
|
|
6666
6666
|
return obj;
|
|
6667
|
-
return
|
|
6667
|
+
return path42.reduce((acc, key) => acc?.[key], obj);
|
|
6668
6668
|
}
|
|
6669
6669
|
function promiseAllObject(promisesObj) {
|
|
6670
6670
|
const keys = Object.keys(promisesObj);
|
|
@@ -6984,11 +6984,11 @@ function aborted(x, startIndex = 0) {
|
|
|
6984
6984
|
}
|
|
6985
6985
|
return false;
|
|
6986
6986
|
}
|
|
6987
|
-
function prefixIssues(
|
|
6987
|
+
function prefixIssues(path42, issues) {
|
|
6988
6988
|
return issues.map((iss) => {
|
|
6989
6989
|
var _a17;
|
|
6990
6990
|
(_a17 = iss).path ?? (_a17.path = []);
|
|
6991
|
-
iss.path.unshift(
|
|
6991
|
+
iss.path.unshift(path42);
|
|
6992
6992
|
return iss;
|
|
6993
6993
|
});
|
|
6994
6994
|
}
|
|
@@ -7125,7 +7125,7 @@ function treeifyError(error40, _mapper) {
|
|
|
7125
7125
|
return issue2.message;
|
|
7126
7126
|
};
|
|
7127
7127
|
const result = { errors: [] };
|
|
7128
|
-
const processError = (error41,
|
|
7128
|
+
const processError = (error41, path42 = []) => {
|
|
7129
7129
|
var _a17, _b8;
|
|
7130
7130
|
for (const issue2 of error41.issues) {
|
|
7131
7131
|
if (issue2.code === "invalid_union" && issue2.errors.length) {
|
|
@@ -7135,7 +7135,7 @@ function treeifyError(error40, _mapper) {
|
|
|
7135
7135
|
} else if (issue2.code === "invalid_element") {
|
|
7136
7136
|
processError({ issues: issue2.issues }, issue2.path);
|
|
7137
7137
|
} else {
|
|
7138
|
-
const fullpath = [...
|
|
7138
|
+
const fullpath = [...path42, ...issue2.path];
|
|
7139
7139
|
if (fullpath.length === 0) {
|
|
7140
7140
|
result.errors.push(mapper(issue2));
|
|
7141
7141
|
continue;
|
|
@@ -7165,9 +7165,9 @@ function treeifyError(error40, _mapper) {
|
|
|
7165
7165
|
processError(error40);
|
|
7166
7166
|
return result;
|
|
7167
7167
|
}
|
|
7168
|
-
function toDotPath(
|
|
7168
|
+
function toDotPath(path42) {
|
|
7169
7169
|
const segs = [];
|
|
7170
|
-
for (const seg of
|
|
7170
|
+
for (const seg of path42) {
|
|
7171
7171
|
if (typeof seg === "number")
|
|
7172
7172
|
segs.push(`[${seg}]`);
|
|
7173
7173
|
else if (typeof seg === "symbol")
|
|
@@ -26720,14 +26720,14 @@ function createAzure(options = {}) {
|
|
|
26720
26720
|
description: "Azure OpenAI resource name"
|
|
26721
26721
|
});
|
|
26722
26722
|
const apiVersion = (_a17 = options.apiVersion) != null ? _a17 : "v1";
|
|
26723
|
-
const url2 = ({ path:
|
|
26723
|
+
const url2 = ({ path: path42, modelId }) => {
|
|
26724
26724
|
var _a24;
|
|
26725
26725
|
const baseUrlPrefix = (_a24 = options.baseURL) != null ? _a24 : `https://${getResourceName()}.openai.azure.com/openai`;
|
|
26726
26726
|
let fullUrl;
|
|
26727
26727
|
if (options.useDeploymentBasedUrls) {
|
|
26728
|
-
fullUrl = new URL(`${baseUrlPrefix}/deployments/${modelId}${
|
|
26728
|
+
fullUrl = new URL(`${baseUrlPrefix}/deployments/${modelId}${path42}`);
|
|
26729
26729
|
} else {
|
|
26730
|
-
fullUrl = new URL(`${baseUrlPrefix}/v1${
|
|
26730
|
+
fullUrl = new URL(`${baseUrlPrefix}/v1${path42}`);
|
|
26731
26731
|
}
|
|
26732
26732
|
fullUrl.searchParams.set("api-version", apiVersion);
|
|
26733
26733
|
return fullUrl.toString();
|
|
@@ -33960,9 +33960,9 @@ import { randomBytes } from "node:crypto";
|
|
|
33960
33960
|
import { createServer } from "node:http";
|
|
33961
33961
|
import fs2 from "node:fs/promises";
|
|
33962
33962
|
import path31 from "node:path";
|
|
33963
|
-
import { createHash as
|
|
33964
|
-
import { mkdir as
|
|
33965
|
-
import
|
|
33963
|
+
import { createHash as createHash3, randomUUID as randomUUID7 } from "node:crypto";
|
|
33964
|
+
import { mkdir as mkdir13, stat as stat7 } from "node:fs/promises";
|
|
33965
|
+
import path39 from "node:path";
|
|
33966
33966
|
import micromatch4 from "micromatch";
|
|
33967
33967
|
import { readFileSync } from "node:fs";
|
|
33968
33968
|
import path322 from "node:path";
|
|
@@ -33977,15 +33977,21 @@ import path35 from "node:path";
|
|
|
33977
33977
|
import { execFile } from "node:child_process";
|
|
33978
33978
|
import { createHash } from "node:crypto";
|
|
33979
33979
|
import { existsSync as existsSync2 } from "node:fs";
|
|
33980
|
-
import { mkdir as mkdir11, rm as rm5, unlink, writeFile as writeFile7 } from "node:fs/promises";
|
|
33980
|
+
import { cp as cp2, mkdir as mkdir11, readFile as readFile11, readdir as readdir4, rm as rm5, unlink, writeFile as writeFile7 } from "node:fs/promises";
|
|
33981
33981
|
import path36 from "node:path";
|
|
33982
33982
|
import { promisify as promisify5 } from "node:util";
|
|
33983
|
-
import {
|
|
33984
|
-
import
|
|
33983
|
+
import { execFile as execFile2 } from "node:child_process";
|
|
33984
|
+
import { createHash as createHash2 } from "node:crypto";
|
|
33985
33985
|
import { existsSync as existsSync3 } from "node:fs";
|
|
33986
|
-
import
|
|
33987
|
-
import
|
|
33986
|
+
import { mkdir as mkdir12, rm as rm6, unlink as unlink2, writeFile as writeFile8 } from "node:fs/promises";
|
|
33987
|
+
import path37 from "node:path";
|
|
33988
|
+
import { promisify as promisify6 } from "node:util";
|
|
33989
|
+
import { readdir as readdir5, stat as stat6 } from "node:fs/promises";
|
|
33990
|
+
import path38 from "node:path";
|
|
33991
|
+
import { existsSync as existsSync4 } from "node:fs";
|
|
33988
33992
|
import path40 from "node:path";
|
|
33993
|
+
import { mkdir as mkdir14, readFile as readFile12, writeFile as writeFile9 } from "node:fs/promises";
|
|
33994
|
+
import path41 from "node:path";
|
|
33989
33995
|
function computeTraceSummary(messages) {
|
|
33990
33996
|
const toolCallCounts = {};
|
|
33991
33997
|
const toolDurations = {};
|
|
@@ -34546,6 +34552,17 @@ function parseExecutionDefaults(raw, configPath) {
|
|
|
34546
34552
|
} else if (otelFile !== void 0) {
|
|
34547
34553
|
logWarning(`Invalid execution.otel_file in ${configPath}, expected non-empty string`);
|
|
34548
34554
|
}
|
|
34555
|
+
if (typeof obj.pool_workspaces === "boolean") {
|
|
34556
|
+
result.pool_workspaces = obj.pool_workspaces;
|
|
34557
|
+
} else if (obj.pool_workspaces !== void 0) {
|
|
34558
|
+
logWarning(`Invalid execution.pool_workspaces in ${configPath}, expected boolean`);
|
|
34559
|
+
}
|
|
34560
|
+
const poolSlots = obj.pool_slots;
|
|
34561
|
+
if (typeof poolSlots === "number" && Number.isInteger(poolSlots) && poolSlots >= 1 && poolSlots <= 50) {
|
|
34562
|
+
result.pool_slots = poolSlots;
|
|
34563
|
+
} else if (poolSlots !== void 0) {
|
|
34564
|
+
logWarning(`Invalid execution.pool_slots in ${configPath}, expected integer 1-50`);
|
|
34565
|
+
}
|
|
34549
34566
|
return Object.keys(result).length > 0 ? result : void 0;
|
|
34550
34567
|
}
|
|
34551
34568
|
function logWarning(message) {
|
|
@@ -35961,6 +35978,7 @@ async function processMessages(options) {
|
|
|
35961
35978
|
repoRootPath,
|
|
35962
35979
|
guidelinePatterns,
|
|
35963
35980
|
guidelinePaths,
|
|
35981
|
+
treatFileSegmentsAsGuidelines,
|
|
35964
35982
|
textParts,
|
|
35965
35983
|
messageType,
|
|
35966
35984
|
verbose
|
|
@@ -36008,16 +36026,20 @@ async function processMessages(options) {
|
|
|
36008
36026
|
}
|
|
36009
36027
|
try {
|
|
36010
36028
|
const fileContent = (await readFile4(resolvedPath, "utf8")).replace(/\r\n/g, "\n");
|
|
36011
|
-
|
|
36012
|
-
|
|
36013
|
-
|
|
36014
|
-
|
|
36015
|
-
|
|
36016
|
-
|
|
36017
|
-
|
|
36018
|
-
|
|
36019
|
-
|
|
36029
|
+
const classifyAsGuideline = shouldTreatAsGuideline({
|
|
36030
|
+
messageType,
|
|
36031
|
+
resolvedPath,
|
|
36032
|
+
repoRootPath,
|
|
36033
|
+
guidelinePatterns,
|
|
36034
|
+
treatFileSegmentsAsGuidelines
|
|
36035
|
+
});
|
|
36036
|
+
if (classifyAsGuideline && guidelinePaths) {
|
|
36037
|
+
guidelinePaths.push(path5.resolve(resolvedPath));
|
|
36038
|
+
if (verbose) {
|
|
36039
|
+
console.log(` [Guideline] Found: ${displayPath}`);
|
|
36040
|
+
console.log(` Resolved to: ${resolvedPath}`);
|
|
36020
36041
|
}
|
|
36042
|
+
continue;
|
|
36021
36043
|
}
|
|
36022
36044
|
segments.push({
|
|
36023
36045
|
type: "file",
|
|
@@ -36046,6 +36068,26 @@ async function processMessages(options) {
|
|
|
36046
36068
|
}
|
|
36047
36069
|
return segments;
|
|
36048
36070
|
}
|
|
36071
|
+
function shouldTreatAsGuideline(options) {
|
|
36072
|
+
const {
|
|
36073
|
+
messageType,
|
|
36074
|
+
resolvedPath,
|
|
36075
|
+
repoRootPath,
|
|
36076
|
+
guidelinePatterns,
|
|
36077
|
+
treatFileSegmentsAsGuidelines
|
|
36078
|
+
} = options;
|
|
36079
|
+
if (messageType !== "input") {
|
|
36080
|
+
return false;
|
|
36081
|
+
}
|
|
36082
|
+
if (treatFileSegmentsAsGuidelines) {
|
|
36083
|
+
return true;
|
|
36084
|
+
}
|
|
36085
|
+
if (!guidelinePatterns || guidelinePatterns.length === 0) {
|
|
36086
|
+
return false;
|
|
36087
|
+
}
|
|
36088
|
+
const relativeToRepo = path5.relative(repoRootPath, resolvedPath);
|
|
36089
|
+
return isGuidelineFile(relativeToRepo, guidelinePatterns);
|
|
36090
|
+
}
|
|
36049
36091
|
function asString3(value) {
|
|
36050
36092
|
return typeof value === "string" ? value : void 0;
|
|
36051
36093
|
}
|
|
@@ -36380,6 +36422,8 @@ async function loadTestsFromJsonl(evalFilePath, repoRoot, options) {
|
|
|
36380
36422
|
for (const guidelinePath of testCase.guideline_paths) {
|
|
36381
36423
|
console.log(` - ${guidelinePath}`);
|
|
36382
36424
|
}
|
|
36425
|
+
} else if (!guidelinePatterns || guidelinePatterns.length === 0) {
|
|
36426
|
+
console.log(" No guidelines found (guideline_patterns not configured)");
|
|
36383
36427
|
} else {
|
|
36384
36428
|
console.log(" No guidelines found");
|
|
36385
36429
|
}
|
|
@@ -36740,7 +36784,7 @@ async function loadTestsFromYaml(evalFilePath, repoRoot, options) {
|
|
|
36740
36784
|
} else {
|
|
36741
36785
|
throw new Error(`Invalid test file format: ${evalFilePath} - missing 'tests' field`);
|
|
36742
36786
|
}
|
|
36743
|
-
const suiteWorkspace =
|
|
36787
|
+
const suiteWorkspace = await resolveWorkspaceConfig(suite.workspace, evalFileDir);
|
|
36744
36788
|
const suiteInputMessages = expandInputShorthand(suite.input);
|
|
36745
36789
|
const rawGlobalExecution = isJsonObject(suite.execution) ? suite.execution : void 0;
|
|
36746
36790
|
const _globalTarget = asString6(rawGlobalExecution?.target) ?? asString6(suite.target);
|
|
@@ -36776,12 +36820,24 @@ async function loadTestsFromYaml(evalFilePath, repoRoot, options) {
|
|
|
36776
36820
|
}
|
|
36777
36821
|
const caseExecution = isJsonObject(evalcase.execution) ? evalcase.execution : void 0;
|
|
36778
36822
|
const skipDefaults = caseExecution?.skip_defaults === true;
|
|
36779
|
-
const
|
|
36823
|
+
const effectiveSuiteInputMessages = suiteInputMessages && !skipDefaults ? suiteInputMessages : void 0;
|
|
36824
|
+
const inputMessages = effectiveSuiteInputMessages ? [...effectiveSuiteInputMessages, ...testInputMessages] : testInputMessages;
|
|
36780
36825
|
const hasExpectedMessages = expectedMessages.length > 0;
|
|
36781
36826
|
const guidelinePaths = [];
|
|
36782
36827
|
const inputTextParts = [];
|
|
36783
|
-
const
|
|
36784
|
-
messages:
|
|
36828
|
+
const suiteInputSegments = effectiveSuiteInputMessages ? await processMessages({
|
|
36829
|
+
messages: effectiveSuiteInputMessages,
|
|
36830
|
+
searchRoots,
|
|
36831
|
+
repoRootPath,
|
|
36832
|
+
guidelinePatterns,
|
|
36833
|
+
guidelinePaths,
|
|
36834
|
+
treatFileSegmentsAsGuidelines: true,
|
|
36835
|
+
textParts: inputTextParts,
|
|
36836
|
+
messageType: "input",
|
|
36837
|
+
verbose
|
|
36838
|
+
}) : [];
|
|
36839
|
+
const testInputSegments = await processMessages({
|
|
36840
|
+
messages: testInputMessages,
|
|
36785
36841
|
searchRoots,
|
|
36786
36842
|
repoRootPath,
|
|
36787
36843
|
guidelinePatterns,
|
|
@@ -36790,6 +36846,7 @@ async function loadTestsFromYaml(evalFilePath, repoRoot, options) {
|
|
|
36790
36846
|
messageType: "input",
|
|
36791
36847
|
verbose
|
|
36792
36848
|
});
|
|
36849
|
+
const inputSegments = [...suiteInputSegments, ...testInputSegments];
|
|
36793
36850
|
const outputSegments = hasExpectedMessages ? await processExpectedMessages({
|
|
36794
36851
|
messages: expectedMessages,
|
|
36795
36852
|
searchRoots,
|
|
@@ -36837,7 +36894,7 @@ async function loadTestsFromYaml(evalFilePath, repoRoot, options) {
|
|
|
36837
36894
|
...guidelinePaths.map((guidelinePath) => path8.resolve(guidelinePath)),
|
|
36838
36895
|
...userFilePaths
|
|
36839
36896
|
];
|
|
36840
|
-
const caseWorkspace =
|
|
36897
|
+
const caseWorkspace = await resolveWorkspaceConfig(evalcase.workspace, evalFileDir);
|
|
36841
36898
|
const mergedWorkspace = mergeWorkspaceConfigs(suiteWorkspace, caseWorkspace);
|
|
36842
36899
|
const metadata = isJsonObject(evalcase.metadata) ? evalcase.metadata : void 0;
|
|
36843
36900
|
const caseTargets = extractTargetsFromTestCase(evalcase);
|
|
@@ -36868,6 +36925,8 @@ async function loadTestsFromYaml(evalFilePath, repoRoot, options) {
|
|
|
36868
36925
|
for (const guidelinePath of testCase.guideline_paths) {
|
|
36869
36926
|
console.log(` - ${guidelinePath}`);
|
|
36870
36927
|
}
|
|
36928
|
+
} else if (!guidelinePatterns || guidelinePatterns.length === 0) {
|
|
36929
|
+
console.log(" No guidelines found (guideline_patterns not configured)");
|
|
36871
36930
|
} else {
|
|
36872
36931
|
console.log(" No guidelines found");
|
|
36873
36932
|
}
|
|
@@ -36967,6 +37026,26 @@ function parseResetConfig(raw) {
|
|
|
36967
37026
|
...afterEach !== void 0 && { after_each: afterEach }
|
|
36968
37027
|
};
|
|
36969
37028
|
}
|
|
37029
|
+
async function resolveWorkspaceConfig(raw, evalFileDir) {
|
|
37030
|
+
if (typeof raw === "string") {
|
|
37031
|
+
const workspaceFilePath = path8.resolve(evalFileDir, raw);
|
|
37032
|
+
let content;
|
|
37033
|
+
try {
|
|
37034
|
+
content = await readFile7(workspaceFilePath, "utf8");
|
|
37035
|
+
} catch {
|
|
37036
|
+
throw new Error(`Workspace file not found: ${raw} (resolved to ${workspaceFilePath})`);
|
|
37037
|
+
}
|
|
37038
|
+
const parsed = parse22(content);
|
|
37039
|
+
if (!isJsonObject(parsed)) {
|
|
37040
|
+
throw new Error(
|
|
37041
|
+
`Invalid workspace file format: ${workspaceFilePath} (expected a YAML object)`
|
|
37042
|
+
);
|
|
37043
|
+
}
|
|
37044
|
+
const workspaceFileDir = path8.dirname(workspaceFilePath);
|
|
37045
|
+
return parseWorkspaceConfig(parsed, workspaceFileDir);
|
|
37046
|
+
}
|
|
37047
|
+
return parseWorkspaceConfig(raw, evalFileDir);
|
|
37048
|
+
}
|
|
36970
37049
|
function parseWorkspaceConfig(raw, evalFileDir) {
|
|
36971
37050
|
if (!isJsonObject(raw)) return void 0;
|
|
36972
37051
|
const obj = raw;
|
|
@@ -40938,6 +41017,9 @@ function getSubagentsRoot() {
|
|
|
40938
41017
|
function getTraceStateRoot() {
|
|
40939
41018
|
return path21.join(getAgentvHome(), "trace-state");
|
|
40940
41019
|
}
|
|
41020
|
+
function getWorkspacePoolRoot() {
|
|
41021
|
+
return path21.join(getAgentvHome(), "workspace-pool");
|
|
41022
|
+
}
|
|
40941
41023
|
var DEFAULT_LOCK_NAME = "subagent.lock";
|
|
40942
41024
|
var DEFAULT_ALIVE_FILENAME = ".alive";
|
|
40943
41025
|
function getDefaultSubagentRoot(vscodeCmd = "code") {
|
|
@@ -41738,8 +41820,6 @@ var AGENTV_REQUEST_TEMPLATE = `[[ ## task ## ]]
|
|
|
41738
41820
|
|
|
41739
41821
|
**IMPORTANT**: Follow these exact steps:
|
|
41740
41822
|
1. Create and write your complete response to: {{responseFileTmp}}
|
|
41741
|
-
- All intended file outputs/changes MUST be written in your response file.
|
|
41742
|
-
- For each intended file, include the repo name, relative path and unified git diff following the convention \`diff --git ...\`.
|
|
41743
41823
|
2. When completely finished, run these PowerShell commands to signal completion:
|
|
41744
41824
|
\`\`\`
|
|
41745
41825
|
Move-Item -LiteralPath '{{responseFileTmp}}' -Destination '{{responseFileFinal}}'
|
|
@@ -41756,8 +41836,6 @@ var AGENTV_BATCH_REQUEST_TEMPLATE = `[[ ## task ## ]]
|
|
|
41756
41836
|
|
|
41757
41837
|
**IMPORTANT**: Follow these exact steps:
|
|
41758
41838
|
1. Create and write your complete response to: {{responseFileTmp}}
|
|
41759
|
-
- All intended file outputs/changes MUST be written in your response file.
|
|
41760
|
-
- For each intended file, include the repo name, relative path and unified git diff following the convention \`diff --git ...\`.
|
|
41761
41839
|
2. When completely finished and the response is stable, rename it to: {{responseFileFinal}}
|
|
41762
41840
|
3. Do not unlock the workspace from this request; batch orchestration will handle unlocking after all responses are ready.
|
|
41763
41841
|
`;
|
|
@@ -42347,16 +42425,16 @@ async function execFileWithStdinNode(argv, stdinPayload, options) {
|
|
|
42347
42425
|
});
|
|
42348
42426
|
}
|
|
42349
42427
|
async function execShellWithStdin(command, stdinPayload, options = {}) {
|
|
42350
|
-
const { mkdir:
|
|
42428
|
+
const { mkdir: mkdir15, readFile: readFile13, rm: rm7, writeFile: writeFile10 } = await import("node:fs/promises");
|
|
42351
42429
|
const { tmpdir: tmpdir3 } = await import("node:os");
|
|
42352
|
-
const
|
|
42430
|
+
const path42 = await import("node:path");
|
|
42353
42431
|
const { randomUUID: randomUUID8 } = await import("node:crypto");
|
|
42354
|
-
const dir =
|
|
42355
|
-
await
|
|
42356
|
-
const stdinPath =
|
|
42357
|
-
const stdoutPath =
|
|
42358
|
-
const stderrPath =
|
|
42359
|
-
await
|
|
42432
|
+
const dir = path42.join(tmpdir3(), `agentv-exec-${randomUUID8()}`);
|
|
42433
|
+
await mkdir15(dir, { recursive: true });
|
|
42434
|
+
const stdinPath = path42.join(dir, "stdin.txt");
|
|
42435
|
+
const stdoutPath = path42.join(dir, "stdout.txt");
|
|
42436
|
+
const stderrPath = path42.join(dir, "stderr.txt");
|
|
42437
|
+
await writeFile10(stdinPath, stdinPayload, "utf8");
|
|
42360
42438
|
const wrappedCommand = process.platform === "win32" ? `(${command}) < ${shellEscapePath(stdinPath)} > ${shellEscapePath(stdoutPath)} 2> ${shellEscapePath(stderrPath)}` : `(${command}) < ${shellEscapePath(stdinPath)} > ${shellEscapePath(stdoutPath)} 2> ${shellEscapePath(stderrPath)}`;
|
|
42361
42439
|
const { spawn: spawn4 } = await import("node:child_process");
|
|
42362
42440
|
try {
|
|
@@ -42385,11 +42463,11 @@ async function execShellWithStdin(command, stdinPayload, options = {}) {
|
|
|
42385
42463
|
resolve2(code ?? 0);
|
|
42386
42464
|
});
|
|
42387
42465
|
});
|
|
42388
|
-
const stdout = (await
|
|
42389
|
-
const stderr = (await
|
|
42466
|
+
const stdout = (await readFile13(stdoutPath, "utf8")).replace(/\r\n/g, "\n");
|
|
42467
|
+
const stderr = (await readFile13(stderrPath, "utf8")).replace(/\r\n/g, "\n");
|
|
42390
42468
|
return { stdout, stderr, exitCode };
|
|
42391
42469
|
} finally {
|
|
42392
|
-
await
|
|
42470
|
+
await rm7(dir, { recursive: true, force: true });
|
|
42393
42471
|
}
|
|
42394
42472
|
}
|
|
42395
42473
|
var DEFAULT_MAX_CALLS = 50;
|
|
@@ -42699,7 +42777,7 @@ var CodeEvaluator = class {
|
|
|
42699
42777
|
outputPath,
|
|
42700
42778
|
guidelineFiles: context.evalCase.guideline_paths,
|
|
42701
42779
|
inputFiles: context.evalCase.file_paths.filter(
|
|
42702
|
-
(
|
|
42780
|
+
(path42) => !context.evalCase.guideline_paths.includes(path42)
|
|
42703
42781
|
),
|
|
42704
42782
|
input: context.evalCase.input,
|
|
42705
42783
|
trace: context.trace ?? null,
|
|
@@ -42942,6 +43020,8 @@ ${context.fileChanges}`;
|
|
|
42942
43020
|
};
|
|
42943
43021
|
} catch (e) {
|
|
42944
43022
|
const message = e instanceof Error ? e.message : String(e);
|
|
43023
|
+
const evalName = context.evaluator?.name ?? "llm-judge";
|
|
43024
|
+
console.warn(`\u26A0 LLM judge "${evalName}" failed after 3 attempts (${message}) \u2014 skipped`);
|
|
42945
43025
|
return {
|
|
42946
43026
|
score: 0,
|
|
42947
43027
|
verdict: "skip",
|
|
@@ -42970,24 +43050,39 @@ ${context.fileChanges}`;
|
|
|
42970
43050
|
systemPrompt,
|
|
42971
43051
|
target: judgeProvider.targetName
|
|
42972
43052
|
};
|
|
42973
|
-
|
|
42974
|
-
|
|
42975
|
-
|
|
42976
|
-
|
|
42977
|
-
|
|
42978
|
-
|
|
42979
|
-
|
|
42980
|
-
|
|
42981
|
-
|
|
42982
|
-
|
|
42983
|
-
|
|
42984
|
-
|
|
42985
|
-
|
|
42986
|
-
|
|
42987
|
-
|
|
42988
|
-
|
|
42989
|
-
|
|
42990
|
-
|
|
43053
|
+
try {
|
|
43054
|
+
const { data, tokenUsage } = await this.runWithRetry({
|
|
43055
|
+
context,
|
|
43056
|
+
judgeProvider,
|
|
43057
|
+
systemPrompt,
|
|
43058
|
+
userPrompt: prompt,
|
|
43059
|
+
schema: rubricEvaluationSchema
|
|
43060
|
+
});
|
|
43061
|
+
const { score, verdict, hits, misses } = calculateRubricScore(data, rubrics);
|
|
43062
|
+
return {
|
|
43063
|
+
score,
|
|
43064
|
+
verdict,
|
|
43065
|
+
hits,
|
|
43066
|
+
misses,
|
|
43067
|
+
expectedAspectCount: rubrics.length,
|
|
43068
|
+
reasoning: data.overall_reasoning,
|
|
43069
|
+
evaluatorRawRequest,
|
|
43070
|
+
tokenUsage
|
|
43071
|
+
};
|
|
43072
|
+
} catch (e) {
|
|
43073
|
+
const message = e instanceof Error ? e.message : String(e);
|
|
43074
|
+
const evalName = context.evaluator?.name ?? "llm-judge";
|
|
43075
|
+
console.warn(`\u26A0 LLM judge "${evalName}" failed after 3 attempts (${message}) \u2014 skipped`);
|
|
43076
|
+
return {
|
|
43077
|
+
score: 0,
|
|
43078
|
+
verdict: "skip",
|
|
43079
|
+
hits: [],
|
|
43080
|
+
misses: [`Judge parse failure after 3 attempts: ${message}`],
|
|
43081
|
+
expectedAspectCount: rubrics.length,
|
|
43082
|
+
reasoning: `Judge parse failure after 3 attempts: ${message}`,
|
|
43083
|
+
evaluatorRawRequest
|
|
43084
|
+
};
|
|
43085
|
+
}
|
|
42991
43086
|
}
|
|
42992
43087
|
/**
|
|
42993
43088
|
* Evaluate using score-range rubrics (analytic rubric scoring).
|
|
@@ -43001,25 +43096,40 @@ ${context.fileChanges}`;
|
|
|
43001
43096
|
systemPrompt,
|
|
43002
43097
|
target: judgeProvider.targetName
|
|
43003
43098
|
};
|
|
43004
|
-
|
|
43005
|
-
|
|
43006
|
-
|
|
43007
|
-
|
|
43008
|
-
|
|
43009
|
-
|
|
43010
|
-
|
|
43011
|
-
|
|
43012
|
-
|
|
43013
|
-
|
|
43014
|
-
|
|
43015
|
-
|
|
43016
|
-
|
|
43017
|
-
|
|
43018
|
-
|
|
43019
|
-
|
|
43020
|
-
|
|
43021
|
-
|
|
43022
|
-
|
|
43099
|
+
try {
|
|
43100
|
+
const { data, tokenUsage } = await this.runWithRetry({
|
|
43101
|
+
context,
|
|
43102
|
+
judgeProvider,
|
|
43103
|
+
systemPrompt,
|
|
43104
|
+
userPrompt: prompt,
|
|
43105
|
+
schema: scoreRangeEvaluationSchema
|
|
43106
|
+
});
|
|
43107
|
+
const { score, verdict, hits, misses, details } = calculateScoreRangeResult(data, rubrics);
|
|
43108
|
+
return {
|
|
43109
|
+
score,
|
|
43110
|
+
verdict,
|
|
43111
|
+
hits,
|
|
43112
|
+
misses,
|
|
43113
|
+
expectedAspectCount: rubrics.length,
|
|
43114
|
+
reasoning: data.overall_reasoning,
|
|
43115
|
+
evaluatorRawRequest,
|
|
43116
|
+
details,
|
|
43117
|
+
tokenUsage
|
|
43118
|
+
};
|
|
43119
|
+
} catch (e) {
|
|
43120
|
+
const message = e instanceof Error ? e.message : String(e);
|
|
43121
|
+
const evalName = context.evaluator?.name ?? "llm-judge";
|
|
43122
|
+
console.warn(`\u26A0 LLM judge "${evalName}" failed after 3 attempts (${message}) \u2014 skipped`);
|
|
43123
|
+
return {
|
|
43124
|
+
score: 0,
|
|
43125
|
+
verdict: "skip",
|
|
43126
|
+
hits: [],
|
|
43127
|
+
misses: [`Judge parse failure after 3 attempts: ${message}`],
|
|
43128
|
+
expectedAspectCount: rubrics.length,
|
|
43129
|
+
reasoning: `Judge parse failure after 3 attempts: ${message}`,
|
|
43130
|
+
evaluatorRawRequest
|
|
43131
|
+
};
|
|
43132
|
+
}
|
|
43023
43133
|
}
|
|
43024
43134
|
/**
|
|
43025
43135
|
* Build prompt for score-range rubric evaluation.
|
|
@@ -43303,19 +43413,13 @@ var CompositeEvaluator = class {
|
|
|
43303
43413
|
runWeightedAverage(results, weights) {
|
|
43304
43414
|
let totalWeight = 0;
|
|
43305
43415
|
let weightedSum = 0;
|
|
43416
|
+
let evaluatedCount = 0;
|
|
43306
43417
|
const allHits = [];
|
|
43307
43418
|
const allMisses = [];
|
|
43308
43419
|
const reasoningParts = [];
|
|
43309
43420
|
const scores = [];
|
|
43310
43421
|
for (const member of results) {
|
|
43311
43422
|
const weight = weights?.[member.id] ?? 1;
|
|
43312
|
-
totalWeight += weight;
|
|
43313
|
-
weightedSum += member.result.score * weight;
|
|
43314
|
-
allHits.push(...member.result.hits.map((h) => `[${member.id}] ${h}`));
|
|
43315
|
-
allMisses.push(...member.result.misses.map((m) => `[${member.id}] ${m}`));
|
|
43316
|
-
if (member.result.reasoning) {
|
|
43317
|
-
reasoningParts.push(`${member.id}: ${member.result.reasoning}`);
|
|
43318
|
-
}
|
|
43319
43423
|
scores.push({
|
|
43320
43424
|
name: member.id,
|
|
43321
43425
|
type: member.type,
|
|
@@ -43330,6 +43434,32 @@ var CompositeEvaluator = class {
|
|
|
43330
43434
|
details: member.result.details,
|
|
43331
43435
|
tokenUsage: member.result.tokenUsage
|
|
43332
43436
|
});
|
|
43437
|
+
if (member.result.verdict === "skip") {
|
|
43438
|
+
continue;
|
|
43439
|
+
}
|
|
43440
|
+
evaluatedCount++;
|
|
43441
|
+
totalWeight += weight;
|
|
43442
|
+
weightedSum += member.result.score * weight;
|
|
43443
|
+
allHits.push(...member.result.hits.map((h) => `[${member.id}] ${h}`));
|
|
43444
|
+
allMisses.push(...member.result.misses.map((m) => `[${member.id}] ${m}`));
|
|
43445
|
+
if (member.result.reasoning) {
|
|
43446
|
+
reasoningParts.push(`${member.id}: ${member.result.reasoning}`);
|
|
43447
|
+
}
|
|
43448
|
+
}
|
|
43449
|
+
if (evaluatedCount === 0 && results.length > 0) {
|
|
43450
|
+
return {
|
|
43451
|
+
score: 0,
|
|
43452
|
+
verdict: "skip",
|
|
43453
|
+
hits: [],
|
|
43454
|
+
misses: [],
|
|
43455
|
+
expectedAspectCount: 1,
|
|
43456
|
+
reasoning: "All evaluators skipped (infrastructure failure)",
|
|
43457
|
+
evaluatorRawRequest: {
|
|
43458
|
+
aggregator: "weighted_average",
|
|
43459
|
+
...weights ? { weights } : {}
|
|
43460
|
+
},
|
|
43461
|
+
scores
|
|
43462
|
+
};
|
|
43333
43463
|
}
|
|
43334
43464
|
const finalScore = totalWeight > 0 ? weightedSum / totalWeight : 0;
|
|
43335
43465
|
return {
|
|
@@ -43353,19 +43483,8 @@ var CompositeEvaluator = class {
|
|
|
43353
43483
|
const reasoningParts = [];
|
|
43354
43484
|
let passingCount = 0;
|
|
43355
43485
|
let borderlineCount = 0;
|
|
43486
|
+
let evaluatedCount = 0;
|
|
43356
43487
|
for (const member of results) {
|
|
43357
|
-
const isPassing = member.result.verdict === "pass" || member.result.verdict === "borderline";
|
|
43358
|
-
if (isPassing) {
|
|
43359
|
-
passingCount++;
|
|
43360
|
-
if (member.result.verdict === "borderline") {
|
|
43361
|
-
borderlineCount++;
|
|
43362
|
-
}
|
|
43363
|
-
}
|
|
43364
|
-
allHits.push(...member.result.hits.map((h) => `[${member.id}] ${h}`));
|
|
43365
|
-
allMisses.push(...member.result.misses.map((m) => `[${member.id}] ${m}`));
|
|
43366
|
-
if (member.result.reasoning) {
|
|
43367
|
-
reasoningParts.push(`${member.id}: ${member.result.reasoning}`);
|
|
43368
|
-
}
|
|
43369
43488
|
scores.push({
|
|
43370
43489
|
name: member.id,
|
|
43371
43490
|
type: member.type,
|
|
@@ -43379,8 +43498,39 @@ var CompositeEvaluator = class {
|
|
|
43379
43498
|
details: member.result.details,
|
|
43380
43499
|
tokenUsage: member.result.tokenUsage
|
|
43381
43500
|
});
|
|
43501
|
+
if (member.result.verdict === "skip") {
|
|
43502
|
+
continue;
|
|
43503
|
+
}
|
|
43504
|
+
evaluatedCount++;
|
|
43505
|
+
const isPassing = member.result.verdict === "pass" || member.result.verdict === "borderline";
|
|
43506
|
+
if (isPassing) {
|
|
43507
|
+
passingCount++;
|
|
43508
|
+
if (member.result.verdict === "borderline") {
|
|
43509
|
+
borderlineCount++;
|
|
43510
|
+
}
|
|
43511
|
+
}
|
|
43512
|
+
allHits.push(...member.result.hits.map((h) => `[${member.id}] ${h}`));
|
|
43513
|
+
allMisses.push(...member.result.misses.map((m) => `[${member.id}] ${m}`));
|
|
43514
|
+
if (member.result.reasoning) {
|
|
43515
|
+
reasoningParts.push(`${member.id}: ${member.result.reasoning}`);
|
|
43516
|
+
}
|
|
43517
|
+
}
|
|
43518
|
+
if (evaluatedCount === 0 && results.length > 0) {
|
|
43519
|
+
return {
|
|
43520
|
+
score: 0,
|
|
43521
|
+
verdict: "skip",
|
|
43522
|
+
hits: [],
|
|
43523
|
+
misses: [],
|
|
43524
|
+
expectedAspectCount: 1,
|
|
43525
|
+
reasoning: "All evaluators skipped (infrastructure failure)",
|
|
43526
|
+
evaluatorRawRequest: {
|
|
43527
|
+
aggregator: "threshold",
|
|
43528
|
+
threshold
|
|
43529
|
+
},
|
|
43530
|
+
scores
|
|
43531
|
+
};
|
|
43382
43532
|
}
|
|
43383
|
-
const totalCount =
|
|
43533
|
+
const totalCount = evaluatedCount;
|
|
43384
43534
|
const score = totalCount > 0 ? passingCount / totalCount : 0;
|
|
43385
43535
|
const pass = score >= threshold;
|
|
43386
43536
|
if (pass && borderlineCount > 0) {
|
|
@@ -43882,115 +44032,115 @@ var FieldAccuracyEvaluator = class {
|
|
|
43882
44032
|
* Evaluate a single field against the expected value.
|
|
43883
44033
|
*/
|
|
43884
44034
|
evaluateField(fieldConfig, candidateData, expectedData) {
|
|
43885
|
-
const { path:
|
|
43886
|
-
const candidateValue = resolvePath(candidateData,
|
|
43887
|
-
const expectedValue = resolvePath(expectedData,
|
|
44035
|
+
const { path: path42, match, required: required2 = true, weight = 1 } = fieldConfig;
|
|
44036
|
+
const candidateValue = resolvePath(candidateData, path42);
|
|
44037
|
+
const expectedValue = resolvePath(expectedData, path42);
|
|
43888
44038
|
if (expectedValue === void 0) {
|
|
43889
44039
|
return {
|
|
43890
|
-
path:
|
|
44040
|
+
path: path42,
|
|
43891
44041
|
score: 1,
|
|
43892
44042
|
// No expected value means no comparison needed
|
|
43893
44043
|
weight,
|
|
43894
44044
|
hit: true,
|
|
43895
|
-
message: `${
|
|
44045
|
+
message: `${path42}: no expected value`
|
|
43896
44046
|
};
|
|
43897
44047
|
}
|
|
43898
44048
|
if (candidateValue === void 0) {
|
|
43899
44049
|
if (required2) {
|
|
43900
44050
|
return {
|
|
43901
|
-
path:
|
|
44051
|
+
path: path42,
|
|
43902
44052
|
score: 0,
|
|
43903
44053
|
weight,
|
|
43904
44054
|
hit: false,
|
|
43905
|
-
message: `${
|
|
44055
|
+
message: `${path42} (required, missing)`
|
|
43906
44056
|
};
|
|
43907
44057
|
}
|
|
43908
44058
|
return {
|
|
43909
|
-
path:
|
|
44059
|
+
path: path42,
|
|
43910
44060
|
score: 1,
|
|
43911
44061
|
// Don't penalize missing optional fields
|
|
43912
44062
|
weight: 0,
|
|
43913
44063
|
// Zero weight means it won't affect the score
|
|
43914
44064
|
hit: true,
|
|
43915
|
-
message: `${
|
|
44065
|
+
message: `${path42}: optional field missing`
|
|
43916
44066
|
};
|
|
43917
44067
|
}
|
|
43918
44068
|
switch (match) {
|
|
43919
44069
|
case "exact":
|
|
43920
|
-
return this.compareExact(
|
|
44070
|
+
return this.compareExact(path42, candidateValue, expectedValue, weight);
|
|
43921
44071
|
case "numeric_tolerance":
|
|
43922
44072
|
return this.compareNumericTolerance(
|
|
43923
|
-
|
|
44073
|
+
path42,
|
|
43924
44074
|
candidateValue,
|
|
43925
44075
|
expectedValue,
|
|
43926
44076
|
fieldConfig,
|
|
43927
44077
|
weight
|
|
43928
44078
|
);
|
|
43929
44079
|
case "date":
|
|
43930
|
-
return this.compareDate(
|
|
44080
|
+
return this.compareDate(path42, candidateValue, expectedValue, fieldConfig, weight);
|
|
43931
44081
|
default:
|
|
43932
44082
|
return {
|
|
43933
|
-
path:
|
|
44083
|
+
path: path42,
|
|
43934
44084
|
score: 0,
|
|
43935
44085
|
weight,
|
|
43936
44086
|
hit: false,
|
|
43937
|
-
message: `${
|
|
44087
|
+
message: `${path42}: unknown match type "${match}"`
|
|
43938
44088
|
};
|
|
43939
44089
|
}
|
|
43940
44090
|
}
|
|
43941
44091
|
/**
|
|
43942
44092
|
* Exact equality comparison.
|
|
43943
44093
|
*/
|
|
43944
|
-
compareExact(
|
|
44094
|
+
compareExact(path42, candidateValue, expectedValue, weight) {
|
|
43945
44095
|
if (deepEqual(candidateValue, expectedValue)) {
|
|
43946
44096
|
return {
|
|
43947
|
-
path:
|
|
44097
|
+
path: path42,
|
|
43948
44098
|
score: 1,
|
|
43949
44099
|
weight,
|
|
43950
44100
|
hit: true,
|
|
43951
|
-
message:
|
|
44101
|
+
message: path42
|
|
43952
44102
|
};
|
|
43953
44103
|
}
|
|
43954
44104
|
if (typeof candidateValue !== typeof expectedValue) {
|
|
43955
44105
|
return {
|
|
43956
|
-
path:
|
|
44106
|
+
path: path42,
|
|
43957
44107
|
score: 0,
|
|
43958
44108
|
weight,
|
|
43959
44109
|
hit: false,
|
|
43960
|
-
message: `${
|
|
44110
|
+
message: `${path42} (type mismatch: got ${typeof candidateValue}, expected ${typeof expectedValue})`
|
|
43961
44111
|
};
|
|
43962
44112
|
}
|
|
43963
44113
|
return {
|
|
43964
|
-
path:
|
|
44114
|
+
path: path42,
|
|
43965
44115
|
score: 0,
|
|
43966
44116
|
weight,
|
|
43967
44117
|
hit: false,
|
|
43968
|
-
message: `${
|
|
44118
|
+
message: `${path42} (value mismatch)`
|
|
43969
44119
|
};
|
|
43970
44120
|
}
|
|
43971
44121
|
/**
|
|
43972
44122
|
* Numeric comparison with absolute or relative tolerance.
|
|
43973
44123
|
*/
|
|
43974
|
-
compareNumericTolerance(
|
|
44124
|
+
compareNumericTolerance(path42, candidateValue, expectedValue, fieldConfig, weight) {
|
|
43975
44125
|
const { tolerance = 0, relative = false } = fieldConfig;
|
|
43976
44126
|
const candidateNum = toNumber2(candidateValue);
|
|
43977
44127
|
const expectedNum = toNumber2(expectedValue);
|
|
43978
44128
|
if (candidateNum === null || expectedNum === null) {
|
|
43979
44129
|
return {
|
|
43980
|
-
path:
|
|
44130
|
+
path: path42,
|
|
43981
44131
|
score: 0,
|
|
43982
44132
|
weight,
|
|
43983
44133
|
hit: false,
|
|
43984
|
-
message: `${
|
|
44134
|
+
message: `${path42} (non-numeric value)`
|
|
43985
44135
|
};
|
|
43986
44136
|
}
|
|
43987
44137
|
if (!Number.isFinite(candidateNum) || !Number.isFinite(expectedNum)) {
|
|
43988
44138
|
return {
|
|
43989
|
-
path:
|
|
44139
|
+
path: path42,
|
|
43990
44140
|
score: 0,
|
|
43991
44141
|
weight,
|
|
43992
44142
|
hit: false,
|
|
43993
|
-
message: `${
|
|
44143
|
+
message: `${path42} (invalid numeric value)`
|
|
43994
44144
|
};
|
|
43995
44145
|
}
|
|
43996
44146
|
const diff = Math.abs(candidateNum - expectedNum);
|
|
@@ -44003,61 +44153,61 @@ var FieldAccuracyEvaluator = class {
|
|
|
44003
44153
|
}
|
|
44004
44154
|
if (withinTolerance) {
|
|
44005
44155
|
return {
|
|
44006
|
-
path:
|
|
44156
|
+
path: path42,
|
|
44007
44157
|
score: 1,
|
|
44008
44158
|
weight,
|
|
44009
44159
|
hit: true,
|
|
44010
|
-
message: `${
|
|
44160
|
+
message: `${path42} (within tolerance: diff=${diff.toFixed(2)})`
|
|
44011
44161
|
};
|
|
44012
44162
|
}
|
|
44013
44163
|
return {
|
|
44014
|
-
path:
|
|
44164
|
+
path: path42,
|
|
44015
44165
|
score: 0,
|
|
44016
44166
|
weight,
|
|
44017
44167
|
hit: false,
|
|
44018
|
-
message: `${
|
|
44168
|
+
message: `${path42} (outside tolerance: diff=${diff.toFixed(2)}, tolerance=${tolerance})`
|
|
44019
44169
|
};
|
|
44020
44170
|
}
|
|
44021
44171
|
/**
|
|
44022
44172
|
* Date comparison with format normalization.
|
|
44023
44173
|
*/
|
|
44024
|
-
compareDate(
|
|
44174
|
+
compareDate(path42, candidateValue, expectedValue, fieldConfig, weight) {
|
|
44025
44175
|
const formats = fieldConfig.formats ?? DEFAULT_DATE_FORMATS;
|
|
44026
44176
|
const candidateDate = parseDate(String(candidateValue), formats);
|
|
44027
44177
|
const expectedDate = parseDate(String(expectedValue), formats);
|
|
44028
44178
|
if (candidateDate === null) {
|
|
44029
44179
|
return {
|
|
44030
|
-
path:
|
|
44180
|
+
path: path42,
|
|
44031
44181
|
score: 0,
|
|
44032
44182
|
weight,
|
|
44033
44183
|
hit: false,
|
|
44034
|
-
message: `${
|
|
44184
|
+
message: `${path42} (unparseable candidate date)`
|
|
44035
44185
|
};
|
|
44036
44186
|
}
|
|
44037
44187
|
if (expectedDate === null) {
|
|
44038
44188
|
return {
|
|
44039
|
-
path:
|
|
44189
|
+
path: path42,
|
|
44040
44190
|
score: 0,
|
|
44041
44191
|
weight,
|
|
44042
44192
|
hit: false,
|
|
44043
|
-
message: `${
|
|
44193
|
+
message: `${path42} (unparseable expected date)`
|
|
44044
44194
|
};
|
|
44045
44195
|
}
|
|
44046
44196
|
if (candidateDate.getFullYear() === expectedDate.getFullYear() && candidateDate.getMonth() === expectedDate.getMonth() && candidateDate.getDate() === expectedDate.getDate()) {
|
|
44047
44197
|
return {
|
|
44048
|
-
path:
|
|
44198
|
+
path: path42,
|
|
44049
44199
|
score: 1,
|
|
44050
44200
|
weight,
|
|
44051
44201
|
hit: true,
|
|
44052
|
-
message:
|
|
44202
|
+
message: path42
|
|
44053
44203
|
};
|
|
44054
44204
|
}
|
|
44055
44205
|
return {
|
|
44056
|
-
path:
|
|
44206
|
+
path: path42,
|
|
44057
44207
|
score: 0,
|
|
44058
44208
|
weight,
|
|
44059
44209
|
hit: false,
|
|
44060
|
-
message: `${
|
|
44210
|
+
message: `${path42} (date mismatch: got ${formatDateISO(candidateDate)}, expected ${formatDateISO(expectedDate)})`
|
|
44061
44211
|
};
|
|
44062
44212
|
}
|
|
44063
44213
|
/**
|
|
@@ -44098,11 +44248,11 @@ var FieldAccuracyEvaluator = class {
|
|
|
44098
44248
|
};
|
|
44099
44249
|
}
|
|
44100
44250
|
};
|
|
44101
|
-
function resolvePath(obj,
|
|
44102
|
-
if (!
|
|
44251
|
+
function resolvePath(obj, path42) {
|
|
44252
|
+
if (!path42 || !obj) {
|
|
44103
44253
|
return void 0;
|
|
44104
44254
|
}
|
|
44105
|
-
const parts =
|
|
44255
|
+
const parts = path42.split(/\.|\[|\]/).filter((p) => p.length > 0);
|
|
44106
44256
|
let current = obj;
|
|
44107
44257
|
for (const part of parts) {
|
|
44108
44258
|
if (current === null || current === void 0) {
|
|
@@ -44906,8 +45056,8 @@ var TokenUsageEvaluator = class {
|
|
|
44906
45056
|
};
|
|
44907
45057
|
}
|
|
44908
45058
|
};
|
|
44909
|
-
function getNestedValue(obj,
|
|
44910
|
-
const parts =
|
|
45059
|
+
function getNestedValue(obj, path42) {
|
|
45060
|
+
const parts = path42.split(".");
|
|
44911
45061
|
let current = obj;
|
|
44912
45062
|
for (const part of parts) {
|
|
44913
45063
|
if (current === null || current === void 0 || typeof current !== "object") {
|
|
@@ -46302,9 +46452,267 @@ async function cleanupEvalWorkspaces(evalRunId, workspaceRoot) {
|
|
|
46302
46452
|
}
|
|
46303
46453
|
}
|
|
46304
46454
|
var execFileAsync = promisify5(execFile);
|
|
46455
|
+
function gitEnv() {
|
|
46456
|
+
const env = { ...process.env };
|
|
46457
|
+
for (const key of Object.keys(env)) {
|
|
46458
|
+
if (key.startsWith("GIT_") && key !== "GIT_SSH_COMMAND") {
|
|
46459
|
+
delete env[key];
|
|
46460
|
+
}
|
|
46461
|
+
}
|
|
46462
|
+
return {
|
|
46463
|
+
...env,
|
|
46464
|
+
GIT_TERMINAL_PROMPT: "0",
|
|
46465
|
+
GIT_ASKPASS: "",
|
|
46466
|
+
GIT_SSH_COMMAND: "ssh -o BatchMode=yes"
|
|
46467
|
+
};
|
|
46468
|
+
}
|
|
46469
|
+
async function git(args, opts) {
|
|
46470
|
+
const { stdout } = await execFileAsync("git", args, {
|
|
46471
|
+
cwd: opts?.cwd,
|
|
46472
|
+
timeout: opts?.timeout ?? 3e5,
|
|
46473
|
+
env: gitEnv(),
|
|
46474
|
+
maxBuffer: 50 * 1024 * 1024
|
|
46475
|
+
});
|
|
46476
|
+
return stdout.trim();
|
|
46477
|
+
}
|
|
46478
|
+
function normalizeRepoForFingerprint(repo) {
|
|
46479
|
+
const source = repo.source.type === "git" ? { type: "git", url: repo.source.url.toLowerCase().replace(/\.git$/, "") } : { type: "local", path: repo.source.path };
|
|
46480
|
+
const result = {
|
|
46481
|
+
path: repo.path,
|
|
46482
|
+
source,
|
|
46483
|
+
ref: repo.checkout?.ref ?? "HEAD"
|
|
46484
|
+
};
|
|
46485
|
+
if (repo.clone?.depth !== void 0) {
|
|
46486
|
+
result.depth = repo.clone.depth;
|
|
46487
|
+
}
|
|
46488
|
+
if (repo.clone?.filter !== void 0) {
|
|
46489
|
+
result.filter = repo.clone.filter;
|
|
46490
|
+
}
|
|
46491
|
+
if (repo.clone?.sparse?.length) {
|
|
46492
|
+
result.sparse = [...repo.clone.sparse].sort();
|
|
46493
|
+
}
|
|
46494
|
+
return result;
|
|
46495
|
+
}
|
|
46496
|
+
function computeWorkspaceFingerprint(templatePath, repos) {
|
|
46497
|
+
const canonical = {
|
|
46498
|
+
templatePath: templatePath ?? null,
|
|
46499
|
+
repos: [...repos].sort((a, b) => a.path.localeCompare(b.path)).map(normalizeRepoForFingerprint)
|
|
46500
|
+
};
|
|
46501
|
+
return createHash("sha256").update(JSON.stringify(canonical)).digest("hex");
|
|
46502
|
+
}
|
|
46503
|
+
async function copyDirectoryRecursive2(src, dest, skipDirs) {
|
|
46504
|
+
await mkdir11(dest, { recursive: true });
|
|
46505
|
+
const entries = await readdir4(src, { withFileTypes: true });
|
|
46506
|
+
for (const entry of entries) {
|
|
46507
|
+
const srcPath = path36.join(src, entry.name);
|
|
46508
|
+
const destPath = path36.join(dest, entry.name);
|
|
46509
|
+
if (entry.name === ".git") {
|
|
46510
|
+
continue;
|
|
46511
|
+
}
|
|
46512
|
+
if (entry.isDirectory()) {
|
|
46513
|
+
if (skipDirs?.has(entry.name)) {
|
|
46514
|
+
continue;
|
|
46515
|
+
}
|
|
46516
|
+
await copyDirectoryRecursive2(srcPath, destPath, skipDirs);
|
|
46517
|
+
} else {
|
|
46518
|
+
await cp2(srcPath, destPath, { preserveTimestamps: true, force: true });
|
|
46519
|
+
}
|
|
46520
|
+
}
|
|
46521
|
+
}
|
|
46522
|
+
var WorkspacePoolManager = class {
|
|
46523
|
+
poolRoot;
|
|
46524
|
+
constructor(poolRoot) {
|
|
46525
|
+
this.poolRoot = poolRoot ?? getWorkspacePoolRoot();
|
|
46526
|
+
}
|
|
46527
|
+
/**
|
|
46528
|
+
* Acquire a workspace slot from the pool.
|
|
46529
|
+
*
|
|
46530
|
+
* 1. Compute fingerprint from template + repos
|
|
46531
|
+
* 2. Check drift (compare stored metadata.json fingerprint vs computed)
|
|
46532
|
+
* 3. If drift: warn, remove all slots, rematerialize
|
|
46533
|
+
* 4. Acquire a slot (try-lock slot-0, slot-1, ..., up to maxSlots)
|
|
46534
|
+
* 5. If slot exists: reset repos, re-copy template files (skip repo directories)
|
|
46535
|
+
* 6. If new slot: copy template, materialize all repos, write metadata.json
|
|
46536
|
+
* 7. Return the slot (with path, index, isExisting)
|
|
46537
|
+
*/
|
|
46538
|
+
async acquireWorkspace(options) {
|
|
46539
|
+
const { templatePath, repos, maxSlots, repoManager } = options;
|
|
46540
|
+
const fingerprint = computeWorkspaceFingerprint(templatePath, repos);
|
|
46541
|
+
const poolDir = path36.join(this.poolRoot, fingerprint);
|
|
46542
|
+
await mkdir11(poolDir, { recursive: true });
|
|
46543
|
+
const drifted = await this.checkDrift(poolDir, fingerprint);
|
|
46544
|
+
if (drifted) {
|
|
46545
|
+
console.warn(
|
|
46546
|
+
`[workspace-pool] Drift detected for fingerprint ${fingerprint.slice(0, 12)}... Removing stale slots.`
|
|
46547
|
+
);
|
|
46548
|
+
await this.removeAllSlots(poolDir);
|
|
46549
|
+
}
|
|
46550
|
+
for (let i = 0; i < maxSlots; i++) {
|
|
46551
|
+
const slotPath = path36.join(poolDir, `slot-${i}`);
|
|
46552
|
+
const lockPath = `${slotPath}.lock`;
|
|
46553
|
+
const locked = await this.tryLock(lockPath);
|
|
46554
|
+
if (!locked) {
|
|
46555
|
+
continue;
|
|
46556
|
+
}
|
|
46557
|
+
const slotExists = existsSync2(slotPath);
|
|
46558
|
+
if (slotExists) {
|
|
46559
|
+
await this.resetSlot(slotPath, templatePath, repos);
|
|
46560
|
+
return {
|
|
46561
|
+
index: i,
|
|
46562
|
+
path: slotPath,
|
|
46563
|
+
isExisting: true,
|
|
46564
|
+
lockPath,
|
|
46565
|
+
fingerprint,
|
|
46566
|
+
poolDir
|
|
46567
|
+
};
|
|
46568
|
+
}
|
|
46569
|
+
await mkdir11(slotPath, { recursive: true });
|
|
46570
|
+
if (templatePath) {
|
|
46571
|
+
await copyDirectoryRecursive2(templatePath, slotPath);
|
|
46572
|
+
}
|
|
46573
|
+
if (repos.length > 0) {
|
|
46574
|
+
await repoManager.materializeAll(repos, slotPath);
|
|
46575
|
+
}
|
|
46576
|
+
await this.writeMetadata(poolDir, fingerprint, templatePath ?? null, repos);
|
|
46577
|
+
return {
|
|
46578
|
+
index: i,
|
|
46579
|
+
path: slotPath,
|
|
46580
|
+
isExisting: false,
|
|
46581
|
+
lockPath,
|
|
46582
|
+
fingerprint,
|
|
46583
|
+
poolDir
|
|
46584
|
+
};
|
|
46585
|
+
}
|
|
46586
|
+
throw new Error(
|
|
46587
|
+
`All ${maxSlots} pool slots are locked for fingerprint ${fingerprint.slice(0, 12)}...`
|
|
46588
|
+
);
|
|
46589
|
+
}
|
|
46590
|
+
/** Remove lock file to release a slot. */
|
|
46591
|
+
async releaseSlot(slot) {
|
|
46592
|
+
try {
|
|
46593
|
+
await unlink(slot.lockPath);
|
|
46594
|
+
} catch {
|
|
46595
|
+
}
|
|
46596
|
+
}
|
|
46597
|
+
/**
|
|
46598
|
+
* Try to acquire a PID-based lock file.
|
|
46599
|
+
* On EEXIST, read PID and check if process is alive. If dead, stale lock — remove and retry.
|
|
46600
|
+
* Returns true if lock acquired, false if slot is actively locked.
|
|
46601
|
+
* Uses a bounded loop (max 3 attempts) to avoid unbounded recursion.
|
|
46602
|
+
*/
|
|
46603
|
+
async tryLock(lockPath) {
|
|
46604
|
+
for (let attempt = 0; attempt < 3; attempt++) {
|
|
46605
|
+
try {
|
|
46606
|
+
await writeFile7(lockPath, String(process.pid), { flag: "wx" });
|
|
46607
|
+
return true;
|
|
46608
|
+
} catch (err) {
|
|
46609
|
+
if (err.code !== "EEXIST") {
|
|
46610
|
+
throw err;
|
|
46611
|
+
}
|
|
46612
|
+
try {
|
|
46613
|
+
const pidStr = await readFile11(lockPath, "utf-8");
|
|
46614
|
+
const pid = Number.parseInt(pidStr.trim(), 10);
|
|
46615
|
+
if (!Number.isNaN(pid)) {
|
|
46616
|
+
try {
|
|
46617
|
+
process.kill(pid, 0);
|
|
46618
|
+
return false;
|
|
46619
|
+
} catch {
|
|
46620
|
+
await unlink(lockPath).catch(() => {
|
|
46621
|
+
});
|
|
46622
|
+
continue;
|
|
46623
|
+
}
|
|
46624
|
+
}
|
|
46625
|
+
} catch {
|
|
46626
|
+
}
|
|
46627
|
+
return false;
|
|
46628
|
+
}
|
|
46629
|
+
}
|
|
46630
|
+
return false;
|
|
46631
|
+
}
|
|
46632
|
+
/**
|
|
46633
|
+
* Check if the stored fingerprint in metadata.json differs from the computed one.
|
|
46634
|
+
* Returns true if drifted, false otherwise.
|
|
46635
|
+
* Returns false (no drift) if metadata.json doesn't exist (first use).
|
|
46636
|
+
*/
|
|
46637
|
+
async checkDrift(poolDir, fingerprint) {
|
|
46638
|
+
const metadataPath = path36.join(poolDir, "metadata.json");
|
|
46639
|
+
try {
|
|
46640
|
+
const raw = await readFile11(metadataPath, "utf-8");
|
|
46641
|
+
const metadata = JSON.parse(raw);
|
|
46642
|
+
return metadata.fingerprint !== fingerprint;
|
|
46643
|
+
} catch {
|
|
46644
|
+
return false;
|
|
46645
|
+
}
|
|
46646
|
+
}
|
|
46647
|
+
/** Write metadata.json with fingerprint, inputs, and timestamp. */
|
|
46648
|
+
async writeMetadata(poolDir, fingerprint, templatePath, repos) {
|
|
46649
|
+
const metadata = {
|
|
46650
|
+
fingerprint,
|
|
46651
|
+
templatePath,
|
|
46652
|
+
repos,
|
|
46653
|
+
createdAt: (/* @__PURE__ */ new Date()).toISOString()
|
|
46654
|
+
};
|
|
46655
|
+
await writeFile7(path36.join(poolDir, "metadata.json"), JSON.stringify(metadata, null, 2));
|
|
46656
|
+
}
|
|
46657
|
+
/** Remove all slot directories and their lock files from a pool directory. */
|
|
46658
|
+
async removeAllSlots(poolDir) {
|
|
46659
|
+
const entries = await readdir4(poolDir);
|
|
46660
|
+
for (const entry of entries) {
|
|
46661
|
+
if (entry.startsWith("slot-") && !entry.endsWith(".lock")) {
|
|
46662
|
+
const lockPath = path36.join(poolDir, `${entry}.lock`);
|
|
46663
|
+
if (existsSync2(lockPath)) {
|
|
46664
|
+
try {
|
|
46665
|
+
const pidStr = await readFile11(lockPath, "utf-8");
|
|
46666
|
+
const pid = Number.parseInt(pidStr.trim(), 10);
|
|
46667
|
+
if (!Number.isNaN(pid)) {
|
|
46668
|
+
try {
|
|
46669
|
+
process.kill(pid, 0);
|
|
46670
|
+
console.warn(`[workspace-pool] Skipping slot ${entry}: locked by PID ${pid}`);
|
|
46671
|
+
continue;
|
|
46672
|
+
} catch {
|
|
46673
|
+
}
|
|
46674
|
+
}
|
|
46675
|
+
} catch {
|
|
46676
|
+
}
|
|
46677
|
+
}
|
|
46678
|
+
await rm5(path36.join(poolDir, entry), { recursive: true, force: true });
|
|
46679
|
+
await rm5(lockPath, { force: true }).catch(() => {
|
|
46680
|
+
});
|
|
46681
|
+
}
|
|
46682
|
+
}
|
|
46683
|
+
await rm5(path36.join(poolDir, "metadata.json"), { force: true }).catch(() => {
|
|
46684
|
+
});
|
|
46685
|
+
}
|
|
46686
|
+
/**
|
|
46687
|
+
* Reset an existing slot for reuse:
|
|
46688
|
+
* 1. Reset repos (git reset --hard {ref} && git clean -fd per repo)
|
|
46689
|
+
* 2. Re-copy template files (skip repo directories)
|
|
46690
|
+
*/
|
|
46691
|
+
async resetSlot(slotPath, templatePath, repos) {
|
|
46692
|
+
for (const repo of repos) {
|
|
46693
|
+
const repoDir = path36.join(slotPath, repo.path);
|
|
46694
|
+
if (!existsSync2(repoDir)) {
|
|
46695
|
+
continue;
|
|
46696
|
+
}
|
|
46697
|
+
const ref = repo.checkout?.ref ?? "HEAD";
|
|
46698
|
+
await git(["reset", "--hard", ref], { cwd: repoDir });
|
|
46699
|
+
await git(["clean", "-fd"], { cwd: repoDir });
|
|
46700
|
+
}
|
|
46701
|
+
if (templatePath) {
|
|
46702
|
+
const repoDirNames = new Set(
|
|
46703
|
+
repos.map((r) => {
|
|
46704
|
+
const normalized = r.path.replace(/^\.\//, "");
|
|
46705
|
+
return normalized.split("/")[0];
|
|
46706
|
+
})
|
|
46707
|
+
);
|
|
46708
|
+
await copyDirectoryRecursive2(templatePath, slotPath, repoDirNames);
|
|
46709
|
+
}
|
|
46710
|
+
}
|
|
46711
|
+
};
|
|
46712
|
+
var execFileAsync2 = promisify6(execFile2);
|
|
46305
46713
|
var DEFAULT_TIMEOUT_MS2 = 3e5;
|
|
46306
46714
|
var LOCK_TIMEOUT_MS = 6e4;
|
|
46307
|
-
function
|
|
46715
|
+
function gitEnv2() {
|
|
46308
46716
|
const env = { ...process.env };
|
|
46309
46717
|
for (const key of Object.keys(env)) {
|
|
46310
46718
|
if (key.startsWith("GIT_") && key !== "GIT_SSH_COMMAND") {
|
|
@@ -46320,16 +46728,16 @@ function gitEnv() {
|
|
|
46320
46728
|
}
|
|
46321
46729
|
function cacheKey(source) {
|
|
46322
46730
|
const raw = source.type === "git" ? source.url.toLowerCase().replace(/\.git$/, "") : source.path;
|
|
46323
|
-
return
|
|
46731
|
+
return createHash2("sha256").update(raw).digest("hex");
|
|
46324
46732
|
}
|
|
46325
46733
|
function getSourceUrl(source) {
|
|
46326
46734
|
return source.type === "git" ? source.url : source.path;
|
|
46327
46735
|
}
|
|
46328
|
-
async function
|
|
46329
|
-
const { stdout } = await
|
|
46736
|
+
async function git2(args, opts) {
|
|
46737
|
+
const { stdout } = await execFileAsync2("git", args, {
|
|
46330
46738
|
cwd: opts?.cwd,
|
|
46331
46739
|
timeout: opts?.timeout ?? DEFAULT_TIMEOUT_MS2,
|
|
46332
|
-
env:
|
|
46740
|
+
env: gitEnv2(),
|
|
46333
46741
|
maxBuffer: 50 * 1024 * 1024
|
|
46334
46742
|
// 50MB
|
|
46335
46743
|
});
|
|
@@ -46339,7 +46747,7 @@ async function acquireLock(lockPath) {
|
|
|
46339
46747
|
const start = Date.now();
|
|
46340
46748
|
while (Date.now() - start < LOCK_TIMEOUT_MS) {
|
|
46341
46749
|
try {
|
|
46342
|
-
await
|
|
46750
|
+
await writeFile8(lockPath, String(process.pid), { flag: "wx" });
|
|
46343
46751
|
return;
|
|
46344
46752
|
} catch (err) {
|
|
46345
46753
|
if (err.code === "EEXIST") {
|
|
@@ -46353,7 +46761,7 @@ async function acquireLock(lockPath) {
|
|
|
46353
46761
|
}
|
|
46354
46762
|
async function releaseLock(lockPath) {
|
|
46355
46763
|
try {
|
|
46356
|
-
await
|
|
46764
|
+
await unlink2(lockPath);
|
|
46357
46765
|
} catch {
|
|
46358
46766
|
}
|
|
46359
46767
|
}
|
|
@@ -46367,16 +46775,12 @@ var RepoManager = class {
|
|
|
46367
46775
|
async runGit(args, opts) {
|
|
46368
46776
|
const startedAt = Date.now();
|
|
46369
46777
|
if (this.verbose) {
|
|
46370
|
-
console.log(
|
|
46371
|
-
`[repo] git start cwd=${opts?.cwd ?? process.cwd()} args=${args.join(" ")}`
|
|
46372
|
-
);
|
|
46778
|
+
console.log(`[repo] git start cwd=${opts?.cwd ?? process.cwd()} args=${args.join(" ")}`);
|
|
46373
46779
|
}
|
|
46374
46780
|
try {
|
|
46375
|
-
const output = await
|
|
46781
|
+
const output = await git2(args, opts);
|
|
46376
46782
|
if (this.verbose) {
|
|
46377
|
-
console.log(
|
|
46378
|
-
`[repo] git ok durationMs=${Date.now() - startedAt} args=${args.join(" ")}`
|
|
46379
|
-
);
|
|
46783
|
+
console.log(`[repo] git ok durationMs=${Date.now() - startedAt} args=${args.join(" ")}`);
|
|
46380
46784
|
}
|
|
46381
46785
|
return output;
|
|
46382
46786
|
} catch (error40) {
|
|
@@ -46396,9 +46800,9 @@ var RepoManager = class {
|
|
|
46396
46800
|
*/
|
|
46397
46801
|
async ensureCache(source, depth, resolve2) {
|
|
46398
46802
|
const key = cacheKey(source);
|
|
46399
|
-
const cachePath =
|
|
46803
|
+
const cachePath = path37.join(this.cacheDir, key);
|
|
46400
46804
|
const lockPath = `${cachePath}.lock`;
|
|
46401
|
-
const cacheExists =
|
|
46805
|
+
const cacheExists = existsSync3(path37.join(cachePath, "HEAD"));
|
|
46402
46806
|
if (this.verbose) {
|
|
46403
46807
|
console.log(
|
|
46404
46808
|
`[repo] ensureCache source=${getSourceUrl(source)} resolve=${resolve2 ?? "remote"} cache=${cachePath}`
|
|
@@ -46416,13 +46820,11 @@ var RepoManager = class {
|
|
|
46416
46820
|
`No cache found for \`${url2}\`. Run \`agentv cache add --url ${url2} --from <local-path>\` to seed it.`
|
|
46417
46821
|
);
|
|
46418
46822
|
}
|
|
46419
|
-
await
|
|
46823
|
+
await mkdir12(this.cacheDir, { recursive: true });
|
|
46420
46824
|
const lockStartedAt = Date.now();
|
|
46421
46825
|
await acquireLock(lockPath);
|
|
46422
46826
|
if (this.verbose) {
|
|
46423
|
-
console.log(
|
|
46424
|
-
`[repo] lock acquired path=${lockPath} waitedMs=${Date.now() - lockStartedAt}`
|
|
46425
|
-
);
|
|
46827
|
+
console.log(`[repo] lock acquired path=${lockPath} waitedMs=${Date.now() - lockStartedAt}`);
|
|
46426
46828
|
}
|
|
46427
46829
|
try {
|
|
46428
46830
|
if (cacheExists) {
|
|
@@ -46460,7 +46862,7 @@ var RepoManager = class {
|
|
|
46460
46862
|
* Handles checkout, ref resolution, ancestor walking, shallow clone, sparse checkout.
|
|
46461
46863
|
*/
|
|
46462
46864
|
async materialize(repo, workspacePath) {
|
|
46463
|
-
const targetDir =
|
|
46865
|
+
const targetDir = path37.join(workspacePath, repo.path);
|
|
46464
46866
|
const startedAt = Date.now();
|
|
46465
46867
|
if (this.verbose) {
|
|
46466
46868
|
console.log(
|
|
@@ -46555,14 +46957,14 @@ var RepoManager = class {
|
|
|
46555
46957
|
async reset(repos, workspacePath, strategy) {
|
|
46556
46958
|
if (strategy === "recreate") {
|
|
46557
46959
|
for (const repo of repos) {
|
|
46558
|
-
const targetDir =
|
|
46559
|
-
await
|
|
46960
|
+
const targetDir = path37.join(workspacePath, repo.path);
|
|
46961
|
+
await rm6(targetDir, { recursive: true, force: true });
|
|
46560
46962
|
}
|
|
46561
46963
|
await this.materializeAll(repos, workspacePath);
|
|
46562
46964
|
return;
|
|
46563
46965
|
}
|
|
46564
46966
|
for (const repo of repos) {
|
|
46565
|
-
const targetDir =
|
|
46967
|
+
const targetDir = path37.join(workspacePath, repo.path);
|
|
46566
46968
|
await this.runGit(["reset", "--hard", "HEAD"], { cwd: targetDir });
|
|
46567
46969
|
await this.runGit(["clean", "-fd"], { cwd: targetDir });
|
|
46568
46970
|
}
|
|
@@ -46574,21 +46976,21 @@ var RepoManager = class {
|
|
|
46574
46976
|
async seedCache(localPath, remoteUrl, opts) {
|
|
46575
46977
|
const source = { type: "git", url: remoteUrl };
|
|
46576
46978
|
const key = cacheKey(source);
|
|
46577
|
-
const cachePath =
|
|
46979
|
+
const cachePath = path37.join(this.cacheDir, key);
|
|
46578
46980
|
const lockPath = `${cachePath}.lock`;
|
|
46579
|
-
await
|
|
46981
|
+
await mkdir12(this.cacheDir, { recursive: true });
|
|
46580
46982
|
await acquireLock(lockPath);
|
|
46581
46983
|
try {
|
|
46582
|
-
if (
|
|
46984
|
+
if (existsSync3(path37.join(cachePath, "HEAD"))) {
|
|
46583
46985
|
if (!opts?.force) {
|
|
46584
46986
|
throw new Error(
|
|
46585
46987
|
`Cache already exists for ${remoteUrl} at ${cachePath}. Use force to overwrite.`
|
|
46586
46988
|
);
|
|
46587
46989
|
}
|
|
46588
|
-
await
|
|
46990
|
+
await rm6(cachePath, { recursive: true, force: true });
|
|
46589
46991
|
}
|
|
46590
|
-
await
|
|
46591
|
-
await
|
|
46992
|
+
await git2(["clone", "--mirror", "--bare", localPath, cachePath]);
|
|
46993
|
+
await git2(["remote", "set-url", "origin", remoteUrl], { cwd: cachePath });
|
|
46592
46994
|
} finally {
|
|
46593
46995
|
await releaseLock(lockPath);
|
|
46594
46996
|
}
|
|
@@ -46596,37 +46998,37 @@ var RepoManager = class {
|
|
|
46596
46998
|
}
|
|
46597
46999
|
/** Remove the entire cache directory. */
|
|
46598
47000
|
async cleanCache() {
|
|
46599
|
-
await
|
|
47001
|
+
await rm6(this.cacheDir, { recursive: true, force: true });
|
|
46600
47002
|
}
|
|
46601
47003
|
};
|
|
46602
47004
|
async function resolveWorkspaceTemplate(templatePath) {
|
|
46603
47005
|
if (!templatePath) {
|
|
46604
47006
|
return void 0;
|
|
46605
47007
|
}
|
|
46606
|
-
const resolved =
|
|
47008
|
+
const resolved = path38.resolve(templatePath);
|
|
46607
47009
|
const stats = await stat6(resolved);
|
|
46608
47010
|
if (stats.isFile()) {
|
|
46609
47011
|
return {
|
|
46610
|
-
dir:
|
|
47012
|
+
dir: path38.dirname(resolved),
|
|
46611
47013
|
workspaceFile: resolved
|
|
46612
47014
|
};
|
|
46613
47015
|
}
|
|
46614
47016
|
if (!stats.isDirectory()) {
|
|
46615
47017
|
throw new Error(`workspace template is neither a file nor a directory: ${resolved}`);
|
|
46616
47018
|
}
|
|
46617
|
-
const entries = await
|
|
47019
|
+
const entries = await readdir5(resolved);
|
|
46618
47020
|
const workspaceFiles = entries.filter((e) => e.endsWith(".code-workspace"));
|
|
46619
47021
|
if (workspaceFiles.length === 1) {
|
|
46620
47022
|
return {
|
|
46621
47023
|
dir: resolved,
|
|
46622
|
-
workspaceFile:
|
|
47024
|
+
workspaceFile: path38.join(resolved, workspaceFiles[0])
|
|
46623
47025
|
};
|
|
46624
47026
|
}
|
|
46625
47027
|
if (workspaceFiles.length > 1) {
|
|
46626
47028
|
const conventionFile = workspaceFiles.find((f) => f === "template.code-workspace");
|
|
46627
47029
|
return {
|
|
46628
47030
|
dir: resolved,
|
|
46629
|
-
workspaceFile: conventionFile ?
|
|
47031
|
+
workspaceFile: conventionFile ? path38.join(resolved, conventionFile) : void 0
|
|
46630
47032
|
};
|
|
46631
47033
|
}
|
|
46632
47034
|
return { dir: resolved };
|
|
@@ -46704,7 +47106,10 @@ async function runEvaluation(options) {
|
|
|
46704
47106
|
trials,
|
|
46705
47107
|
streamCallbacks,
|
|
46706
47108
|
totalBudgetUsd,
|
|
46707
|
-
failOnError
|
|
47109
|
+
failOnError,
|
|
47110
|
+
poolWorkspaces,
|
|
47111
|
+
poolMaxSlots: configPoolMaxSlots,
|
|
47112
|
+
workspace: userWorkspacePath
|
|
46708
47113
|
} = options;
|
|
46709
47114
|
let useCache = options.useCache;
|
|
46710
47115
|
if (trials && trials.count > 1 && useCache) {
|
|
@@ -46778,7 +47183,7 @@ async function runEvaluation(options) {
|
|
|
46778
47183
|
];
|
|
46779
47184
|
const evaluatorRegistry = buildEvaluatorRegistry(evaluators, resolveJudgeProvider);
|
|
46780
47185
|
const typeRegistry = createBuiltinRegistry();
|
|
46781
|
-
const discoveryBaseDir = evalFilePath ?
|
|
47186
|
+
const discoveryBaseDir = evalFilePath ? path39.dirname(path39.resolve(evalFilePath)) : process.cwd();
|
|
46782
47187
|
const evalDir = discoveryBaseDir;
|
|
46783
47188
|
await discoverAssertions(typeRegistry, discoveryBaseDir);
|
|
46784
47189
|
const providerRegistry = createBuiltinProviderRegistry();
|
|
@@ -46840,13 +47245,19 @@ async function runEvaluation(options) {
|
|
|
46840
47245
|
}
|
|
46841
47246
|
};
|
|
46842
47247
|
const isPerTestIsolation = suiteWorkspace?.isolation === "per_test";
|
|
46843
|
-
|
|
47248
|
+
if (userWorkspacePath && isPerTestIsolation) {
|
|
47249
|
+
throw new Error(
|
|
47250
|
+
"--workspace is incompatible with isolation: per_test. Use isolation: shared (default)."
|
|
47251
|
+
);
|
|
47252
|
+
}
|
|
47253
|
+
const hasSharedWorkspace = !!(userWorkspacePath || workspaceTemplate || suiteWorkspace?.before_all || suiteWorkspace?.repos?.length && !isPerTestIsolation);
|
|
47254
|
+
const usePool = poolWorkspaces === true && !!suiteWorkspace?.repos?.length && !isPerTestIsolation && !userWorkspacePath;
|
|
46844
47255
|
const requestedWorkers = options.maxConcurrency ?? target.workers ?? 1;
|
|
46845
|
-
const workers = hasSharedWorkspace ? 1 : requestedWorkers;
|
|
47256
|
+
const workers = hasSharedWorkspace && !usePool ? 1 : requestedWorkers;
|
|
46846
47257
|
setupLog(
|
|
46847
|
-
`sharedWorkspace=${hasSharedWorkspace} perTestIsolation=${isPerTestIsolation} requestedWorkers=${requestedWorkers} effectiveWorkers=${workers}`
|
|
47258
|
+
`sharedWorkspace=${hasSharedWorkspace} perTestIsolation=${isPerTestIsolation} usePool=${usePool} requestedWorkers=${requestedWorkers} effectiveWorkers=${workers}`
|
|
46848
47259
|
);
|
|
46849
|
-
if (hasSharedWorkspace && requestedWorkers > 1) {
|
|
47260
|
+
if (hasSharedWorkspace && !usePool && requestedWorkers > 1) {
|
|
46850
47261
|
console.warn(
|
|
46851
47262
|
`Warning: Shared workspace requires sequential execution. Overriding workers from ${requestedWorkers} to 1.`
|
|
46852
47263
|
);
|
|
@@ -46855,7 +47266,37 @@ async function runEvaluation(options) {
|
|
|
46855
47266
|
let sharedWorkspacePath;
|
|
46856
47267
|
let sharedBaselineCommit;
|
|
46857
47268
|
let beforeAllOutput;
|
|
46858
|
-
|
|
47269
|
+
let poolManager;
|
|
47270
|
+
let poolSlot;
|
|
47271
|
+
const poolSlots = [];
|
|
47272
|
+
const availablePoolSlots = [];
|
|
47273
|
+
const poolSlotBaselines = /* @__PURE__ */ new Map();
|
|
47274
|
+
const poolMaxSlots = Math.min(configPoolMaxSlots ?? 10, 50);
|
|
47275
|
+
if (userWorkspacePath) {
|
|
47276
|
+
sharedWorkspacePath = userWorkspacePath;
|
|
47277
|
+
setupLog(`using user-provided workspace: ${userWorkspacePath}`);
|
|
47278
|
+
} else if (usePool && suiteWorkspace?.repos) {
|
|
47279
|
+
const slotsNeeded = workers;
|
|
47280
|
+
setupLog(`acquiring ${slotsNeeded} workspace pool slot(s) (pool capacity: ${poolMaxSlots})`);
|
|
47281
|
+
poolManager = new WorkspacePoolManager(getWorkspacePoolRoot());
|
|
47282
|
+
const poolRepoManager = new RepoManager(void 0, verbose);
|
|
47283
|
+
for (let i = 0; i < slotsNeeded; i++) {
|
|
47284
|
+
const slot = await poolManager.acquireWorkspace({
|
|
47285
|
+
templatePath: workspaceTemplate,
|
|
47286
|
+
repos: suiteWorkspace.repos,
|
|
47287
|
+
maxSlots: poolMaxSlots,
|
|
47288
|
+
repoManager: poolRepoManager
|
|
47289
|
+
});
|
|
47290
|
+
poolSlots.push(slot);
|
|
47291
|
+
setupLog(`pool slot ${i} acquired at: ${slot.path} (existing=${slot.isExisting})`);
|
|
47292
|
+
}
|
|
47293
|
+
if (slotsNeeded === 1) {
|
|
47294
|
+
poolSlot = poolSlots[0];
|
|
47295
|
+
sharedWorkspacePath = poolSlot.path;
|
|
47296
|
+
} else {
|
|
47297
|
+
availablePoolSlots.push(...poolSlots);
|
|
47298
|
+
}
|
|
47299
|
+
} else if (workspaceTemplate) {
|
|
46859
47300
|
setupLog(`creating shared workspace from template: ${workspaceTemplate}`);
|
|
46860
47301
|
try {
|
|
46861
47302
|
sharedWorkspacePath = await createTempWorkspace(workspaceTemplate, evalRunId, "shared");
|
|
@@ -46864,288 +47305,344 @@ async function runEvaluation(options) {
|
|
|
46864
47305
|
const message = error40 instanceof Error ? error40.message : String(error40);
|
|
46865
47306
|
throw new Error(`Failed to create shared workspace: ${message}`);
|
|
46866
47307
|
}
|
|
47308
|
+
} else if (suiteWorkspace?.before_all || suiteWorkspace?.repos?.length && !isPerTestIsolation) {
|
|
47309
|
+
sharedWorkspacePath = getWorkspacePath(evalRunId, "shared");
|
|
47310
|
+
await mkdir13(sharedWorkspacePath, { recursive: true });
|
|
47311
|
+
setupLog(`created empty shared workspace at: ${sharedWorkspacePath}`);
|
|
47312
|
+
}
|
|
47313
|
+
try {
|
|
46867
47314
|
if (suiteWorkspaceFile && sharedWorkspacePath) {
|
|
46868
|
-
const copiedWorkspaceFile =
|
|
47315
|
+
const copiedWorkspaceFile = path39.join(sharedWorkspacePath, path39.basename(suiteWorkspaceFile));
|
|
46869
47316
|
try {
|
|
46870
47317
|
await stat7(copiedWorkspaceFile);
|
|
46871
47318
|
suiteWorkspaceFile = copiedWorkspaceFile;
|
|
46872
47319
|
} catch {
|
|
46873
47320
|
}
|
|
46874
47321
|
}
|
|
46875
|
-
|
|
46876
|
-
sharedWorkspacePath
|
|
46877
|
-
|
|
46878
|
-
|
|
46879
|
-
|
|
46880
|
-
|
|
46881
|
-
|
|
46882
|
-
|
|
46883
|
-
|
|
46884
|
-
|
|
46885
|
-
|
|
46886
|
-
|
|
46887
|
-
const message = error40 instanceof Error ? error40.message : String(error40);
|
|
46888
|
-
if (sharedWorkspacePath) {
|
|
46889
|
-
await cleanupWorkspace(sharedWorkspacePath).catch(() => {
|
|
46890
|
-
});
|
|
46891
|
-
}
|
|
46892
|
-
throw new Error(`Failed to materialize repos: ${message}`);
|
|
46893
|
-
}
|
|
46894
|
-
}
|
|
46895
|
-
if (sharedWorkspacePath && suiteWorkspace?.before_all) {
|
|
46896
|
-
const beforeAllCommand = (suiteWorkspace.before_all.command ?? suiteWorkspace.before_all.script ?? []).join(" ");
|
|
46897
|
-
setupLog(
|
|
46898
|
-
`running shared before_all in cwd=${suiteWorkspace.before_all.cwd ?? evalDir} command=${beforeAllCommand}`
|
|
46899
|
-
);
|
|
46900
|
-
const scriptContext = {
|
|
46901
|
-
workspacePath: sharedWorkspacePath,
|
|
46902
|
-
testId: "__before_all__",
|
|
46903
|
-
evalRunId,
|
|
46904
|
-
evalDir
|
|
46905
|
-
};
|
|
46906
|
-
try {
|
|
46907
|
-
beforeAllOutput = await executeWorkspaceScript(suiteWorkspace.before_all, scriptContext);
|
|
46908
|
-
setupLog("shared before_all completed");
|
|
46909
|
-
} catch (error40) {
|
|
46910
|
-
const message = error40 instanceof Error ? error40.message : String(error40);
|
|
46911
|
-
if (sharedWorkspacePath) {
|
|
46912
|
-
await cleanupWorkspace(sharedWorkspacePath).catch(() => {
|
|
46913
|
-
});
|
|
46914
|
-
}
|
|
46915
|
-
throw new Error(`before_all script failed: ${message}`);
|
|
46916
|
-
}
|
|
46917
|
-
}
|
|
46918
|
-
if (sharedWorkspacePath) {
|
|
46919
|
-
try {
|
|
46920
|
-
sharedBaselineCommit = await initializeBaseline(sharedWorkspacePath);
|
|
46921
|
-
setupLog(`shared baseline initialized: ${sharedBaselineCommit}`);
|
|
46922
|
-
} catch {
|
|
46923
|
-
setupLog("shared baseline initialization skipped (non-fatal)");
|
|
46924
|
-
}
|
|
46925
|
-
}
|
|
46926
|
-
let nextWorkerId = 1;
|
|
46927
|
-
const workerIdByEvalId = /* @__PURE__ */ new Map();
|
|
46928
|
-
let beforeAllOutputAttached = false;
|
|
46929
|
-
let cumulativeBudgetCost = 0;
|
|
46930
|
-
let budgetExhausted = false;
|
|
46931
|
-
let failOnErrorTriggered = false;
|
|
46932
|
-
const promises = filteredEvalCases.map(
|
|
46933
|
-
(evalCase) => limit(async () => {
|
|
46934
|
-
const workerId = nextWorkerId++;
|
|
46935
|
-
workerIdByEvalId.set(evalCase.id, workerId);
|
|
46936
|
-
if (totalBudgetUsd !== void 0 && budgetExhausted) {
|
|
46937
|
-
const budgetResult = {
|
|
46938
|
-
timestamp: (now ?? (() => /* @__PURE__ */ new Date()))().toISOString(),
|
|
46939
|
-
testId: evalCase.id,
|
|
46940
|
-
dataset: evalCase.dataset,
|
|
46941
|
-
score: 0,
|
|
46942
|
-
hits: [],
|
|
46943
|
-
misses: [],
|
|
46944
|
-
answer: "",
|
|
46945
|
-
target: target.name,
|
|
46946
|
-
error: `Suite budget exceeded ($${cumulativeBudgetCost.toFixed(4)} / $${totalBudgetUsd.toFixed(4)})`,
|
|
46947
|
-
budgetExceeded: true,
|
|
46948
|
-
executionStatus: "execution_error",
|
|
46949
|
-
failureStage: "setup",
|
|
46950
|
-
failureReasonCode: "budget_exceeded",
|
|
46951
|
-
executionError: {
|
|
46952
|
-
message: `Suite budget exceeded ($${cumulativeBudgetCost.toFixed(4)} / $${totalBudgetUsd.toFixed(4)})`,
|
|
46953
|
-
stage: "setup"
|
|
46954
|
-
}
|
|
46955
|
-
};
|
|
46956
|
-
if (onProgress) {
|
|
46957
|
-
await onProgress({
|
|
46958
|
-
workerId,
|
|
46959
|
-
testId: evalCase.id,
|
|
46960
|
-
status: "failed",
|
|
46961
|
-
completedAt: Date.now(),
|
|
46962
|
-
error: budgetResult.error
|
|
47322
|
+
const repoManager = suiteWorkspace?.repos?.length && !usePool && !userWorkspacePath ? new RepoManager(void 0, verbose) : void 0;
|
|
47323
|
+
if (repoManager && sharedWorkspacePath && suiteWorkspace?.repos && !isPerTestIsolation) {
|
|
47324
|
+
setupLog(
|
|
47325
|
+
`materializing ${suiteWorkspace.repos.length} shared repo(s) into ${sharedWorkspacePath}`
|
|
47326
|
+
);
|
|
47327
|
+
try {
|
|
47328
|
+
await repoManager.materializeAll(suiteWorkspace.repos, sharedWorkspacePath);
|
|
47329
|
+
setupLog("shared repo materialization complete");
|
|
47330
|
+
} catch (error40) {
|
|
47331
|
+
const message = error40 instanceof Error ? error40.message : String(error40);
|
|
47332
|
+
if (sharedWorkspacePath && !userWorkspacePath) {
|
|
47333
|
+
await cleanupWorkspace(sharedWorkspacePath).catch(() => {
|
|
46963
47334
|
});
|
|
46964
47335
|
}
|
|
46965
|
-
|
|
46966
|
-
await onResult(budgetResult);
|
|
46967
|
-
}
|
|
46968
|
-
return budgetResult;
|
|
47336
|
+
throw new Error(`Failed to materialize repos: ${message}`);
|
|
46969
47337
|
}
|
|
46970
|
-
|
|
46971
|
-
|
|
46972
|
-
|
|
46973
|
-
|
|
46974
|
-
|
|
46975
|
-
|
|
46976
|
-
|
|
46977
|
-
|
|
46978
|
-
|
|
46979
|
-
|
|
46980
|
-
|
|
46981
|
-
|
|
46982
|
-
|
|
46983
|
-
|
|
46984
|
-
|
|
46985
|
-
|
|
46986
|
-
|
|
46987
|
-
if (
|
|
46988
|
-
await
|
|
46989
|
-
workerId,
|
|
46990
|
-
testId: evalCase.id,
|
|
46991
|
-
status: "failed",
|
|
46992
|
-
completedAt: Date.now(),
|
|
46993
|
-
error: haltResult.error
|
|
47338
|
+
}
|
|
47339
|
+
if (sharedWorkspacePath && suiteWorkspace?.before_all) {
|
|
47340
|
+
const beforeAllCommand = (suiteWorkspace.before_all.command ?? suiteWorkspace.before_all.script ?? []).join(" ");
|
|
47341
|
+
setupLog(
|
|
47342
|
+
`running shared before_all in cwd=${suiteWorkspace.before_all.cwd ?? evalDir} command=${beforeAllCommand}`
|
|
47343
|
+
);
|
|
47344
|
+
const scriptContext = {
|
|
47345
|
+
workspacePath: sharedWorkspacePath,
|
|
47346
|
+
testId: "__before_all__",
|
|
47347
|
+
evalRunId,
|
|
47348
|
+
evalDir
|
|
47349
|
+
};
|
|
47350
|
+
try {
|
|
47351
|
+
beforeAllOutput = await executeWorkspaceScript(suiteWorkspace.before_all, scriptContext);
|
|
47352
|
+
setupLog("shared before_all completed");
|
|
47353
|
+
} catch (error40) {
|
|
47354
|
+
const message = error40 instanceof Error ? error40.message : String(error40);
|
|
47355
|
+
if (sharedWorkspacePath && !userWorkspacePath) {
|
|
47356
|
+
await cleanupWorkspace(sharedWorkspacePath).catch(() => {
|
|
46994
47357
|
});
|
|
46995
47358
|
}
|
|
46996
|
-
|
|
46997
|
-
await onResult(haltResult);
|
|
46998
|
-
}
|
|
46999
|
-
return haltResult;
|
|
47000
|
-
}
|
|
47001
|
-
if (onProgress) {
|
|
47002
|
-
await onProgress({
|
|
47003
|
-
workerId,
|
|
47004
|
-
testId: evalCase.id,
|
|
47005
|
-
status: "running",
|
|
47006
|
-
startedAt: Date.now()
|
|
47007
|
-
});
|
|
47359
|
+
throw new Error(`before_all script failed: ${message}`);
|
|
47008
47360
|
}
|
|
47009
|
-
|
|
47010
|
-
|
|
47011
|
-
|
|
47012
|
-
|
|
47013
|
-
|
|
47014
|
-
|
|
47015
|
-
|
|
47016
|
-
maxRetries,
|
|
47017
|
-
agentTimeoutMs,
|
|
47018
|
-
cache,
|
|
47019
|
-
useCache,
|
|
47020
|
-
now,
|
|
47021
|
-
judgeProvider,
|
|
47022
|
-
targetResolver,
|
|
47023
|
-
availableTargets,
|
|
47361
|
+
}
|
|
47362
|
+
if (availablePoolSlots.length > 0 && suiteWorkspace?.before_all) {
|
|
47363
|
+
for (const slot of availablePoolSlots) {
|
|
47364
|
+
setupLog(`running before_all on pool slot ${slot.index}`);
|
|
47365
|
+
const scriptContext = {
|
|
47366
|
+
workspacePath: slot.path,
|
|
47367
|
+
testId: "__before_all__",
|
|
47024
47368
|
evalRunId,
|
|
47025
|
-
keepWorkspaces,
|
|
47026
|
-
cleanupWorkspaces,
|
|
47027
|
-
sharedWorkspacePath,
|
|
47028
|
-
sharedBaselineCommit,
|
|
47029
|
-
suiteWorkspaceFile,
|
|
47030
|
-
streamCallbacks,
|
|
47031
|
-
typeRegistry,
|
|
47032
|
-
repoManager,
|
|
47033
47369
|
evalDir
|
|
47034
47370
|
};
|
|
47035
|
-
|
|
47036
|
-
|
|
47037
|
-
|
|
47038
|
-
|
|
47039
|
-
|
|
47040
|
-
|
|
47041
|
-
|
|
47371
|
+
try {
|
|
47372
|
+
const output = await executeWorkspaceScript(suiteWorkspace.before_all, scriptContext);
|
|
47373
|
+
if (!beforeAllOutput) beforeAllOutput = output;
|
|
47374
|
+
setupLog(`before_all completed on pool slot ${slot.index}`);
|
|
47375
|
+
} catch (error40) {
|
|
47376
|
+
const message = error40 instanceof Error ? error40.message : String(error40);
|
|
47377
|
+
throw new Error(`before_all script failed on pool slot ${slot.index}: ${message}`);
|
|
47378
|
+
}
|
|
47379
|
+
}
|
|
47380
|
+
}
|
|
47381
|
+
if (sharedWorkspacePath) {
|
|
47382
|
+
try {
|
|
47383
|
+
sharedBaselineCommit = await initializeBaseline(sharedWorkspacePath);
|
|
47384
|
+
setupLog(`shared baseline initialized: ${sharedBaselineCommit}`);
|
|
47385
|
+
} catch {
|
|
47386
|
+
setupLog("shared baseline initialization skipped (non-fatal)");
|
|
47387
|
+
}
|
|
47388
|
+
}
|
|
47389
|
+
if (availablePoolSlots.length > 0) {
|
|
47390
|
+
for (const slot of availablePoolSlots) {
|
|
47391
|
+
try {
|
|
47392
|
+
const baseline = await initializeBaseline(slot.path);
|
|
47393
|
+
poolSlotBaselines.set(slot.path, baseline);
|
|
47394
|
+
setupLog(`pool slot ${slot.index} baseline initialized: ${baseline}`);
|
|
47395
|
+
} catch {
|
|
47396
|
+
setupLog(`pool slot ${slot.index} baseline initialization skipped (non-fatal)`);
|
|
47397
|
+
}
|
|
47398
|
+
}
|
|
47399
|
+
}
|
|
47400
|
+
let nextWorkerId = 1;
|
|
47401
|
+
const workerIdByEvalId = /* @__PURE__ */ new Map();
|
|
47402
|
+
let beforeAllOutputAttached = false;
|
|
47403
|
+
let cumulativeBudgetCost = 0;
|
|
47404
|
+
let budgetExhausted = false;
|
|
47405
|
+
let failOnErrorTriggered = false;
|
|
47406
|
+
const promises = filteredEvalCases.map(
|
|
47407
|
+
(evalCase) => limit(async () => {
|
|
47408
|
+
const workerId = nextWorkerId++;
|
|
47409
|
+
workerIdByEvalId.set(evalCase.id, workerId);
|
|
47410
|
+
if (totalBudgetUsd !== void 0 && budgetExhausted) {
|
|
47411
|
+
const budgetResult = {
|
|
47412
|
+
timestamp: (now ?? (() => /* @__PURE__ */ new Date()))().toISOString(),
|
|
47413
|
+
testId: evalCase.id,
|
|
47414
|
+
dataset: evalCase.dataset,
|
|
47415
|
+
score: 0,
|
|
47416
|
+
hits: [],
|
|
47417
|
+
misses: [],
|
|
47418
|
+
answer: "",
|
|
47419
|
+
target: target.name,
|
|
47420
|
+
error: `Suite budget exceeded ($${cumulativeBudgetCost.toFixed(4)} / $${totalBudgetUsd.toFixed(4)})`,
|
|
47421
|
+
budgetExceeded: true,
|
|
47422
|
+
executionStatus: "execution_error",
|
|
47423
|
+
failureStage: "setup",
|
|
47424
|
+
failureReasonCode: "budget_exceeded",
|
|
47425
|
+
executionError: {
|
|
47426
|
+
message: `Suite budget exceeded ($${cumulativeBudgetCost.toFixed(4)} / $${totalBudgetUsd.toFixed(4)})`,
|
|
47427
|
+
stage: "setup"
|
|
47042
47428
|
}
|
|
47043
|
-
}
|
|
47044
|
-
|
|
47429
|
+
};
|
|
47430
|
+
if (onProgress) {
|
|
47431
|
+
await onProgress({
|
|
47432
|
+
workerId,
|
|
47433
|
+
testId: evalCase.id,
|
|
47434
|
+
status: "failed",
|
|
47435
|
+
completedAt: Date.now(),
|
|
47436
|
+
error: budgetResult.error
|
|
47437
|
+
});
|
|
47045
47438
|
}
|
|
47046
|
-
if (
|
|
47047
|
-
|
|
47048
|
-
if (cumulativeBudgetCost >= totalBudgetUsd) {
|
|
47049
|
-
budgetExhausted = true;
|
|
47050
|
-
}
|
|
47439
|
+
if (onResult) {
|
|
47440
|
+
await onResult(budgetResult);
|
|
47051
47441
|
}
|
|
47442
|
+
return budgetResult;
|
|
47052
47443
|
}
|
|
47053
|
-
if (failOnError === true &&
|
|
47054
|
-
|
|
47055
|
-
|
|
47056
|
-
|
|
47057
|
-
|
|
47058
|
-
|
|
47444
|
+
if (failOnError === true && failOnErrorTriggered) {
|
|
47445
|
+
const errorMsg = "Halted: execution error encountered with fail_on_error enabled";
|
|
47446
|
+
const haltResult = {
|
|
47447
|
+
timestamp: (now ?? (() => /* @__PURE__ */ new Date()))().toISOString(),
|
|
47448
|
+
testId: evalCase.id,
|
|
47449
|
+
dataset: evalCase.dataset,
|
|
47450
|
+
score: 0,
|
|
47451
|
+
hits: [],
|
|
47452
|
+
misses: [],
|
|
47453
|
+
answer: "",
|
|
47454
|
+
target: target.name,
|
|
47455
|
+
error: errorMsg,
|
|
47456
|
+
executionStatus: "execution_error",
|
|
47457
|
+
failureStage: "setup",
|
|
47458
|
+
failureReasonCode: "error_threshold_exceeded",
|
|
47459
|
+
executionError: { message: errorMsg, stage: "setup" }
|
|
47460
|
+
};
|
|
47461
|
+
if (onProgress) {
|
|
47462
|
+
await onProgress({
|
|
47463
|
+
workerId,
|
|
47464
|
+
testId: evalCase.id,
|
|
47465
|
+
status: "failed",
|
|
47466
|
+
completedAt: Date.now(),
|
|
47467
|
+
error: haltResult.error
|
|
47468
|
+
});
|
|
47469
|
+
}
|
|
47470
|
+
if (onResult) {
|
|
47471
|
+
await onResult(haltResult);
|
|
47472
|
+
}
|
|
47473
|
+
return haltResult;
|
|
47059
47474
|
}
|
|
47060
47475
|
if (onProgress) {
|
|
47061
47476
|
await onProgress({
|
|
47062
47477
|
workerId,
|
|
47063
47478
|
testId: evalCase.id,
|
|
47064
|
-
status:
|
|
47065
|
-
startedAt:
|
|
47066
|
-
// Not used for completed status
|
|
47067
|
-
completedAt: Date.now(),
|
|
47068
|
-
error: result.error
|
|
47479
|
+
status: "running",
|
|
47480
|
+
startedAt: Date.now()
|
|
47069
47481
|
});
|
|
47070
47482
|
}
|
|
47071
|
-
|
|
47072
|
-
|
|
47483
|
+
const testPoolSlot = availablePoolSlots.length > 0 ? availablePoolSlots.pop() : void 0;
|
|
47484
|
+
const testWorkspacePath = testPoolSlot?.path ?? sharedWorkspacePath;
|
|
47485
|
+
const testBaselineCommit = testPoolSlot ? poolSlotBaselines.get(testPoolSlot.path) : sharedBaselineCommit;
|
|
47486
|
+
try {
|
|
47487
|
+
const judgeProvider = await resolveJudgeProvider(target);
|
|
47488
|
+
const runCaseOptions = {
|
|
47489
|
+
evalCase,
|
|
47490
|
+
provider: primaryProvider,
|
|
47491
|
+
target,
|
|
47492
|
+
evaluators: evaluatorRegistry,
|
|
47493
|
+
maxRetries,
|
|
47494
|
+
agentTimeoutMs,
|
|
47495
|
+
cache,
|
|
47496
|
+
useCache,
|
|
47497
|
+
now,
|
|
47498
|
+
judgeProvider,
|
|
47499
|
+
targetResolver,
|
|
47500
|
+
availableTargets,
|
|
47501
|
+
evalRunId,
|
|
47502
|
+
keepWorkspaces,
|
|
47503
|
+
cleanupWorkspaces,
|
|
47504
|
+
sharedWorkspacePath: testWorkspacePath,
|
|
47505
|
+
sharedBaselineCommit: testBaselineCommit,
|
|
47506
|
+
suiteWorkspaceFile,
|
|
47507
|
+
streamCallbacks,
|
|
47508
|
+
typeRegistry,
|
|
47509
|
+
repoManager,
|
|
47510
|
+
evalDir
|
|
47511
|
+
};
|
|
47512
|
+
let result = trials && trials.count > 1 ? await runEvalCaseWithTrials(runCaseOptions, trials) : await runEvalCase(runCaseOptions);
|
|
47513
|
+
if (totalBudgetUsd !== void 0) {
|
|
47514
|
+
let caseCost;
|
|
47515
|
+
if (result.trials && result.trials.length > 0) {
|
|
47516
|
+
const trialCostSum = result.trials.reduce((sum, t) => sum + (t.costUsd ?? 0), 0);
|
|
47517
|
+
if (trialCostSum > 0) {
|
|
47518
|
+
caseCost = trialCostSum;
|
|
47519
|
+
}
|
|
47520
|
+
} else {
|
|
47521
|
+
caseCost = result.costUsd;
|
|
47522
|
+
}
|
|
47523
|
+
if (caseCost !== void 0) {
|
|
47524
|
+
cumulativeBudgetCost += caseCost;
|
|
47525
|
+
if (cumulativeBudgetCost >= totalBudgetUsd) {
|
|
47526
|
+
budgetExhausted = true;
|
|
47527
|
+
}
|
|
47528
|
+
}
|
|
47529
|
+
}
|
|
47530
|
+
if (failOnError === true && result.executionStatus === "execution_error") {
|
|
47531
|
+
failOnErrorTriggered = true;
|
|
47532
|
+
}
|
|
47533
|
+
if (beforeAllOutput && !beforeAllOutputAttached) {
|
|
47534
|
+
result = { ...result, beforeAllOutput };
|
|
47535
|
+
beforeAllOutputAttached = true;
|
|
47536
|
+
}
|
|
47537
|
+
if (onProgress) {
|
|
47538
|
+
await onProgress({
|
|
47539
|
+
workerId,
|
|
47540
|
+
testId: evalCase.id,
|
|
47541
|
+
status: result.error ? "failed" : "completed",
|
|
47542
|
+
startedAt: 0,
|
|
47543
|
+
// Not used for completed status
|
|
47544
|
+
completedAt: Date.now(),
|
|
47545
|
+
error: result.error
|
|
47546
|
+
});
|
|
47547
|
+
}
|
|
47548
|
+
if (onResult) {
|
|
47549
|
+
await onResult(result);
|
|
47550
|
+
}
|
|
47551
|
+
return result;
|
|
47552
|
+
} catch (error40) {
|
|
47553
|
+
if (onProgress) {
|
|
47554
|
+
await onProgress({
|
|
47555
|
+
workerId,
|
|
47556
|
+
testId: evalCase.id,
|
|
47557
|
+
status: "failed",
|
|
47558
|
+
completedAt: Date.now(),
|
|
47559
|
+
error: error40 instanceof Error ? error40.message : String(error40)
|
|
47560
|
+
});
|
|
47561
|
+
}
|
|
47562
|
+
throw error40;
|
|
47563
|
+
} finally {
|
|
47564
|
+
if (testPoolSlot) {
|
|
47565
|
+
availablePoolSlots.push(testPoolSlot);
|
|
47566
|
+
}
|
|
47073
47567
|
}
|
|
47074
|
-
|
|
47075
|
-
|
|
47076
|
-
|
|
47077
|
-
|
|
47078
|
-
|
|
47079
|
-
|
|
47080
|
-
|
|
47081
|
-
|
|
47082
|
-
|
|
47083
|
-
|
|
47568
|
+
})
|
|
47569
|
+
);
|
|
47570
|
+
const settled = await Promise.allSettled(promises);
|
|
47571
|
+
const results = [];
|
|
47572
|
+
for (let i = 0; i < settled.length; i++) {
|
|
47573
|
+
const outcome = settled[i];
|
|
47574
|
+
if (outcome.status === "fulfilled") {
|
|
47575
|
+
results.push(outcome.value);
|
|
47576
|
+
} else {
|
|
47577
|
+
const evalCase = filteredEvalCases[i];
|
|
47578
|
+
const formattingMode = usesFileReferencePrompt(primaryProvider) ? "agent" : "lm";
|
|
47579
|
+
const promptInputs = await buildPromptInputs(evalCase, formattingMode);
|
|
47580
|
+
const errorResult = buildErrorResult(
|
|
47581
|
+
evalCase,
|
|
47582
|
+
target.name,
|
|
47583
|
+
(now ?? (() => /* @__PURE__ */ new Date()))(),
|
|
47584
|
+
outcome.reason,
|
|
47585
|
+
promptInputs,
|
|
47586
|
+
primaryProvider,
|
|
47587
|
+
"agent",
|
|
47588
|
+
"provider_error"
|
|
47589
|
+
);
|
|
47590
|
+
results.push(errorResult);
|
|
47591
|
+
if (onResult) {
|
|
47592
|
+
await onResult(errorResult);
|
|
47084
47593
|
}
|
|
47085
|
-
throw error40;
|
|
47086
47594
|
}
|
|
47087
|
-
}
|
|
47088
|
-
|
|
47089
|
-
|
|
47090
|
-
|
|
47091
|
-
|
|
47092
|
-
|
|
47093
|
-
|
|
47094
|
-
|
|
47095
|
-
|
|
47096
|
-
|
|
47097
|
-
|
|
47098
|
-
|
|
47099
|
-
|
|
47100
|
-
|
|
47101
|
-
|
|
47102
|
-
|
|
47103
|
-
|
|
47104
|
-
|
|
47105
|
-
|
|
47106
|
-
|
|
47107
|
-
|
|
47108
|
-
);
|
|
47109
|
-
results.push(errorResult);
|
|
47110
|
-
if (onResult) {
|
|
47111
|
-
await onResult(errorResult);
|
|
47595
|
+
}
|
|
47596
|
+
const afterAllWorkspaces = poolSlots.length > 1 ? poolSlots.map((s) => s.path) : sharedWorkspacePath ? [sharedWorkspacePath] : [];
|
|
47597
|
+
if (afterAllWorkspaces.length > 0 && suiteWorkspace?.after_all) {
|
|
47598
|
+
for (const wsPath of afterAllWorkspaces) {
|
|
47599
|
+
const scriptContext = {
|
|
47600
|
+
workspacePath: wsPath,
|
|
47601
|
+
testId: "__after_all__",
|
|
47602
|
+
evalRunId,
|
|
47603
|
+
evalDir
|
|
47604
|
+
};
|
|
47605
|
+
try {
|
|
47606
|
+
const afterAllOutput = await executeWorkspaceScript(
|
|
47607
|
+
suiteWorkspace.after_all,
|
|
47608
|
+
scriptContext,
|
|
47609
|
+
"warn"
|
|
47610
|
+
);
|
|
47611
|
+
if (afterAllOutput && results.length > 0 && wsPath === afterAllWorkspaces[0]) {
|
|
47612
|
+
results[results.length - 1] = { ...results[results.length - 1], afterAllOutput };
|
|
47613
|
+
}
|
|
47614
|
+
} catch {
|
|
47615
|
+
}
|
|
47112
47616
|
}
|
|
47113
47617
|
}
|
|
47114
|
-
|
|
47115
|
-
|
|
47116
|
-
|
|
47117
|
-
|
|
47118
|
-
|
|
47119
|
-
|
|
47120
|
-
|
|
47121
|
-
|
|
47122
|
-
try {
|
|
47123
|
-
const afterAllOutput = await executeWorkspaceScript(
|
|
47124
|
-
suiteWorkspace.after_all,
|
|
47125
|
-
scriptContext,
|
|
47126
|
-
"warn"
|
|
47127
|
-
);
|
|
47128
|
-
if (afterAllOutput && results.length > 0) {
|
|
47129
|
-
results[results.length - 1] = { ...results[results.length - 1], afterAllOutput };
|
|
47618
|
+
if (sharedWorkspacePath && !poolSlot && poolSlots.length === 0 && !userWorkspacePath) {
|
|
47619
|
+
const hasFailure = results.some((r) => !!r.error || r.score < 0.5);
|
|
47620
|
+
if (cleanupWorkspaces) {
|
|
47621
|
+
await cleanupWorkspace(sharedWorkspacePath).catch(() => {
|
|
47622
|
+
});
|
|
47623
|
+
} else if (!hasFailure && !keepWorkspaces) {
|
|
47624
|
+
await cleanupWorkspace(sharedWorkspacePath).catch(() => {
|
|
47625
|
+
});
|
|
47130
47626
|
}
|
|
47131
|
-
} catch {
|
|
47132
47627
|
}
|
|
47133
|
-
}
|
|
47134
|
-
if (sharedWorkspacePath) {
|
|
47135
|
-
const hasFailure = results.some((r) => !!r.error || r.score < 0.5);
|
|
47136
47628
|
if (cleanupWorkspaces) {
|
|
47137
|
-
await
|
|
47138
|
-
});
|
|
47139
|
-
} else if (!hasFailure && !keepWorkspaces) {
|
|
47140
|
-
await cleanupWorkspace(sharedWorkspacePath).catch(() => {
|
|
47629
|
+
await cleanupEvalWorkspaces(evalRunId).catch(() => {
|
|
47141
47630
|
});
|
|
47142
47631
|
}
|
|
47632
|
+
return results;
|
|
47633
|
+
} finally {
|
|
47634
|
+
if (poolManager) {
|
|
47635
|
+
if (poolSlot) {
|
|
47636
|
+
await poolManager.releaseSlot(poolSlot);
|
|
47637
|
+
}
|
|
47638
|
+
for (const slot of poolSlots) {
|
|
47639
|
+
if (slot !== poolSlot) {
|
|
47640
|
+
await poolManager.releaseSlot(slot).catch(() => {
|
|
47641
|
+
});
|
|
47642
|
+
}
|
|
47643
|
+
}
|
|
47644
|
+
}
|
|
47143
47645
|
}
|
|
47144
|
-
if (cleanupWorkspaces) {
|
|
47145
|
-
await cleanupEvalWorkspaces(evalRunId).catch(() => {
|
|
47146
|
-
});
|
|
47147
|
-
}
|
|
47148
|
-
return results;
|
|
47149
47646
|
}
|
|
47150
47647
|
async function runBatchEvaluation(options) {
|
|
47151
47648
|
const {
|
|
@@ -47362,7 +47859,7 @@ async function runEvalCase(options) {
|
|
|
47362
47859
|
);
|
|
47363
47860
|
}
|
|
47364
47861
|
if (caseWorkspaceFile && workspacePath) {
|
|
47365
|
-
const copiedFile =
|
|
47862
|
+
const copiedFile = path39.join(workspacePath, path39.basename(caseWorkspaceFile));
|
|
47366
47863
|
try {
|
|
47367
47864
|
await stat7(copiedFile);
|
|
47368
47865
|
caseWorkspaceFile = copiedFile;
|
|
@@ -47372,7 +47869,7 @@ async function runEvalCase(options) {
|
|
|
47372
47869
|
}
|
|
47373
47870
|
if (!workspacePath && (evalCase.workspace?.before_all || evalCase.workspace?.repos?.length) && evalRunId) {
|
|
47374
47871
|
workspacePath = getWorkspacePath(evalRunId, evalCase.id);
|
|
47375
|
-
await
|
|
47872
|
+
await mkdir13(workspacePath, { recursive: true });
|
|
47376
47873
|
}
|
|
47377
47874
|
if (evalCase.workspace?.repos?.length && workspacePath) {
|
|
47378
47875
|
const perCaseRepoManager = new RepoManager(void 0, setupDebug);
|
|
@@ -47972,7 +48469,7 @@ async function runEvaluatorList(options) {
|
|
|
47972
48469
|
fileChanges,
|
|
47973
48470
|
workspacePath
|
|
47974
48471
|
};
|
|
47975
|
-
const evalFileDir = evalCase.guideline_paths[0] ?
|
|
48472
|
+
const evalFileDir = evalCase.guideline_paths[0] ? path39.dirname(evalCase.guideline_paths[0]) : process.cwd();
|
|
47976
48473
|
const dispatchContext = {
|
|
47977
48474
|
judgeProvider,
|
|
47978
48475
|
targetResolver,
|
|
@@ -48206,7 +48703,7 @@ function extractProviderError(response) {
|
|
|
48206
48703
|
return trimmed.length > 0 ? trimmed : void 0;
|
|
48207
48704
|
}
|
|
48208
48705
|
function createCacheKey(provider, target, evalCase, promptInputs) {
|
|
48209
|
-
const hash =
|
|
48706
|
+
const hash = createHash3("sha256");
|
|
48210
48707
|
hash.update(provider.id);
|
|
48211
48708
|
hash.update(target.name);
|
|
48212
48709
|
hash.update(evalCase.id);
|
|
@@ -48293,13 +48790,13 @@ async function evaluate(config2) {
|
|
|
48293
48790
|
let evalCases;
|
|
48294
48791
|
let testFilePath;
|
|
48295
48792
|
if (config2.specFile) {
|
|
48296
|
-
testFilePath =
|
|
48793
|
+
testFilePath = path40.resolve(config2.specFile);
|
|
48297
48794
|
evalCases = await loadTests(testFilePath, repoRoot, {
|
|
48298
48795
|
verbose: config2.verbose,
|
|
48299
48796
|
filter: config2.filter
|
|
48300
48797
|
});
|
|
48301
48798
|
} else {
|
|
48302
|
-
testFilePath =
|
|
48799
|
+
testFilePath = path40.join(process.cwd(), "__programmatic__.yaml");
|
|
48303
48800
|
evalCases = (config2.tests ?? []).map((test) => {
|
|
48304
48801
|
const input = typeof test.input === "string" ? [{ role: "user", content: test.input }] : test.input;
|
|
48305
48802
|
const question = typeof test.input === "string" ? test.input : test.input.find((m) => m.role === "user")?.content ?? "";
|
|
@@ -48385,11 +48882,11 @@ function computeSummary(results, durationMs) {
|
|
|
48385
48882
|
var TARGET_FILE_CANDIDATES = [".agentv/targets.yaml", ".agentv/targets.yml"];
|
|
48386
48883
|
async function discoverDefaultTarget(repoRoot) {
|
|
48387
48884
|
const cwd = process.cwd();
|
|
48388
|
-
const chain = buildDirectoryChain(
|
|
48885
|
+
const chain = buildDirectoryChain(path40.join(cwd, "_placeholder"), repoRoot);
|
|
48389
48886
|
for (const dir of chain) {
|
|
48390
48887
|
for (const candidate of TARGET_FILE_CANDIDATES) {
|
|
48391
|
-
const targetsPath =
|
|
48392
|
-
if (!
|
|
48888
|
+
const targetsPath = path40.join(dir, candidate);
|
|
48889
|
+
if (!existsSync4(targetsPath)) continue;
|
|
48393
48890
|
try {
|
|
48394
48891
|
const definitions = await readTargetDefinitions(targetsPath);
|
|
48395
48892
|
const defaultTarget = definitions.find((d) => d.name === "default");
|
|
@@ -48403,11 +48900,11 @@ async function discoverDefaultTarget(repoRoot) {
|
|
|
48403
48900
|
async function loadEnvHierarchy(repoRoot) {
|
|
48404
48901
|
const { readFileSync: readFileSync2 } = await import("node:fs");
|
|
48405
48902
|
const cwd = process.cwd();
|
|
48406
|
-
const chain = buildDirectoryChain(
|
|
48903
|
+
const chain = buildDirectoryChain(path40.join(cwd, "_placeholder"), repoRoot);
|
|
48407
48904
|
const envFiles = [];
|
|
48408
48905
|
for (const dir of chain) {
|
|
48409
|
-
const envPath =
|
|
48410
|
-
if (
|
|
48906
|
+
const envPath = path40.join(dir, ".env");
|
|
48907
|
+
if (existsSync4(envPath)) envFiles.push(envPath);
|
|
48411
48908
|
}
|
|
48412
48909
|
for (let i = envFiles.length - 1; i >= 0; i--) {
|
|
48413
48910
|
try {
|
|
@@ -48482,12 +48979,12 @@ var CONFIG_FILE_NAMES = [
|
|
|
48482
48979
|
".agentv/config.js"
|
|
48483
48980
|
];
|
|
48484
48981
|
async function loadTsConfig(projectRoot) {
|
|
48485
|
-
const { existsSync:
|
|
48982
|
+
const { existsSync: existsSync5 } = await import("node:fs");
|
|
48486
48983
|
const { pathToFileURL } = await import("node:url");
|
|
48487
48984
|
const { join: join2 } = await import("node:path");
|
|
48488
48985
|
for (const fileName of CONFIG_FILE_NAMES) {
|
|
48489
48986
|
const filePath = join2(projectRoot, fileName);
|
|
48490
|
-
if (!
|
|
48987
|
+
if (!existsSync5(filePath)) {
|
|
48491
48988
|
continue;
|
|
48492
48989
|
}
|
|
48493
48990
|
try {
|
|
@@ -48587,7 +49084,7 @@ var ResponseCache = class {
|
|
|
48587
49084
|
async get(key) {
|
|
48588
49085
|
const filePath = this.keyToPath(key);
|
|
48589
49086
|
try {
|
|
48590
|
-
const data = await
|
|
49087
|
+
const data = await readFile12(filePath, "utf8");
|
|
48591
49088
|
return JSON.parse(data);
|
|
48592
49089
|
} catch {
|
|
48593
49090
|
return void 0;
|
|
@@ -48595,13 +49092,13 @@ var ResponseCache = class {
|
|
|
48595
49092
|
}
|
|
48596
49093
|
async set(key, value) {
|
|
48597
49094
|
const filePath = this.keyToPath(key);
|
|
48598
|
-
const dir =
|
|
48599
|
-
await
|
|
48600
|
-
await
|
|
49095
|
+
const dir = path41.dirname(filePath);
|
|
49096
|
+
await mkdir14(dir, { recursive: true });
|
|
49097
|
+
await writeFile9(filePath, JSON.stringify(value, null, 2), "utf8");
|
|
48601
49098
|
}
|
|
48602
49099
|
keyToPath(key) {
|
|
48603
49100
|
const prefix = key.slice(0, 2);
|
|
48604
|
-
return
|
|
49101
|
+
return path41.join(this.cachePath, prefix, `${key}.json`);
|
|
48605
49102
|
}
|
|
48606
49103
|
};
|
|
48607
49104
|
function shouldEnableCache(params) {
|
|
@@ -49110,6 +49607,7 @@ export {
|
|
|
49110
49607
|
getGitCacheRoot,
|
|
49111
49608
|
getSubagentsRoot,
|
|
49112
49609
|
getTraceStateRoot,
|
|
49610
|
+
getWorkspacePoolRoot,
|
|
49113
49611
|
ensureVSCodeSubagents,
|
|
49114
49612
|
readTargetDefinitions,
|
|
49115
49613
|
listTargetNames,
|
|
@@ -49171,6 +49669,8 @@ export {
|
|
|
49171
49669
|
createTempWorkspace,
|
|
49172
49670
|
cleanupWorkspace,
|
|
49173
49671
|
cleanupEvalWorkspaces,
|
|
49672
|
+
computeWorkspaceFingerprint,
|
|
49673
|
+
WorkspacePoolManager,
|
|
49174
49674
|
RepoManager,
|
|
49175
49675
|
resolveWorkspaceTemplate,
|
|
49176
49676
|
executeWorkspaceScript,
|
|
@@ -49189,4 +49689,4 @@ export {
|
|
|
49189
49689
|
OtelStreamingObserver,
|
|
49190
49690
|
createAgentKernel
|
|
49191
49691
|
};
|
|
49192
|
-
//# sourceMappingURL=chunk-
|
|
49692
|
+
//# sourceMappingURL=chunk-VBK7BJLE.js.map
|