agentv 2.14.2 → 2.15.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/{chunk-IO5LMHZH.js → chunk-IKGJTJSU.js} +204 -16
- package/dist/chunk-IKGJTJSU.js.map +1 -0
- package/dist/{chunk-OQN2GDEU.js → chunk-VBK7BJLE.js} +1230 -578
- package/dist/chunk-VBK7BJLE.js.map +1 -0
- package/dist/{chunk-LHDGRK2S.js → chunk-ZDSLKUCM.js} +28 -5
- package/dist/chunk-ZDSLKUCM.js.map +1 -0
- package/dist/cli.js +11 -4
- package/dist/cli.js.map +1 -1
- package/dist/{dist-QR5OZ4DH.js → dist-XZPR3AGM.js} +18 -2
- package/dist/index.js +3 -3
- package/dist/{interactive-R22BRR6A.js → interactive-SSGXAAKA.js} +5 -5
- package/dist/{interactive-R22BRR6A.js.map → interactive-SSGXAAKA.js.map} +1 -1
- package/package.json +1 -1
- package/dist/chunk-IO5LMHZH.js.map +0 -1
- package/dist/chunk-LHDGRK2S.js.map +0 -1
- package/dist/chunk-OQN2GDEU.js.map +0 -1
- /package/dist/{dist-QR5OZ4DH.js.map → dist-XZPR3AGM.js.map} +0 -0
|
@@ -632,8 +632,8 @@ function getErrorMap() {
|
|
|
632
632
|
|
|
633
633
|
// ../../node_modules/.bun/zod@3.25.76/node_modules/zod/v3/helpers/parseUtil.js
|
|
634
634
|
var makeIssue = (params) => {
|
|
635
|
-
const { data, path:
|
|
636
|
-
const fullPath = [...
|
|
635
|
+
const { data, path: path42, errorMaps, issueData } = params;
|
|
636
|
+
const fullPath = [...path42, ...issueData.path || []];
|
|
637
637
|
const fullIssue = {
|
|
638
638
|
...issueData,
|
|
639
639
|
path: fullPath
|
|
@@ -749,11 +749,11 @@ var errorUtil;
|
|
|
749
749
|
|
|
750
750
|
// ../../node_modules/.bun/zod@3.25.76/node_modules/zod/v3/types.js
|
|
751
751
|
var ParseInputLazyPath = class {
|
|
752
|
-
constructor(parent, value,
|
|
752
|
+
constructor(parent, value, path42, key) {
|
|
753
753
|
this._cachedPath = [];
|
|
754
754
|
this.parent = parent;
|
|
755
755
|
this.data = value;
|
|
756
|
-
this._path =
|
|
756
|
+
this._path = path42;
|
|
757
757
|
this._key = key;
|
|
758
758
|
}
|
|
759
759
|
get path() {
|
|
@@ -6661,10 +6661,10 @@ function assignProp(target, prop, value) {
|
|
|
6661
6661
|
configurable: true
|
|
6662
6662
|
});
|
|
6663
6663
|
}
|
|
6664
|
-
function getElementAtPath(obj,
|
|
6665
|
-
if (!
|
|
6664
|
+
function getElementAtPath(obj, path42) {
|
|
6665
|
+
if (!path42)
|
|
6666
6666
|
return obj;
|
|
6667
|
-
return
|
|
6667
|
+
return path42.reduce((acc, key) => acc?.[key], obj);
|
|
6668
6668
|
}
|
|
6669
6669
|
function promiseAllObject(promisesObj) {
|
|
6670
6670
|
const keys = Object.keys(promisesObj);
|
|
@@ -6984,11 +6984,11 @@ function aborted(x, startIndex = 0) {
|
|
|
6984
6984
|
}
|
|
6985
6985
|
return false;
|
|
6986
6986
|
}
|
|
6987
|
-
function prefixIssues(
|
|
6987
|
+
function prefixIssues(path42, issues) {
|
|
6988
6988
|
return issues.map((iss) => {
|
|
6989
6989
|
var _a17;
|
|
6990
6990
|
(_a17 = iss).path ?? (_a17.path = []);
|
|
6991
|
-
iss.path.unshift(
|
|
6991
|
+
iss.path.unshift(path42);
|
|
6992
6992
|
return iss;
|
|
6993
6993
|
});
|
|
6994
6994
|
}
|
|
@@ -7125,7 +7125,7 @@ function treeifyError(error40, _mapper) {
|
|
|
7125
7125
|
return issue2.message;
|
|
7126
7126
|
};
|
|
7127
7127
|
const result = { errors: [] };
|
|
7128
|
-
const processError = (error41,
|
|
7128
|
+
const processError = (error41, path42 = []) => {
|
|
7129
7129
|
var _a17, _b8;
|
|
7130
7130
|
for (const issue2 of error41.issues) {
|
|
7131
7131
|
if (issue2.code === "invalid_union" && issue2.errors.length) {
|
|
@@ -7135,7 +7135,7 @@ function treeifyError(error40, _mapper) {
|
|
|
7135
7135
|
} else if (issue2.code === "invalid_element") {
|
|
7136
7136
|
processError({ issues: issue2.issues }, issue2.path);
|
|
7137
7137
|
} else {
|
|
7138
|
-
const fullpath = [...
|
|
7138
|
+
const fullpath = [...path42, ...issue2.path];
|
|
7139
7139
|
if (fullpath.length === 0) {
|
|
7140
7140
|
result.errors.push(mapper(issue2));
|
|
7141
7141
|
continue;
|
|
@@ -7165,9 +7165,9 @@ function treeifyError(error40, _mapper) {
|
|
|
7165
7165
|
processError(error40);
|
|
7166
7166
|
return result;
|
|
7167
7167
|
}
|
|
7168
|
-
function toDotPath(
|
|
7168
|
+
function toDotPath(path42) {
|
|
7169
7169
|
const segs = [];
|
|
7170
|
-
for (const seg of
|
|
7170
|
+
for (const seg of path42) {
|
|
7171
7171
|
if (typeof seg === "number")
|
|
7172
7172
|
segs.push(`[${seg}]`);
|
|
7173
7173
|
else if (typeof seg === "symbol")
|
|
@@ -26720,14 +26720,14 @@ function createAzure(options = {}) {
|
|
|
26720
26720
|
description: "Azure OpenAI resource name"
|
|
26721
26721
|
});
|
|
26722
26722
|
const apiVersion = (_a17 = options.apiVersion) != null ? _a17 : "v1";
|
|
26723
|
-
const url2 = ({ path:
|
|
26723
|
+
const url2 = ({ path: path42, modelId }) => {
|
|
26724
26724
|
var _a24;
|
|
26725
26725
|
const baseUrlPrefix = (_a24 = options.baseURL) != null ? _a24 : `https://${getResourceName()}.openai.azure.com/openai`;
|
|
26726
26726
|
let fullUrl;
|
|
26727
26727
|
if (options.useDeploymentBasedUrls) {
|
|
26728
|
-
fullUrl = new URL(`${baseUrlPrefix}/deployments/${modelId}${
|
|
26728
|
+
fullUrl = new URL(`${baseUrlPrefix}/deployments/${modelId}${path42}`);
|
|
26729
26729
|
} else {
|
|
26730
|
-
fullUrl = new URL(`${baseUrlPrefix}/v1${
|
|
26730
|
+
fullUrl = new URL(`${baseUrlPrefix}/v1${path42}`);
|
|
26731
26731
|
}
|
|
26732
26732
|
fullUrl.searchParams.set("api-version", apiVersion);
|
|
26733
26733
|
return fullUrl.toString();
|
|
@@ -33923,10 +33923,10 @@ import { tmpdir } from "node:os";
|
|
|
33923
33923
|
import path16 from "node:path";
|
|
33924
33924
|
import { exec as exec2 } from "node:child_process";
|
|
33925
33925
|
import { constants as constants3, access as access3, stat as stat4 } from "node:fs/promises";
|
|
33926
|
-
import
|
|
33926
|
+
import path28 from "node:path";
|
|
33927
33927
|
import { promisify as promisify3 } from "node:util";
|
|
33928
33928
|
import { stat as stat3, writeFile as writeFile4 } from "node:fs/promises";
|
|
33929
|
-
import
|
|
33929
|
+
import path26 from "node:path";
|
|
33930
33930
|
import { constants as constants22 } from "node:fs";
|
|
33931
33931
|
import { access as access22, mkdir as mkdir6, readdir, rm as rm2, stat } from "node:fs/promises";
|
|
33932
33932
|
import path17 from "node:path";
|
|
@@ -33936,21 +33936,22 @@ import { readFile as readFile8 } from "node:fs/promises";
|
|
|
33936
33936
|
import path20 from "node:path";
|
|
33937
33937
|
import { exec, spawn as spawn3 } from "node:child_process";
|
|
33938
33938
|
import { mkdir as mkdir7, writeFile as writeFile2 } from "node:fs/promises";
|
|
33939
|
-
import
|
|
33939
|
+
import path23 from "node:path";
|
|
33940
33940
|
import { promisify as promisify2 } from "node:util";
|
|
33941
|
+
import path222 from "node:path";
|
|
33941
33942
|
import os2 from "node:os";
|
|
33942
33943
|
import path21 from "node:path";
|
|
33943
33944
|
import { copyFile, mkdir as mkdir8, readFile as readFile9, readdir as readdir2, stat as stat2, writeFile as writeFile3 } from "node:fs/promises";
|
|
33945
|
+
import path25 from "node:path";
|
|
33944
33946
|
import path24 from "node:path";
|
|
33945
|
-
import path23 from "node:path";
|
|
33946
33947
|
import JSON5 from "json5";
|
|
33947
33948
|
import { writeFile as writeFile5 } from "node:fs/promises";
|
|
33948
|
-
import
|
|
33949
|
+
import path27 from "node:path";
|
|
33949
33950
|
import { constants as constants4 } from "node:fs";
|
|
33950
33951
|
import { access as access4, readFile as readFile10 } from "node:fs/promises";
|
|
33951
|
-
import path28 from "node:path";
|
|
33952
|
-
import { parse as parse32 } from "yaml";
|
|
33953
33952
|
import path29 from "node:path";
|
|
33953
|
+
import { parse as parse32 } from "yaml";
|
|
33954
|
+
import path30 from "node:path";
|
|
33954
33955
|
import fg2 from "fast-glob";
|
|
33955
33956
|
import { mkdtemp as mkdtemp2, rm as rm3, writeFile as writeFile6 } from "node:fs/promises";
|
|
33956
33957
|
import { tmpdir as tmpdir2 } from "node:os";
|
|
@@ -33958,35 +33959,39 @@ import { dirname, join } from "node:path";
|
|
|
33958
33959
|
import { randomBytes } from "node:crypto";
|
|
33959
33960
|
import { createServer } from "node:http";
|
|
33960
33961
|
import fs2 from "node:fs/promises";
|
|
33961
|
-
import
|
|
33962
|
-
import { createHash as
|
|
33963
|
-
import { mkdir as
|
|
33964
|
-
import
|
|
33962
|
+
import path31 from "node:path";
|
|
33963
|
+
import { createHash as createHash3, randomUUID as randomUUID7 } from "node:crypto";
|
|
33964
|
+
import { mkdir as mkdir13, stat as stat7 } from "node:fs/promises";
|
|
33965
|
+
import path39 from "node:path";
|
|
33965
33966
|
import micromatch4 from "micromatch";
|
|
33966
33967
|
import { readFileSync } from "node:fs";
|
|
33967
|
-
import path31 from "node:path";
|
|
33968
33968
|
import path322 from "node:path";
|
|
33969
|
+
import path33 from "node:path";
|
|
33969
33970
|
import fg3 from "fast-glob";
|
|
33970
33971
|
import { exec as execCallback } from "node:child_process";
|
|
33971
33972
|
import { readdirSync as readdirSync2, statSync } from "node:fs";
|
|
33972
|
-
import
|
|
33973
|
+
import path34 from "node:path";
|
|
33973
33974
|
import { promisify as promisify4 } from "node:util";
|
|
33974
33975
|
import { cp, mkdir as mkdir10, readdir as readdir3, rm as rm4, stat as stat5 } from "node:fs/promises";
|
|
33975
|
-
import
|
|
33976
|
-
import path34 from "node:path";
|
|
33976
|
+
import path35 from "node:path";
|
|
33977
33977
|
import { execFile } from "node:child_process";
|
|
33978
33978
|
import { createHash } from "node:crypto";
|
|
33979
33979
|
import { existsSync as existsSync2 } from "node:fs";
|
|
33980
|
-
import { mkdir as mkdir11, rm as rm5, unlink, writeFile as writeFile7 } from "node:fs/promises";
|
|
33981
|
-
import os4 from "node:os";
|
|
33982
|
-
import path35 from "node:path";
|
|
33983
|
-
import { promisify as promisify5 } from "node:util";
|
|
33984
|
-
import { readdir as readdir4, stat as stat6 } from "node:fs/promises";
|
|
33980
|
+
import { cp as cp2, mkdir as mkdir11, readFile as readFile11, readdir as readdir4, rm as rm5, unlink, writeFile as writeFile7 } from "node:fs/promises";
|
|
33985
33981
|
import path36 from "node:path";
|
|
33982
|
+
import { promisify as promisify5 } from "node:util";
|
|
33983
|
+
import { execFile as execFile2 } from "node:child_process";
|
|
33984
|
+
import { createHash as createHash2 } from "node:crypto";
|
|
33986
33985
|
import { existsSync as existsSync3 } from "node:fs";
|
|
33986
|
+
import { mkdir as mkdir12, rm as rm6, unlink as unlink2, writeFile as writeFile8 } from "node:fs/promises";
|
|
33987
|
+
import path37 from "node:path";
|
|
33988
|
+
import { promisify as promisify6 } from "node:util";
|
|
33989
|
+
import { readdir as readdir5, stat as stat6 } from "node:fs/promises";
|
|
33987
33990
|
import path38 from "node:path";
|
|
33988
|
-
import {
|
|
33989
|
-
import
|
|
33991
|
+
import { existsSync as existsSync4 } from "node:fs";
|
|
33992
|
+
import path40 from "node:path";
|
|
33993
|
+
import { mkdir as mkdir14, readFile as readFile12, writeFile as writeFile9 } from "node:fs/promises";
|
|
33994
|
+
import path41 from "node:path";
|
|
33990
33995
|
function computeTraceSummary(messages) {
|
|
33991
33996
|
const toolCallCounts = {};
|
|
33992
33997
|
const toolDurations = {};
|
|
@@ -34183,7 +34188,7 @@ async function resolveFileReference2(ref, evalFileDir) {
|
|
|
34183
34188
|
const rawPath = extractFilePath(ref);
|
|
34184
34189
|
const absolutePattern = path3.resolve(evalFileDir, rawPath);
|
|
34185
34190
|
if (isGlobPattern(rawPath)) {
|
|
34186
|
-
const matches = await fg(absolutePattern, {
|
|
34191
|
+
const matches = await fg(absolutePattern.replaceAll("\\", "/"), {
|
|
34187
34192
|
onlyFiles: true,
|
|
34188
34193
|
absolute: true
|
|
34189
34194
|
});
|
|
@@ -34547,6 +34552,17 @@ function parseExecutionDefaults(raw, configPath) {
|
|
|
34547
34552
|
} else if (otelFile !== void 0) {
|
|
34548
34553
|
logWarning(`Invalid execution.otel_file in ${configPath}, expected non-empty string`);
|
|
34549
34554
|
}
|
|
34555
|
+
if (typeof obj.pool_workspaces === "boolean") {
|
|
34556
|
+
result.pool_workspaces = obj.pool_workspaces;
|
|
34557
|
+
} else if (obj.pool_workspaces !== void 0) {
|
|
34558
|
+
logWarning(`Invalid execution.pool_workspaces in ${configPath}, expected boolean`);
|
|
34559
|
+
}
|
|
34560
|
+
const poolSlots = obj.pool_slots;
|
|
34561
|
+
if (typeof poolSlots === "number" && Number.isInteger(poolSlots) && poolSlots >= 1 && poolSlots <= 50) {
|
|
34562
|
+
result.pool_slots = poolSlots;
|
|
34563
|
+
} else if (poolSlots !== void 0) {
|
|
34564
|
+
logWarning(`Invalid execution.pool_slots in ${configPath}, expected integer 1-50`);
|
|
34565
|
+
}
|
|
34550
34566
|
return Object.keys(result).length > 0 ? result : void 0;
|
|
34551
34567
|
}
|
|
34552
34568
|
function logWarning(message) {
|
|
@@ -35962,6 +35978,7 @@ async function processMessages(options) {
|
|
|
35962
35978
|
repoRootPath,
|
|
35963
35979
|
guidelinePatterns,
|
|
35964
35980
|
guidelinePaths,
|
|
35981
|
+
treatFileSegmentsAsGuidelines,
|
|
35965
35982
|
textParts,
|
|
35966
35983
|
messageType,
|
|
35967
35984
|
verbose
|
|
@@ -36009,16 +36026,20 @@ async function processMessages(options) {
|
|
|
36009
36026
|
}
|
|
36010
36027
|
try {
|
|
36011
36028
|
const fileContent = (await readFile4(resolvedPath, "utf8")).replace(/\r\n/g, "\n");
|
|
36012
|
-
|
|
36013
|
-
|
|
36014
|
-
|
|
36015
|
-
|
|
36016
|
-
|
|
36017
|
-
|
|
36018
|
-
|
|
36019
|
-
|
|
36020
|
-
|
|
36029
|
+
const classifyAsGuideline = shouldTreatAsGuideline({
|
|
36030
|
+
messageType,
|
|
36031
|
+
resolvedPath,
|
|
36032
|
+
repoRootPath,
|
|
36033
|
+
guidelinePatterns,
|
|
36034
|
+
treatFileSegmentsAsGuidelines
|
|
36035
|
+
});
|
|
36036
|
+
if (classifyAsGuideline && guidelinePaths) {
|
|
36037
|
+
guidelinePaths.push(path5.resolve(resolvedPath));
|
|
36038
|
+
if (verbose) {
|
|
36039
|
+
console.log(` [Guideline] Found: ${displayPath}`);
|
|
36040
|
+
console.log(` Resolved to: ${resolvedPath}`);
|
|
36021
36041
|
}
|
|
36042
|
+
continue;
|
|
36022
36043
|
}
|
|
36023
36044
|
segments.push({
|
|
36024
36045
|
type: "file",
|
|
@@ -36047,6 +36068,26 @@ async function processMessages(options) {
|
|
|
36047
36068
|
}
|
|
36048
36069
|
return segments;
|
|
36049
36070
|
}
|
|
36071
|
+
function shouldTreatAsGuideline(options) {
|
|
36072
|
+
const {
|
|
36073
|
+
messageType,
|
|
36074
|
+
resolvedPath,
|
|
36075
|
+
repoRootPath,
|
|
36076
|
+
guidelinePatterns,
|
|
36077
|
+
treatFileSegmentsAsGuidelines
|
|
36078
|
+
} = options;
|
|
36079
|
+
if (messageType !== "input") {
|
|
36080
|
+
return false;
|
|
36081
|
+
}
|
|
36082
|
+
if (treatFileSegmentsAsGuidelines) {
|
|
36083
|
+
return true;
|
|
36084
|
+
}
|
|
36085
|
+
if (!guidelinePatterns || guidelinePatterns.length === 0) {
|
|
36086
|
+
return false;
|
|
36087
|
+
}
|
|
36088
|
+
const relativeToRepo = path5.relative(repoRootPath, resolvedPath);
|
|
36089
|
+
return isGuidelineFile(relativeToRepo, guidelinePatterns);
|
|
36090
|
+
}
|
|
36050
36091
|
function asString3(value) {
|
|
36051
36092
|
return typeof value === "string" ? value : void 0;
|
|
36052
36093
|
}
|
|
@@ -36381,6 +36422,8 @@ async function loadTestsFromJsonl(evalFilePath, repoRoot, options) {
|
|
|
36381
36422
|
for (const guidelinePath of testCase.guideline_paths) {
|
|
36382
36423
|
console.log(` - ${guidelinePath}`);
|
|
36383
36424
|
}
|
|
36425
|
+
} else if (!guidelinePatterns || guidelinePatterns.length === 0) {
|
|
36426
|
+
console.log(" No guidelines found (guideline_patterns not configured)");
|
|
36384
36427
|
} else {
|
|
36385
36428
|
console.log(" No guidelines found");
|
|
36386
36429
|
}
|
|
@@ -36741,7 +36784,7 @@ async function loadTestsFromYaml(evalFilePath, repoRoot, options) {
|
|
|
36741
36784
|
} else {
|
|
36742
36785
|
throw new Error(`Invalid test file format: ${evalFilePath} - missing 'tests' field`);
|
|
36743
36786
|
}
|
|
36744
|
-
const suiteWorkspace =
|
|
36787
|
+
const suiteWorkspace = await resolveWorkspaceConfig(suite.workspace, evalFileDir);
|
|
36745
36788
|
const suiteInputMessages = expandInputShorthand(suite.input);
|
|
36746
36789
|
const rawGlobalExecution = isJsonObject(suite.execution) ? suite.execution : void 0;
|
|
36747
36790
|
const _globalTarget = asString6(rawGlobalExecution?.target) ?? asString6(suite.target);
|
|
@@ -36777,12 +36820,24 @@ async function loadTestsFromYaml(evalFilePath, repoRoot, options) {
|
|
|
36777
36820
|
}
|
|
36778
36821
|
const caseExecution = isJsonObject(evalcase.execution) ? evalcase.execution : void 0;
|
|
36779
36822
|
const skipDefaults = caseExecution?.skip_defaults === true;
|
|
36780
|
-
const
|
|
36823
|
+
const effectiveSuiteInputMessages = suiteInputMessages && !skipDefaults ? suiteInputMessages : void 0;
|
|
36824
|
+
const inputMessages = effectiveSuiteInputMessages ? [...effectiveSuiteInputMessages, ...testInputMessages] : testInputMessages;
|
|
36781
36825
|
const hasExpectedMessages = expectedMessages.length > 0;
|
|
36782
36826
|
const guidelinePaths = [];
|
|
36783
36827
|
const inputTextParts = [];
|
|
36784
|
-
const
|
|
36785
|
-
messages:
|
|
36828
|
+
const suiteInputSegments = effectiveSuiteInputMessages ? await processMessages({
|
|
36829
|
+
messages: effectiveSuiteInputMessages,
|
|
36830
|
+
searchRoots,
|
|
36831
|
+
repoRootPath,
|
|
36832
|
+
guidelinePatterns,
|
|
36833
|
+
guidelinePaths,
|
|
36834
|
+
treatFileSegmentsAsGuidelines: true,
|
|
36835
|
+
textParts: inputTextParts,
|
|
36836
|
+
messageType: "input",
|
|
36837
|
+
verbose
|
|
36838
|
+
}) : [];
|
|
36839
|
+
const testInputSegments = await processMessages({
|
|
36840
|
+
messages: testInputMessages,
|
|
36786
36841
|
searchRoots,
|
|
36787
36842
|
repoRootPath,
|
|
36788
36843
|
guidelinePatterns,
|
|
@@ -36791,6 +36846,7 @@ async function loadTestsFromYaml(evalFilePath, repoRoot, options) {
|
|
|
36791
36846
|
messageType: "input",
|
|
36792
36847
|
verbose
|
|
36793
36848
|
});
|
|
36849
|
+
const inputSegments = [...suiteInputSegments, ...testInputSegments];
|
|
36794
36850
|
const outputSegments = hasExpectedMessages ? await processExpectedMessages({
|
|
36795
36851
|
messages: expectedMessages,
|
|
36796
36852
|
searchRoots,
|
|
@@ -36838,7 +36894,7 @@ async function loadTestsFromYaml(evalFilePath, repoRoot, options) {
|
|
|
36838
36894
|
...guidelinePaths.map((guidelinePath) => path8.resolve(guidelinePath)),
|
|
36839
36895
|
...userFilePaths
|
|
36840
36896
|
];
|
|
36841
|
-
const caseWorkspace =
|
|
36897
|
+
const caseWorkspace = await resolveWorkspaceConfig(evalcase.workspace, evalFileDir);
|
|
36842
36898
|
const mergedWorkspace = mergeWorkspaceConfigs(suiteWorkspace, caseWorkspace);
|
|
36843
36899
|
const metadata = isJsonObject(evalcase.metadata) ? evalcase.metadata : void 0;
|
|
36844
36900
|
const caseTargets = extractTargetsFromTestCase(evalcase);
|
|
@@ -36869,6 +36925,8 @@ async function loadTestsFromYaml(evalFilePath, repoRoot, options) {
|
|
|
36869
36925
|
for (const guidelinePath of testCase.guideline_paths) {
|
|
36870
36926
|
console.log(` - ${guidelinePath}`);
|
|
36871
36927
|
}
|
|
36928
|
+
} else if (!guidelinePatterns || guidelinePatterns.length === 0) {
|
|
36929
|
+
console.log(" No guidelines found (guideline_patterns not configured)");
|
|
36872
36930
|
} else {
|
|
36873
36931
|
console.log(" No guidelines found");
|
|
36874
36932
|
}
|
|
@@ -36968,6 +37026,26 @@ function parseResetConfig(raw) {
|
|
|
36968
37026
|
...afterEach !== void 0 && { after_each: afterEach }
|
|
36969
37027
|
};
|
|
36970
37028
|
}
|
|
37029
|
+
async function resolveWorkspaceConfig(raw, evalFileDir) {
|
|
37030
|
+
if (typeof raw === "string") {
|
|
37031
|
+
const workspaceFilePath = path8.resolve(evalFileDir, raw);
|
|
37032
|
+
let content;
|
|
37033
|
+
try {
|
|
37034
|
+
content = await readFile7(workspaceFilePath, "utf8");
|
|
37035
|
+
} catch {
|
|
37036
|
+
throw new Error(`Workspace file not found: ${raw} (resolved to ${workspaceFilePath})`);
|
|
37037
|
+
}
|
|
37038
|
+
const parsed = parse22(content);
|
|
37039
|
+
if (!isJsonObject(parsed)) {
|
|
37040
|
+
throw new Error(
|
|
37041
|
+
`Invalid workspace file format: ${workspaceFilePath} (expected a YAML object)`
|
|
37042
|
+
);
|
|
37043
|
+
}
|
|
37044
|
+
const workspaceFileDir = path8.dirname(workspaceFilePath);
|
|
37045
|
+
return parseWorkspaceConfig(parsed, workspaceFileDir);
|
|
37046
|
+
}
|
|
37047
|
+
return parseWorkspaceConfig(raw, evalFileDir);
|
|
37048
|
+
}
|
|
36971
37049
|
function parseWorkspaceConfig(raw, evalFileDir) {
|
|
36972
37050
|
if (!isJsonObject(raw)) return void 0;
|
|
36973
37051
|
const obj = raw;
|
|
@@ -38891,7 +38969,7 @@ function subscribeToCopilotCliLogEntries(listener) {
|
|
|
38891
38969
|
};
|
|
38892
38970
|
}
|
|
38893
38971
|
function resolvePlatformCliPath() {
|
|
38894
|
-
const
|
|
38972
|
+
const os3 = platform();
|
|
38895
38973
|
const cpu = arch();
|
|
38896
38974
|
const platformMap = {
|
|
38897
38975
|
linux: "linux",
|
|
@@ -38902,13 +38980,13 @@ function resolvePlatformCliPath() {
|
|
|
38902
38980
|
x64: "x64",
|
|
38903
38981
|
arm64: "arm64"
|
|
38904
38982
|
};
|
|
38905
|
-
const osPart = platformMap[
|
|
38983
|
+
const osPart = platformMap[os3];
|
|
38906
38984
|
const archPart = archMap[cpu];
|
|
38907
38985
|
if (!osPart || !archPart) {
|
|
38908
38986
|
return void 0;
|
|
38909
38987
|
}
|
|
38910
38988
|
const packageName = `@github/copilot-${osPart}-${archPart}`;
|
|
38911
|
-
const binaryName =
|
|
38989
|
+
const binaryName = os3 === "win32" ? "copilot.exe" : "copilot";
|
|
38912
38990
|
try {
|
|
38913
38991
|
const resolved = import.meta.resolve(`${packageName}/package.json`);
|
|
38914
38992
|
const packageJsonPath = resolved.startsWith("file:") ? fileURLToPath2(resolved) : resolved;
|
|
@@ -40915,11 +40993,38 @@ async function waitForBatchResponses(responseFilesFinal, pollInterval = 1e3, sil
|
|
|
40915
40993
|
}
|
|
40916
40994
|
return true;
|
|
40917
40995
|
}
|
|
40996
|
+
var logged = false;
|
|
40997
|
+
function getAgentvHome() {
|
|
40998
|
+
const envHome = process.env.AGENTV_HOME;
|
|
40999
|
+
if (envHome) {
|
|
41000
|
+
if (!logged) {
|
|
41001
|
+
logged = true;
|
|
41002
|
+
console.warn(`Using AGENTV_HOME: ${envHome}`);
|
|
41003
|
+
}
|
|
41004
|
+
return envHome;
|
|
41005
|
+
}
|
|
41006
|
+
return path21.join(os2.homedir(), ".agentv");
|
|
41007
|
+
}
|
|
41008
|
+
function getWorkspacesRoot() {
|
|
41009
|
+
return path21.join(getAgentvHome(), "workspaces");
|
|
41010
|
+
}
|
|
41011
|
+
function getGitCacheRoot() {
|
|
41012
|
+
return path21.join(getAgentvHome(), "git-cache");
|
|
41013
|
+
}
|
|
41014
|
+
function getSubagentsRoot() {
|
|
41015
|
+
return path21.join(getAgentvHome(), "subagents");
|
|
41016
|
+
}
|
|
41017
|
+
function getTraceStateRoot() {
|
|
41018
|
+
return path21.join(getAgentvHome(), "trace-state");
|
|
41019
|
+
}
|
|
41020
|
+
function getWorkspacePoolRoot() {
|
|
41021
|
+
return path21.join(getAgentvHome(), "workspace-pool");
|
|
41022
|
+
}
|
|
40918
41023
|
var DEFAULT_LOCK_NAME = "subagent.lock";
|
|
40919
41024
|
var DEFAULT_ALIVE_FILENAME = ".alive";
|
|
40920
41025
|
function getDefaultSubagentRoot(vscodeCmd = "code") {
|
|
40921
41026
|
const folder = vscodeCmd === "code-insiders" ? "vscode-insiders-agents" : "vscode-agents";
|
|
40922
|
-
return
|
|
41027
|
+
return path222.join(getSubagentsRoot(), folder);
|
|
40923
41028
|
}
|
|
40924
41029
|
var DEFAULT_SUBAGENT_ROOT = getDefaultSubagentRoot();
|
|
40925
41030
|
var execAsync2 = promisify2(exec);
|
|
@@ -40931,12 +41036,19 @@ description: 'Wake-up Signal'
|
|
|
40931
41036
|
model: Grok Code Fast 1 (copilot)
|
|
40932
41037
|
---`;
|
|
40933
41038
|
function spawnVsCode(vscodeCmd, args, options) {
|
|
40934
|
-
const
|
|
41039
|
+
const useShell = options?.shell ?? true;
|
|
41040
|
+
const command = useShell ? shellQuote(vscodeCmd) : vscodeCmd;
|
|
41041
|
+
const child = spawn3(command, args, {
|
|
40935
41042
|
windowsHide: true,
|
|
40936
|
-
shell:
|
|
41043
|
+
shell: useShell,
|
|
40937
41044
|
detached: false
|
|
40938
41045
|
});
|
|
40939
|
-
child.on("error", () => {
|
|
41046
|
+
child.on("error", (error40) => {
|
|
41047
|
+
const label = options?.label ?? "spawn";
|
|
41048
|
+
const renderedArgs = args.map((value) => JSON.stringify(value)).join(" ");
|
|
41049
|
+
console.error(
|
|
41050
|
+
`[vscode] ${label} failed: command=${JSON.stringify(vscodeCmd)} args=${renderedArgs} error=${error40.message}`
|
|
41051
|
+
);
|
|
40940
41052
|
});
|
|
40941
41053
|
return child;
|
|
40942
41054
|
}
|
|
@@ -40973,16 +41085,20 @@ async function checkWorkspaceOpened(workspaceName, vscodeCmd) {
|
|
|
40973
41085
|
async function ensureWorkspaceFocused(workspacePath, workspaceName, subagentDir, vscodeCmd, pollInterval = 1, timeout = 60) {
|
|
40974
41086
|
const alreadyOpen = await checkWorkspaceOpened(workspaceName, vscodeCmd);
|
|
40975
41087
|
if (alreadyOpen) {
|
|
40976
|
-
spawnVsCode(
|
|
41088
|
+
const child = spawnVsCode(vscodeCmd, [workspacePath], { label: "focus-existing-workspace" });
|
|
41089
|
+
await raceSpawnError(child);
|
|
40977
41090
|
return true;
|
|
40978
41091
|
}
|
|
40979
|
-
const aliveFile =
|
|
41092
|
+
const aliveFile = path23.join(subagentDir, DEFAULT_ALIVE_FILENAME);
|
|
40980
41093
|
await removeIfExists(aliveFile);
|
|
40981
|
-
const githubAgentsDir =
|
|
41094
|
+
const githubAgentsDir = path23.join(subagentDir, ".github", "agents");
|
|
40982
41095
|
await mkdir7(githubAgentsDir, { recursive: true });
|
|
40983
|
-
const wakeupDst =
|
|
41096
|
+
const wakeupDst = path23.join(githubAgentsDir, "wakeup.md");
|
|
40984
41097
|
await writeFile2(wakeupDst, DEFAULT_WAKEUP_CONTENT, "utf8");
|
|
40985
|
-
spawnVsCode(
|
|
41098
|
+
const workspaceChild = spawnVsCode(vscodeCmd, [workspacePath], {
|
|
41099
|
+
label: "open-workspace"
|
|
41100
|
+
});
|
|
41101
|
+
await raceSpawnError(workspaceChild);
|
|
40986
41102
|
await sleep2(100);
|
|
40987
41103
|
const wakeupChatId = "wakeup";
|
|
40988
41104
|
const chatArgs = [
|
|
@@ -40990,9 +41106,10 @@ async function ensureWorkspaceFocused(workspacePath, workspaceName, subagentDir,
|
|
|
40990
41106
|
"chat",
|
|
40991
41107
|
"-m",
|
|
40992
41108
|
wakeupChatId,
|
|
40993
|
-
`create a file named .alive in the ${
|
|
41109
|
+
`create a file named .alive in the ${path23.basename(subagentDir)} folder`
|
|
40994
41110
|
];
|
|
40995
|
-
spawnVsCode(
|
|
41111
|
+
const wakeupChild = spawnVsCode(vscodeCmd, chatArgs, { label: "send-wakeup-chat" });
|
|
41112
|
+
await raceSpawnError(wakeupChild);
|
|
40996
41113
|
const start = Date.now();
|
|
40997
41114
|
while (!await pathExists(aliveFile)) {
|
|
40998
41115
|
if (Date.now() - start > timeout * 1e3) {
|
|
@@ -41004,10 +41121,10 @@ async function ensureWorkspaceFocused(workspacePath, workspaceName, subagentDir,
|
|
|
41004
41121
|
return true;
|
|
41005
41122
|
}
|
|
41006
41123
|
async function launchVsCodeWithChat(subagentDir, chatId, attachmentPaths, requestInstructions, timestamp, vscodeCmd) {
|
|
41007
|
-
const workspacePath =
|
|
41008
|
-
const messagesDir =
|
|
41124
|
+
const workspacePath = path23.join(subagentDir, `${path23.basename(subagentDir)}.code-workspace`);
|
|
41125
|
+
const messagesDir = path23.join(subagentDir, "messages");
|
|
41009
41126
|
await mkdir7(messagesDir, { recursive: true });
|
|
41010
|
-
const reqFile =
|
|
41127
|
+
const reqFile = path23.join(messagesDir, `${timestamp}_req.md`);
|
|
41011
41128
|
await writeFile2(reqFile, requestInstructions, { encoding: "utf8" });
|
|
41012
41129
|
const reqUri = pathToFileUri2(reqFile);
|
|
41013
41130
|
const chatArgs = ["-r", "chat", "-m", chatId];
|
|
@@ -41015,25 +41132,25 @@ async function launchVsCodeWithChat(subagentDir, chatId, attachmentPaths, reques
|
|
|
41015
41132
|
chatArgs.push("-a", attachment);
|
|
41016
41133
|
}
|
|
41017
41134
|
chatArgs.push("-a", reqFile);
|
|
41018
|
-
chatArgs.push(`Follow instructions in [${
|
|
41135
|
+
chatArgs.push(`Follow instructions in [${path23.basename(reqFile)}](${reqUri})`);
|
|
41019
41136
|
const workspaceReady = await ensureWorkspaceFocused(
|
|
41020
41137
|
workspacePath,
|
|
41021
|
-
|
|
41138
|
+
path23.basename(subagentDir),
|
|
41022
41139
|
subagentDir,
|
|
41023
41140
|
vscodeCmd
|
|
41024
41141
|
);
|
|
41025
41142
|
if (!workspaceReady) {
|
|
41026
41143
|
throw new Error(
|
|
41027
|
-
`VS Code workspace '${
|
|
41144
|
+
`VS Code workspace '${path23.basename(subagentDir)}' failed to become ready within the timeout. Check that '${vscodeCmd}' can open workspaces.`
|
|
41028
41145
|
);
|
|
41029
41146
|
}
|
|
41030
41147
|
await sleep2(500);
|
|
41031
|
-
const child = spawnVsCode(
|
|
41148
|
+
const child = spawnVsCode(vscodeCmd, chatArgs, { label: "send-chat" });
|
|
41032
41149
|
await raceSpawnError(child);
|
|
41033
41150
|
}
|
|
41034
41151
|
async function launchVsCodeWithBatchChat(subagentDir, chatId, attachmentPaths, chatInstruction, vscodeCmd) {
|
|
41035
|
-
const workspacePath =
|
|
41036
|
-
const messagesDir =
|
|
41152
|
+
const workspacePath = path23.join(subagentDir, `${path23.basename(subagentDir)}.code-workspace`);
|
|
41153
|
+
const messagesDir = path23.join(subagentDir, "messages");
|
|
41037
41154
|
await mkdir7(messagesDir, { recursive: true });
|
|
41038
41155
|
const chatArgs = ["-r", "chat", "-m", chatId];
|
|
41039
41156
|
for (const attachment of attachmentPaths) {
|
|
@@ -41042,17 +41159,17 @@ async function launchVsCodeWithBatchChat(subagentDir, chatId, attachmentPaths, c
|
|
|
41042
41159
|
chatArgs.push(chatInstruction);
|
|
41043
41160
|
const workspaceReady = await ensureWorkspaceFocused(
|
|
41044
41161
|
workspacePath,
|
|
41045
|
-
|
|
41162
|
+
path23.basename(subagentDir),
|
|
41046
41163
|
subagentDir,
|
|
41047
41164
|
vscodeCmd
|
|
41048
41165
|
);
|
|
41049
41166
|
if (!workspaceReady) {
|
|
41050
41167
|
throw new Error(
|
|
41051
|
-
`VS Code workspace '${
|
|
41168
|
+
`VS Code workspace '${path23.basename(subagentDir)}' failed to become ready within the timeout. Check that '${vscodeCmd}' can open workspaces.`
|
|
41052
41169
|
);
|
|
41053
41170
|
}
|
|
41054
41171
|
await sleep2(500);
|
|
41055
|
-
const child = spawnVsCode(
|
|
41172
|
+
const child = spawnVsCode(vscodeCmd, chatArgs, { label: "send-batch-chat" });
|
|
41056
41173
|
await raceSpawnError(child);
|
|
41057
41174
|
}
|
|
41058
41175
|
function transformWorkspacePaths(workspaceContent, templateDir) {
|
|
@@ -41070,10 +41187,10 @@ function transformWorkspacePaths(workspaceContent, templateDir) {
|
|
|
41070
41187
|
}
|
|
41071
41188
|
const transformedFolders = workspace.folders.map((folder) => {
|
|
41072
41189
|
const folderPath = folder.path;
|
|
41073
|
-
if (
|
|
41190
|
+
if (path24.isAbsolute(folderPath)) {
|
|
41074
41191
|
return folder;
|
|
41075
41192
|
}
|
|
41076
|
-
const absolutePath =
|
|
41193
|
+
const absolutePath = path24.resolve(templateDir, folderPath);
|
|
41077
41194
|
return {
|
|
41078
41195
|
...folder,
|
|
41079
41196
|
path: absolutePath
|
|
@@ -41095,19 +41212,19 @@ function transformWorkspacePaths(workspaceContent, templateDir) {
|
|
|
41095
41212
|
if (locationMap && typeof locationMap === "object") {
|
|
41096
41213
|
const transformedMap = {};
|
|
41097
41214
|
for (const [locationPath, value] of Object.entries(locationMap)) {
|
|
41098
|
-
const isAbsolute =
|
|
41215
|
+
const isAbsolute = path24.isAbsolute(locationPath);
|
|
41099
41216
|
if (isAbsolute) {
|
|
41100
41217
|
transformedMap[locationPath] = value;
|
|
41101
41218
|
} else {
|
|
41102
41219
|
const firstGlobIndex = locationPath.search(/[*]/);
|
|
41103
41220
|
if (firstGlobIndex === -1) {
|
|
41104
|
-
const resolvedPath =
|
|
41221
|
+
const resolvedPath = path24.resolve(templateDir, locationPath).replace(/\\/g, "/");
|
|
41105
41222
|
transformedMap[resolvedPath] = value;
|
|
41106
41223
|
} else {
|
|
41107
41224
|
const basePathEnd = locationPath.lastIndexOf("/", firstGlobIndex);
|
|
41108
41225
|
const basePath = basePathEnd !== -1 ? locationPath.substring(0, basePathEnd) : ".";
|
|
41109
41226
|
const patternPath = locationPath.substring(basePathEnd !== -1 ? basePathEnd : 0);
|
|
41110
|
-
const resolvedPath = (
|
|
41227
|
+
const resolvedPath = (path24.resolve(templateDir, basePath) + patternPath).replace(
|
|
41111
41228
|
/\\/g,
|
|
41112
41229
|
"/"
|
|
41113
41230
|
);
|
|
@@ -41146,7 +41263,7 @@ async function findUnlockedSubagent(subagentRoot) {
|
|
|
41146
41263
|
number: Number.parseInt(entry.name.split("-")[1] ?? "", 10)
|
|
41147
41264
|
})).filter((entry) => Number.isInteger(entry.number)).sort((a, b) => a.number - b.number);
|
|
41148
41265
|
for (const subagent of subagents) {
|
|
41149
|
-
const lockFile =
|
|
41266
|
+
const lockFile = path25.join(subagent.absolutePath, DEFAULT_LOCK_NAME);
|
|
41150
41267
|
if (!await pathExists(lockFile)) {
|
|
41151
41268
|
return subagent.absolutePath;
|
|
41152
41269
|
}
|
|
@@ -41156,7 +41273,7 @@ async function findUnlockedSubagent(subagentRoot) {
|
|
|
41156
41273
|
async function copyAgentConfig(subagentDir, workspaceTemplate, cwd) {
|
|
41157
41274
|
let workspaceContent;
|
|
41158
41275
|
if (workspaceTemplate) {
|
|
41159
|
-
const workspaceSrc =
|
|
41276
|
+
const workspaceSrc = path25.resolve(workspaceTemplate);
|
|
41160
41277
|
if (!await pathExists(workspaceSrc)) {
|
|
41161
41278
|
throw new Error(`workspace template not found: ${workspaceSrc}`);
|
|
41162
41279
|
}
|
|
@@ -41169,13 +41286,13 @@ async function copyAgentConfig(subagentDir, workspaceTemplate, cwd) {
|
|
|
41169
41286
|
} else {
|
|
41170
41287
|
workspaceContent = DEFAULT_WORKSPACE_TEMPLATE;
|
|
41171
41288
|
}
|
|
41172
|
-
const workspaceName = `${
|
|
41173
|
-
const workspaceDst =
|
|
41174
|
-
const templateDir = workspaceTemplate ?
|
|
41289
|
+
const workspaceName = `${path25.basename(subagentDir)}.code-workspace`;
|
|
41290
|
+
const workspaceDst = path25.join(subagentDir, workspaceName);
|
|
41291
|
+
const templateDir = workspaceTemplate ? path25.dirname(path25.resolve(workspaceTemplate)) : subagentDir;
|
|
41175
41292
|
const workspaceJson = JSON.stringify(workspaceContent, null, 2);
|
|
41176
41293
|
let transformedContent = transformWorkspacePaths(workspaceJson, templateDir);
|
|
41177
41294
|
if (cwd) {
|
|
41178
|
-
const absCwd =
|
|
41295
|
+
const absCwd = path25.resolve(cwd);
|
|
41179
41296
|
const parsed = JSON.parse(transformedContent);
|
|
41180
41297
|
const alreadyPresent = parsed.folders.some((f) => f.path === absCwd);
|
|
41181
41298
|
if (!alreadyPresent) {
|
|
@@ -41184,35 +41301,35 @@ async function copyAgentConfig(subagentDir, workspaceTemplate, cwd) {
|
|
|
41184
41301
|
}
|
|
41185
41302
|
}
|
|
41186
41303
|
await writeFile3(workspaceDst, transformedContent, "utf8");
|
|
41187
|
-
const messagesDir =
|
|
41304
|
+
const messagesDir = path25.join(subagentDir, "messages");
|
|
41188
41305
|
await mkdir8(messagesDir, { recursive: true });
|
|
41189
41306
|
return { workspace: workspaceDst, messagesDir };
|
|
41190
41307
|
}
|
|
41191
41308
|
async function createSubagentLock(subagentDir) {
|
|
41192
|
-
const messagesDir =
|
|
41309
|
+
const messagesDir = path25.join(subagentDir, "messages");
|
|
41193
41310
|
if (await pathExists(messagesDir)) {
|
|
41194
41311
|
const files = await readdir2(messagesDir);
|
|
41195
41312
|
await Promise.all(
|
|
41196
41313
|
files.map(async (file2) => {
|
|
41197
|
-
const target =
|
|
41314
|
+
const target = path25.join(messagesDir, file2);
|
|
41198
41315
|
await removeIfExists(target);
|
|
41199
41316
|
})
|
|
41200
41317
|
);
|
|
41201
41318
|
}
|
|
41202
|
-
const githubAgentsDir =
|
|
41319
|
+
const githubAgentsDir = path25.join(subagentDir, ".github", "agents");
|
|
41203
41320
|
if (await pathExists(githubAgentsDir)) {
|
|
41204
41321
|
const agentFiles = await readdir2(githubAgentsDir);
|
|
41205
41322
|
const preservedFiles = /* @__PURE__ */ new Set(["wakeup.md", "subagent.md"]);
|
|
41206
41323
|
await Promise.all(
|
|
41207
|
-
agentFiles.filter((file2) => file2.endsWith(".md") && !preservedFiles.has(file2)).map((file2) => removeIfExists(
|
|
41324
|
+
agentFiles.filter((file2) => file2.endsWith(".md") && !preservedFiles.has(file2)).map((file2) => removeIfExists(path25.join(githubAgentsDir, file2)))
|
|
41208
41325
|
);
|
|
41209
41326
|
}
|
|
41210
|
-
const lockFile =
|
|
41327
|
+
const lockFile = path25.join(subagentDir, DEFAULT_LOCK_NAME);
|
|
41211
41328
|
await writeFile3(lockFile, "", { encoding: "utf8" });
|
|
41212
41329
|
return lockFile;
|
|
41213
41330
|
}
|
|
41214
41331
|
async function removeSubagentLock(subagentDir) {
|
|
41215
|
-
const lockFile =
|
|
41332
|
+
const lockFile = path25.join(subagentDir, DEFAULT_LOCK_NAME);
|
|
41216
41333
|
await removeIfExists(lockFile);
|
|
41217
41334
|
}
|
|
41218
41335
|
async function prepareSubagentDirectory(subagentDir, promptFile, chatId, workspaceTemplate, dryRun, cwd) {
|
|
@@ -41232,9 +41349,9 @@ async function prepareSubagentDirectory(subagentDir, promptFile, chatId, workspa
|
|
|
41232
41349
|
return 1;
|
|
41233
41350
|
}
|
|
41234
41351
|
if (promptFile) {
|
|
41235
|
-
const githubAgentsDir =
|
|
41352
|
+
const githubAgentsDir = path25.join(subagentDir, ".github", "agents");
|
|
41236
41353
|
await mkdir8(githubAgentsDir, { recursive: true });
|
|
41237
|
-
const agentFile =
|
|
41354
|
+
const agentFile = path25.join(githubAgentsDir, `${chatId}.md`);
|
|
41238
41355
|
try {
|
|
41239
41356
|
await copyFile(promptFile, agentFile);
|
|
41240
41357
|
} catch (error40) {
|
|
@@ -41251,7 +41368,7 @@ async function resolvePromptFile(promptFile) {
|
|
|
41251
41368
|
if (!promptFile) {
|
|
41252
41369
|
return void 0;
|
|
41253
41370
|
}
|
|
41254
|
-
const resolvedPrompt =
|
|
41371
|
+
const resolvedPrompt = path26.resolve(promptFile);
|
|
41255
41372
|
if (!await pathExists(resolvedPrompt)) {
|
|
41256
41373
|
throw new Error(`Prompt file not found: ${resolvedPrompt}`);
|
|
41257
41374
|
}
|
|
@@ -41267,7 +41384,7 @@ async function resolveAttachments(extraAttachments) {
|
|
|
41267
41384
|
}
|
|
41268
41385
|
const resolved = [];
|
|
41269
41386
|
for (const attachment of extraAttachments) {
|
|
41270
|
-
const resolvedPath =
|
|
41387
|
+
const resolvedPath = path26.resolve(attachment);
|
|
41271
41388
|
if (!await pathExists(resolvedPath)) {
|
|
41272
41389
|
throw new Error(`Attachment not found: ${resolvedPath}`);
|
|
41273
41390
|
}
|
|
@@ -41309,7 +41426,7 @@ async function dispatchAgentSession(options) {
|
|
|
41309
41426
|
error: "No unlocked subagents available. Provision additional subagents with: subagent code provision --subagents <desired_total>"
|
|
41310
41427
|
};
|
|
41311
41428
|
}
|
|
41312
|
-
const subagentName =
|
|
41429
|
+
const subagentName = path26.basename(subagentDir);
|
|
41313
41430
|
const chatId = Math.random().toString(16).slice(2, 10);
|
|
41314
41431
|
const preparationResult = await prepareSubagentDirectory(
|
|
41315
41432
|
subagentDir,
|
|
@@ -41337,9 +41454,9 @@ async function dispatchAgentSession(options) {
|
|
|
41337
41454
|
};
|
|
41338
41455
|
}
|
|
41339
41456
|
const timestamp = generateTimestamp();
|
|
41340
|
-
const messagesDir =
|
|
41341
|
-
const responseFileTmp =
|
|
41342
|
-
const responseFileFinal =
|
|
41457
|
+
const messagesDir = path26.join(subagentDir, "messages");
|
|
41458
|
+
const responseFileTmp = path26.join(messagesDir, `${timestamp}_res.tmp.md`);
|
|
41459
|
+
const responseFileFinal = path26.join(messagesDir, `${timestamp}_res.md`);
|
|
41343
41460
|
const requestInstructions = createRequestPrompt(
|
|
41344
41461
|
userQuery,
|
|
41345
41462
|
responseFileTmp,
|
|
@@ -41444,7 +41561,7 @@ async function dispatchBatchAgent(options) {
|
|
|
41444
41561
|
error: "No unlocked subagents available. Provision additional subagents with: subagent code provision --subagents <desired_total>"
|
|
41445
41562
|
};
|
|
41446
41563
|
}
|
|
41447
|
-
subagentName =
|
|
41564
|
+
subagentName = path26.basename(subagentDir);
|
|
41448
41565
|
const chatId = Math.random().toString(16).slice(2, 10);
|
|
41449
41566
|
const preparationResult = await prepareSubagentDirectory(
|
|
41450
41567
|
subagentDir,
|
|
@@ -41475,17 +41592,17 @@ async function dispatchBatchAgent(options) {
|
|
|
41475
41592
|
};
|
|
41476
41593
|
}
|
|
41477
41594
|
const timestamp = generateTimestamp();
|
|
41478
|
-
const messagesDir =
|
|
41595
|
+
const messagesDir = path26.join(subagentDir, "messages");
|
|
41479
41596
|
requestFiles = userQueries.map(
|
|
41480
|
-
(_, index) =>
|
|
41597
|
+
(_, index) => path26.join(messagesDir, `${timestamp}_${index}_req.md`)
|
|
41481
41598
|
);
|
|
41482
41599
|
const responseTmpFiles = userQueries.map(
|
|
41483
|
-
(_, index) =>
|
|
41600
|
+
(_, index) => path26.join(messagesDir, `${timestamp}_${index}_res.tmp.md`)
|
|
41484
41601
|
);
|
|
41485
41602
|
responseFilesFinal = userQueries.map(
|
|
41486
|
-
(_, index) =>
|
|
41603
|
+
(_, index) => path26.join(messagesDir, `${timestamp}_${index}_res.md`)
|
|
41487
41604
|
);
|
|
41488
|
-
const orchestratorFile =
|
|
41605
|
+
const orchestratorFile = path26.join(messagesDir, `${timestamp}_orchestrator.md`);
|
|
41489
41606
|
if (!dryRun) {
|
|
41490
41607
|
await Promise.all(
|
|
41491
41608
|
userQueries.map((query, index) => {
|
|
@@ -41598,7 +41715,7 @@ async function provisionSubagents(options) {
|
|
|
41598
41715
|
if (!Number.isInteger(subagents) || subagents < 1) {
|
|
41599
41716
|
throw new Error("subagents must be a positive integer");
|
|
41600
41717
|
}
|
|
41601
|
-
const targetPath =
|
|
41718
|
+
const targetPath = path27.resolve(targetRoot);
|
|
41602
41719
|
if (!dryRun) {
|
|
41603
41720
|
await ensureDir(targetPath);
|
|
41604
41721
|
}
|
|
@@ -41618,7 +41735,7 @@ async function provisionSubagents(options) {
|
|
|
41618
41735
|
continue;
|
|
41619
41736
|
}
|
|
41620
41737
|
highestNumber = Math.max(highestNumber, parsed);
|
|
41621
|
-
const lockFile =
|
|
41738
|
+
const lockFile = path27.join(entry.absolutePath, lockName);
|
|
41622
41739
|
const locked = await pathExists(lockFile);
|
|
41623
41740
|
if (locked) {
|
|
41624
41741
|
lockedSubagents.add(entry.absolutePath);
|
|
@@ -41635,10 +41752,10 @@ async function provisionSubagents(options) {
|
|
|
41635
41752
|
break;
|
|
41636
41753
|
}
|
|
41637
41754
|
const subagentDir = subagent.absolutePath;
|
|
41638
|
-
const githubAgentsDir =
|
|
41639
|
-
const lockFile =
|
|
41640
|
-
const workspaceDst =
|
|
41641
|
-
const wakeupDst =
|
|
41755
|
+
const githubAgentsDir = path27.join(subagentDir, ".github", "agents");
|
|
41756
|
+
const lockFile = path27.join(subagentDir, lockName);
|
|
41757
|
+
const workspaceDst = path27.join(subagentDir, `${path27.basename(subagentDir)}.code-workspace`);
|
|
41758
|
+
const wakeupDst = path27.join(githubAgentsDir, "wakeup.md");
|
|
41642
41759
|
const isLocked = await pathExists(lockFile);
|
|
41643
41760
|
if (isLocked && !force) {
|
|
41644
41761
|
continue;
|
|
@@ -41676,10 +41793,10 @@ async function provisionSubagents(options) {
|
|
|
41676
41793
|
let nextIndex = highestNumber;
|
|
41677
41794
|
while (subagentsProvisioned < subagents) {
|
|
41678
41795
|
nextIndex += 1;
|
|
41679
|
-
const subagentDir =
|
|
41680
|
-
const githubAgentsDir =
|
|
41681
|
-
const workspaceDst =
|
|
41682
|
-
const wakeupDst =
|
|
41796
|
+
const subagentDir = path27.join(targetPath, `subagent-${nextIndex}`);
|
|
41797
|
+
const githubAgentsDir = path27.join(subagentDir, ".github", "agents");
|
|
41798
|
+
const workspaceDst = path27.join(subagentDir, `${path27.basename(subagentDir)}.code-workspace`);
|
|
41799
|
+
const wakeupDst = path27.join(githubAgentsDir, "wakeup.md");
|
|
41683
41800
|
if (!dryRun) {
|
|
41684
41801
|
await ensureDir(subagentDir);
|
|
41685
41802
|
await ensureDir(githubAgentsDir);
|
|
@@ -41703,8 +41820,6 @@ var AGENTV_REQUEST_TEMPLATE = `[[ ## task ## ]]
|
|
|
41703
41820
|
|
|
41704
41821
|
**IMPORTANT**: Follow these exact steps:
|
|
41705
41822
|
1. Create and write your complete response to: {{responseFileTmp}}
|
|
41706
|
-
- All intended file outputs/changes MUST be written in your response file.
|
|
41707
|
-
- For each intended file, include the repo name, relative path and unified git diff following the convention \`diff --git ...\`.
|
|
41708
41823
|
2. When completely finished, run these PowerShell commands to signal completion:
|
|
41709
41824
|
\`\`\`
|
|
41710
41825
|
Move-Item -LiteralPath '{{responseFileTmp}}' -Destination '{{responseFileFinal}}'
|
|
@@ -41721,8 +41836,6 @@ var AGENTV_BATCH_REQUEST_TEMPLATE = `[[ ## task ## ]]
|
|
|
41721
41836
|
|
|
41722
41837
|
**IMPORTANT**: Follow these exact steps:
|
|
41723
41838
|
1. Create and write your complete response to: {{responseFileTmp}}
|
|
41724
|
-
- All intended file outputs/changes MUST be written in your response file.
|
|
41725
|
-
- For each intended file, include the repo name, relative path and unified git diff following the convention \`diff --git ...\`.
|
|
41726
41839
|
2. When completely finished and the response is stable, rename it to: {{responseFileFinal}}
|
|
41727
41840
|
3. Do not unlock the workspace from this request; batch orchestration will handle unlocking after all responses are ready.
|
|
41728
41841
|
`;
|
|
@@ -41869,7 +41982,7 @@ var VSCodeProvider = class {
|
|
|
41869
41982
|
async function locateVSCodeExecutable(candidate) {
|
|
41870
41983
|
const includesPathSeparator = candidate.includes("/") || candidate.includes("\\");
|
|
41871
41984
|
if (includesPathSeparator) {
|
|
41872
|
-
const resolved =
|
|
41985
|
+
const resolved = path28.isAbsolute(candidate) ? candidate : path28.resolve(candidate);
|
|
41873
41986
|
try {
|
|
41874
41987
|
await access3(resolved, constants3.F_OK);
|
|
41875
41988
|
return resolved;
|
|
@@ -41898,7 +42011,7 @@ async function resolveWorkspaceTemplateFile(template) {
|
|
|
41898
42011
|
return void 0;
|
|
41899
42012
|
}
|
|
41900
42013
|
try {
|
|
41901
|
-
const stats = await stat4(
|
|
42014
|
+
const stats = await stat4(path28.resolve(template));
|
|
41902
42015
|
return stats.isFile() ? template : void 0;
|
|
41903
42016
|
} catch {
|
|
41904
42017
|
return template;
|
|
@@ -41924,7 +42037,7 @@ function buildMandatoryPrereadBlock2(guidelineFiles, attachmentFiles) {
|
|
|
41924
42037
|
return "";
|
|
41925
42038
|
}
|
|
41926
42039
|
const buildList = (files) => files.map((absolutePath) => {
|
|
41927
|
-
const fileName =
|
|
42040
|
+
const fileName = path28.basename(absolutePath);
|
|
41928
42041
|
const fileUri = pathToFileUri3(absolutePath);
|
|
41929
42042
|
return `* [${fileName}](${fileUri})`;
|
|
41930
42043
|
});
|
|
@@ -41949,8 +42062,8 @@ function collectGuidelineFiles2(attachments, guidelinePatterns) {
|
|
|
41949
42062
|
}
|
|
41950
42063
|
const unique = /* @__PURE__ */ new Map();
|
|
41951
42064
|
for (const attachment of attachments) {
|
|
41952
|
-
const absolutePath =
|
|
41953
|
-
const normalized = absolutePath.split(
|
|
42065
|
+
const absolutePath = path28.resolve(attachment);
|
|
42066
|
+
const normalized = absolutePath.split(path28.sep).join("/");
|
|
41954
42067
|
if (isGuidelineFile(normalized, guidelinePatterns)) {
|
|
41955
42068
|
if (!unique.has(absolutePath)) {
|
|
41956
42069
|
unique.set(absolutePath, absolutePath);
|
|
@@ -41965,7 +42078,7 @@ function collectAttachmentFiles(attachments) {
|
|
|
41965
42078
|
}
|
|
41966
42079
|
const unique = /* @__PURE__ */ new Map();
|
|
41967
42080
|
for (const attachment of attachments) {
|
|
41968
|
-
const absolutePath =
|
|
42081
|
+
const absolutePath = path28.resolve(attachment);
|
|
41969
42082
|
if (!unique.has(absolutePath)) {
|
|
41970
42083
|
unique.set(absolutePath, absolutePath);
|
|
41971
42084
|
}
|
|
@@ -41973,7 +42086,7 @@ function collectAttachmentFiles(attachments) {
|
|
|
41973
42086
|
return Array.from(unique.values());
|
|
41974
42087
|
}
|
|
41975
42088
|
function pathToFileUri3(filePath) {
|
|
41976
|
-
const absolutePath =
|
|
42089
|
+
const absolutePath = path28.isAbsolute(filePath) ? filePath : path28.resolve(filePath);
|
|
41977
42090
|
const normalizedPath = absolutePath.replace(/\\/g, "/");
|
|
41978
42091
|
if (/^[a-zA-Z]:\//.test(normalizedPath)) {
|
|
41979
42092
|
return `file:///${normalizedPath}`;
|
|
@@ -41986,7 +42099,7 @@ function normalizeAttachments(attachments) {
|
|
|
41986
42099
|
}
|
|
41987
42100
|
const deduped = /* @__PURE__ */ new Set();
|
|
41988
42101
|
for (const attachment of attachments) {
|
|
41989
|
-
deduped.add(
|
|
42102
|
+
deduped.add(path28.resolve(attachment));
|
|
41990
42103
|
}
|
|
41991
42104
|
return Array.from(deduped);
|
|
41992
42105
|
}
|
|
@@ -41995,7 +42108,7 @@ function mergeAttachments(all) {
|
|
|
41995
42108
|
for (const list of all) {
|
|
41996
42109
|
if (!list) continue;
|
|
41997
42110
|
for (const inputFile of list) {
|
|
41998
|
-
deduped.add(
|
|
42111
|
+
deduped.add(path28.resolve(inputFile));
|
|
41999
42112
|
}
|
|
42000
42113
|
}
|
|
42001
42114
|
return deduped.size > 0 ? Array.from(deduped) : void 0;
|
|
@@ -42075,7 +42188,7 @@ async function fileExists3(filePath) {
|
|
|
42075
42188
|
}
|
|
42076
42189
|
}
|
|
42077
42190
|
async function readTargetDefinitions(filePath) {
|
|
42078
|
-
const absolutePath =
|
|
42191
|
+
const absolutePath = path29.resolve(filePath);
|
|
42079
42192
|
if (!await fileExists3(absolutePath)) {
|
|
42080
42193
|
throw new Error(`targets.yaml not found at ${absolutePath}`);
|
|
42081
42194
|
}
|
|
@@ -42096,11 +42209,11 @@ function listTargetNames(definitions) {
|
|
|
42096
42209
|
async function discoverProviders(registry2, baseDir) {
|
|
42097
42210
|
const patterns = ["*.ts", "*.js", "*.mts", "*.mjs"];
|
|
42098
42211
|
const candidateDirs = [];
|
|
42099
|
-
let dir =
|
|
42100
|
-
const root =
|
|
42212
|
+
let dir = path30.resolve(baseDir);
|
|
42213
|
+
const root = path30.parse(dir).root;
|
|
42101
42214
|
while (dir !== root) {
|
|
42102
|
-
candidateDirs.push(
|
|
42103
|
-
dir =
|
|
42215
|
+
candidateDirs.push(path30.join(dir, ".agentv", "providers"));
|
|
42216
|
+
dir = path30.dirname(dir);
|
|
42104
42217
|
}
|
|
42105
42218
|
let files = [];
|
|
42106
42219
|
for (const providersDir of candidateDirs) {
|
|
@@ -42116,7 +42229,7 @@ async function discoverProviders(registry2, baseDir) {
|
|
|
42116
42229
|
}
|
|
42117
42230
|
const discoveredKinds = [];
|
|
42118
42231
|
for (const filePath of files) {
|
|
42119
|
-
const basename =
|
|
42232
|
+
const basename = path30.basename(filePath);
|
|
42120
42233
|
const kindName = basename.replace(/\.(ts|js|mts|mjs)$/, "");
|
|
42121
42234
|
if (registry2.has(kindName)) {
|
|
42122
42235
|
continue;
|
|
@@ -42312,16 +42425,16 @@ async function execFileWithStdinNode(argv, stdinPayload, options) {
|
|
|
42312
42425
|
});
|
|
42313
42426
|
}
|
|
42314
42427
|
async function execShellWithStdin(command, stdinPayload, options = {}) {
|
|
42315
|
-
const { mkdir:
|
|
42428
|
+
const { mkdir: mkdir15, readFile: readFile13, rm: rm7, writeFile: writeFile10 } = await import("node:fs/promises");
|
|
42316
42429
|
const { tmpdir: tmpdir3 } = await import("node:os");
|
|
42317
|
-
const
|
|
42430
|
+
const path42 = await import("node:path");
|
|
42318
42431
|
const { randomUUID: randomUUID8 } = await import("node:crypto");
|
|
42319
|
-
const dir =
|
|
42320
|
-
await
|
|
42321
|
-
const stdinPath =
|
|
42322
|
-
const stdoutPath =
|
|
42323
|
-
const stderrPath =
|
|
42324
|
-
await
|
|
42432
|
+
const dir = path42.join(tmpdir3(), `agentv-exec-${randomUUID8()}`);
|
|
42433
|
+
await mkdir15(dir, { recursive: true });
|
|
42434
|
+
const stdinPath = path42.join(dir, "stdin.txt");
|
|
42435
|
+
const stdoutPath = path42.join(dir, "stdout.txt");
|
|
42436
|
+
const stderrPath = path42.join(dir, "stderr.txt");
|
|
42437
|
+
await writeFile10(stdinPath, stdinPayload, "utf8");
|
|
42325
42438
|
const wrappedCommand = process.platform === "win32" ? `(${command}) < ${shellEscapePath(stdinPath)} > ${shellEscapePath(stdoutPath)} 2> ${shellEscapePath(stderrPath)}` : `(${command}) < ${shellEscapePath(stdinPath)} > ${shellEscapePath(stdoutPath)} 2> ${shellEscapePath(stderrPath)}`;
|
|
42326
42439
|
const { spawn: spawn4 } = await import("node:child_process");
|
|
42327
42440
|
try {
|
|
@@ -42350,11 +42463,11 @@ async function execShellWithStdin(command, stdinPayload, options = {}) {
|
|
|
42350
42463
|
resolve2(code ?? 0);
|
|
42351
42464
|
});
|
|
42352
42465
|
});
|
|
42353
|
-
const stdout = (await
|
|
42354
|
-
const stderr = (await
|
|
42466
|
+
const stdout = (await readFile13(stdoutPath, "utf8")).replace(/\r\n/g, "\n");
|
|
42467
|
+
const stderr = (await readFile13(stderrPath, "utf8")).replace(/\r\n/g, "\n");
|
|
42355
42468
|
return { stdout, stderr, exitCode };
|
|
42356
42469
|
} finally {
|
|
42357
|
-
await
|
|
42470
|
+
await rm7(dir, { recursive: true, force: true });
|
|
42358
42471
|
}
|
|
42359
42472
|
}
|
|
42360
42473
|
var DEFAULT_MAX_CALLS = 50;
|
|
@@ -42664,7 +42777,7 @@ var CodeEvaluator = class {
|
|
|
42664
42777
|
outputPath,
|
|
42665
42778
|
guidelineFiles: context.evalCase.guideline_paths,
|
|
42666
42779
|
inputFiles: context.evalCase.file_paths.filter(
|
|
42667
|
-
(
|
|
42780
|
+
(path42) => !context.evalCase.guideline_paths.includes(path42)
|
|
42668
42781
|
),
|
|
42669
42782
|
input: context.evalCase.input,
|
|
42670
42783
|
trace: context.trace ?? null,
|
|
@@ -42907,6 +43020,8 @@ ${context.fileChanges}`;
|
|
|
42907
43020
|
};
|
|
42908
43021
|
} catch (e) {
|
|
42909
43022
|
const message = e instanceof Error ? e.message : String(e);
|
|
43023
|
+
const evalName = context.evaluator?.name ?? "llm-judge";
|
|
43024
|
+
console.warn(`\u26A0 LLM judge "${evalName}" failed after 3 attempts (${message}) \u2014 skipped`);
|
|
42910
43025
|
return {
|
|
42911
43026
|
score: 0,
|
|
42912
43027
|
verdict: "skip",
|
|
@@ -42935,24 +43050,39 @@ ${context.fileChanges}`;
|
|
|
42935
43050
|
systemPrompt,
|
|
42936
43051
|
target: judgeProvider.targetName
|
|
42937
43052
|
};
|
|
42938
|
-
|
|
42939
|
-
|
|
42940
|
-
|
|
42941
|
-
|
|
42942
|
-
|
|
42943
|
-
|
|
42944
|
-
|
|
42945
|
-
|
|
42946
|
-
|
|
42947
|
-
|
|
42948
|
-
|
|
42949
|
-
|
|
42950
|
-
|
|
42951
|
-
|
|
42952
|
-
|
|
42953
|
-
|
|
42954
|
-
|
|
42955
|
-
|
|
43053
|
+
try {
|
|
43054
|
+
const { data, tokenUsage } = await this.runWithRetry({
|
|
43055
|
+
context,
|
|
43056
|
+
judgeProvider,
|
|
43057
|
+
systemPrompt,
|
|
43058
|
+
userPrompt: prompt,
|
|
43059
|
+
schema: rubricEvaluationSchema
|
|
43060
|
+
});
|
|
43061
|
+
const { score, verdict, hits, misses } = calculateRubricScore(data, rubrics);
|
|
43062
|
+
return {
|
|
43063
|
+
score,
|
|
43064
|
+
verdict,
|
|
43065
|
+
hits,
|
|
43066
|
+
misses,
|
|
43067
|
+
expectedAspectCount: rubrics.length,
|
|
43068
|
+
reasoning: data.overall_reasoning,
|
|
43069
|
+
evaluatorRawRequest,
|
|
43070
|
+
tokenUsage
|
|
43071
|
+
};
|
|
43072
|
+
} catch (e) {
|
|
43073
|
+
const message = e instanceof Error ? e.message : String(e);
|
|
43074
|
+
const evalName = context.evaluator?.name ?? "llm-judge";
|
|
43075
|
+
console.warn(`\u26A0 LLM judge "${evalName}" failed after 3 attempts (${message}) \u2014 skipped`);
|
|
43076
|
+
return {
|
|
43077
|
+
score: 0,
|
|
43078
|
+
verdict: "skip",
|
|
43079
|
+
hits: [],
|
|
43080
|
+
misses: [`Judge parse failure after 3 attempts: ${message}`],
|
|
43081
|
+
expectedAspectCount: rubrics.length,
|
|
43082
|
+
reasoning: `Judge parse failure after 3 attempts: ${message}`,
|
|
43083
|
+
evaluatorRawRequest
|
|
43084
|
+
};
|
|
43085
|
+
}
|
|
42956
43086
|
}
|
|
42957
43087
|
/**
|
|
42958
43088
|
* Evaluate using score-range rubrics (analytic rubric scoring).
|
|
@@ -42966,25 +43096,40 @@ ${context.fileChanges}`;
|
|
|
42966
43096
|
systemPrompt,
|
|
42967
43097
|
target: judgeProvider.targetName
|
|
42968
43098
|
};
|
|
42969
|
-
|
|
42970
|
-
|
|
42971
|
-
|
|
42972
|
-
|
|
42973
|
-
|
|
42974
|
-
|
|
42975
|
-
|
|
42976
|
-
|
|
42977
|
-
|
|
42978
|
-
|
|
42979
|
-
|
|
42980
|
-
|
|
42981
|
-
|
|
42982
|
-
|
|
42983
|
-
|
|
42984
|
-
|
|
42985
|
-
|
|
42986
|
-
|
|
42987
|
-
|
|
43099
|
+
try {
|
|
43100
|
+
const { data, tokenUsage } = await this.runWithRetry({
|
|
43101
|
+
context,
|
|
43102
|
+
judgeProvider,
|
|
43103
|
+
systemPrompt,
|
|
43104
|
+
userPrompt: prompt,
|
|
43105
|
+
schema: scoreRangeEvaluationSchema
|
|
43106
|
+
});
|
|
43107
|
+
const { score, verdict, hits, misses, details } = calculateScoreRangeResult(data, rubrics);
|
|
43108
|
+
return {
|
|
43109
|
+
score,
|
|
43110
|
+
verdict,
|
|
43111
|
+
hits,
|
|
43112
|
+
misses,
|
|
43113
|
+
expectedAspectCount: rubrics.length,
|
|
43114
|
+
reasoning: data.overall_reasoning,
|
|
43115
|
+
evaluatorRawRequest,
|
|
43116
|
+
details,
|
|
43117
|
+
tokenUsage
|
|
43118
|
+
};
|
|
43119
|
+
} catch (e) {
|
|
43120
|
+
const message = e instanceof Error ? e.message : String(e);
|
|
43121
|
+
const evalName = context.evaluator?.name ?? "llm-judge";
|
|
43122
|
+
console.warn(`\u26A0 LLM judge "${evalName}" failed after 3 attempts (${message}) \u2014 skipped`);
|
|
43123
|
+
return {
|
|
43124
|
+
score: 0,
|
|
43125
|
+
verdict: "skip",
|
|
43126
|
+
hits: [],
|
|
43127
|
+
misses: [`Judge parse failure after 3 attempts: ${message}`],
|
|
43128
|
+
expectedAspectCount: rubrics.length,
|
|
43129
|
+
reasoning: `Judge parse failure after 3 attempts: ${message}`,
|
|
43130
|
+
evaluatorRawRequest
|
|
43131
|
+
};
|
|
43132
|
+
}
|
|
42988
43133
|
}
|
|
42989
43134
|
/**
|
|
42990
43135
|
* Build prompt for score-range rubric evaluation.
|
|
@@ -43268,19 +43413,13 @@ var CompositeEvaluator = class {
|
|
|
43268
43413
|
runWeightedAverage(results, weights) {
|
|
43269
43414
|
let totalWeight = 0;
|
|
43270
43415
|
let weightedSum = 0;
|
|
43416
|
+
let evaluatedCount = 0;
|
|
43271
43417
|
const allHits = [];
|
|
43272
43418
|
const allMisses = [];
|
|
43273
43419
|
const reasoningParts = [];
|
|
43274
43420
|
const scores = [];
|
|
43275
43421
|
for (const member of results) {
|
|
43276
43422
|
const weight = weights?.[member.id] ?? 1;
|
|
43277
|
-
totalWeight += weight;
|
|
43278
|
-
weightedSum += member.result.score * weight;
|
|
43279
|
-
allHits.push(...member.result.hits.map((h) => `[${member.id}] ${h}`));
|
|
43280
|
-
allMisses.push(...member.result.misses.map((m) => `[${member.id}] ${m}`));
|
|
43281
|
-
if (member.result.reasoning) {
|
|
43282
|
-
reasoningParts.push(`${member.id}: ${member.result.reasoning}`);
|
|
43283
|
-
}
|
|
43284
43423
|
scores.push({
|
|
43285
43424
|
name: member.id,
|
|
43286
43425
|
type: member.type,
|
|
@@ -43295,6 +43434,32 @@ var CompositeEvaluator = class {
|
|
|
43295
43434
|
details: member.result.details,
|
|
43296
43435
|
tokenUsage: member.result.tokenUsage
|
|
43297
43436
|
});
|
|
43437
|
+
if (member.result.verdict === "skip") {
|
|
43438
|
+
continue;
|
|
43439
|
+
}
|
|
43440
|
+
evaluatedCount++;
|
|
43441
|
+
totalWeight += weight;
|
|
43442
|
+
weightedSum += member.result.score * weight;
|
|
43443
|
+
allHits.push(...member.result.hits.map((h) => `[${member.id}] ${h}`));
|
|
43444
|
+
allMisses.push(...member.result.misses.map((m) => `[${member.id}] ${m}`));
|
|
43445
|
+
if (member.result.reasoning) {
|
|
43446
|
+
reasoningParts.push(`${member.id}: ${member.result.reasoning}`);
|
|
43447
|
+
}
|
|
43448
|
+
}
|
|
43449
|
+
if (evaluatedCount === 0 && results.length > 0) {
|
|
43450
|
+
return {
|
|
43451
|
+
score: 0,
|
|
43452
|
+
verdict: "skip",
|
|
43453
|
+
hits: [],
|
|
43454
|
+
misses: [],
|
|
43455
|
+
expectedAspectCount: 1,
|
|
43456
|
+
reasoning: "All evaluators skipped (infrastructure failure)",
|
|
43457
|
+
evaluatorRawRequest: {
|
|
43458
|
+
aggregator: "weighted_average",
|
|
43459
|
+
...weights ? { weights } : {}
|
|
43460
|
+
},
|
|
43461
|
+
scores
|
|
43462
|
+
};
|
|
43298
43463
|
}
|
|
43299
43464
|
const finalScore = totalWeight > 0 ? weightedSum / totalWeight : 0;
|
|
43300
43465
|
return {
|
|
@@ -43318,19 +43483,8 @@ var CompositeEvaluator = class {
|
|
|
43318
43483
|
const reasoningParts = [];
|
|
43319
43484
|
let passingCount = 0;
|
|
43320
43485
|
let borderlineCount = 0;
|
|
43486
|
+
let evaluatedCount = 0;
|
|
43321
43487
|
for (const member of results) {
|
|
43322
|
-
const isPassing = member.result.verdict === "pass" || member.result.verdict === "borderline";
|
|
43323
|
-
if (isPassing) {
|
|
43324
|
-
passingCount++;
|
|
43325
|
-
if (member.result.verdict === "borderline") {
|
|
43326
|
-
borderlineCount++;
|
|
43327
|
-
}
|
|
43328
|
-
}
|
|
43329
|
-
allHits.push(...member.result.hits.map((h) => `[${member.id}] ${h}`));
|
|
43330
|
-
allMisses.push(...member.result.misses.map((m) => `[${member.id}] ${m}`));
|
|
43331
|
-
if (member.result.reasoning) {
|
|
43332
|
-
reasoningParts.push(`${member.id}: ${member.result.reasoning}`);
|
|
43333
|
-
}
|
|
43334
43488
|
scores.push({
|
|
43335
43489
|
name: member.id,
|
|
43336
43490
|
type: member.type,
|
|
@@ -43344,8 +43498,39 @@ var CompositeEvaluator = class {
|
|
|
43344
43498
|
details: member.result.details,
|
|
43345
43499
|
tokenUsage: member.result.tokenUsage
|
|
43346
43500
|
});
|
|
43501
|
+
if (member.result.verdict === "skip") {
|
|
43502
|
+
continue;
|
|
43503
|
+
}
|
|
43504
|
+
evaluatedCount++;
|
|
43505
|
+
const isPassing = member.result.verdict === "pass" || member.result.verdict === "borderline";
|
|
43506
|
+
if (isPassing) {
|
|
43507
|
+
passingCount++;
|
|
43508
|
+
if (member.result.verdict === "borderline") {
|
|
43509
|
+
borderlineCount++;
|
|
43510
|
+
}
|
|
43511
|
+
}
|
|
43512
|
+
allHits.push(...member.result.hits.map((h) => `[${member.id}] ${h}`));
|
|
43513
|
+
allMisses.push(...member.result.misses.map((m) => `[${member.id}] ${m}`));
|
|
43514
|
+
if (member.result.reasoning) {
|
|
43515
|
+
reasoningParts.push(`${member.id}: ${member.result.reasoning}`);
|
|
43516
|
+
}
|
|
43517
|
+
}
|
|
43518
|
+
if (evaluatedCount === 0 && results.length > 0) {
|
|
43519
|
+
return {
|
|
43520
|
+
score: 0,
|
|
43521
|
+
verdict: "skip",
|
|
43522
|
+
hits: [],
|
|
43523
|
+
misses: [],
|
|
43524
|
+
expectedAspectCount: 1,
|
|
43525
|
+
reasoning: "All evaluators skipped (infrastructure failure)",
|
|
43526
|
+
evaluatorRawRequest: {
|
|
43527
|
+
aggregator: "threshold",
|
|
43528
|
+
threshold
|
|
43529
|
+
},
|
|
43530
|
+
scores
|
|
43531
|
+
};
|
|
43347
43532
|
}
|
|
43348
|
-
const totalCount =
|
|
43533
|
+
const totalCount = evaluatedCount;
|
|
43349
43534
|
const score = totalCount > 0 ? passingCount / totalCount : 0;
|
|
43350
43535
|
const pass = score >= threshold;
|
|
43351
43536
|
if (pass && borderlineCount > 0) {
|
|
@@ -43847,115 +44032,115 @@ var FieldAccuracyEvaluator = class {
|
|
|
43847
44032
|
* Evaluate a single field against the expected value.
|
|
43848
44033
|
*/
|
|
43849
44034
|
evaluateField(fieldConfig, candidateData, expectedData) {
|
|
43850
|
-
const { path:
|
|
43851
|
-
const candidateValue = resolvePath(candidateData,
|
|
43852
|
-
const expectedValue = resolvePath(expectedData,
|
|
44035
|
+
const { path: path42, match, required: required2 = true, weight = 1 } = fieldConfig;
|
|
44036
|
+
const candidateValue = resolvePath(candidateData, path42);
|
|
44037
|
+
const expectedValue = resolvePath(expectedData, path42);
|
|
43853
44038
|
if (expectedValue === void 0) {
|
|
43854
44039
|
return {
|
|
43855
|
-
path:
|
|
44040
|
+
path: path42,
|
|
43856
44041
|
score: 1,
|
|
43857
44042
|
// No expected value means no comparison needed
|
|
43858
44043
|
weight,
|
|
43859
44044
|
hit: true,
|
|
43860
|
-
message: `${
|
|
44045
|
+
message: `${path42}: no expected value`
|
|
43861
44046
|
};
|
|
43862
44047
|
}
|
|
43863
44048
|
if (candidateValue === void 0) {
|
|
43864
44049
|
if (required2) {
|
|
43865
44050
|
return {
|
|
43866
|
-
path:
|
|
44051
|
+
path: path42,
|
|
43867
44052
|
score: 0,
|
|
43868
44053
|
weight,
|
|
43869
44054
|
hit: false,
|
|
43870
|
-
message: `${
|
|
44055
|
+
message: `${path42} (required, missing)`
|
|
43871
44056
|
};
|
|
43872
44057
|
}
|
|
43873
44058
|
return {
|
|
43874
|
-
path:
|
|
44059
|
+
path: path42,
|
|
43875
44060
|
score: 1,
|
|
43876
44061
|
// Don't penalize missing optional fields
|
|
43877
44062
|
weight: 0,
|
|
43878
44063
|
// Zero weight means it won't affect the score
|
|
43879
44064
|
hit: true,
|
|
43880
|
-
message: `${
|
|
44065
|
+
message: `${path42}: optional field missing`
|
|
43881
44066
|
};
|
|
43882
44067
|
}
|
|
43883
44068
|
switch (match) {
|
|
43884
44069
|
case "exact":
|
|
43885
|
-
return this.compareExact(
|
|
44070
|
+
return this.compareExact(path42, candidateValue, expectedValue, weight);
|
|
43886
44071
|
case "numeric_tolerance":
|
|
43887
44072
|
return this.compareNumericTolerance(
|
|
43888
|
-
|
|
44073
|
+
path42,
|
|
43889
44074
|
candidateValue,
|
|
43890
44075
|
expectedValue,
|
|
43891
44076
|
fieldConfig,
|
|
43892
44077
|
weight
|
|
43893
44078
|
);
|
|
43894
44079
|
case "date":
|
|
43895
|
-
return this.compareDate(
|
|
44080
|
+
return this.compareDate(path42, candidateValue, expectedValue, fieldConfig, weight);
|
|
43896
44081
|
default:
|
|
43897
44082
|
return {
|
|
43898
|
-
path:
|
|
44083
|
+
path: path42,
|
|
43899
44084
|
score: 0,
|
|
43900
44085
|
weight,
|
|
43901
44086
|
hit: false,
|
|
43902
|
-
message: `${
|
|
44087
|
+
message: `${path42}: unknown match type "${match}"`
|
|
43903
44088
|
};
|
|
43904
44089
|
}
|
|
43905
44090
|
}
|
|
43906
44091
|
/**
|
|
43907
44092
|
* Exact equality comparison.
|
|
43908
44093
|
*/
|
|
43909
|
-
compareExact(
|
|
44094
|
+
compareExact(path42, candidateValue, expectedValue, weight) {
|
|
43910
44095
|
if (deepEqual(candidateValue, expectedValue)) {
|
|
43911
44096
|
return {
|
|
43912
|
-
path:
|
|
44097
|
+
path: path42,
|
|
43913
44098
|
score: 1,
|
|
43914
44099
|
weight,
|
|
43915
44100
|
hit: true,
|
|
43916
|
-
message:
|
|
44101
|
+
message: path42
|
|
43917
44102
|
};
|
|
43918
44103
|
}
|
|
43919
44104
|
if (typeof candidateValue !== typeof expectedValue) {
|
|
43920
44105
|
return {
|
|
43921
|
-
path:
|
|
44106
|
+
path: path42,
|
|
43922
44107
|
score: 0,
|
|
43923
44108
|
weight,
|
|
43924
44109
|
hit: false,
|
|
43925
|
-
message: `${
|
|
44110
|
+
message: `${path42} (type mismatch: got ${typeof candidateValue}, expected ${typeof expectedValue})`
|
|
43926
44111
|
};
|
|
43927
44112
|
}
|
|
43928
44113
|
return {
|
|
43929
|
-
path:
|
|
44114
|
+
path: path42,
|
|
43930
44115
|
score: 0,
|
|
43931
44116
|
weight,
|
|
43932
44117
|
hit: false,
|
|
43933
|
-
message: `${
|
|
44118
|
+
message: `${path42} (value mismatch)`
|
|
43934
44119
|
};
|
|
43935
44120
|
}
|
|
43936
44121
|
/**
|
|
43937
44122
|
* Numeric comparison with absolute or relative tolerance.
|
|
43938
44123
|
*/
|
|
43939
|
-
compareNumericTolerance(
|
|
44124
|
+
compareNumericTolerance(path42, candidateValue, expectedValue, fieldConfig, weight) {
|
|
43940
44125
|
const { tolerance = 0, relative = false } = fieldConfig;
|
|
43941
44126
|
const candidateNum = toNumber2(candidateValue);
|
|
43942
44127
|
const expectedNum = toNumber2(expectedValue);
|
|
43943
44128
|
if (candidateNum === null || expectedNum === null) {
|
|
43944
44129
|
return {
|
|
43945
|
-
path:
|
|
44130
|
+
path: path42,
|
|
43946
44131
|
score: 0,
|
|
43947
44132
|
weight,
|
|
43948
44133
|
hit: false,
|
|
43949
|
-
message: `${
|
|
44134
|
+
message: `${path42} (non-numeric value)`
|
|
43950
44135
|
};
|
|
43951
44136
|
}
|
|
43952
44137
|
if (!Number.isFinite(candidateNum) || !Number.isFinite(expectedNum)) {
|
|
43953
44138
|
return {
|
|
43954
|
-
path:
|
|
44139
|
+
path: path42,
|
|
43955
44140
|
score: 0,
|
|
43956
44141
|
weight,
|
|
43957
44142
|
hit: false,
|
|
43958
|
-
message: `${
|
|
44143
|
+
message: `${path42} (invalid numeric value)`
|
|
43959
44144
|
};
|
|
43960
44145
|
}
|
|
43961
44146
|
const diff = Math.abs(candidateNum - expectedNum);
|
|
@@ -43968,61 +44153,61 @@ var FieldAccuracyEvaluator = class {
|
|
|
43968
44153
|
}
|
|
43969
44154
|
if (withinTolerance) {
|
|
43970
44155
|
return {
|
|
43971
|
-
path:
|
|
44156
|
+
path: path42,
|
|
43972
44157
|
score: 1,
|
|
43973
44158
|
weight,
|
|
43974
44159
|
hit: true,
|
|
43975
|
-
message: `${
|
|
44160
|
+
message: `${path42} (within tolerance: diff=${diff.toFixed(2)})`
|
|
43976
44161
|
};
|
|
43977
44162
|
}
|
|
43978
44163
|
return {
|
|
43979
|
-
path:
|
|
44164
|
+
path: path42,
|
|
43980
44165
|
score: 0,
|
|
43981
44166
|
weight,
|
|
43982
44167
|
hit: false,
|
|
43983
|
-
message: `${
|
|
44168
|
+
message: `${path42} (outside tolerance: diff=${diff.toFixed(2)}, tolerance=${tolerance})`
|
|
43984
44169
|
};
|
|
43985
44170
|
}
|
|
43986
44171
|
/**
|
|
43987
44172
|
* Date comparison with format normalization.
|
|
43988
44173
|
*/
|
|
43989
|
-
compareDate(
|
|
44174
|
+
compareDate(path42, candidateValue, expectedValue, fieldConfig, weight) {
|
|
43990
44175
|
const formats = fieldConfig.formats ?? DEFAULT_DATE_FORMATS;
|
|
43991
44176
|
const candidateDate = parseDate(String(candidateValue), formats);
|
|
43992
44177
|
const expectedDate = parseDate(String(expectedValue), formats);
|
|
43993
44178
|
if (candidateDate === null) {
|
|
43994
44179
|
return {
|
|
43995
|
-
path:
|
|
44180
|
+
path: path42,
|
|
43996
44181
|
score: 0,
|
|
43997
44182
|
weight,
|
|
43998
44183
|
hit: false,
|
|
43999
|
-
message: `${
|
|
44184
|
+
message: `${path42} (unparseable candidate date)`
|
|
44000
44185
|
};
|
|
44001
44186
|
}
|
|
44002
44187
|
if (expectedDate === null) {
|
|
44003
44188
|
return {
|
|
44004
|
-
path:
|
|
44189
|
+
path: path42,
|
|
44005
44190
|
score: 0,
|
|
44006
44191
|
weight,
|
|
44007
44192
|
hit: false,
|
|
44008
|
-
message: `${
|
|
44193
|
+
message: `${path42} (unparseable expected date)`
|
|
44009
44194
|
};
|
|
44010
44195
|
}
|
|
44011
44196
|
if (candidateDate.getFullYear() === expectedDate.getFullYear() && candidateDate.getMonth() === expectedDate.getMonth() && candidateDate.getDate() === expectedDate.getDate()) {
|
|
44012
44197
|
return {
|
|
44013
|
-
path:
|
|
44198
|
+
path: path42,
|
|
44014
44199
|
score: 1,
|
|
44015
44200
|
weight,
|
|
44016
44201
|
hit: true,
|
|
44017
|
-
message:
|
|
44202
|
+
message: path42
|
|
44018
44203
|
};
|
|
44019
44204
|
}
|
|
44020
44205
|
return {
|
|
44021
|
-
path:
|
|
44206
|
+
path: path42,
|
|
44022
44207
|
score: 0,
|
|
44023
44208
|
weight,
|
|
44024
44209
|
hit: false,
|
|
44025
|
-
message: `${
|
|
44210
|
+
message: `${path42} (date mismatch: got ${formatDateISO(candidateDate)}, expected ${formatDateISO(expectedDate)})`
|
|
44026
44211
|
};
|
|
44027
44212
|
}
|
|
44028
44213
|
/**
|
|
@@ -44063,11 +44248,11 @@ var FieldAccuracyEvaluator = class {
|
|
|
44063
44248
|
};
|
|
44064
44249
|
}
|
|
44065
44250
|
};
|
|
44066
|
-
function resolvePath(obj,
|
|
44067
|
-
if (!
|
|
44251
|
+
function resolvePath(obj, path42) {
|
|
44252
|
+
if (!path42 || !obj) {
|
|
44068
44253
|
return void 0;
|
|
44069
44254
|
}
|
|
44070
|
-
const parts =
|
|
44255
|
+
const parts = path42.split(/\.|\[|\]/).filter((p) => p.length > 0);
|
|
44071
44256
|
let current = obj;
|
|
44072
44257
|
for (const part of parts) {
|
|
44073
44258
|
if (current === null || current === void 0) {
|
|
@@ -44540,8 +44725,8 @@ ${outputSchema2}`;
|
|
|
44540
44725
|
}
|
|
44541
44726
|
};
|
|
44542
44727
|
function resolveSandboxed(basePath, relativePath) {
|
|
44543
|
-
const resolved =
|
|
44544
|
-
if (!resolved.startsWith(basePath +
|
|
44728
|
+
const resolved = path31.resolve(basePath, relativePath);
|
|
44729
|
+
if (!resolved.startsWith(basePath + path31.sep) && resolved !== basePath) {
|
|
44545
44730
|
throw new Error(`Path '${relativePath}' is outside the workspace`);
|
|
44546
44731
|
}
|
|
44547
44732
|
return resolved;
|
|
@@ -44624,11 +44809,11 @@ async function searchDirectory(dirPath, workspacePath, regex, matches) {
|
|
|
44624
44809
|
for (const entry of entries) {
|
|
44625
44810
|
if (matches.length >= MAX_SEARCH_MATCHES) return;
|
|
44626
44811
|
if (SEARCH_SKIP_DIRS.has(entry.name)) continue;
|
|
44627
|
-
const fullPath =
|
|
44812
|
+
const fullPath = path31.join(dirPath, entry.name);
|
|
44628
44813
|
if (entry.isDirectory()) {
|
|
44629
44814
|
await searchDirectory(fullPath, workspacePath, regex, matches);
|
|
44630
44815
|
} else if (entry.isFile()) {
|
|
44631
|
-
const ext =
|
|
44816
|
+
const ext = path31.extname(entry.name).toLowerCase();
|
|
44632
44817
|
if (BINARY_EXTENSIONS.has(ext)) continue;
|
|
44633
44818
|
try {
|
|
44634
44819
|
const stat8 = await fs2.stat(fullPath);
|
|
@@ -44640,7 +44825,7 @@ async function searchDirectory(dirPath, workspacePath, regex, matches) {
|
|
|
44640
44825
|
regex.lastIndex = 0;
|
|
44641
44826
|
if (regex.test(lines[i])) {
|
|
44642
44827
|
matches.push({
|
|
44643
|
-
file:
|
|
44828
|
+
file: path31.relative(workspacePath, fullPath),
|
|
44644
44829
|
line: i + 1,
|
|
44645
44830
|
text: lines[i].substring(0, 200)
|
|
44646
44831
|
});
|
|
@@ -44871,8 +45056,8 @@ var TokenUsageEvaluator = class {
|
|
|
44871
45056
|
};
|
|
44872
45057
|
}
|
|
44873
45058
|
};
|
|
44874
|
-
function getNestedValue(obj,
|
|
44875
|
-
const parts =
|
|
45059
|
+
function getNestedValue(obj, path42) {
|
|
45060
|
+
const parts = path42.split(".");
|
|
44876
45061
|
let current = obj;
|
|
44877
45062
|
for (const part of parts) {
|
|
44878
45063
|
if (current === null || current === void 0 || typeof current !== "object") {
|
|
@@ -45655,7 +45840,7 @@ async function executePromptTemplate(script, context, config2, timeoutMs) {
|
|
|
45655
45840
|
};
|
|
45656
45841
|
const inputJson = JSON.stringify(toSnakeCaseDeep(payload), null, 2);
|
|
45657
45842
|
const scriptPath = script[script.length - 1];
|
|
45658
|
-
const cwd =
|
|
45843
|
+
const cwd = path322.dirname(scriptPath);
|
|
45659
45844
|
try {
|
|
45660
45845
|
const stdout = await executeScript(script, inputJson, timeoutMs, cwd);
|
|
45661
45846
|
const prompt = stdout.trim();
|
|
@@ -45947,11 +46132,11 @@ function createBuiltinRegistry() {
|
|
|
45947
46132
|
async function discoverAssertions(registry2, baseDir) {
|
|
45948
46133
|
const patterns = ["*.ts", "*.js", "*.mts", "*.mjs"];
|
|
45949
46134
|
const candidateDirs = [];
|
|
45950
|
-
let dir =
|
|
45951
|
-
const root =
|
|
46135
|
+
let dir = path33.resolve(baseDir);
|
|
46136
|
+
const root = path33.parse(dir).root;
|
|
45952
46137
|
while (dir !== root) {
|
|
45953
|
-
candidateDirs.push(
|
|
45954
|
-
dir =
|
|
46138
|
+
candidateDirs.push(path33.join(dir, ".agentv", "assertions"));
|
|
46139
|
+
dir = path33.dirname(dir);
|
|
45955
46140
|
}
|
|
45956
46141
|
let files = [];
|
|
45957
46142
|
for (const assertionsDir of candidateDirs) {
|
|
@@ -45967,7 +46152,7 @@ async function discoverAssertions(registry2, baseDir) {
|
|
|
45967
46152
|
}
|
|
45968
46153
|
const discoveredTypes = [];
|
|
45969
46154
|
for (const filePath of files) {
|
|
45970
|
-
const basename =
|
|
46155
|
+
const basename = path33.basename(filePath);
|
|
45971
46156
|
const typeName = basename.replace(/\.(ts|js|mts|mjs)$/, "");
|
|
45972
46157
|
if (registry2.has(typeName)) {
|
|
45973
46158
|
continue;
|
|
@@ -46153,10 +46338,10 @@ async function stageNestedRepoChanges(workspacePath) {
|
|
|
46153
46338
|
}
|
|
46154
46339
|
for (const entry of entries) {
|
|
46155
46340
|
if (entry === ".git" || entry === "node_modules") continue;
|
|
46156
|
-
const childPath =
|
|
46341
|
+
const childPath = path34.join(workspacePath, entry);
|
|
46157
46342
|
try {
|
|
46158
46343
|
if (!statSync(childPath).isDirectory()) continue;
|
|
46159
|
-
if (!statSync(
|
|
46344
|
+
if (!statSync(path34.join(childPath, ".git")).isDirectory()) continue;
|
|
46160
46345
|
} catch {
|
|
46161
46346
|
continue;
|
|
46162
46347
|
}
|
|
@@ -46164,7 +46349,6 @@ async function stageNestedRepoChanges(workspacePath) {
|
|
|
46164
46349
|
await execAsync4("git add -A", childOpts);
|
|
46165
46350
|
}
|
|
46166
46351
|
}
|
|
46167
|
-
var DEFAULT_WORKSPACE_ROOT = path34.join(os3.homedir(), ".agentv", "workspaces");
|
|
46168
46352
|
var TemplateNotFoundError = class extends Error {
|
|
46169
46353
|
constructor(templatePath) {
|
|
46170
46354
|
super(`Workspace template not found: ${templatePath}`);
|
|
@@ -46193,15 +46377,15 @@ async function isDirectory(filePath) {
|
|
|
46193
46377
|
}
|
|
46194
46378
|
}
|
|
46195
46379
|
function getWorkspacePath(evalRunId, caseId, workspaceRoot) {
|
|
46196
|
-
const root = workspaceRoot ??
|
|
46197
|
-
return
|
|
46380
|
+
const root = workspaceRoot ?? getWorkspacesRoot();
|
|
46381
|
+
return path35.join(root, evalRunId, caseId);
|
|
46198
46382
|
}
|
|
46199
46383
|
async function copyDirectoryRecursive(src, dest) {
|
|
46200
46384
|
await mkdir10(dest, { recursive: true });
|
|
46201
46385
|
const entries = await readdir3(src, { withFileTypes: true });
|
|
46202
46386
|
for (const entry of entries) {
|
|
46203
|
-
const srcPath =
|
|
46204
|
-
const destPath =
|
|
46387
|
+
const srcPath = path35.join(src, entry.name);
|
|
46388
|
+
const destPath = path35.join(dest, entry.name);
|
|
46205
46389
|
if (entry.name === ".git") {
|
|
46206
46390
|
continue;
|
|
46207
46391
|
}
|
|
@@ -46213,7 +46397,7 @@ async function copyDirectoryRecursive(src, dest) {
|
|
|
46213
46397
|
}
|
|
46214
46398
|
}
|
|
46215
46399
|
async function createTempWorkspace(templatePath, evalRunId, caseId, workspaceRoot) {
|
|
46216
|
-
const resolvedTemplatePath =
|
|
46400
|
+
const resolvedTemplatePath = path35.resolve(templatePath);
|
|
46217
46401
|
if (!await fileExists(resolvedTemplatePath)) {
|
|
46218
46402
|
throw new TemplateNotFoundError(resolvedTemplatePath);
|
|
46219
46403
|
}
|
|
@@ -46261,17 +46445,274 @@ async function cleanupWorkspace(workspacePath) {
|
|
|
46261
46445
|
}
|
|
46262
46446
|
}
|
|
46263
46447
|
async function cleanupEvalWorkspaces(evalRunId, workspaceRoot) {
|
|
46264
|
-
const root = workspaceRoot ??
|
|
46265
|
-
const evalDir =
|
|
46448
|
+
const root = workspaceRoot ?? getWorkspacesRoot();
|
|
46449
|
+
const evalDir = path35.join(root, evalRunId);
|
|
46266
46450
|
if (await fileExists(evalDir)) {
|
|
46267
46451
|
await rm4(evalDir, { recursive: true, force: true });
|
|
46268
46452
|
}
|
|
46269
46453
|
}
|
|
46270
46454
|
var execFileAsync = promisify5(execFile);
|
|
46271
|
-
|
|
46455
|
+
function gitEnv() {
|
|
46456
|
+
const env = { ...process.env };
|
|
46457
|
+
for (const key of Object.keys(env)) {
|
|
46458
|
+
if (key.startsWith("GIT_") && key !== "GIT_SSH_COMMAND") {
|
|
46459
|
+
delete env[key];
|
|
46460
|
+
}
|
|
46461
|
+
}
|
|
46462
|
+
return {
|
|
46463
|
+
...env,
|
|
46464
|
+
GIT_TERMINAL_PROMPT: "0",
|
|
46465
|
+
GIT_ASKPASS: "",
|
|
46466
|
+
GIT_SSH_COMMAND: "ssh -o BatchMode=yes"
|
|
46467
|
+
};
|
|
46468
|
+
}
|
|
46469
|
+
async function git(args, opts) {
|
|
46470
|
+
const { stdout } = await execFileAsync("git", args, {
|
|
46471
|
+
cwd: opts?.cwd,
|
|
46472
|
+
timeout: opts?.timeout ?? 3e5,
|
|
46473
|
+
env: gitEnv(),
|
|
46474
|
+
maxBuffer: 50 * 1024 * 1024
|
|
46475
|
+
});
|
|
46476
|
+
return stdout.trim();
|
|
46477
|
+
}
|
|
46478
|
+
function normalizeRepoForFingerprint(repo) {
|
|
46479
|
+
const source = repo.source.type === "git" ? { type: "git", url: repo.source.url.toLowerCase().replace(/\.git$/, "") } : { type: "local", path: repo.source.path };
|
|
46480
|
+
const result = {
|
|
46481
|
+
path: repo.path,
|
|
46482
|
+
source,
|
|
46483
|
+
ref: repo.checkout?.ref ?? "HEAD"
|
|
46484
|
+
};
|
|
46485
|
+
if (repo.clone?.depth !== void 0) {
|
|
46486
|
+
result.depth = repo.clone.depth;
|
|
46487
|
+
}
|
|
46488
|
+
if (repo.clone?.filter !== void 0) {
|
|
46489
|
+
result.filter = repo.clone.filter;
|
|
46490
|
+
}
|
|
46491
|
+
if (repo.clone?.sparse?.length) {
|
|
46492
|
+
result.sparse = [...repo.clone.sparse].sort();
|
|
46493
|
+
}
|
|
46494
|
+
return result;
|
|
46495
|
+
}
|
|
46496
|
+
function computeWorkspaceFingerprint(templatePath, repos) {
|
|
46497
|
+
const canonical = {
|
|
46498
|
+
templatePath: templatePath ?? null,
|
|
46499
|
+
repos: [...repos].sort((a, b) => a.path.localeCompare(b.path)).map(normalizeRepoForFingerprint)
|
|
46500
|
+
};
|
|
46501
|
+
return createHash("sha256").update(JSON.stringify(canonical)).digest("hex");
|
|
46502
|
+
}
|
|
46503
|
+
async function copyDirectoryRecursive2(src, dest, skipDirs) {
|
|
46504
|
+
await mkdir11(dest, { recursive: true });
|
|
46505
|
+
const entries = await readdir4(src, { withFileTypes: true });
|
|
46506
|
+
for (const entry of entries) {
|
|
46507
|
+
const srcPath = path36.join(src, entry.name);
|
|
46508
|
+
const destPath = path36.join(dest, entry.name);
|
|
46509
|
+
if (entry.name === ".git") {
|
|
46510
|
+
continue;
|
|
46511
|
+
}
|
|
46512
|
+
if (entry.isDirectory()) {
|
|
46513
|
+
if (skipDirs?.has(entry.name)) {
|
|
46514
|
+
continue;
|
|
46515
|
+
}
|
|
46516
|
+
await copyDirectoryRecursive2(srcPath, destPath, skipDirs);
|
|
46517
|
+
} else {
|
|
46518
|
+
await cp2(srcPath, destPath, { preserveTimestamps: true, force: true });
|
|
46519
|
+
}
|
|
46520
|
+
}
|
|
46521
|
+
}
|
|
46522
|
+
var WorkspacePoolManager = class {
|
|
46523
|
+
poolRoot;
|
|
46524
|
+
constructor(poolRoot) {
|
|
46525
|
+
this.poolRoot = poolRoot ?? getWorkspacePoolRoot();
|
|
46526
|
+
}
|
|
46527
|
+
/**
|
|
46528
|
+
* Acquire a workspace slot from the pool.
|
|
46529
|
+
*
|
|
46530
|
+
* 1. Compute fingerprint from template + repos
|
|
46531
|
+
* 2. Check drift (compare stored metadata.json fingerprint vs computed)
|
|
46532
|
+
* 3. If drift: warn, remove all slots, rematerialize
|
|
46533
|
+
* 4. Acquire a slot (try-lock slot-0, slot-1, ..., up to maxSlots)
|
|
46534
|
+
* 5. If slot exists: reset repos, re-copy template files (skip repo directories)
|
|
46535
|
+
* 6. If new slot: copy template, materialize all repos, write metadata.json
|
|
46536
|
+
* 7. Return the slot (with path, index, isExisting)
|
|
46537
|
+
*/
|
|
46538
|
+
async acquireWorkspace(options) {
|
|
46539
|
+
const { templatePath, repos, maxSlots, repoManager } = options;
|
|
46540
|
+
const fingerprint = computeWorkspaceFingerprint(templatePath, repos);
|
|
46541
|
+
const poolDir = path36.join(this.poolRoot, fingerprint);
|
|
46542
|
+
await mkdir11(poolDir, { recursive: true });
|
|
46543
|
+
const drifted = await this.checkDrift(poolDir, fingerprint);
|
|
46544
|
+
if (drifted) {
|
|
46545
|
+
console.warn(
|
|
46546
|
+
`[workspace-pool] Drift detected for fingerprint ${fingerprint.slice(0, 12)}... Removing stale slots.`
|
|
46547
|
+
);
|
|
46548
|
+
await this.removeAllSlots(poolDir);
|
|
46549
|
+
}
|
|
46550
|
+
for (let i = 0; i < maxSlots; i++) {
|
|
46551
|
+
const slotPath = path36.join(poolDir, `slot-${i}`);
|
|
46552
|
+
const lockPath = `${slotPath}.lock`;
|
|
46553
|
+
const locked = await this.tryLock(lockPath);
|
|
46554
|
+
if (!locked) {
|
|
46555
|
+
continue;
|
|
46556
|
+
}
|
|
46557
|
+
const slotExists = existsSync2(slotPath);
|
|
46558
|
+
if (slotExists) {
|
|
46559
|
+
await this.resetSlot(slotPath, templatePath, repos);
|
|
46560
|
+
return {
|
|
46561
|
+
index: i,
|
|
46562
|
+
path: slotPath,
|
|
46563
|
+
isExisting: true,
|
|
46564
|
+
lockPath,
|
|
46565
|
+
fingerprint,
|
|
46566
|
+
poolDir
|
|
46567
|
+
};
|
|
46568
|
+
}
|
|
46569
|
+
await mkdir11(slotPath, { recursive: true });
|
|
46570
|
+
if (templatePath) {
|
|
46571
|
+
await copyDirectoryRecursive2(templatePath, slotPath);
|
|
46572
|
+
}
|
|
46573
|
+
if (repos.length > 0) {
|
|
46574
|
+
await repoManager.materializeAll(repos, slotPath);
|
|
46575
|
+
}
|
|
46576
|
+
await this.writeMetadata(poolDir, fingerprint, templatePath ?? null, repos);
|
|
46577
|
+
return {
|
|
46578
|
+
index: i,
|
|
46579
|
+
path: slotPath,
|
|
46580
|
+
isExisting: false,
|
|
46581
|
+
lockPath,
|
|
46582
|
+
fingerprint,
|
|
46583
|
+
poolDir
|
|
46584
|
+
};
|
|
46585
|
+
}
|
|
46586
|
+
throw new Error(
|
|
46587
|
+
`All ${maxSlots} pool slots are locked for fingerprint ${fingerprint.slice(0, 12)}...`
|
|
46588
|
+
);
|
|
46589
|
+
}
|
|
46590
|
+
/** Remove lock file to release a slot. */
|
|
46591
|
+
async releaseSlot(slot) {
|
|
46592
|
+
try {
|
|
46593
|
+
await unlink(slot.lockPath);
|
|
46594
|
+
} catch {
|
|
46595
|
+
}
|
|
46596
|
+
}
|
|
46597
|
+
/**
|
|
46598
|
+
* Try to acquire a PID-based lock file.
|
|
46599
|
+
* On EEXIST, read PID and check if process is alive. If dead, stale lock — remove and retry.
|
|
46600
|
+
* Returns true if lock acquired, false if slot is actively locked.
|
|
46601
|
+
* Uses a bounded loop (max 3 attempts) to avoid unbounded recursion.
|
|
46602
|
+
*/
|
|
46603
|
+
async tryLock(lockPath) {
|
|
46604
|
+
for (let attempt = 0; attempt < 3; attempt++) {
|
|
46605
|
+
try {
|
|
46606
|
+
await writeFile7(lockPath, String(process.pid), { flag: "wx" });
|
|
46607
|
+
return true;
|
|
46608
|
+
} catch (err) {
|
|
46609
|
+
if (err.code !== "EEXIST") {
|
|
46610
|
+
throw err;
|
|
46611
|
+
}
|
|
46612
|
+
try {
|
|
46613
|
+
const pidStr = await readFile11(lockPath, "utf-8");
|
|
46614
|
+
const pid = Number.parseInt(pidStr.trim(), 10);
|
|
46615
|
+
if (!Number.isNaN(pid)) {
|
|
46616
|
+
try {
|
|
46617
|
+
process.kill(pid, 0);
|
|
46618
|
+
return false;
|
|
46619
|
+
} catch {
|
|
46620
|
+
await unlink(lockPath).catch(() => {
|
|
46621
|
+
});
|
|
46622
|
+
continue;
|
|
46623
|
+
}
|
|
46624
|
+
}
|
|
46625
|
+
} catch {
|
|
46626
|
+
}
|
|
46627
|
+
return false;
|
|
46628
|
+
}
|
|
46629
|
+
}
|
|
46630
|
+
return false;
|
|
46631
|
+
}
|
|
46632
|
+
/**
|
|
46633
|
+
* Check if the stored fingerprint in metadata.json differs from the computed one.
|
|
46634
|
+
* Returns true if drifted, false otherwise.
|
|
46635
|
+
* Returns false (no drift) if metadata.json doesn't exist (first use).
|
|
46636
|
+
*/
|
|
46637
|
+
async checkDrift(poolDir, fingerprint) {
|
|
46638
|
+
const metadataPath = path36.join(poolDir, "metadata.json");
|
|
46639
|
+
try {
|
|
46640
|
+
const raw = await readFile11(metadataPath, "utf-8");
|
|
46641
|
+
const metadata = JSON.parse(raw);
|
|
46642
|
+
return metadata.fingerprint !== fingerprint;
|
|
46643
|
+
} catch {
|
|
46644
|
+
return false;
|
|
46645
|
+
}
|
|
46646
|
+
}
|
|
46647
|
+
/** Write metadata.json with fingerprint, inputs, and timestamp. */
|
|
46648
|
+
async writeMetadata(poolDir, fingerprint, templatePath, repos) {
|
|
46649
|
+
const metadata = {
|
|
46650
|
+
fingerprint,
|
|
46651
|
+
templatePath,
|
|
46652
|
+
repos,
|
|
46653
|
+
createdAt: (/* @__PURE__ */ new Date()).toISOString()
|
|
46654
|
+
};
|
|
46655
|
+
await writeFile7(path36.join(poolDir, "metadata.json"), JSON.stringify(metadata, null, 2));
|
|
46656
|
+
}
|
|
46657
|
+
/** Remove all slot directories and their lock files from a pool directory. */
|
|
46658
|
+
async removeAllSlots(poolDir) {
|
|
46659
|
+
const entries = await readdir4(poolDir);
|
|
46660
|
+
for (const entry of entries) {
|
|
46661
|
+
if (entry.startsWith("slot-") && !entry.endsWith(".lock")) {
|
|
46662
|
+
const lockPath = path36.join(poolDir, `${entry}.lock`);
|
|
46663
|
+
if (existsSync2(lockPath)) {
|
|
46664
|
+
try {
|
|
46665
|
+
const pidStr = await readFile11(lockPath, "utf-8");
|
|
46666
|
+
const pid = Number.parseInt(pidStr.trim(), 10);
|
|
46667
|
+
if (!Number.isNaN(pid)) {
|
|
46668
|
+
try {
|
|
46669
|
+
process.kill(pid, 0);
|
|
46670
|
+
console.warn(`[workspace-pool] Skipping slot ${entry}: locked by PID ${pid}`);
|
|
46671
|
+
continue;
|
|
46672
|
+
} catch {
|
|
46673
|
+
}
|
|
46674
|
+
}
|
|
46675
|
+
} catch {
|
|
46676
|
+
}
|
|
46677
|
+
}
|
|
46678
|
+
await rm5(path36.join(poolDir, entry), { recursive: true, force: true });
|
|
46679
|
+
await rm5(lockPath, { force: true }).catch(() => {
|
|
46680
|
+
});
|
|
46681
|
+
}
|
|
46682
|
+
}
|
|
46683
|
+
await rm5(path36.join(poolDir, "metadata.json"), { force: true }).catch(() => {
|
|
46684
|
+
});
|
|
46685
|
+
}
|
|
46686
|
+
/**
|
|
46687
|
+
* Reset an existing slot for reuse:
|
|
46688
|
+
* 1. Reset repos (git reset --hard {ref} && git clean -fd per repo)
|
|
46689
|
+
* 2. Re-copy template files (skip repo directories)
|
|
46690
|
+
*/
|
|
46691
|
+
async resetSlot(slotPath, templatePath, repos) {
|
|
46692
|
+
for (const repo of repos) {
|
|
46693
|
+
const repoDir = path36.join(slotPath, repo.path);
|
|
46694
|
+
if (!existsSync2(repoDir)) {
|
|
46695
|
+
continue;
|
|
46696
|
+
}
|
|
46697
|
+
const ref = repo.checkout?.ref ?? "HEAD";
|
|
46698
|
+
await git(["reset", "--hard", ref], { cwd: repoDir });
|
|
46699
|
+
await git(["clean", "-fd"], { cwd: repoDir });
|
|
46700
|
+
}
|
|
46701
|
+
if (templatePath) {
|
|
46702
|
+
const repoDirNames = new Set(
|
|
46703
|
+
repos.map((r) => {
|
|
46704
|
+
const normalized = r.path.replace(/^\.\//, "");
|
|
46705
|
+
return normalized.split("/")[0];
|
|
46706
|
+
})
|
|
46707
|
+
);
|
|
46708
|
+
await copyDirectoryRecursive2(templatePath, slotPath, repoDirNames);
|
|
46709
|
+
}
|
|
46710
|
+
}
|
|
46711
|
+
};
|
|
46712
|
+
var execFileAsync2 = promisify6(execFile2);
|
|
46272
46713
|
var DEFAULT_TIMEOUT_MS2 = 3e5;
|
|
46273
46714
|
var LOCK_TIMEOUT_MS = 6e4;
|
|
46274
|
-
function
|
|
46715
|
+
function gitEnv2() {
|
|
46275
46716
|
const env = { ...process.env };
|
|
46276
46717
|
for (const key of Object.keys(env)) {
|
|
46277
46718
|
if (key.startsWith("GIT_") && key !== "GIT_SSH_COMMAND") {
|
|
@@ -46287,16 +46728,16 @@ function gitEnv() {
|
|
|
46287
46728
|
}
|
|
46288
46729
|
function cacheKey(source) {
|
|
46289
46730
|
const raw = source.type === "git" ? source.url.toLowerCase().replace(/\.git$/, "") : source.path;
|
|
46290
|
-
return
|
|
46731
|
+
return createHash2("sha256").update(raw).digest("hex");
|
|
46291
46732
|
}
|
|
46292
46733
|
function getSourceUrl(source) {
|
|
46293
46734
|
return source.type === "git" ? source.url : source.path;
|
|
46294
46735
|
}
|
|
46295
|
-
async function
|
|
46296
|
-
const { stdout } = await
|
|
46736
|
+
async function git2(args, opts) {
|
|
46737
|
+
const { stdout } = await execFileAsync2("git", args, {
|
|
46297
46738
|
cwd: opts?.cwd,
|
|
46298
46739
|
timeout: opts?.timeout ?? DEFAULT_TIMEOUT_MS2,
|
|
46299
|
-
env:
|
|
46740
|
+
env: gitEnv2(),
|
|
46300
46741
|
maxBuffer: 50 * 1024 * 1024
|
|
46301
46742
|
// 50MB
|
|
46302
46743
|
});
|
|
@@ -46306,7 +46747,7 @@ async function acquireLock(lockPath) {
|
|
|
46306
46747
|
const start = Date.now();
|
|
46307
46748
|
while (Date.now() - start < LOCK_TIMEOUT_MS) {
|
|
46308
46749
|
try {
|
|
46309
|
-
await
|
|
46750
|
+
await writeFile8(lockPath, String(process.pid), { flag: "wx" });
|
|
46310
46751
|
return;
|
|
46311
46752
|
} catch (err) {
|
|
46312
46753
|
if (err.code === "EEXIST") {
|
|
@@ -46320,14 +46761,37 @@ async function acquireLock(lockPath) {
|
|
|
46320
46761
|
}
|
|
46321
46762
|
async function releaseLock(lockPath) {
|
|
46322
46763
|
try {
|
|
46323
|
-
await
|
|
46764
|
+
await unlink2(lockPath);
|
|
46324
46765
|
} catch {
|
|
46325
46766
|
}
|
|
46326
46767
|
}
|
|
46327
46768
|
var RepoManager = class {
|
|
46328
46769
|
cacheDir;
|
|
46329
|
-
|
|
46330
|
-
|
|
46770
|
+
verbose;
|
|
46771
|
+
constructor(cacheDir, verbose = false) {
|
|
46772
|
+
this.cacheDir = cacheDir ?? getGitCacheRoot();
|
|
46773
|
+
this.verbose = verbose;
|
|
46774
|
+
}
|
|
46775
|
+
async runGit(args, opts) {
|
|
46776
|
+
const startedAt = Date.now();
|
|
46777
|
+
if (this.verbose) {
|
|
46778
|
+
console.log(`[repo] git start cwd=${opts?.cwd ?? process.cwd()} args=${args.join(" ")}`);
|
|
46779
|
+
}
|
|
46780
|
+
try {
|
|
46781
|
+
const output = await git2(args, opts);
|
|
46782
|
+
if (this.verbose) {
|
|
46783
|
+
console.log(`[repo] git ok durationMs=${Date.now() - startedAt} args=${args.join(" ")}`);
|
|
46784
|
+
}
|
|
46785
|
+
return output;
|
|
46786
|
+
} catch (error40) {
|
|
46787
|
+
if (this.verbose) {
|
|
46788
|
+
const message = error40 instanceof Error ? error40.message : String(error40);
|
|
46789
|
+
console.log(
|
|
46790
|
+
`[repo] git fail durationMs=${Date.now() - startedAt} args=${args.join(" ")} error=${message}`
|
|
46791
|
+
);
|
|
46792
|
+
}
|
|
46793
|
+
throw error40;
|
|
46794
|
+
}
|
|
46331
46795
|
}
|
|
46332
46796
|
/**
|
|
46333
46797
|
* Ensure a bare mirror cache exists for the given source.
|
|
@@ -46336,11 +46800,19 @@ var RepoManager = class {
|
|
|
46336
46800
|
*/
|
|
46337
46801
|
async ensureCache(source, depth, resolve2) {
|
|
46338
46802
|
const key = cacheKey(source);
|
|
46339
|
-
const cachePath =
|
|
46803
|
+
const cachePath = path37.join(this.cacheDir, key);
|
|
46340
46804
|
const lockPath = `${cachePath}.lock`;
|
|
46341
|
-
const cacheExists =
|
|
46805
|
+
const cacheExists = existsSync3(path37.join(cachePath, "HEAD"));
|
|
46806
|
+
if (this.verbose) {
|
|
46807
|
+
console.log(
|
|
46808
|
+
`[repo] ensureCache source=${getSourceUrl(source)} resolve=${resolve2 ?? "remote"} cache=${cachePath}`
|
|
46809
|
+
);
|
|
46810
|
+
}
|
|
46342
46811
|
if (resolve2 === "local") {
|
|
46343
46812
|
if (cacheExists) {
|
|
46813
|
+
if (this.verbose) {
|
|
46814
|
+
console.log(`[repo] using existing local cache ${cachePath}`);
|
|
46815
|
+
}
|
|
46344
46816
|
return cachePath;
|
|
46345
46817
|
}
|
|
46346
46818
|
const url2 = getSourceUrl(source);
|
|
@@ -46348,16 +46820,26 @@ var RepoManager = class {
|
|
|
46348
46820
|
`No cache found for \`${url2}\`. Run \`agentv cache add --url ${url2} --from <local-path>\` to seed it.`
|
|
46349
46821
|
);
|
|
46350
46822
|
}
|
|
46351
|
-
await
|
|
46823
|
+
await mkdir12(this.cacheDir, { recursive: true });
|
|
46824
|
+
const lockStartedAt = Date.now();
|
|
46352
46825
|
await acquireLock(lockPath);
|
|
46826
|
+
if (this.verbose) {
|
|
46827
|
+
console.log(`[repo] lock acquired path=${lockPath} waitedMs=${Date.now() - lockStartedAt}`);
|
|
46828
|
+
}
|
|
46353
46829
|
try {
|
|
46354
46830
|
if (cacheExists) {
|
|
46831
|
+
if (this.verbose) {
|
|
46832
|
+
console.log(`[repo] refreshing existing cache ${cachePath}`);
|
|
46833
|
+
}
|
|
46355
46834
|
const fetchArgs = ["fetch", "--prune"];
|
|
46356
46835
|
if (depth) {
|
|
46357
46836
|
fetchArgs.push("--depth", String(depth));
|
|
46358
46837
|
}
|
|
46359
|
-
await
|
|
46838
|
+
await this.runGit(fetchArgs, { cwd: cachePath });
|
|
46360
46839
|
} else {
|
|
46840
|
+
if (this.verbose) {
|
|
46841
|
+
console.log(`[repo] creating new cache ${cachePath}`);
|
|
46842
|
+
}
|
|
46361
46843
|
const cloneArgs = ["clone", "--mirror", "--bare"];
|
|
46362
46844
|
if (depth) {
|
|
46363
46845
|
cloneArgs.push("--depth", String(depth));
|
|
@@ -46365,10 +46847,13 @@ var RepoManager = class {
|
|
|
46365
46847
|
const sourceUrl = getSourceUrl(source);
|
|
46366
46848
|
const cloneUrl = depth && source.type === "local" ? `file://${sourceUrl}` : sourceUrl;
|
|
46367
46849
|
cloneArgs.push(cloneUrl, cachePath);
|
|
46368
|
-
await
|
|
46850
|
+
await this.runGit(cloneArgs);
|
|
46369
46851
|
}
|
|
46370
46852
|
} finally {
|
|
46371
46853
|
await releaseLock(lockPath);
|
|
46854
|
+
if (this.verbose) {
|
|
46855
|
+
console.log(`[repo] lock released path=${lockPath}`);
|
|
46856
|
+
}
|
|
46372
46857
|
}
|
|
46373
46858
|
return cachePath;
|
|
46374
46859
|
}
|
|
@@ -46377,7 +46862,13 @@ var RepoManager = class {
|
|
|
46377
46862
|
* Handles checkout, ref resolution, ancestor walking, shallow clone, sparse checkout.
|
|
46378
46863
|
*/
|
|
46379
46864
|
async materialize(repo, workspacePath) {
|
|
46380
|
-
const targetDir =
|
|
46865
|
+
const targetDir = path37.join(workspacePath, repo.path);
|
|
46866
|
+
const startedAt = Date.now();
|
|
46867
|
+
if (this.verbose) {
|
|
46868
|
+
console.log(
|
|
46869
|
+
`[repo] materialize start path=${repo.path} source=${getSourceUrl(repo.source)} workspace=${workspacePath}`
|
|
46870
|
+
);
|
|
46871
|
+
}
|
|
46381
46872
|
const cachePath = await this.ensureCache(
|
|
46382
46873
|
repo.source,
|
|
46383
46874
|
repo.clone?.depth,
|
|
@@ -46393,10 +46884,10 @@ var RepoManager = class {
|
|
|
46393
46884
|
cloneArgs.push("--no-checkout");
|
|
46394
46885
|
const cloneUrl = repo.clone?.depth || repo.clone?.filter ? `file://${cachePath}` : cachePath;
|
|
46395
46886
|
cloneArgs.push(cloneUrl, targetDir);
|
|
46396
|
-
await
|
|
46887
|
+
await this.runGit(cloneArgs);
|
|
46397
46888
|
if (repo.clone?.sparse?.length) {
|
|
46398
|
-
await
|
|
46399
|
-
await
|
|
46889
|
+
await this.runGit(["sparse-checkout", "init", "--cone"], { cwd: targetDir });
|
|
46890
|
+
await this.runGit(["sparse-checkout", "set", ...repo.clone.sparse], { cwd: targetDir });
|
|
46400
46891
|
}
|
|
46401
46892
|
const ref = repo.checkout?.ref ?? "HEAD";
|
|
46402
46893
|
const resolve2 = repo.checkout?.resolve ?? "remote";
|
|
@@ -46404,7 +46895,7 @@ var RepoManager = class {
|
|
|
46404
46895
|
if (resolve2 === "remote" && repo.source.type === "git") {
|
|
46405
46896
|
const url2 = getSourceUrl(repo.source);
|
|
46406
46897
|
try {
|
|
46407
|
-
const lsOutput = await
|
|
46898
|
+
const lsOutput = await this.runGit(["ls-remote", url2, ref]);
|
|
46408
46899
|
const match = lsOutput.split(" ")[0];
|
|
46409
46900
|
if (!match) {
|
|
46410
46901
|
throw new Error(`Ref '${ref}' not found on remote ${url2}`);
|
|
@@ -46417,17 +46908,26 @@ var RepoManager = class {
|
|
|
46417
46908
|
} else {
|
|
46418
46909
|
resolvedSha = ref;
|
|
46419
46910
|
}
|
|
46420
|
-
|
|
46911
|
+
if (this.verbose) {
|
|
46912
|
+
console.log(
|
|
46913
|
+
`[repo] checkout path=${repo.path} ref=${ref} resolved=${resolvedSha} resolve=${resolve2}`
|
|
46914
|
+
);
|
|
46915
|
+
}
|
|
46916
|
+
await this.runGit(["checkout", resolvedSha], { cwd: targetDir });
|
|
46421
46917
|
const ancestor = repo.checkout?.ancestor ?? 0;
|
|
46422
46918
|
if (ancestor > 0) {
|
|
46423
46919
|
try {
|
|
46424
|
-
const ancestorSha = await
|
|
46425
|
-
|
|
46920
|
+
const ancestorSha = await this.runGit(["rev-parse", `HEAD~${ancestor}`], {
|
|
46921
|
+
cwd: targetDir
|
|
46922
|
+
});
|
|
46923
|
+
await this.runGit(["checkout", ancestorSha], { cwd: targetDir });
|
|
46426
46924
|
} catch {
|
|
46427
46925
|
if (repo.clone?.depth) {
|
|
46428
|
-
await
|
|
46429
|
-
const ancestorSha = await
|
|
46430
|
-
|
|
46926
|
+
await this.runGit(["fetch", "--deepen", String(ancestor)], { cwd: targetDir });
|
|
46927
|
+
const ancestorSha = await this.runGit(["rev-parse", `HEAD~${ancestor}`], {
|
|
46928
|
+
cwd: targetDir
|
|
46929
|
+
});
|
|
46930
|
+
await this.runGit(["checkout", ancestorSha], { cwd: targetDir });
|
|
46431
46931
|
} else {
|
|
46432
46932
|
throw new Error(
|
|
46433
46933
|
`Cannot resolve ancestor ${ancestor} of ref '${ref}'. If using shallow clone, increase clone.depth to at least ${ancestor + 1}.`
|
|
@@ -46435,27 +46935,38 @@ var RepoManager = class {
|
|
|
46435
46935
|
}
|
|
46436
46936
|
}
|
|
46437
46937
|
}
|
|
46938
|
+
if (this.verbose) {
|
|
46939
|
+
console.log(
|
|
46940
|
+
`[repo] materialize done path=${repo.path} target=${targetDir} durationMs=${Date.now() - startedAt}`
|
|
46941
|
+
);
|
|
46942
|
+
}
|
|
46438
46943
|
}
|
|
46439
46944
|
/** Materialize all repos into the workspace. */
|
|
46440
46945
|
async materializeAll(repos, workspacePath) {
|
|
46946
|
+
if (this.verbose) {
|
|
46947
|
+
console.log(`[repo] materializeAll count=${repos.length} workspace=${workspacePath}`);
|
|
46948
|
+
}
|
|
46441
46949
|
for (const repo of repos) {
|
|
46442
46950
|
await this.materialize(repo, workspacePath);
|
|
46443
46951
|
}
|
|
46952
|
+
if (this.verbose) {
|
|
46953
|
+
console.log("[repo] materializeAll complete");
|
|
46954
|
+
}
|
|
46444
46955
|
}
|
|
46445
46956
|
/** Reset repos in workspace to their checkout state. */
|
|
46446
46957
|
async reset(repos, workspacePath, strategy) {
|
|
46447
46958
|
if (strategy === "recreate") {
|
|
46448
46959
|
for (const repo of repos) {
|
|
46449
|
-
const targetDir =
|
|
46450
|
-
await
|
|
46960
|
+
const targetDir = path37.join(workspacePath, repo.path);
|
|
46961
|
+
await rm6(targetDir, { recursive: true, force: true });
|
|
46451
46962
|
}
|
|
46452
46963
|
await this.materializeAll(repos, workspacePath);
|
|
46453
46964
|
return;
|
|
46454
46965
|
}
|
|
46455
46966
|
for (const repo of repos) {
|
|
46456
|
-
const targetDir =
|
|
46457
|
-
await
|
|
46458
|
-
await
|
|
46967
|
+
const targetDir = path37.join(workspacePath, repo.path);
|
|
46968
|
+
await this.runGit(["reset", "--hard", "HEAD"], { cwd: targetDir });
|
|
46969
|
+
await this.runGit(["clean", "-fd"], { cwd: targetDir });
|
|
46459
46970
|
}
|
|
46460
46971
|
}
|
|
46461
46972
|
/**
|
|
@@ -46465,21 +46976,21 @@ var RepoManager = class {
|
|
|
46465
46976
|
async seedCache(localPath, remoteUrl, opts) {
|
|
46466
46977
|
const source = { type: "git", url: remoteUrl };
|
|
46467
46978
|
const key = cacheKey(source);
|
|
46468
|
-
const cachePath =
|
|
46979
|
+
const cachePath = path37.join(this.cacheDir, key);
|
|
46469
46980
|
const lockPath = `${cachePath}.lock`;
|
|
46470
|
-
await
|
|
46981
|
+
await mkdir12(this.cacheDir, { recursive: true });
|
|
46471
46982
|
await acquireLock(lockPath);
|
|
46472
46983
|
try {
|
|
46473
|
-
if (
|
|
46984
|
+
if (existsSync3(path37.join(cachePath, "HEAD"))) {
|
|
46474
46985
|
if (!opts?.force) {
|
|
46475
46986
|
throw new Error(
|
|
46476
46987
|
`Cache already exists for ${remoteUrl} at ${cachePath}. Use force to overwrite.`
|
|
46477
46988
|
);
|
|
46478
46989
|
}
|
|
46479
|
-
await
|
|
46990
|
+
await rm6(cachePath, { recursive: true, force: true });
|
|
46480
46991
|
}
|
|
46481
|
-
await
|
|
46482
|
-
await
|
|
46992
|
+
await git2(["clone", "--mirror", "--bare", localPath, cachePath]);
|
|
46993
|
+
await git2(["remote", "set-url", "origin", remoteUrl], { cwd: cachePath });
|
|
46483
46994
|
} finally {
|
|
46484
46995
|
await releaseLock(lockPath);
|
|
46485
46996
|
}
|
|
@@ -46487,37 +46998,37 @@ var RepoManager = class {
|
|
|
46487
46998
|
}
|
|
46488
46999
|
/** Remove the entire cache directory. */
|
|
46489
47000
|
async cleanCache() {
|
|
46490
|
-
await
|
|
47001
|
+
await rm6(this.cacheDir, { recursive: true, force: true });
|
|
46491
47002
|
}
|
|
46492
47003
|
};
|
|
46493
47004
|
async function resolveWorkspaceTemplate(templatePath) {
|
|
46494
47005
|
if (!templatePath) {
|
|
46495
47006
|
return void 0;
|
|
46496
47007
|
}
|
|
46497
|
-
const resolved =
|
|
47008
|
+
const resolved = path38.resolve(templatePath);
|
|
46498
47009
|
const stats = await stat6(resolved);
|
|
46499
47010
|
if (stats.isFile()) {
|
|
46500
47011
|
return {
|
|
46501
|
-
dir:
|
|
47012
|
+
dir: path38.dirname(resolved),
|
|
46502
47013
|
workspaceFile: resolved
|
|
46503
47014
|
};
|
|
46504
47015
|
}
|
|
46505
47016
|
if (!stats.isDirectory()) {
|
|
46506
47017
|
throw new Error(`workspace template is neither a file nor a directory: ${resolved}`);
|
|
46507
47018
|
}
|
|
46508
|
-
const entries = await
|
|
47019
|
+
const entries = await readdir5(resolved);
|
|
46509
47020
|
const workspaceFiles = entries.filter((e) => e.endsWith(".code-workspace"));
|
|
46510
47021
|
if (workspaceFiles.length === 1) {
|
|
46511
47022
|
return {
|
|
46512
47023
|
dir: resolved,
|
|
46513
|
-
workspaceFile:
|
|
47024
|
+
workspaceFile: path38.join(resolved, workspaceFiles[0])
|
|
46514
47025
|
};
|
|
46515
47026
|
}
|
|
46516
47027
|
if (workspaceFiles.length > 1) {
|
|
46517
47028
|
const conventionFile = workspaceFiles.find((f) => f === "template.code-workspace");
|
|
46518
47029
|
return {
|
|
46519
47030
|
dir: resolved,
|
|
46520
|
-
workspaceFile: conventionFile ?
|
|
47031
|
+
workspaceFile: conventionFile ? path38.join(resolved, conventionFile) : void 0
|
|
46521
47032
|
};
|
|
46522
47033
|
}
|
|
46523
47034
|
return { dir: resolved };
|
|
@@ -46595,7 +47106,10 @@ async function runEvaluation(options) {
|
|
|
46595
47106
|
trials,
|
|
46596
47107
|
streamCallbacks,
|
|
46597
47108
|
totalBudgetUsd,
|
|
46598
|
-
failOnError
|
|
47109
|
+
failOnError,
|
|
47110
|
+
poolWorkspaces,
|
|
47111
|
+
poolMaxSlots: configPoolMaxSlots,
|
|
47112
|
+
workspace: userWorkspacePath
|
|
46599
47113
|
} = options;
|
|
46600
47114
|
let useCache = options.useCache;
|
|
46601
47115
|
if (trials && trials.count > 1 && useCache) {
|
|
@@ -46669,7 +47183,7 @@ async function runEvaluation(options) {
|
|
|
46669
47183
|
];
|
|
46670
47184
|
const evaluatorRegistry = buildEvaluatorRegistry(evaluators, resolveJudgeProvider);
|
|
46671
47185
|
const typeRegistry = createBuiltinRegistry();
|
|
46672
|
-
const discoveryBaseDir = evalFilePath ?
|
|
47186
|
+
const discoveryBaseDir = evalFilePath ? path39.dirname(path39.resolve(evalFilePath)) : process.cwd();
|
|
46673
47187
|
const evalDir = discoveryBaseDir;
|
|
46674
47188
|
await discoverAssertions(typeRegistry, discoveryBaseDir);
|
|
46675
47189
|
const providerRegistry = createBuiltinProviderRegistry();
|
|
@@ -46725,11 +47239,25 @@ async function runEvaluation(options) {
|
|
|
46725
47239
|
const resolvedTemplate = await resolveWorkspaceTemplate(rawTemplate);
|
|
46726
47240
|
const workspaceTemplate = resolvedTemplate?.dir;
|
|
46727
47241
|
let suiteWorkspaceFile = resolvedTemplate?.workspaceFile;
|
|
47242
|
+
const setupLog = (message) => {
|
|
47243
|
+
if (verbose) {
|
|
47244
|
+
console.log(`[setup] ${message}`);
|
|
47245
|
+
}
|
|
47246
|
+
};
|
|
46728
47247
|
const isPerTestIsolation = suiteWorkspace?.isolation === "per_test";
|
|
46729
|
-
|
|
47248
|
+
if (userWorkspacePath && isPerTestIsolation) {
|
|
47249
|
+
throw new Error(
|
|
47250
|
+
"--workspace is incompatible with isolation: per_test. Use isolation: shared (default)."
|
|
47251
|
+
);
|
|
47252
|
+
}
|
|
47253
|
+
const hasSharedWorkspace = !!(userWorkspacePath || workspaceTemplate || suiteWorkspace?.before_all || suiteWorkspace?.repos?.length && !isPerTestIsolation);
|
|
47254
|
+
const usePool = poolWorkspaces === true && !!suiteWorkspace?.repos?.length && !isPerTestIsolation && !userWorkspacePath;
|
|
46730
47255
|
const requestedWorkers = options.maxConcurrency ?? target.workers ?? 1;
|
|
46731
|
-
const workers = hasSharedWorkspace ? 1 : requestedWorkers;
|
|
46732
|
-
|
|
47256
|
+
const workers = hasSharedWorkspace && !usePool ? 1 : requestedWorkers;
|
|
47257
|
+
setupLog(
|
|
47258
|
+
`sharedWorkspace=${hasSharedWorkspace} perTestIsolation=${isPerTestIsolation} usePool=${usePool} requestedWorkers=${requestedWorkers} effectiveWorkers=${workers}`
|
|
47259
|
+
);
|
|
47260
|
+
if (hasSharedWorkspace && !usePool && requestedWorkers > 1) {
|
|
46733
47261
|
console.warn(
|
|
46734
47262
|
`Warning: Shared workspace requires sequential execution. Overriding workers from ${requestedWorkers} to 1.`
|
|
46735
47263
|
);
|
|
@@ -46738,285 +47266,383 @@ async function runEvaluation(options) {
|
|
|
46738
47266
|
let sharedWorkspacePath;
|
|
46739
47267
|
let sharedBaselineCommit;
|
|
46740
47268
|
let beforeAllOutput;
|
|
46741
|
-
|
|
47269
|
+
let poolManager;
|
|
47270
|
+
let poolSlot;
|
|
47271
|
+
const poolSlots = [];
|
|
47272
|
+
const availablePoolSlots = [];
|
|
47273
|
+
const poolSlotBaselines = /* @__PURE__ */ new Map();
|
|
47274
|
+
const poolMaxSlots = Math.min(configPoolMaxSlots ?? 10, 50);
|
|
47275
|
+
if (userWorkspacePath) {
|
|
47276
|
+
sharedWorkspacePath = userWorkspacePath;
|
|
47277
|
+
setupLog(`using user-provided workspace: ${userWorkspacePath}`);
|
|
47278
|
+
} else if (usePool && suiteWorkspace?.repos) {
|
|
47279
|
+
const slotsNeeded = workers;
|
|
47280
|
+
setupLog(`acquiring ${slotsNeeded} workspace pool slot(s) (pool capacity: ${poolMaxSlots})`);
|
|
47281
|
+
poolManager = new WorkspacePoolManager(getWorkspacePoolRoot());
|
|
47282
|
+
const poolRepoManager = new RepoManager(void 0, verbose);
|
|
47283
|
+
for (let i = 0; i < slotsNeeded; i++) {
|
|
47284
|
+
const slot = await poolManager.acquireWorkspace({
|
|
47285
|
+
templatePath: workspaceTemplate,
|
|
47286
|
+
repos: suiteWorkspace.repos,
|
|
47287
|
+
maxSlots: poolMaxSlots,
|
|
47288
|
+
repoManager: poolRepoManager
|
|
47289
|
+
});
|
|
47290
|
+
poolSlots.push(slot);
|
|
47291
|
+
setupLog(`pool slot ${i} acquired at: ${slot.path} (existing=${slot.isExisting})`);
|
|
47292
|
+
}
|
|
47293
|
+
if (slotsNeeded === 1) {
|
|
47294
|
+
poolSlot = poolSlots[0];
|
|
47295
|
+
sharedWorkspacePath = poolSlot.path;
|
|
47296
|
+
} else {
|
|
47297
|
+
availablePoolSlots.push(...poolSlots);
|
|
47298
|
+
}
|
|
47299
|
+
} else if (workspaceTemplate) {
|
|
47300
|
+
setupLog(`creating shared workspace from template: ${workspaceTemplate}`);
|
|
46742
47301
|
try {
|
|
46743
47302
|
sharedWorkspacePath = await createTempWorkspace(workspaceTemplate, evalRunId, "shared");
|
|
47303
|
+
setupLog(`shared workspace created at: ${sharedWorkspacePath}`);
|
|
46744
47304
|
} catch (error40) {
|
|
46745
47305
|
const message = error40 instanceof Error ? error40.message : String(error40);
|
|
46746
47306
|
throw new Error(`Failed to create shared workspace: ${message}`);
|
|
46747
47307
|
}
|
|
47308
|
+
} else if (suiteWorkspace?.before_all || suiteWorkspace?.repos?.length && !isPerTestIsolation) {
|
|
47309
|
+
sharedWorkspacePath = getWorkspacePath(evalRunId, "shared");
|
|
47310
|
+
await mkdir13(sharedWorkspacePath, { recursive: true });
|
|
47311
|
+
setupLog(`created empty shared workspace at: ${sharedWorkspacePath}`);
|
|
47312
|
+
}
|
|
47313
|
+
try {
|
|
46748
47314
|
if (suiteWorkspaceFile && sharedWorkspacePath) {
|
|
46749
|
-
const copiedWorkspaceFile =
|
|
47315
|
+
const copiedWorkspaceFile = path39.join(sharedWorkspacePath, path39.basename(suiteWorkspaceFile));
|
|
46750
47316
|
try {
|
|
46751
47317
|
await stat7(copiedWorkspaceFile);
|
|
46752
47318
|
suiteWorkspaceFile = copiedWorkspaceFile;
|
|
46753
47319
|
} catch {
|
|
46754
47320
|
}
|
|
46755
47321
|
}
|
|
46756
|
-
|
|
46757
|
-
sharedWorkspacePath
|
|
46758
|
-
|
|
46759
|
-
|
|
46760
|
-
|
|
46761
|
-
|
|
46762
|
-
|
|
46763
|
-
|
|
46764
|
-
|
|
46765
|
-
|
|
46766
|
-
|
|
46767
|
-
|
|
46768
|
-
});
|
|
46769
|
-
}
|
|
46770
|
-
throw new Error(`Failed to materialize repos: ${message}`);
|
|
46771
|
-
}
|
|
46772
|
-
}
|
|
46773
|
-
if (sharedWorkspacePath && suiteWorkspace?.before_all) {
|
|
46774
|
-
const scriptContext = {
|
|
46775
|
-
workspacePath: sharedWorkspacePath,
|
|
46776
|
-
testId: "__before_all__",
|
|
46777
|
-
evalRunId,
|
|
46778
|
-
evalDir
|
|
46779
|
-
};
|
|
46780
|
-
try {
|
|
46781
|
-
beforeAllOutput = await executeWorkspaceScript(suiteWorkspace.before_all, scriptContext);
|
|
46782
|
-
} catch (error40) {
|
|
46783
|
-
const message = error40 instanceof Error ? error40.message : String(error40);
|
|
46784
|
-
if (sharedWorkspacePath) {
|
|
46785
|
-
await cleanupWorkspace(sharedWorkspacePath).catch(() => {
|
|
46786
|
-
});
|
|
46787
|
-
}
|
|
46788
|
-
throw new Error(`before_all script failed: ${message}`);
|
|
46789
|
-
}
|
|
46790
|
-
}
|
|
46791
|
-
if (sharedWorkspacePath) {
|
|
46792
|
-
try {
|
|
46793
|
-
sharedBaselineCommit = await initializeBaseline(sharedWorkspacePath);
|
|
46794
|
-
} catch {
|
|
46795
|
-
}
|
|
46796
|
-
}
|
|
46797
|
-
let nextWorkerId = 1;
|
|
46798
|
-
const workerIdByEvalId = /* @__PURE__ */ new Map();
|
|
46799
|
-
let beforeAllOutputAttached = false;
|
|
46800
|
-
let cumulativeBudgetCost = 0;
|
|
46801
|
-
let budgetExhausted = false;
|
|
46802
|
-
let failOnErrorTriggered = false;
|
|
46803
|
-
const promises = filteredEvalCases.map(
|
|
46804
|
-
(evalCase) => limit(async () => {
|
|
46805
|
-
const workerId = nextWorkerId++;
|
|
46806
|
-
workerIdByEvalId.set(evalCase.id, workerId);
|
|
46807
|
-
if (totalBudgetUsd !== void 0 && budgetExhausted) {
|
|
46808
|
-
const budgetResult = {
|
|
46809
|
-
timestamp: (now ?? (() => /* @__PURE__ */ new Date()))().toISOString(),
|
|
46810
|
-
testId: evalCase.id,
|
|
46811
|
-
dataset: evalCase.dataset,
|
|
46812
|
-
score: 0,
|
|
46813
|
-
hits: [],
|
|
46814
|
-
misses: [],
|
|
46815
|
-
answer: "",
|
|
46816
|
-
target: target.name,
|
|
46817
|
-
error: `Suite budget exceeded ($${cumulativeBudgetCost.toFixed(4)} / $${totalBudgetUsd.toFixed(4)})`,
|
|
46818
|
-
budgetExceeded: true,
|
|
46819
|
-
executionStatus: "execution_error",
|
|
46820
|
-
failureStage: "setup",
|
|
46821
|
-
failureReasonCode: "budget_exceeded",
|
|
46822
|
-
executionError: {
|
|
46823
|
-
message: `Suite budget exceeded ($${cumulativeBudgetCost.toFixed(4)} / $${totalBudgetUsd.toFixed(4)})`,
|
|
46824
|
-
stage: "setup"
|
|
46825
|
-
}
|
|
46826
|
-
};
|
|
46827
|
-
if (onProgress) {
|
|
46828
|
-
await onProgress({
|
|
46829
|
-
workerId,
|
|
46830
|
-
testId: evalCase.id,
|
|
46831
|
-
status: "failed",
|
|
46832
|
-
completedAt: Date.now(),
|
|
46833
|
-
error: budgetResult.error
|
|
47322
|
+
const repoManager = suiteWorkspace?.repos?.length && !usePool && !userWorkspacePath ? new RepoManager(void 0, verbose) : void 0;
|
|
47323
|
+
if (repoManager && sharedWorkspacePath && suiteWorkspace?.repos && !isPerTestIsolation) {
|
|
47324
|
+
setupLog(
|
|
47325
|
+
`materializing ${suiteWorkspace.repos.length} shared repo(s) into ${sharedWorkspacePath}`
|
|
47326
|
+
);
|
|
47327
|
+
try {
|
|
47328
|
+
await repoManager.materializeAll(suiteWorkspace.repos, sharedWorkspacePath);
|
|
47329
|
+
setupLog("shared repo materialization complete");
|
|
47330
|
+
} catch (error40) {
|
|
47331
|
+
const message = error40 instanceof Error ? error40.message : String(error40);
|
|
47332
|
+
if (sharedWorkspacePath && !userWorkspacePath) {
|
|
47333
|
+
await cleanupWorkspace(sharedWorkspacePath).catch(() => {
|
|
46834
47334
|
});
|
|
46835
47335
|
}
|
|
46836
|
-
|
|
46837
|
-
await onResult(budgetResult);
|
|
46838
|
-
}
|
|
46839
|
-
return budgetResult;
|
|
47336
|
+
throw new Error(`Failed to materialize repos: ${message}`);
|
|
46840
47337
|
}
|
|
46841
|
-
|
|
46842
|
-
|
|
46843
|
-
|
|
46844
|
-
|
|
46845
|
-
|
|
46846
|
-
|
|
46847
|
-
|
|
46848
|
-
|
|
46849
|
-
|
|
46850
|
-
|
|
46851
|
-
|
|
46852
|
-
|
|
46853
|
-
|
|
46854
|
-
|
|
46855
|
-
|
|
46856
|
-
|
|
46857
|
-
|
|
46858
|
-
if (
|
|
46859
|
-
await
|
|
46860
|
-
workerId,
|
|
46861
|
-
testId: evalCase.id,
|
|
46862
|
-
status: "failed",
|
|
46863
|
-
completedAt: Date.now(),
|
|
46864
|
-
error: haltResult.error
|
|
47338
|
+
}
|
|
47339
|
+
if (sharedWorkspacePath && suiteWorkspace?.before_all) {
|
|
47340
|
+
const beforeAllCommand = (suiteWorkspace.before_all.command ?? suiteWorkspace.before_all.script ?? []).join(" ");
|
|
47341
|
+
setupLog(
|
|
47342
|
+
`running shared before_all in cwd=${suiteWorkspace.before_all.cwd ?? evalDir} command=${beforeAllCommand}`
|
|
47343
|
+
);
|
|
47344
|
+
const scriptContext = {
|
|
47345
|
+
workspacePath: sharedWorkspacePath,
|
|
47346
|
+
testId: "__before_all__",
|
|
47347
|
+
evalRunId,
|
|
47348
|
+
evalDir
|
|
47349
|
+
};
|
|
47350
|
+
try {
|
|
47351
|
+
beforeAllOutput = await executeWorkspaceScript(suiteWorkspace.before_all, scriptContext);
|
|
47352
|
+
setupLog("shared before_all completed");
|
|
47353
|
+
} catch (error40) {
|
|
47354
|
+
const message = error40 instanceof Error ? error40.message : String(error40);
|
|
47355
|
+
if (sharedWorkspacePath && !userWorkspacePath) {
|
|
47356
|
+
await cleanupWorkspace(sharedWorkspacePath).catch(() => {
|
|
46865
47357
|
});
|
|
46866
47358
|
}
|
|
46867
|
-
|
|
46868
|
-
await onResult(haltResult);
|
|
46869
|
-
}
|
|
46870
|
-
return haltResult;
|
|
47359
|
+
throw new Error(`before_all script failed: ${message}`);
|
|
46871
47360
|
}
|
|
46872
|
-
|
|
46873
|
-
|
|
46874
|
-
|
|
46875
|
-
|
|
46876
|
-
|
|
46877
|
-
|
|
46878
|
-
|
|
46879
|
-
}
|
|
46880
|
-
try {
|
|
46881
|
-
const judgeProvider = await resolveJudgeProvider(target);
|
|
46882
|
-
const runCaseOptions = {
|
|
46883
|
-
evalCase,
|
|
46884
|
-
provider: primaryProvider,
|
|
46885
|
-
target,
|
|
46886
|
-
evaluators: evaluatorRegistry,
|
|
46887
|
-
maxRetries,
|
|
46888
|
-
agentTimeoutMs,
|
|
46889
|
-
cache,
|
|
46890
|
-
useCache,
|
|
46891
|
-
now,
|
|
46892
|
-
judgeProvider,
|
|
46893
|
-
targetResolver,
|
|
46894
|
-
availableTargets,
|
|
47361
|
+
}
|
|
47362
|
+
if (availablePoolSlots.length > 0 && suiteWorkspace?.before_all) {
|
|
47363
|
+
for (const slot of availablePoolSlots) {
|
|
47364
|
+
setupLog(`running before_all on pool slot ${slot.index}`);
|
|
47365
|
+
const scriptContext = {
|
|
47366
|
+
workspacePath: slot.path,
|
|
47367
|
+
testId: "__before_all__",
|
|
46895
47368
|
evalRunId,
|
|
46896
|
-
keepWorkspaces,
|
|
46897
|
-
cleanupWorkspaces,
|
|
46898
|
-
sharedWorkspacePath,
|
|
46899
|
-
sharedBaselineCommit,
|
|
46900
|
-
suiteWorkspaceFile,
|
|
46901
|
-
streamCallbacks,
|
|
46902
|
-
typeRegistry,
|
|
46903
|
-
repoManager,
|
|
46904
47369
|
evalDir
|
|
46905
47370
|
};
|
|
46906
|
-
|
|
46907
|
-
|
|
46908
|
-
|
|
46909
|
-
|
|
46910
|
-
|
|
46911
|
-
|
|
46912
|
-
|
|
47371
|
+
try {
|
|
47372
|
+
const output = await executeWorkspaceScript(suiteWorkspace.before_all, scriptContext);
|
|
47373
|
+
if (!beforeAllOutput) beforeAllOutput = output;
|
|
47374
|
+
setupLog(`before_all completed on pool slot ${slot.index}`);
|
|
47375
|
+
} catch (error40) {
|
|
47376
|
+
const message = error40 instanceof Error ? error40.message : String(error40);
|
|
47377
|
+
throw new Error(`before_all script failed on pool slot ${slot.index}: ${message}`);
|
|
47378
|
+
}
|
|
47379
|
+
}
|
|
47380
|
+
}
|
|
47381
|
+
if (sharedWorkspacePath) {
|
|
47382
|
+
try {
|
|
47383
|
+
sharedBaselineCommit = await initializeBaseline(sharedWorkspacePath);
|
|
47384
|
+
setupLog(`shared baseline initialized: ${sharedBaselineCommit}`);
|
|
47385
|
+
} catch {
|
|
47386
|
+
setupLog("shared baseline initialization skipped (non-fatal)");
|
|
47387
|
+
}
|
|
47388
|
+
}
|
|
47389
|
+
if (availablePoolSlots.length > 0) {
|
|
47390
|
+
for (const slot of availablePoolSlots) {
|
|
47391
|
+
try {
|
|
47392
|
+
const baseline = await initializeBaseline(slot.path);
|
|
47393
|
+
poolSlotBaselines.set(slot.path, baseline);
|
|
47394
|
+
setupLog(`pool slot ${slot.index} baseline initialized: ${baseline}`);
|
|
47395
|
+
} catch {
|
|
47396
|
+
setupLog(`pool slot ${slot.index} baseline initialization skipped (non-fatal)`);
|
|
47397
|
+
}
|
|
47398
|
+
}
|
|
47399
|
+
}
|
|
47400
|
+
let nextWorkerId = 1;
|
|
47401
|
+
const workerIdByEvalId = /* @__PURE__ */ new Map();
|
|
47402
|
+
let beforeAllOutputAttached = false;
|
|
47403
|
+
let cumulativeBudgetCost = 0;
|
|
47404
|
+
let budgetExhausted = false;
|
|
47405
|
+
let failOnErrorTriggered = false;
|
|
47406
|
+
const promises = filteredEvalCases.map(
|
|
47407
|
+
(evalCase) => limit(async () => {
|
|
47408
|
+
const workerId = nextWorkerId++;
|
|
47409
|
+
workerIdByEvalId.set(evalCase.id, workerId);
|
|
47410
|
+
if (totalBudgetUsd !== void 0 && budgetExhausted) {
|
|
47411
|
+
const budgetResult = {
|
|
47412
|
+
timestamp: (now ?? (() => /* @__PURE__ */ new Date()))().toISOString(),
|
|
47413
|
+
testId: evalCase.id,
|
|
47414
|
+
dataset: evalCase.dataset,
|
|
47415
|
+
score: 0,
|
|
47416
|
+
hits: [],
|
|
47417
|
+
misses: [],
|
|
47418
|
+
answer: "",
|
|
47419
|
+
target: target.name,
|
|
47420
|
+
error: `Suite budget exceeded ($${cumulativeBudgetCost.toFixed(4)} / $${totalBudgetUsd.toFixed(4)})`,
|
|
47421
|
+
budgetExceeded: true,
|
|
47422
|
+
executionStatus: "execution_error",
|
|
47423
|
+
failureStage: "setup",
|
|
47424
|
+
failureReasonCode: "budget_exceeded",
|
|
47425
|
+
executionError: {
|
|
47426
|
+
message: `Suite budget exceeded ($${cumulativeBudgetCost.toFixed(4)} / $${totalBudgetUsd.toFixed(4)})`,
|
|
47427
|
+
stage: "setup"
|
|
46913
47428
|
}
|
|
46914
|
-
}
|
|
46915
|
-
|
|
47429
|
+
};
|
|
47430
|
+
if (onProgress) {
|
|
47431
|
+
await onProgress({
|
|
47432
|
+
workerId,
|
|
47433
|
+
testId: evalCase.id,
|
|
47434
|
+
status: "failed",
|
|
47435
|
+
completedAt: Date.now(),
|
|
47436
|
+
error: budgetResult.error
|
|
47437
|
+
});
|
|
46916
47438
|
}
|
|
46917
|
-
if (
|
|
46918
|
-
|
|
46919
|
-
if (cumulativeBudgetCost >= totalBudgetUsd) {
|
|
46920
|
-
budgetExhausted = true;
|
|
46921
|
-
}
|
|
47439
|
+
if (onResult) {
|
|
47440
|
+
await onResult(budgetResult);
|
|
46922
47441
|
}
|
|
47442
|
+
return budgetResult;
|
|
46923
47443
|
}
|
|
46924
|
-
if (failOnError === true &&
|
|
46925
|
-
|
|
46926
|
-
|
|
46927
|
-
|
|
46928
|
-
|
|
46929
|
-
|
|
47444
|
+
if (failOnError === true && failOnErrorTriggered) {
|
|
47445
|
+
const errorMsg = "Halted: execution error encountered with fail_on_error enabled";
|
|
47446
|
+
const haltResult = {
|
|
47447
|
+
timestamp: (now ?? (() => /* @__PURE__ */ new Date()))().toISOString(),
|
|
47448
|
+
testId: evalCase.id,
|
|
47449
|
+
dataset: evalCase.dataset,
|
|
47450
|
+
score: 0,
|
|
47451
|
+
hits: [],
|
|
47452
|
+
misses: [],
|
|
47453
|
+
answer: "",
|
|
47454
|
+
target: target.name,
|
|
47455
|
+
error: errorMsg,
|
|
47456
|
+
executionStatus: "execution_error",
|
|
47457
|
+
failureStage: "setup",
|
|
47458
|
+
failureReasonCode: "error_threshold_exceeded",
|
|
47459
|
+
executionError: { message: errorMsg, stage: "setup" }
|
|
47460
|
+
};
|
|
47461
|
+
if (onProgress) {
|
|
47462
|
+
await onProgress({
|
|
47463
|
+
workerId,
|
|
47464
|
+
testId: evalCase.id,
|
|
47465
|
+
status: "failed",
|
|
47466
|
+
completedAt: Date.now(),
|
|
47467
|
+
error: haltResult.error
|
|
47468
|
+
});
|
|
47469
|
+
}
|
|
47470
|
+
if (onResult) {
|
|
47471
|
+
await onResult(haltResult);
|
|
47472
|
+
}
|
|
47473
|
+
return haltResult;
|
|
46930
47474
|
}
|
|
46931
47475
|
if (onProgress) {
|
|
46932
47476
|
await onProgress({
|
|
46933
47477
|
workerId,
|
|
46934
47478
|
testId: evalCase.id,
|
|
46935
|
-
status:
|
|
46936
|
-
startedAt:
|
|
46937
|
-
// Not used for completed status
|
|
46938
|
-
completedAt: Date.now(),
|
|
46939
|
-
error: result.error
|
|
47479
|
+
status: "running",
|
|
47480
|
+
startedAt: Date.now()
|
|
46940
47481
|
});
|
|
46941
47482
|
}
|
|
46942
|
-
|
|
46943
|
-
|
|
47483
|
+
const testPoolSlot = availablePoolSlots.length > 0 ? availablePoolSlots.pop() : void 0;
|
|
47484
|
+
const testWorkspacePath = testPoolSlot?.path ?? sharedWorkspacePath;
|
|
47485
|
+
const testBaselineCommit = testPoolSlot ? poolSlotBaselines.get(testPoolSlot.path) : sharedBaselineCommit;
|
|
47486
|
+
try {
|
|
47487
|
+
const judgeProvider = await resolveJudgeProvider(target);
|
|
47488
|
+
const runCaseOptions = {
|
|
47489
|
+
evalCase,
|
|
47490
|
+
provider: primaryProvider,
|
|
47491
|
+
target,
|
|
47492
|
+
evaluators: evaluatorRegistry,
|
|
47493
|
+
maxRetries,
|
|
47494
|
+
agentTimeoutMs,
|
|
47495
|
+
cache,
|
|
47496
|
+
useCache,
|
|
47497
|
+
now,
|
|
47498
|
+
judgeProvider,
|
|
47499
|
+
targetResolver,
|
|
47500
|
+
availableTargets,
|
|
47501
|
+
evalRunId,
|
|
47502
|
+
keepWorkspaces,
|
|
47503
|
+
cleanupWorkspaces,
|
|
47504
|
+
sharedWorkspacePath: testWorkspacePath,
|
|
47505
|
+
sharedBaselineCommit: testBaselineCommit,
|
|
47506
|
+
suiteWorkspaceFile,
|
|
47507
|
+
streamCallbacks,
|
|
47508
|
+
typeRegistry,
|
|
47509
|
+
repoManager,
|
|
47510
|
+
evalDir
|
|
47511
|
+
};
|
|
47512
|
+
let result = trials && trials.count > 1 ? await runEvalCaseWithTrials(runCaseOptions, trials) : await runEvalCase(runCaseOptions);
|
|
47513
|
+
if (totalBudgetUsd !== void 0) {
|
|
47514
|
+
let caseCost;
|
|
47515
|
+
if (result.trials && result.trials.length > 0) {
|
|
47516
|
+
const trialCostSum = result.trials.reduce((sum, t) => sum + (t.costUsd ?? 0), 0);
|
|
47517
|
+
if (trialCostSum > 0) {
|
|
47518
|
+
caseCost = trialCostSum;
|
|
47519
|
+
}
|
|
47520
|
+
} else {
|
|
47521
|
+
caseCost = result.costUsd;
|
|
47522
|
+
}
|
|
47523
|
+
if (caseCost !== void 0) {
|
|
47524
|
+
cumulativeBudgetCost += caseCost;
|
|
47525
|
+
if (cumulativeBudgetCost >= totalBudgetUsd) {
|
|
47526
|
+
budgetExhausted = true;
|
|
47527
|
+
}
|
|
47528
|
+
}
|
|
47529
|
+
}
|
|
47530
|
+
if (failOnError === true && result.executionStatus === "execution_error") {
|
|
47531
|
+
failOnErrorTriggered = true;
|
|
47532
|
+
}
|
|
47533
|
+
if (beforeAllOutput && !beforeAllOutputAttached) {
|
|
47534
|
+
result = { ...result, beforeAllOutput };
|
|
47535
|
+
beforeAllOutputAttached = true;
|
|
47536
|
+
}
|
|
47537
|
+
if (onProgress) {
|
|
47538
|
+
await onProgress({
|
|
47539
|
+
workerId,
|
|
47540
|
+
testId: evalCase.id,
|
|
47541
|
+
status: result.error ? "failed" : "completed",
|
|
47542
|
+
startedAt: 0,
|
|
47543
|
+
// Not used for completed status
|
|
47544
|
+
completedAt: Date.now(),
|
|
47545
|
+
error: result.error
|
|
47546
|
+
});
|
|
47547
|
+
}
|
|
47548
|
+
if (onResult) {
|
|
47549
|
+
await onResult(result);
|
|
47550
|
+
}
|
|
47551
|
+
return result;
|
|
47552
|
+
} catch (error40) {
|
|
47553
|
+
if (onProgress) {
|
|
47554
|
+
await onProgress({
|
|
47555
|
+
workerId,
|
|
47556
|
+
testId: evalCase.id,
|
|
47557
|
+
status: "failed",
|
|
47558
|
+
completedAt: Date.now(),
|
|
47559
|
+
error: error40 instanceof Error ? error40.message : String(error40)
|
|
47560
|
+
});
|
|
47561
|
+
}
|
|
47562
|
+
throw error40;
|
|
47563
|
+
} finally {
|
|
47564
|
+
if (testPoolSlot) {
|
|
47565
|
+
availablePoolSlots.push(testPoolSlot);
|
|
47566
|
+
}
|
|
46944
47567
|
}
|
|
46945
|
-
|
|
46946
|
-
|
|
46947
|
-
|
|
46948
|
-
|
|
46949
|
-
|
|
46950
|
-
|
|
46951
|
-
|
|
46952
|
-
|
|
46953
|
-
|
|
46954
|
-
|
|
47568
|
+
})
|
|
47569
|
+
);
|
|
47570
|
+
const settled = await Promise.allSettled(promises);
|
|
47571
|
+
const results = [];
|
|
47572
|
+
for (let i = 0; i < settled.length; i++) {
|
|
47573
|
+
const outcome = settled[i];
|
|
47574
|
+
if (outcome.status === "fulfilled") {
|
|
47575
|
+
results.push(outcome.value);
|
|
47576
|
+
} else {
|
|
47577
|
+
const evalCase = filteredEvalCases[i];
|
|
47578
|
+
const formattingMode = usesFileReferencePrompt(primaryProvider) ? "agent" : "lm";
|
|
47579
|
+
const promptInputs = await buildPromptInputs(evalCase, formattingMode);
|
|
47580
|
+
const errorResult = buildErrorResult(
|
|
47581
|
+
evalCase,
|
|
47582
|
+
target.name,
|
|
47583
|
+
(now ?? (() => /* @__PURE__ */ new Date()))(),
|
|
47584
|
+
outcome.reason,
|
|
47585
|
+
promptInputs,
|
|
47586
|
+
primaryProvider,
|
|
47587
|
+
"agent",
|
|
47588
|
+
"provider_error"
|
|
47589
|
+
);
|
|
47590
|
+
results.push(errorResult);
|
|
47591
|
+
if (onResult) {
|
|
47592
|
+
await onResult(errorResult);
|
|
46955
47593
|
}
|
|
46956
|
-
throw error40;
|
|
46957
47594
|
}
|
|
46958
|
-
}
|
|
46959
|
-
|
|
46960
|
-
|
|
46961
|
-
|
|
46962
|
-
|
|
46963
|
-
|
|
46964
|
-
|
|
46965
|
-
|
|
46966
|
-
|
|
46967
|
-
|
|
46968
|
-
|
|
46969
|
-
|
|
46970
|
-
|
|
46971
|
-
|
|
46972
|
-
|
|
46973
|
-
|
|
46974
|
-
|
|
46975
|
-
|
|
46976
|
-
|
|
46977
|
-
|
|
46978
|
-
|
|
46979
|
-
);
|
|
46980
|
-
results.push(errorResult);
|
|
46981
|
-
if (onResult) {
|
|
46982
|
-
await onResult(errorResult);
|
|
47595
|
+
}
|
|
47596
|
+
const afterAllWorkspaces = poolSlots.length > 1 ? poolSlots.map((s) => s.path) : sharedWorkspacePath ? [sharedWorkspacePath] : [];
|
|
47597
|
+
if (afterAllWorkspaces.length > 0 && suiteWorkspace?.after_all) {
|
|
47598
|
+
for (const wsPath of afterAllWorkspaces) {
|
|
47599
|
+
const scriptContext = {
|
|
47600
|
+
workspacePath: wsPath,
|
|
47601
|
+
testId: "__after_all__",
|
|
47602
|
+
evalRunId,
|
|
47603
|
+
evalDir
|
|
47604
|
+
};
|
|
47605
|
+
try {
|
|
47606
|
+
const afterAllOutput = await executeWorkspaceScript(
|
|
47607
|
+
suiteWorkspace.after_all,
|
|
47608
|
+
scriptContext,
|
|
47609
|
+
"warn"
|
|
47610
|
+
);
|
|
47611
|
+
if (afterAllOutput && results.length > 0 && wsPath === afterAllWorkspaces[0]) {
|
|
47612
|
+
results[results.length - 1] = { ...results[results.length - 1], afterAllOutput };
|
|
47613
|
+
}
|
|
47614
|
+
} catch {
|
|
47615
|
+
}
|
|
46983
47616
|
}
|
|
46984
47617
|
}
|
|
46985
|
-
|
|
46986
|
-
|
|
46987
|
-
|
|
46988
|
-
|
|
46989
|
-
|
|
46990
|
-
|
|
46991
|
-
|
|
46992
|
-
|
|
46993
|
-
try {
|
|
46994
|
-
const afterAllOutput = await executeWorkspaceScript(
|
|
46995
|
-
suiteWorkspace.after_all,
|
|
46996
|
-
scriptContext,
|
|
46997
|
-
"warn"
|
|
46998
|
-
);
|
|
46999
|
-
if (afterAllOutput && results.length > 0) {
|
|
47000
|
-
results[results.length - 1] = { ...results[results.length - 1], afterAllOutput };
|
|
47618
|
+
if (sharedWorkspacePath && !poolSlot && poolSlots.length === 0 && !userWorkspacePath) {
|
|
47619
|
+
const hasFailure = results.some((r) => !!r.error || r.score < 0.5);
|
|
47620
|
+
if (cleanupWorkspaces) {
|
|
47621
|
+
await cleanupWorkspace(sharedWorkspacePath).catch(() => {
|
|
47622
|
+
});
|
|
47623
|
+
} else if (!hasFailure && !keepWorkspaces) {
|
|
47624
|
+
await cleanupWorkspace(sharedWorkspacePath).catch(() => {
|
|
47625
|
+
});
|
|
47001
47626
|
}
|
|
47002
|
-
} catch {
|
|
47003
47627
|
}
|
|
47004
|
-
}
|
|
47005
|
-
if (sharedWorkspacePath) {
|
|
47006
|
-
const hasFailure = results.some((r) => !!r.error || r.score < 0.5);
|
|
47007
47628
|
if (cleanupWorkspaces) {
|
|
47008
|
-
await
|
|
47009
|
-
});
|
|
47010
|
-
} else if (!hasFailure && !keepWorkspaces) {
|
|
47011
|
-
await cleanupWorkspace(sharedWorkspacePath).catch(() => {
|
|
47629
|
+
await cleanupEvalWorkspaces(evalRunId).catch(() => {
|
|
47012
47630
|
});
|
|
47013
47631
|
}
|
|
47632
|
+
return results;
|
|
47633
|
+
} finally {
|
|
47634
|
+
if (poolManager) {
|
|
47635
|
+
if (poolSlot) {
|
|
47636
|
+
await poolManager.releaseSlot(poolSlot);
|
|
47637
|
+
}
|
|
47638
|
+
for (const slot of poolSlots) {
|
|
47639
|
+
if (slot !== poolSlot) {
|
|
47640
|
+
await poolManager.releaseSlot(slot).catch(() => {
|
|
47641
|
+
});
|
|
47642
|
+
}
|
|
47643
|
+
}
|
|
47644
|
+
}
|
|
47014
47645
|
}
|
|
47015
|
-
if (cleanupWorkspaces) {
|
|
47016
|
-
await cleanupEvalWorkspaces(evalRunId).catch(() => {
|
|
47017
|
-
});
|
|
47018
|
-
}
|
|
47019
|
-
return results;
|
|
47020
47646
|
}
|
|
47021
47647
|
async function runBatchEvaluation(options) {
|
|
47022
47648
|
const {
|
|
@@ -47195,6 +47821,7 @@ async function runEvalCase(options) {
|
|
|
47195
47821
|
repoManager,
|
|
47196
47822
|
evalDir
|
|
47197
47823
|
} = options;
|
|
47824
|
+
const setupDebug = process.env.AGENTV_SETUP_DEBUG === "1";
|
|
47198
47825
|
const formattingMode = usesFileReferencePrompt(provider) ? "agent" : "lm";
|
|
47199
47826
|
const promptInputs = await buildPromptInputs(evalCase, formattingMode);
|
|
47200
47827
|
const typeRegistry = providedTypeRegistry ?? createBuiltinRegistry();
|
|
@@ -47232,7 +47859,7 @@ async function runEvalCase(options) {
|
|
|
47232
47859
|
);
|
|
47233
47860
|
}
|
|
47234
47861
|
if (caseWorkspaceFile && workspacePath) {
|
|
47235
|
-
const copiedFile =
|
|
47862
|
+
const copiedFile = path39.join(workspacePath, path39.basename(caseWorkspaceFile));
|
|
47236
47863
|
try {
|
|
47237
47864
|
await stat7(copiedFile);
|
|
47238
47865
|
caseWorkspaceFile = copiedFile;
|
|
@@ -47242,12 +47869,20 @@ async function runEvalCase(options) {
|
|
|
47242
47869
|
}
|
|
47243
47870
|
if (!workspacePath && (evalCase.workspace?.before_all || evalCase.workspace?.repos?.length) && evalRunId) {
|
|
47244
47871
|
workspacePath = getWorkspacePath(evalRunId, evalCase.id);
|
|
47245
|
-
await
|
|
47872
|
+
await mkdir13(workspacePath, { recursive: true });
|
|
47246
47873
|
}
|
|
47247
47874
|
if (evalCase.workspace?.repos?.length && workspacePath) {
|
|
47248
|
-
const perCaseRepoManager = new RepoManager();
|
|
47875
|
+
const perCaseRepoManager = new RepoManager(void 0, setupDebug);
|
|
47249
47876
|
try {
|
|
47877
|
+
if (setupDebug) {
|
|
47878
|
+
console.log(
|
|
47879
|
+
`[setup] test=${evalCase.id} materializing ${evalCase.workspace.repos.length} per-test repo(s) into ${workspacePath}`
|
|
47880
|
+
);
|
|
47881
|
+
}
|
|
47250
47882
|
await perCaseRepoManager.materializeAll(evalCase.workspace.repos, workspacePath);
|
|
47883
|
+
if (setupDebug) {
|
|
47884
|
+
console.log(`[setup] test=${evalCase.id} per-test repo materialization complete`);
|
|
47885
|
+
}
|
|
47251
47886
|
} catch (error40) {
|
|
47252
47887
|
const message = error40 instanceof Error ? error40.message : String(error40);
|
|
47253
47888
|
return buildErrorResult(
|
|
@@ -47263,6 +47898,12 @@ async function runEvalCase(options) {
|
|
|
47263
47898
|
}
|
|
47264
47899
|
}
|
|
47265
47900
|
if (workspacePath && evalCase.workspace?.before_all) {
|
|
47901
|
+
const beforeAllCommand = (evalCase.workspace.before_all.command ?? evalCase.workspace.before_all.script ?? []).join(" ");
|
|
47902
|
+
if (setupDebug) {
|
|
47903
|
+
console.log(
|
|
47904
|
+
`[setup] test=${evalCase.id} running before_all in cwd=${evalCase.workspace.before_all.cwd ?? evalDir} command=${beforeAllCommand}`
|
|
47905
|
+
);
|
|
47906
|
+
}
|
|
47266
47907
|
const scriptContext = {
|
|
47267
47908
|
workspacePath,
|
|
47268
47909
|
testId: evalCase.id,
|
|
@@ -47276,6 +47917,9 @@ async function runEvalCase(options) {
|
|
|
47276
47917
|
evalCase.workspace.before_all,
|
|
47277
47918
|
scriptContext
|
|
47278
47919
|
);
|
|
47920
|
+
if (setupDebug) {
|
|
47921
|
+
console.log(`[setup] test=${evalCase.id} before_all completed`);
|
|
47922
|
+
}
|
|
47279
47923
|
} catch (error40) {
|
|
47280
47924
|
const message = error40 instanceof Error ? error40.message : String(error40);
|
|
47281
47925
|
if (forceCleanup && workspacePath) {
|
|
@@ -47825,7 +48469,7 @@ async function runEvaluatorList(options) {
|
|
|
47825
48469
|
fileChanges,
|
|
47826
48470
|
workspacePath
|
|
47827
48471
|
};
|
|
47828
|
-
const evalFileDir = evalCase.guideline_paths[0] ?
|
|
48472
|
+
const evalFileDir = evalCase.guideline_paths[0] ? path39.dirname(evalCase.guideline_paths[0]) : process.cwd();
|
|
47829
48473
|
const dispatchContext = {
|
|
47830
48474
|
judgeProvider,
|
|
47831
48475
|
targetResolver,
|
|
@@ -48059,7 +48703,7 @@ function extractProviderError(response) {
|
|
|
48059
48703
|
return trimmed.length > 0 ? trimmed : void 0;
|
|
48060
48704
|
}
|
|
48061
48705
|
function createCacheKey(provider, target, evalCase, promptInputs) {
|
|
48062
|
-
const hash =
|
|
48706
|
+
const hash = createHash3("sha256");
|
|
48063
48707
|
hash.update(provider.id);
|
|
48064
48708
|
hash.update(target.name);
|
|
48065
48709
|
hash.update(evalCase.id);
|
|
@@ -48146,13 +48790,13 @@ async function evaluate(config2) {
|
|
|
48146
48790
|
let evalCases;
|
|
48147
48791
|
let testFilePath;
|
|
48148
48792
|
if (config2.specFile) {
|
|
48149
|
-
testFilePath =
|
|
48793
|
+
testFilePath = path40.resolve(config2.specFile);
|
|
48150
48794
|
evalCases = await loadTests(testFilePath, repoRoot, {
|
|
48151
48795
|
verbose: config2.verbose,
|
|
48152
48796
|
filter: config2.filter
|
|
48153
48797
|
});
|
|
48154
48798
|
} else {
|
|
48155
|
-
testFilePath =
|
|
48799
|
+
testFilePath = path40.join(process.cwd(), "__programmatic__.yaml");
|
|
48156
48800
|
evalCases = (config2.tests ?? []).map((test) => {
|
|
48157
48801
|
const input = typeof test.input === "string" ? [{ role: "user", content: test.input }] : test.input;
|
|
48158
48802
|
const question = typeof test.input === "string" ? test.input : test.input.find((m) => m.role === "user")?.content ?? "";
|
|
@@ -48238,11 +48882,11 @@ function computeSummary(results, durationMs) {
|
|
|
48238
48882
|
var TARGET_FILE_CANDIDATES = [".agentv/targets.yaml", ".agentv/targets.yml"];
|
|
48239
48883
|
async function discoverDefaultTarget(repoRoot) {
|
|
48240
48884
|
const cwd = process.cwd();
|
|
48241
|
-
const chain = buildDirectoryChain(
|
|
48885
|
+
const chain = buildDirectoryChain(path40.join(cwd, "_placeholder"), repoRoot);
|
|
48242
48886
|
for (const dir of chain) {
|
|
48243
48887
|
for (const candidate of TARGET_FILE_CANDIDATES) {
|
|
48244
|
-
const targetsPath =
|
|
48245
|
-
if (!
|
|
48888
|
+
const targetsPath = path40.join(dir, candidate);
|
|
48889
|
+
if (!existsSync4(targetsPath)) continue;
|
|
48246
48890
|
try {
|
|
48247
48891
|
const definitions = await readTargetDefinitions(targetsPath);
|
|
48248
48892
|
const defaultTarget = definitions.find((d) => d.name === "default");
|
|
@@ -48256,11 +48900,11 @@ async function discoverDefaultTarget(repoRoot) {
|
|
|
48256
48900
|
async function loadEnvHierarchy(repoRoot) {
|
|
48257
48901
|
const { readFileSync: readFileSync2 } = await import("node:fs");
|
|
48258
48902
|
const cwd = process.cwd();
|
|
48259
|
-
const chain = buildDirectoryChain(
|
|
48903
|
+
const chain = buildDirectoryChain(path40.join(cwd, "_placeholder"), repoRoot);
|
|
48260
48904
|
const envFiles = [];
|
|
48261
48905
|
for (const dir of chain) {
|
|
48262
|
-
const envPath =
|
|
48263
|
-
if (
|
|
48906
|
+
const envPath = path40.join(dir, ".env");
|
|
48907
|
+
if (existsSync4(envPath)) envFiles.push(envPath);
|
|
48264
48908
|
}
|
|
48265
48909
|
for (let i = envFiles.length - 1; i >= 0; i--) {
|
|
48266
48910
|
try {
|
|
@@ -48335,12 +48979,12 @@ var CONFIG_FILE_NAMES = [
|
|
|
48335
48979
|
".agentv/config.js"
|
|
48336
48980
|
];
|
|
48337
48981
|
async function loadTsConfig(projectRoot) {
|
|
48338
|
-
const { existsSync:
|
|
48982
|
+
const { existsSync: existsSync5 } = await import("node:fs");
|
|
48339
48983
|
const { pathToFileURL } = await import("node:url");
|
|
48340
48984
|
const { join: join2 } = await import("node:path");
|
|
48341
48985
|
for (const fileName of CONFIG_FILE_NAMES) {
|
|
48342
48986
|
const filePath = join2(projectRoot, fileName);
|
|
48343
|
-
if (!
|
|
48987
|
+
if (!existsSync5(filePath)) {
|
|
48344
48988
|
continue;
|
|
48345
48989
|
}
|
|
48346
48990
|
try {
|
|
@@ -48440,7 +49084,7 @@ var ResponseCache = class {
|
|
|
48440
49084
|
async get(key) {
|
|
48441
49085
|
const filePath = this.keyToPath(key);
|
|
48442
49086
|
try {
|
|
48443
|
-
const data = await
|
|
49087
|
+
const data = await readFile12(filePath, "utf8");
|
|
48444
49088
|
return JSON.parse(data);
|
|
48445
49089
|
} catch {
|
|
48446
49090
|
return void 0;
|
|
@@ -48448,13 +49092,13 @@ var ResponseCache = class {
|
|
|
48448
49092
|
}
|
|
48449
49093
|
async set(key, value) {
|
|
48450
49094
|
const filePath = this.keyToPath(key);
|
|
48451
|
-
const dir =
|
|
48452
|
-
await
|
|
48453
|
-
await
|
|
49095
|
+
const dir = path41.dirname(filePath);
|
|
49096
|
+
await mkdir14(dir, { recursive: true });
|
|
49097
|
+
await writeFile9(filePath, JSON.stringify(value, null, 2), "utf8");
|
|
48454
49098
|
}
|
|
48455
49099
|
keyToPath(key) {
|
|
48456
49100
|
const prefix = key.slice(0, 2);
|
|
48457
|
-
return
|
|
49101
|
+
return path41.join(this.cachePath, prefix, `${key}.json`);
|
|
48458
49102
|
}
|
|
48459
49103
|
};
|
|
48460
49104
|
function shouldEnableCache(params) {
|
|
@@ -48958,6 +49602,12 @@ export {
|
|
|
48958
49602
|
consumePiLogEntries,
|
|
48959
49603
|
subscribeToPiLogEntries,
|
|
48960
49604
|
ProviderRegistry,
|
|
49605
|
+
getAgentvHome,
|
|
49606
|
+
getWorkspacesRoot,
|
|
49607
|
+
getGitCacheRoot,
|
|
49608
|
+
getSubagentsRoot,
|
|
49609
|
+
getTraceStateRoot,
|
|
49610
|
+
getWorkspacePoolRoot,
|
|
48961
49611
|
ensureVSCodeSubagents,
|
|
48962
49612
|
readTargetDefinitions,
|
|
48963
49613
|
listTargetNames,
|
|
@@ -49019,6 +49669,8 @@ export {
|
|
|
49019
49669
|
createTempWorkspace,
|
|
49020
49670
|
cleanupWorkspace,
|
|
49021
49671
|
cleanupEvalWorkspaces,
|
|
49672
|
+
computeWorkspaceFingerprint,
|
|
49673
|
+
WorkspacePoolManager,
|
|
49022
49674
|
RepoManager,
|
|
49023
49675
|
resolveWorkspaceTemplate,
|
|
49024
49676
|
executeWorkspaceScript,
|
|
@@ -49037,4 +49689,4 @@ export {
|
|
|
49037
49689
|
OtelStreamingObserver,
|
|
49038
49690
|
createAgentKernel
|
|
49039
49691
|
};
|
|
49040
|
-
//# sourceMappingURL=chunk-
|
|
49692
|
+
//# sourceMappingURL=chunk-VBK7BJLE.js.map
|