@agentv/core 2.14.2 → 2.15.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.cjs +1270 -604
- package/dist/index.cjs.map +1 -1
- package/dist/index.d.cts +94 -2
- package/dist/index.d.ts +94 -2
- package/dist/index.js +1242 -584
- package/dist/index.js.map +1 -1
- package/package.json +1 -1
package/dist/index.js
CHANGED
|
@@ -234,7 +234,7 @@ async function resolveFileReference2(ref, evalFileDir) {
|
|
|
234
234
|
const rawPath = extractFilePath(ref);
|
|
235
235
|
const absolutePattern = path.resolve(evalFileDir, rawPath);
|
|
236
236
|
if (isGlobPattern(rawPath)) {
|
|
237
|
-
const matches = await fg(absolutePattern, {
|
|
237
|
+
const matches = await fg(absolutePattern.replaceAll("\\", "/"), {
|
|
238
238
|
onlyFiles: true,
|
|
239
239
|
absolute: true
|
|
240
240
|
});
|
|
@@ -612,6 +612,17 @@ function parseExecutionDefaults(raw, configPath) {
|
|
|
612
612
|
} else if (otelFile !== void 0) {
|
|
613
613
|
logWarning(`Invalid execution.otel_file in ${configPath}, expected non-empty string`);
|
|
614
614
|
}
|
|
615
|
+
if (typeof obj.pool_workspaces === "boolean") {
|
|
616
|
+
result.pool_workspaces = obj.pool_workspaces;
|
|
617
|
+
} else if (obj.pool_workspaces !== void 0) {
|
|
618
|
+
logWarning(`Invalid execution.pool_workspaces in ${configPath}, expected boolean`);
|
|
619
|
+
}
|
|
620
|
+
const poolSlots = obj.pool_slots;
|
|
621
|
+
if (typeof poolSlots === "number" && Number.isInteger(poolSlots) && poolSlots >= 1 && poolSlots <= 50) {
|
|
622
|
+
result.pool_slots = poolSlots;
|
|
623
|
+
} else if (poolSlots !== void 0) {
|
|
624
|
+
logWarning(`Invalid execution.pool_slots in ${configPath}, expected integer 1-50`);
|
|
625
|
+
}
|
|
615
626
|
return Object.keys(result).length > 0 ? result : void 0;
|
|
616
627
|
}
|
|
617
628
|
function logWarning(message) {
|
|
@@ -2053,6 +2064,7 @@ async function processMessages(options) {
|
|
|
2053
2064
|
repoRootPath,
|
|
2054
2065
|
guidelinePatterns,
|
|
2055
2066
|
guidelinePaths,
|
|
2067
|
+
treatFileSegmentsAsGuidelines,
|
|
2056
2068
|
textParts,
|
|
2057
2069
|
messageType,
|
|
2058
2070
|
verbose
|
|
@@ -2100,16 +2112,20 @@ async function processMessages(options) {
|
|
|
2100
2112
|
}
|
|
2101
2113
|
try {
|
|
2102
2114
|
const fileContent = (await readFile4(resolvedPath, "utf8")).replace(/\r\n/g, "\n");
|
|
2103
|
-
|
|
2104
|
-
|
|
2105
|
-
|
|
2106
|
-
|
|
2107
|
-
|
|
2108
|
-
|
|
2109
|
-
|
|
2110
|
-
|
|
2111
|
-
|
|
2115
|
+
const classifyAsGuideline = shouldTreatAsGuideline({
|
|
2116
|
+
messageType,
|
|
2117
|
+
resolvedPath,
|
|
2118
|
+
repoRootPath,
|
|
2119
|
+
guidelinePatterns,
|
|
2120
|
+
treatFileSegmentsAsGuidelines
|
|
2121
|
+
});
|
|
2122
|
+
if (classifyAsGuideline && guidelinePaths) {
|
|
2123
|
+
guidelinePaths.push(path5.resolve(resolvedPath));
|
|
2124
|
+
if (verbose) {
|
|
2125
|
+
console.log(` [Guideline] Found: ${displayPath}`);
|
|
2126
|
+
console.log(` Resolved to: ${resolvedPath}`);
|
|
2112
2127
|
}
|
|
2128
|
+
continue;
|
|
2113
2129
|
}
|
|
2114
2130
|
segments.push({
|
|
2115
2131
|
type: "file",
|
|
@@ -2138,6 +2154,26 @@ async function processMessages(options) {
|
|
|
2138
2154
|
}
|
|
2139
2155
|
return segments;
|
|
2140
2156
|
}
|
|
2157
|
+
function shouldTreatAsGuideline(options) {
|
|
2158
|
+
const {
|
|
2159
|
+
messageType,
|
|
2160
|
+
resolvedPath,
|
|
2161
|
+
repoRootPath,
|
|
2162
|
+
guidelinePatterns,
|
|
2163
|
+
treatFileSegmentsAsGuidelines
|
|
2164
|
+
} = options;
|
|
2165
|
+
if (messageType !== "input") {
|
|
2166
|
+
return false;
|
|
2167
|
+
}
|
|
2168
|
+
if (treatFileSegmentsAsGuidelines) {
|
|
2169
|
+
return true;
|
|
2170
|
+
}
|
|
2171
|
+
if (!guidelinePatterns || guidelinePatterns.length === 0) {
|
|
2172
|
+
return false;
|
|
2173
|
+
}
|
|
2174
|
+
const relativeToRepo = path5.relative(repoRootPath, resolvedPath);
|
|
2175
|
+
return isGuidelineFile(relativeToRepo, guidelinePatterns);
|
|
2176
|
+
}
|
|
2141
2177
|
function asString3(value) {
|
|
2142
2178
|
return typeof value === "string" ? value : void 0;
|
|
2143
2179
|
}
|
|
@@ -2476,6 +2512,8 @@ async function loadTestsFromJsonl(evalFilePath, repoRoot, options) {
|
|
|
2476
2512
|
for (const guidelinePath of testCase.guideline_paths) {
|
|
2477
2513
|
console.log(` - ${guidelinePath}`);
|
|
2478
2514
|
}
|
|
2515
|
+
} else if (!guidelinePatterns || guidelinePatterns.length === 0) {
|
|
2516
|
+
console.log(" No guidelines found (guideline_patterns not configured)");
|
|
2479
2517
|
} else {
|
|
2480
2518
|
console.log(" No guidelines found");
|
|
2481
2519
|
}
|
|
@@ -2845,7 +2883,7 @@ async function loadTestsFromYaml(evalFilePath, repoRoot, options) {
|
|
|
2845
2883
|
} else {
|
|
2846
2884
|
throw new Error(`Invalid test file format: ${evalFilePath} - missing 'tests' field`);
|
|
2847
2885
|
}
|
|
2848
|
-
const suiteWorkspace =
|
|
2886
|
+
const suiteWorkspace = await resolveWorkspaceConfig(suite.workspace, evalFileDir);
|
|
2849
2887
|
const suiteInputMessages = expandInputShorthand(suite.input);
|
|
2850
2888
|
const rawGlobalExecution = isJsonObject(suite.execution) ? suite.execution : void 0;
|
|
2851
2889
|
const _globalTarget = asString6(rawGlobalExecution?.target) ?? asString6(suite.target);
|
|
@@ -2881,12 +2919,24 @@ async function loadTestsFromYaml(evalFilePath, repoRoot, options) {
|
|
|
2881
2919
|
}
|
|
2882
2920
|
const caseExecution = isJsonObject(evalcase.execution) ? evalcase.execution : void 0;
|
|
2883
2921
|
const skipDefaults = caseExecution?.skip_defaults === true;
|
|
2884
|
-
const
|
|
2922
|
+
const effectiveSuiteInputMessages = suiteInputMessages && !skipDefaults ? suiteInputMessages : void 0;
|
|
2923
|
+
const inputMessages = effectiveSuiteInputMessages ? [...effectiveSuiteInputMessages, ...testInputMessages] : testInputMessages;
|
|
2885
2924
|
const hasExpectedMessages = expectedMessages.length > 0;
|
|
2886
2925
|
const guidelinePaths = [];
|
|
2887
2926
|
const inputTextParts = [];
|
|
2888
|
-
const
|
|
2889
|
-
messages:
|
|
2927
|
+
const suiteInputSegments = effectiveSuiteInputMessages ? await processMessages({
|
|
2928
|
+
messages: effectiveSuiteInputMessages,
|
|
2929
|
+
searchRoots,
|
|
2930
|
+
repoRootPath,
|
|
2931
|
+
guidelinePatterns,
|
|
2932
|
+
guidelinePaths,
|
|
2933
|
+
treatFileSegmentsAsGuidelines: true,
|
|
2934
|
+
textParts: inputTextParts,
|
|
2935
|
+
messageType: "input",
|
|
2936
|
+
verbose
|
|
2937
|
+
}) : [];
|
|
2938
|
+
const testInputSegments = await processMessages({
|
|
2939
|
+
messages: testInputMessages,
|
|
2890
2940
|
searchRoots,
|
|
2891
2941
|
repoRootPath,
|
|
2892
2942
|
guidelinePatterns,
|
|
@@ -2895,6 +2945,7 @@ async function loadTestsFromYaml(evalFilePath, repoRoot, options) {
|
|
|
2895
2945
|
messageType: "input",
|
|
2896
2946
|
verbose
|
|
2897
2947
|
});
|
|
2948
|
+
const inputSegments = [...suiteInputSegments, ...testInputSegments];
|
|
2898
2949
|
const outputSegments = hasExpectedMessages ? await processExpectedMessages({
|
|
2899
2950
|
messages: expectedMessages,
|
|
2900
2951
|
searchRoots,
|
|
@@ -2942,7 +2993,7 @@ async function loadTestsFromYaml(evalFilePath, repoRoot, options) {
|
|
|
2942
2993
|
...guidelinePaths.map((guidelinePath) => path8.resolve(guidelinePath)),
|
|
2943
2994
|
...userFilePaths
|
|
2944
2995
|
];
|
|
2945
|
-
const caseWorkspace =
|
|
2996
|
+
const caseWorkspace = await resolveWorkspaceConfig(evalcase.workspace, evalFileDir);
|
|
2946
2997
|
const mergedWorkspace = mergeWorkspaceConfigs(suiteWorkspace, caseWorkspace);
|
|
2947
2998
|
const metadata = isJsonObject(evalcase.metadata) ? evalcase.metadata : void 0;
|
|
2948
2999
|
const caseTargets = extractTargetsFromTestCase(evalcase);
|
|
@@ -2973,6 +3024,8 @@ async function loadTestsFromYaml(evalFilePath, repoRoot, options) {
|
|
|
2973
3024
|
for (const guidelinePath of testCase.guideline_paths) {
|
|
2974
3025
|
console.log(` - ${guidelinePath}`);
|
|
2975
3026
|
}
|
|
3027
|
+
} else if (!guidelinePatterns || guidelinePatterns.length === 0) {
|
|
3028
|
+
console.log(" No guidelines found (guideline_patterns not configured)");
|
|
2976
3029
|
} else {
|
|
2977
3030
|
console.log(" No guidelines found");
|
|
2978
3031
|
}
|
|
@@ -3072,6 +3125,26 @@ function parseResetConfig(raw) {
|
|
|
3072
3125
|
...afterEach !== void 0 && { after_each: afterEach }
|
|
3073
3126
|
};
|
|
3074
3127
|
}
|
|
3128
|
+
async function resolveWorkspaceConfig(raw, evalFileDir) {
|
|
3129
|
+
if (typeof raw === "string") {
|
|
3130
|
+
const workspaceFilePath = path8.resolve(evalFileDir, raw);
|
|
3131
|
+
let content;
|
|
3132
|
+
try {
|
|
3133
|
+
content = await readFile7(workspaceFilePath, "utf8");
|
|
3134
|
+
} catch {
|
|
3135
|
+
throw new Error(`Workspace file not found: ${raw} (resolved to ${workspaceFilePath})`);
|
|
3136
|
+
}
|
|
3137
|
+
const parsed = parse2(content);
|
|
3138
|
+
if (!isJsonObject(parsed)) {
|
|
3139
|
+
throw new Error(
|
|
3140
|
+
`Invalid workspace file format: ${workspaceFilePath} (expected a YAML object)`
|
|
3141
|
+
);
|
|
3142
|
+
}
|
|
3143
|
+
const workspaceFileDir = path8.dirname(workspaceFilePath);
|
|
3144
|
+
return parseWorkspaceConfig(parsed, workspaceFileDir);
|
|
3145
|
+
}
|
|
3146
|
+
return parseWorkspaceConfig(raw, evalFileDir);
|
|
3147
|
+
}
|
|
3075
3148
|
function parseWorkspaceConfig(raw, evalFileDir) {
|
|
3076
3149
|
if (!isJsonObject(raw)) return void 0;
|
|
3077
3150
|
const obj = raw;
|
|
@@ -5049,7 +5122,7 @@ import { arch, platform } from "node:os";
|
|
|
5049
5122
|
import path13 from "node:path";
|
|
5050
5123
|
import { fileURLToPath as fileURLToPath2 } from "node:url";
|
|
5051
5124
|
function resolvePlatformCliPath() {
|
|
5052
|
-
const
|
|
5125
|
+
const os3 = platform();
|
|
5053
5126
|
const cpu = arch();
|
|
5054
5127
|
const platformMap = {
|
|
5055
5128
|
linux: "linux",
|
|
@@ -5060,13 +5133,13 @@ function resolvePlatformCliPath() {
|
|
|
5060
5133
|
x64: "x64",
|
|
5061
5134
|
arm64: "arm64"
|
|
5062
5135
|
};
|
|
5063
|
-
const osPart = platformMap[
|
|
5136
|
+
const osPart = platformMap[os3];
|
|
5064
5137
|
const archPart = archMap[cpu];
|
|
5065
5138
|
if (!osPart || !archPart) {
|
|
5066
5139
|
return void 0;
|
|
5067
5140
|
}
|
|
5068
5141
|
const packageName = `@github/copilot-${osPart}-${archPart}`;
|
|
5069
|
-
const binaryName =
|
|
5142
|
+
const binaryName = os3 === "win32" ? "copilot.exe" : "copilot";
|
|
5070
5143
|
try {
|
|
5071
5144
|
const resolved = import.meta.resolve(`${packageName}/package.json`);
|
|
5072
5145
|
const packageJsonPath = resolved.startsWith("file:") ? fileURLToPath2(resolved) : resolved;
|
|
@@ -6868,12 +6941,12 @@ var ProviderRegistry = class {
|
|
|
6868
6941
|
// src/evaluation/providers/vscode-provider.ts
|
|
6869
6942
|
import { exec as exec2 } from "node:child_process";
|
|
6870
6943
|
import { constants as constants3, access as access3, stat as stat4 } from "node:fs/promises";
|
|
6871
|
-
import
|
|
6944
|
+
import path28 from "node:path";
|
|
6872
6945
|
import { promisify as promisify3 } from "node:util";
|
|
6873
6946
|
|
|
6874
6947
|
// src/evaluation/providers/vscode/dispatch/agentDispatch.ts
|
|
6875
6948
|
import { stat as stat3, writeFile as writeFile4 } from "node:fs/promises";
|
|
6876
|
-
import
|
|
6949
|
+
import path26 from "node:path";
|
|
6877
6950
|
|
|
6878
6951
|
// src/evaluation/providers/vscode/utils/fs.ts
|
|
6879
6952
|
import { constants as constants2 } from "node:fs";
|
|
@@ -7141,17 +7214,49 @@ async function waitForBatchResponses(responseFilesFinal, pollInterval = 1e3, sil
|
|
|
7141
7214
|
// src/evaluation/providers/vscode/dispatch/vscodeProcess.ts
|
|
7142
7215
|
import { exec, spawn as spawn3 } from "node:child_process";
|
|
7143
7216
|
import { mkdir as mkdir7, writeFile as writeFile2 } from "node:fs/promises";
|
|
7144
|
-
import
|
|
7217
|
+
import path23 from "node:path";
|
|
7145
7218
|
import { promisify as promisify2 } from "node:util";
|
|
7146
7219
|
|
|
7147
7220
|
// src/evaluation/providers/vscode/dispatch/constants.ts
|
|
7221
|
+
import path22 from "node:path";
|
|
7222
|
+
|
|
7223
|
+
// src/paths.ts
|
|
7148
7224
|
import os2 from "node:os";
|
|
7149
7225
|
import path21 from "node:path";
|
|
7226
|
+
var logged = false;
|
|
7227
|
+
function getAgentvHome() {
|
|
7228
|
+
const envHome = process.env.AGENTV_HOME;
|
|
7229
|
+
if (envHome) {
|
|
7230
|
+
if (!logged) {
|
|
7231
|
+
logged = true;
|
|
7232
|
+
console.warn(`Using AGENTV_HOME: ${envHome}`);
|
|
7233
|
+
}
|
|
7234
|
+
return envHome;
|
|
7235
|
+
}
|
|
7236
|
+
return path21.join(os2.homedir(), ".agentv");
|
|
7237
|
+
}
|
|
7238
|
+
function getWorkspacesRoot() {
|
|
7239
|
+
return path21.join(getAgentvHome(), "workspaces");
|
|
7240
|
+
}
|
|
7241
|
+
function getGitCacheRoot() {
|
|
7242
|
+
return path21.join(getAgentvHome(), "git-cache");
|
|
7243
|
+
}
|
|
7244
|
+
function getSubagentsRoot() {
|
|
7245
|
+
return path21.join(getAgentvHome(), "subagents");
|
|
7246
|
+
}
|
|
7247
|
+
function getTraceStateRoot() {
|
|
7248
|
+
return path21.join(getAgentvHome(), "trace-state");
|
|
7249
|
+
}
|
|
7250
|
+
function getWorkspacePoolRoot() {
|
|
7251
|
+
return path21.join(getAgentvHome(), "workspace-pool");
|
|
7252
|
+
}
|
|
7253
|
+
|
|
7254
|
+
// src/evaluation/providers/vscode/dispatch/constants.ts
|
|
7150
7255
|
var DEFAULT_LOCK_NAME = "subagent.lock";
|
|
7151
7256
|
var DEFAULT_ALIVE_FILENAME = ".alive";
|
|
7152
7257
|
function getDefaultSubagentRoot(vscodeCmd = "code") {
|
|
7153
7258
|
const folder = vscodeCmd === "code-insiders" ? "vscode-insiders-agents" : "vscode-agents";
|
|
7154
|
-
return
|
|
7259
|
+
return path22.join(getSubagentsRoot(), folder);
|
|
7155
7260
|
}
|
|
7156
7261
|
var DEFAULT_SUBAGENT_ROOT = getDefaultSubagentRoot();
|
|
7157
7262
|
|
|
@@ -7165,12 +7270,19 @@ description: 'Wake-up Signal'
|
|
|
7165
7270
|
model: Grok Code Fast 1 (copilot)
|
|
7166
7271
|
---`;
|
|
7167
7272
|
function spawnVsCode(vscodeCmd, args, options) {
|
|
7168
|
-
const
|
|
7273
|
+
const useShell = options?.shell ?? true;
|
|
7274
|
+
const command = useShell ? shellQuote(vscodeCmd) : vscodeCmd;
|
|
7275
|
+
const child = spawn3(command, args, {
|
|
7169
7276
|
windowsHide: true,
|
|
7170
|
-
shell:
|
|
7277
|
+
shell: useShell,
|
|
7171
7278
|
detached: false
|
|
7172
7279
|
});
|
|
7173
|
-
child.on("error", () => {
|
|
7280
|
+
child.on("error", (error) => {
|
|
7281
|
+
const label = options?.label ?? "spawn";
|
|
7282
|
+
const renderedArgs = args.map((value) => JSON.stringify(value)).join(" ");
|
|
7283
|
+
console.error(
|
|
7284
|
+
`[vscode] ${label} failed: command=${JSON.stringify(vscodeCmd)} args=${renderedArgs} error=${error.message}`
|
|
7285
|
+
);
|
|
7174
7286
|
});
|
|
7175
7287
|
return child;
|
|
7176
7288
|
}
|
|
@@ -7207,16 +7319,20 @@ async function checkWorkspaceOpened(workspaceName, vscodeCmd) {
|
|
|
7207
7319
|
async function ensureWorkspaceFocused(workspacePath, workspaceName, subagentDir, vscodeCmd, pollInterval = 1, timeout = 60) {
|
|
7208
7320
|
const alreadyOpen = await checkWorkspaceOpened(workspaceName, vscodeCmd);
|
|
7209
7321
|
if (alreadyOpen) {
|
|
7210
|
-
spawnVsCode(
|
|
7322
|
+
const child = spawnVsCode(vscodeCmd, [workspacePath], { label: "focus-existing-workspace" });
|
|
7323
|
+
await raceSpawnError(child);
|
|
7211
7324
|
return true;
|
|
7212
7325
|
}
|
|
7213
|
-
const aliveFile =
|
|
7326
|
+
const aliveFile = path23.join(subagentDir, DEFAULT_ALIVE_FILENAME);
|
|
7214
7327
|
await removeIfExists(aliveFile);
|
|
7215
|
-
const githubAgentsDir =
|
|
7328
|
+
const githubAgentsDir = path23.join(subagentDir, ".github", "agents");
|
|
7216
7329
|
await mkdir7(githubAgentsDir, { recursive: true });
|
|
7217
|
-
const wakeupDst =
|
|
7330
|
+
const wakeupDst = path23.join(githubAgentsDir, "wakeup.md");
|
|
7218
7331
|
await writeFile2(wakeupDst, DEFAULT_WAKEUP_CONTENT, "utf8");
|
|
7219
|
-
spawnVsCode(
|
|
7332
|
+
const workspaceChild = spawnVsCode(vscodeCmd, [workspacePath], {
|
|
7333
|
+
label: "open-workspace"
|
|
7334
|
+
});
|
|
7335
|
+
await raceSpawnError(workspaceChild);
|
|
7220
7336
|
await sleep2(100);
|
|
7221
7337
|
const wakeupChatId = "wakeup";
|
|
7222
7338
|
const chatArgs = [
|
|
@@ -7224,9 +7340,10 @@ async function ensureWorkspaceFocused(workspacePath, workspaceName, subagentDir,
|
|
|
7224
7340
|
"chat",
|
|
7225
7341
|
"-m",
|
|
7226
7342
|
wakeupChatId,
|
|
7227
|
-
`create a file named .alive in the ${
|
|
7343
|
+
`create a file named .alive in the ${path23.basename(subagentDir)} folder`
|
|
7228
7344
|
];
|
|
7229
|
-
spawnVsCode(
|
|
7345
|
+
const wakeupChild = spawnVsCode(vscodeCmd, chatArgs, { label: "send-wakeup-chat" });
|
|
7346
|
+
await raceSpawnError(wakeupChild);
|
|
7230
7347
|
const start = Date.now();
|
|
7231
7348
|
while (!await pathExists(aliveFile)) {
|
|
7232
7349
|
if (Date.now() - start > timeout * 1e3) {
|
|
@@ -7238,10 +7355,10 @@ async function ensureWorkspaceFocused(workspacePath, workspaceName, subagentDir,
|
|
|
7238
7355
|
return true;
|
|
7239
7356
|
}
|
|
7240
7357
|
async function launchVsCodeWithChat(subagentDir, chatId, attachmentPaths, requestInstructions, timestamp, vscodeCmd) {
|
|
7241
|
-
const workspacePath =
|
|
7242
|
-
const messagesDir =
|
|
7358
|
+
const workspacePath = path23.join(subagentDir, `${path23.basename(subagentDir)}.code-workspace`);
|
|
7359
|
+
const messagesDir = path23.join(subagentDir, "messages");
|
|
7243
7360
|
await mkdir7(messagesDir, { recursive: true });
|
|
7244
|
-
const reqFile =
|
|
7361
|
+
const reqFile = path23.join(messagesDir, `${timestamp}_req.md`);
|
|
7245
7362
|
await writeFile2(reqFile, requestInstructions, { encoding: "utf8" });
|
|
7246
7363
|
const reqUri = pathToFileUri2(reqFile);
|
|
7247
7364
|
const chatArgs = ["-r", "chat", "-m", chatId];
|
|
@@ -7249,25 +7366,25 @@ async function launchVsCodeWithChat(subagentDir, chatId, attachmentPaths, reques
|
|
|
7249
7366
|
chatArgs.push("-a", attachment);
|
|
7250
7367
|
}
|
|
7251
7368
|
chatArgs.push("-a", reqFile);
|
|
7252
|
-
chatArgs.push(`Follow instructions in [${
|
|
7369
|
+
chatArgs.push(`Follow instructions in [${path23.basename(reqFile)}](${reqUri})`);
|
|
7253
7370
|
const workspaceReady = await ensureWorkspaceFocused(
|
|
7254
7371
|
workspacePath,
|
|
7255
|
-
|
|
7372
|
+
path23.basename(subagentDir),
|
|
7256
7373
|
subagentDir,
|
|
7257
7374
|
vscodeCmd
|
|
7258
7375
|
);
|
|
7259
7376
|
if (!workspaceReady) {
|
|
7260
7377
|
throw new Error(
|
|
7261
|
-
`VS Code workspace '${
|
|
7378
|
+
`VS Code workspace '${path23.basename(subagentDir)}' failed to become ready within the timeout. Check that '${vscodeCmd}' can open workspaces.`
|
|
7262
7379
|
);
|
|
7263
7380
|
}
|
|
7264
7381
|
await sleep2(500);
|
|
7265
|
-
const child = spawnVsCode(
|
|
7382
|
+
const child = spawnVsCode(vscodeCmd, chatArgs, { label: "send-chat" });
|
|
7266
7383
|
await raceSpawnError(child);
|
|
7267
7384
|
}
|
|
7268
7385
|
async function launchVsCodeWithBatchChat(subagentDir, chatId, attachmentPaths, chatInstruction, vscodeCmd) {
|
|
7269
|
-
const workspacePath =
|
|
7270
|
-
const messagesDir =
|
|
7386
|
+
const workspacePath = path23.join(subagentDir, `${path23.basename(subagentDir)}.code-workspace`);
|
|
7387
|
+
const messagesDir = path23.join(subagentDir, "messages");
|
|
7271
7388
|
await mkdir7(messagesDir, { recursive: true });
|
|
7272
7389
|
const chatArgs = ["-r", "chat", "-m", chatId];
|
|
7273
7390
|
for (const attachment of attachmentPaths) {
|
|
@@ -7276,26 +7393,26 @@ async function launchVsCodeWithBatchChat(subagentDir, chatId, attachmentPaths, c
|
|
|
7276
7393
|
chatArgs.push(chatInstruction);
|
|
7277
7394
|
const workspaceReady = await ensureWorkspaceFocused(
|
|
7278
7395
|
workspacePath,
|
|
7279
|
-
|
|
7396
|
+
path23.basename(subagentDir),
|
|
7280
7397
|
subagentDir,
|
|
7281
7398
|
vscodeCmd
|
|
7282
7399
|
);
|
|
7283
7400
|
if (!workspaceReady) {
|
|
7284
7401
|
throw new Error(
|
|
7285
|
-
`VS Code workspace '${
|
|
7402
|
+
`VS Code workspace '${path23.basename(subagentDir)}' failed to become ready within the timeout. Check that '${vscodeCmd}' can open workspaces.`
|
|
7286
7403
|
);
|
|
7287
7404
|
}
|
|
7288
7405
|
await sleep2(500);
|
|
7289
|
-
const child = spawnVsCode(
|
|
7406
|
+
const child = spawnVsCode(vscodeCmd, chatArgs, { label: "send-batch-chat" });
|
|
7290
7407
|
await raceSpawnError(child);
|
|
7291
7408
|
}
|
|
7292
7409
|
|
|
7293
7410
|
// src/evaluation/providers/vscode/dispatch/workspaceManager.ts
|
|
7294
7411
|
import { copyFile, mkdir as mkdir8, readFile as readFile9, readdir as readdir2, stat as stat2, writeFile as writeFile3 } from "node:fs/promises";
|
|
7295
|
-
import
|
|
7412
|
+
import path25 from "node:path";
|
|
7296
7413
|
|
|
7297
7414
|
// src/evaluation/providers/vscode/utils/workspace.ts
|
|
7298
|
-
import
|
|
7415
|
+
import path24 from "node:path";
|
|
7299
7416
|
import JSON5 from "json5";
|
|
7300
7417
|
function transformWorkspacePaths(workspaceContent, templateDir) {
|
|
7301
7418
|
let workspace;
|
|
@@ -7312,10 +7429,10 @@ function transformWorkspacePaths(workspaceContent, templateDir) {
|
|
|
7312
7429
|
}
|
|
7313
7430
|
const transformedFolders = workspace.folders.map((folder) => {
|
|
7314
7431
|
const folderPath = folder.path;
|
|
7315
|
-
if (
|
|
7432
|
+
if (path24.isAbsolute(folderPath)) {
|
|
7316
7433
|
return folder;
|
|
7317
7434
|
}
|
|
7318
|
-
const absolutePath =
|
|
7435
|
+
const absolutePath = path24.resolve(templateDir, folderPath);
|
|
7319
7436
|
return {
|
|
7320
7437
|
...folder,
|
|
7321
7438
|
path: absolutePath
|
|
@@ -7337,19 +7454,19 @@ function transformWorkspacePaths(workspaceContent, templateDir) {
|
|
|
7337
7454
|
if (locationMap && typeof locationMap === "object") {
|
|
7338
7455
|
const transformedMap = {};
|
|
7339
7456
|
for (const [locationPath, value] of Object.entries(locationMap)) {
|
|
7340
|
-
const isAbsolute =
|
|
7457
|
+
const isAbsolute = path24.isAbsolute(locationPath);
|
|
7341
7458
|
if (isAbsolute) {
|
|
7342
7459
|
transformedMap[locationPath] = value;
|
|
7343
7460
|
} else {
|
|
7344
7461
|
const firstGlobIndex = locationPath.search(/[*]/);
|
|
7345
7462
|
if (firstGlobIndex === -1) {
|
|
7346
|
-
const resolvedPath =
|
|
7463
|
+
const resolvedPath = path24.resolve(templateDir, locationPath).replace(/\\/g, "/");
|
|
7347
7464
|
transformedMap[resolvedPath] = value;
|
|
7348
7465
|
} else {
|
|
7349
7466
|
const basePathEnd = locationPath.lastIndexOf("/", firstGlobIndex);
|
|
7350
7467
|
const basePath = basePathEnd !== -1 ? locationPath.substring(0, basePathEnd) : ".";
|
|
7351
7468
|
const patternPath = locationPath.substring(basePathEnd !== -1 ? basePathEnd : 0);
|
|
7352
|
-
const resolvedPath = (
|
|
7469
|
+
const resolvedPath = (path24.resolve(templateDir, basePath) + patternPath).replace(
|
|
7353
7470
|
/\\/g,
|
|
7354
7471
|
"/"
|
|
7355
7472
|
);
|
|
@@ -7390,7 +7507,7 @@ async function findUnlockedSubagent(subagentRoot) {
|
|
|
7390
7507
|
number: Number.parseInt(entry.name.split("-")[1] ?? "", 10)
|
|
7391
7508
|
})).filter((entry) => Number.isInteger(entry.number)).sort((a, b) => a.number - b.number);
|
|
7392
7509
|
for (const subagent of subagents) {
|
|
7393
|
-
const lockFile =
|
|
7510
|
+
const lockFile = path25.join(subagent.absolutePath, DEFAULT_LOCK_NAME);
|
|
7394
7511
|
if (!await pathExists(lockFile)) {
|
|
7395
7512
|
return subagent.absolutePath;
|
|
7396
7513
|
}
|
|
@@ -7400,7 +7517,7 @@ async function findUnlockedSubagent(subagentRoot) {
|
|
|
7400
7517
|
async function copyAgentConfig(subagentDir, workspaceTemplate, cwd) {
|
|
7401
7518
|
let workspaceContent;
|
|
7402
7519
|
if (workspaceTemplate) {
|
|
7403
|
-
const workspaceSrc =
|
|
7520
|
+
const workspaceSrc = path25.resolve(workspaceTemplate);
|
|
7404
7521
|
if (!await pathExists(workspaceSrc)) {
|
|
7405
7522
|
throw new Error(`workspace template not found: ${workspaceSrc}`);
|
|
7406
7523
|
}
|
|
@@ -7413,13 +7530,13 @@ async function copyAgentConfig(subagentDir, workspaceTemplate, cwd) {
|
|
|
7413
7530
|
} else {
|
|
7414
7531
|
workspaceContent = DEFAULT_WORKSPACE_TEMPLATE;
|
|
7415
7532
|
}
|
|
7416
|
-
const workspaceName = `${
|
|
7417
|
-
const workspaceDst =
|
|
7418
|
-
const templateDir = workspaceTemplate ?
|
|
7533
|
+
const workspaceName = `${path25.basename(subagentDir)}.code-workspace`;
|
|
7534
|
+
const workspaceDst = path25.join(subagentDir, workspaceName);
|
|
7535
|
+
const templateDir = workspaceTemplate ? path25.dirname(path25.resolve(workspaceTemplate)) : subagentDir;
|
|
7419
7536
|
const workspaceJson = JSON.stringify(workspaceContent, null, 2);
|
|
7420
7537
|
let transformedContent = transformWorkspacePaths(workspaceJson, templateDir);
|
|
7421
7538
|
if (cwd) {
|
|
7422
|
-
const absCwd =
|
|
7539
|
+
const absCwd = path25.resolve(cwd);
|
|
7423
7540
|
const parsed = JSON.parse(transformedContent);
|
|
7424
7541
|
const alreadyPresent = parsed.folders.some((f) => f.path === absCwd);
|
|
7425
7542
|
if (!alreadyPresent) {
|
|
@@ -7428,35 +7545,35 @@ async function copyAgentConfig(subagentDir, workspaceTemplate, cwd) {
|
|
|
7428
7545
|
}
|
|
7429
7546
|
}
|
|
7430
7547
|
await writeFile3(workspaceDst, transformedContent, "utf8");
|
|
7431
|
-
const messagesDir =
|
|
7548
|
+
const messagesDir = path25.join(subagentDir, "messages");
|
|
7432
7549
|
await mkdir8(messagesDir, { recursive: true });
|
|
7433
7550
|
return { workspace: workspaceDst, messagesDir };
|
|
7434
7551
|
}
|
|
7435
7552
|
async function createSubagentLock(subagentDir) {
|
|
7436
|
-
const messagesDir =
|
|
7553
|
+
const messagesDir = path25.join(subagentDir, "messages");
|
|
7437
7554
|
if (await pathExists(messagesDir)) {
|
|
7438
7555
|
const files = await readdir2(messagesDir);
|
|
7439
7556
|
await Promise.all(
|
|
7440
7557
|
files.map(async (file) => {
|
|
7441
|
-
const target =
|
|
7558
|
+
const target = path25.join(messagesDir, file);
|
|
7442
7559
|
await removeIfExists(target);
|
|
7443
7560
|
})
|
|
7444
7561
|
);
|
|
7445
7562
|
}
|
|
7446
|
-
const githubAgentsDir =
|
|
7563
|
+
const githubAgentsDir = path25.join(subagentDir, ".github", "agents");
|
|
7447
7564
|
if (await pathExists(githubAgentsDir)) {
|
|
7448
7565
|
const agentFiles = await readdir2(githubAgentsDir);
|
|
7449
7566
|
const preservedFiles = /* @__PURE__ */ new Set(["wakeup.md", "subagent.md"]);
|
|
7450
7567
|
await Promise.all(
|
|
7451
|
-
agentFiles.filter((file) => file.endsWith(".md") && !preservedFiles.has(file)).map((file) => removeIfExists(
|
|
7568
|
+
agentFiles.filter((file) => file.endsWith(".md") && !preservedFiles.has(file)).map((file) => removeIfExists(path25.join(githubAgentsDir, file)))
|
|
7452
7569
|
);
|
|
7453
7570
|
}
|
|
7454
|
-
const lockFile =
|
|
7571
|
+
const lockFile = path25.join(subagentDir, DEFAULT_LOCK_NAME);
|
|
7455
7572
|
await writeFile3(lockFile, "", { encoding: "utf8" });
|
|
7456
7573
|
return lockFile;
|
|
7457
7574
|
}
|
|
7458
7575
|
async function removeSubagentLock(subagentDir) {
|
|
7459
|
-
const lockFile =
|
|
7576
|
+
const lockFile = path25.join(subagentDir, DEFAULT_LOCK_NAME);
|
|
7460
7577
|
await removeIfExists(lockFile);
|
|
7461
7578
|
}
|
|
7462
7579
|
async function prepareSubagentDirectory(subagentDir, promptFile, chatId, workspaceTemplate, dryRun, cwd) {
|
|
@@ -7476,9 +7593,9 @@ async function prepareSubagentDirectory(subagentDir, promptFile, chatId, workspa
|
|
|
7476
7593
|
return 1;
|
|
7477
7594
|
}
|
|
7478
7595
|
if (promptFile) {
|
|
7479
|
-
const githubAgentsDir =
|
|
7596
|
+
const githubAgentsDir = path25.join(subagentDir, ".github", "agents");
|
|
7480
7597
|
await mkdir8(githubAgentsDir, { recursive: true });
|
|
7481
|
-
const agentFile =
|
|
7598
|
+
const agentFile = path25.join(githubAgentsDir, `${chatId}.md`);
|
|
7482
7599
|
try {
|
|
7483
7600
|
await copyFile(promptFile, agentFile);
|
|
7484
7601
|
} catch (error) {
|
|
@@ -7497,7 +7614,7 @@ async function resolvePromptFile(promptFile) {
|
|
|
7497
7614
|
if (!promptFile) {
|
|
7498
7615
|
return void 0;
|
|
7499
7616
|
}
|
|
7500
|
-
const resolvedPrompt =
|
|
7617
|
+
const resolvedPrompt = path26.resolve(promptFile);
|
|
7501
7618
|
if (!await pathExists(resolvedPrompt)) {
|
|
7502
7619
|
throw new Error(`Prompt file not found: ${resolvedPrompt}`);
|
|
7503
7620
|
}
|
|
@@ -7513,7 +7630,7 @@ async function resolveAttachments(extraAttachments) {
|
|
|
7513
7630
|
}
|
|
7514
7631
|
const resolved = [];
|
|
7515
7632
|
for (const attachment of extraAttachments) {
|
|
7516
|
-
const resolvedPath =
|
|
7633
|
+
const resolvedPath = path26.resolve(attachment);
|
|
7517
7634
|
if (!await pathExists(resolvedPath)) {
|
|
7518
7635
|
throw new Error(`Attachment not found: ${resolvedPath}`);
|
|
7519
7636
|
}
|
|
@@ -7555,7 +7672,7 @@ async function dispatchAgentSession(options) {
|
|
|
7555
7672
|
error: "No unlocked subagents available. Provision additional subagents with: subagent code provision --subagents <desired_total>"
|
|
7556
7673
|
};
|
|
7557
7674
|
}
|
|
7558
|
-
const subagentName =
|
|
7675
|
+
const subagentName = path26.basename(subagentDir);
|
|
7559
7676
|
const chatId = Math.random().toString(16).slice(2, 10);
|
|
7560
7677
|
const preparationResult = await prepareSubagentDirectory(
|
|
7561
7678
|
subagentDir,
|
|
@@ -7583,9 +7700,9 @@ async function dispatchAgentSession(options) {
|
|
|
7583
7700
|
};
|
|
7584
7701
|
}
|
|
7585
7702
|
const timestamp = generateTimestamp();
|
|
7586
|
-
const messagesDir =
|
|
7587
|
-
const responseFileTmp =
|
|
7588
|
-
const responseFileFinal =
|
|
7703
|
+
const messagesDir = path26.join(subagentDir, "messages");
|
|
7704
|
+
const responseFileTmp = path26.join(messagesDir, `${timestamp}_res.tmp.md`);
|
|
7705
|
+
const responseFileFinal = path26.join(messagesDir, `${timestamp}_res.md`);
|
|
7589
7706
|
const requestInstructions = createRequestPrompt(
|
|
7590
7707
|
userQuery,
|
|
7591
7708
|
responseFileTmp,
|
|
@@ -7690,7 +7807,7 @@ async function dispatchBatchAgent(options) {
|
|
|
7690
7807
|
error: "No unlocked subagents available. Provision additional subagents with: subagent code provision --subagents <desired_total>"
|
|
7691
7808
|
};
|
|
7692
7809
|
}
|
|
7693
|
-
subagentName =
|
|
7810
|
+
subagentName = path26.basename(subagentDir);
|
|
7694
7811
|
const chatId = Math.random().toString(16).slice(2, 10);
|
|
7695
7812
|
const preparationResult = await prepareSubagentDirectory(
|
|
7696
7813
|
subagentDir,
|
|
@@ -7721,17 +7838,17 @@ async function dispatchBatchAgent(options) {
|
|
|
7721
7838
|
};
|
|
7722
7839
|
}
|
|
7723
7840
|
const timestamp = generateTimestamp();
|
|
7724
|
-
const messagesDir =
|
|
7841
|
+
const messagesDir = path26.join(subagentDir, "messages");
|
|
7725
7842
|
requestFiles = userQueries.map(
|
|
7726
|
-
(_, index) =>
|
|
7843
|
+
(_, index) => path26.join(messagesDir, `${timestamp}_${index}_req.md`)
|
|
7727
7844
|
);
|
|
7728
7845
|
const responseTmpFiles = userQueries.map(
|
|
7729
|
-
(_, index) =>
|
|
7846
|
+
(_, index) => path26.join(messagesDir, `${timestamp}_${index}_res.tmp.md`)
|
|
7730
7847
|
);
|
|
7731
7848
|
responseFilesFinal = userQueries.map(
|
|
7732
|
-
(_, index) =>
|
|
7849
|
+
(_, index) => path26.join(messagesDir, `${timestamp}_${index}_res.md`)
|
|
7733
7850
|
);
|
|
7734
|
-
const orchestratorFile =
|
|
7851
|
+
const orchestratorFile = path26.join(messagesDir, `${timestamp}_orchestrator.md`);
|
|
7735
7852
|
if (!dryRun) {
|
|
7736
7853
|
await Promise.all(
|
|
7737
7854
|
userQueries.map((query, index) => {
|
|
@@ -7817,7 +7934,7 @@ async function dispatchBatchAgent(options) {
|
|
|
7817
7934
|
|
|
7818
7935
|
// src/evaluation/providers/vscode/dispatch/provision.ts
|
|
7819
7936
|
import { writeFile as writeFile5 } from "node:fs/promises";
|
|
7820
|
-
import
|
|
7937
|
+
import path27 from "node:path";
|
|
7821
7938
|
var DEFAULT_WORKSPACE_TEMPLATE2 = {
|
|
7822
7939
|
folders: [
|
|
7823
7940
|
{
|
|
@@ -7848,7 +7965,7 @@ async function provisionSubagents(options) {
|
|
|
7848
7965
|
if (!Number.isInteger(subagents) || subagents < 1) {
|
|
7849
7966
|
throw new Error("subagents must be a positive integer");
|
|
7850
7967
|
}
|
|
7851
|
-
const targetPath =
|
|
7968
|
+
const targetPath = path27.resolve(targetRoot);
|
|
7852
7969
|
if (!dryRun) {
|
|
7853
7970
|
await ensureDir(targetPath);
|
|
7854
7971
|
}
|
|
@@ -7868,7 +7985,7 @@ async function provisionSubagents(options) {
|
|
|
7868
7985
|
continue;
|
|
7869
7986
|
}
|
|
7870
7987
|
highestNumber = Math.max(highestNumber, parsed);
|
|
7871
|
-
const lockFile =
|
|
7988
|
+
const lockFile = path27.join(entry.absolutePath, lockName);
|
|
7872
7989
|
const locked = await pathExists(lockFile);
|
|
7873
7990
|
if (locked) {
|
|
7874
7991
|
lockedSubagents.add(entry.absolutePath);
|
|
@@ -7885,10 +8002,10 @@ async function provisionSubagents(options) {
|
|
|
7885
8002
|
break;
|
|
7886
8003
|
}
|
|
7887
8004
|
const subagentDir = subagent.absolutePath;
|
|
7888
|
-
const githubAgentsDir =
|
|
7889
|
-
const lockFile =
|
|
7890
|
-
const workspaceDst =
|
|
7891
|
-
const wakeupDst =
|
|
8005
|
+
const githubAgentsDir = path27.join(subagentDir, ".github", "agents");
|
|
8006
|
+
const lockFile = path27.join(subagentDir, lockName);
|
|
8007
|
+
const workspaceDst = path27.join(subagentDir, `${path27.basename(subagentDir)}.code-workspace`);
|
|
8008
|
+
const wakeupDst = path27.join(githubAgentsDir, "wakeup.md");
|
|
7892
8009
|
const isLocked = await pathExists(lockFile);
|
|
7893
8010
|
if (isLocked && !force) {
|
|
7894
8011
|
continue;
|
|
@@ -7926,10 +8043,10 @@ async function provisionSubagents(options) {
|
|
|
7926
8043
|
let nextIndex = highestNumber;
|
|
7927
8044
|
while (subagentsProvisioned < subagents) {
|
|
7928
8045
|
nextIndex += 1;
|
|
7929
|
-
const subagentDir =
|
|
7930
|
-
const githubAgentsDir =
|
|
7931
|
-
const workspaceDst =
|
|
7932
|
-
const wakeupDst =
|
|
8046
|
+
const subagentDir = path27.join(targetPath, `subagent-${nextIndex}`);
|
|
8047
|
+
const githubAgentsDir = path27.join(subagentDir, ".github", "agents");
|
|
8048
|
+
const workspaceDst = path27.join(subagentDir, `${path27.basename(subagentDir)}.code-workspace`);
|
|
8049
|
+
const wakeupDst = path27.join(githubAgentsDir, "wakeup.md");
|
|
7933
8050
|
if (!dryRun) {
|
|
7934
8051
|
await ensureDir(subagentDir);
|
|
7935
8052
|
await ensureDir(githubAgentsDir);
|
|
@@ -7955,8 +8072,6 @@ var AGENTV_REQUEST_TEMPLATE = `[[ ## task ## ]]
|
|
|
7955
8072
|
|
|
7956
8073
|
**IMPORTANT**: Follow these exact steps:
|
|
7957
8074
|
1. Create and write your complete response to: {{responseFileTmp}}
|
|
7958
|
-
- All intended file outputs/changes MUST be written in your response file.
|
|
7959
|
-
- For each intended file, include the repo name, relative path and unified git diff following the convention \`diff --git ...\`.
|
|
7960
8075
|
2. When completely finished, run these PowerShell commands to signal completion:
|
|
7961
8076
|
\`\`\`
|
|
7962
8077
|
Move-Item -LiteralPath '{{responseFileTmp}}' -Destination '{{responseFileFinal}}'
|
|
@@ -7973,8 +8088,6 @@ var AGENTV_BATCH_REQUEST_TEMPLATE = `[[ ## task ## ]]
|
|
|
7973
8088
|
|
|
7974
8089
|
**IMPORTANT**: Follow these exact steps:
|
|
7975
8090
|
1. Create and write your complete response to: {{responseFileTmp}}
|
|
7976
|
-
- All intended file outputs/changes MUST be written in your response file.
|
|
7977
|
-
- For each intended file, include the repo name, relative path and unified git diff following the convention \`diff --git ...\`.
|
|
7978
8091
|
2. When completely finished and the response is stable, rename it to: {{responseFileFinal}}
|
|
7979
8092
|
3. Do not unlock the workspace from this request; batch orchestration will handle unlocking after all responses are ready.
|
|
7980
8093
|
`;
|
|
@@ -8123,7 +8236,7 @@ var VSCodeProvider = class {
|
|
|
8123
8236
|
async function locateVSCodeExecutable(candidate) {
|
|
8124
8237
|
const includesPathSeparator = candidate.includes("/") || candidate.includes("\\");
|
|
8125
8238
|
if (includesPathSeparator) {
|
|
8126
|
-
const resolved =
|
|
8239
|
+
const resolved = path28.isAbsolute(candidate) ? candidate : path28.resolve(candidate);
|
|
8127
8240
|
try {
|
|
8128
8241
|
await access3(resolved, constants3.F_OK);
|
|
8129
8242
|
return resolved;
|
|
@@ -8152,7 +8265,7 @@ async function resolveWorkspaceTemplateFile(template) {
|
|
|
8152
8265
|
return void 0;
|
|
8153
8266
|
}
|
|
8154
8267
|
try {
|
|
8155
|
-
const stats = await stat4(
|
|
8268
|
+
const stats = await stat4(path28.resolve(template));
|
|
8156
8269
|
return stats.isFile() ? template : void 0;
|
|
8157
8270
|
} catch {
|
|
8158
8271
|
return template;
|
|
@@ -8178,7 +8291,7 @@ function buildMandatoryPrereadBlock2(guidelineFiles, attachmentFiles) {
|
|
|
8178
8291
|
return "";
|
|
8179
8292
|
}
|
|
8180
8293
|
const buildList = (files) => files.map((absolutePath) => {
|
|
8181
|
-
const fileName =
|
|
8294
|
+
const fileName = path28.basename(absolutePath);
|
|
8182
8295
|
const fileUri = pathToFileUri3(absolutePath);
|
|
8183
8296
|
return `* [${fileName}](${fileUri})`;
|
|
8184
8297
|
});
|
|
@@ -8203,8 +8316,8 @@ function collectGuidelineFiles2(attachments, guidelinePatterns) {
|
|
|
8203
8316
|
}
|
|
8204
8317
|
const unique = /* @__PURE__ */ new Map();
|
|
8205
8318
|
for (const attachment of attachments) {
|
|
8206
|
-
const absolutePath =
|
|
8207
|
-
const normalized = absolutePath.split(
|
|
8319
|
+
const absolutePath = path28.resolve(attachment);
|
|
8320
|
+
const normalized = absolutePath.split(path28.sep).join("/");
|
|
8208
8321
|
if (isGuidelineFile(normalized, guidelinePatterns)) {
|
|
8209
8322
|
if (!unique.has(absolutePath)) {
|
|
8210
8323
|
unique.set(absolutePath, absolutePath);
|
|
@@ -8219,7 +8332,7 @@ function collectAttachmentFiles(attachments) {
|
|
|
8219
8332
|
}
|
|
8220
8333
|
const unique = /* @__PURE__ */ new Map();
|
|
8221
8334
|
for (const attachment of attachments) {
|
|
8222
|
-
const absolutePath =
|
|
8335
|
+
const absolutePath = path28.resolve(attachment);
|
|
8223
8336
|
if (!unique.has(absolutePath)) {
|
|
8224
8337
|
unique.set(absolutePath, absolutePath);
|
|
8225
8338
|
}
|
|
@@ -8227,7 +8340,7 @@ function collectAttachmentFiles(attachments) {
|
|
|
8227
8340
|
return Array.from(unique.values());
|
|
8228
8341
|
}
|
|
8229
8342
|
function pathToFileUri3(filePath) {
|
|
8230
|
-
const absolutePath =
|
|
8343
|
+
const absolutePath = path28.isAbsolute(filePath) ? filePath : path28.resolve(filePath);
|
|
8231
8344
|
const normalizedPath = absolutePath.replace(/\\/g, "/");
|
|
8232
8345
|
if (/^[a-zA-Z]:\//.test(normalizedPath)) {
|
|
8233
8346
|
return `file:///${normalizedPath}`;
|
|
@@ -8240,7 +8353,7 @@ function normalizeAttachments(attachments) {
|
|
|
8240
8353
|
}
|
|
8241
8354
|
const deduped = /* @__PURE__ */ new Set();
|
|
8242
8355
|
for (const attachment of attachments) {
|
|
8243
|
-
deduped.add(
|
|
8356
|
+
deduped.add(path28.resolve(attachment));
|
|
8244
8357
|
}
|
|
8245
8358
|
return Array.from(deduped);
|
|
8246
8359
|
}
|
|
@@ -8249,7 +8362,7 @@ function mergeAttachments(all) {
|
|
|
8249
8362
|
for (const list of all) {
|
|
8250
8363
|
if (!list) continue;
|
|
8251
8364
|
for (const inputFile of list) {
|
|
8252
|
-
deduped.add(
|
|
8365
|
+
deduped.add(path28.resolve(inputFile));
|
|
8253
8366
|
}
|
|
8254
8367
|
}
|
|
8255
8368
|
return deduped.size > 0 ? Array.from(deduped) : void 0;
|
|
@@ -8298,7 +8411,7 @@ total unlocked subagents available: ${result.created.length + result.skippedExis
|
|
|
8298
8411
|
// src/evaluation/providers/targets-file.ts
|
|
8299
8412
|
import { constants as constants4 } from "node:fs";
|
|
8300
8413
|
import { access as access4, readFile as readFile10 } from "node:fs/promises";
|
|
8301
|
-
import
|
|
8414
|
+
import path29 from "node:path";
|
|
8302
8415
|
import { parse as parse3 } from "yaml";
|
|
8303
8416
|
function isRecord(value) {
|
|
8304
8417
|
return typeof value === "object" && value !== null && !Array.isArray(value);
|
|
@@ -8335,7 +8448,7 @@ async function fileExists3(filePath) {
|
|
|
8335
8448
|
}
|
|
8336
8449
|
}
|
|
8337
8450
|
async function readTargetDefinitions(filePath) {
|
|
8338
|
-
const absolutePath =
|
|
8451
|
+
const absolutePath = path29.resolve(filePath);
|
|
8339
8452
|
if (!await fileExists3(absolutePath)) {
|
|
8340
8453
|
throw new Error(`targets.yaml not found at ${absolutePath}`);
|
|
8341
8454
|
}
|
|
@@ -8355,16 +8468,16 @@ function listTargetNames(definitions) {
|
|
|
8355
8468
|
}
|
|
8356
8469
|
|
|
8357
8470
|
// src/evaluation/providers/provider-discovery.ts
|
|
8358
|
-
import
|
|
8471
|
+
import path30 from "node:path";
|
|
8359
8472
|
import fg2 from "fast-glob";
|
|
8360
8473
|
async function discoverProviders(registry, baseDir) {
|
|
8361
8474
|
const patterns = ["*.ts", "*.js", "*.mts", "*.mjs"];
|
|
8362
8475
|
const candidateDirs = [];
|
|
8363
|
-
let dir =
|
|
8364
|
-
const root =
|
|
8476
|
+
let dir = path30.resolve(baseDir);
|
|
8477
|
+
const root = path30.parse(dir).root;
|
|
8365
8478
|
while (dir !== root) {
|
|
8366
|
-
candidateDirs.push(
|
|
8367
|
-
dir =
|
|
8479
|
+
candidateDirs.push(path30.join(dir, ".agentv", "providers"));
|
|
8480
|
+
dir = path30.dirname(dir);
|
|
8368
8481
|
}
|
|
8369
8482
|
let files = [];
|
|
8370
8483
|
for (const providersDir of candidateDirs) {
|
|
@@ -8380,7 +8493,7 @@ async function discoverProviders(registry, baseDir) {
|
|
|
8380
8493
|
}
|
|
8381
8494
|
const discoveredKinds = [];
|
|
8382
8495
|
for (const filePath of files) {
|
|
8383
|
-
const basename =
|
|
8496
|
+
const basename = path30.basename(filePath);
|
|
8384
8497
|
const kindName = basename.replace(/\.(ts|js|mts|mjs)$/, "");
|
|
8385
8498
|
if (registry.has(kindName)) {
|
|
8386
8499
|
continue;
|
|
@@ -8587,16 +8700,16 @@ async function execFileWithStdinNode(argv, stdinPayload, options) {
|
|
|
8587
8700
|
});
|
|
8588
8701
|
}
|
|
8589
8702
|
async function execShellWithStdin(command, stdinPayload, options = {}) {
|
|
8590
|
-
const { mkdir:
|
|
8703
|
+
const { mkdir: mkdir15, readFile: readFile13, rm: rm7, writeFile: writeFile10 } = await import("node:fs/promises");
|
|
8591
8704
|
const { tmpdir: tmpdir3 } = await import("node:os");
|
|
8592
|
-
const
|
|
8705
|
+
const path42 = await import("node:path");
|
|
8593
8706
|
const { randomUUID: randomUUID8 } = await import("node:crypto");
|
|
8594
|
-
const dir =
|
|
8595
|
-
await
|
|
8596
|
-
const stdinPath =
|
|
8597
|
-
const stdoutPath =
|
|
8598
|
-
const stderrPath =
|
|
8599
|
-
await
|
|
8707
|
+
const dir = path42.join(tmpdir3(), `agentv-exec-${randomUUID8()}`);
|
|
8708
|
+
await mkdir15(dir, { recursive: true });
|
|
8709
|
+
const stdinPath = path42.join(dir, "stdin.txt");
|
|
8710
|
+
const stdoutPath = path42.join(dir, "stdout.txt");
|
|
8711
|
+
const stderrPath = path42.join(dir, "stderr.txt");
|
|
8712
|
+
await writeFile10(stdinPath, stdinPayload, "utf8");
|
|
8600
8713
|
const wrappedCommand = process.platform === "win32" ? `(${command}) < ${shellEscapePath(stdinPath)} > ${shellEscapePath(stdoutPath)} 2> ${shellEscapePath(stderrPath)}` : `(${command}) < ${shellEscapePath(stdinPath)} > ${shellEscapePath(stdoutPath)} 2> ${shellEscapePath(stderrPath)}`;
|
|
8601
8714
|
const { spawn: spawn4 } = await import("node:child_process");
|
|
8602
8715
|
try {
|
|
@@ -8625,11 +8738,11 @@ async function execShellWithStdin(command, stdinPayload, options = {}) {
|
|
|
8625
8738
|
resolve(code ?? 0);
|
|
8626
8739
|
});
|
|
8627
8740
|
});
|
|
8628
|
-
const stdout = (await
|
|
8629
|
-
const stderr = (await
|
|
8741
|
+
const stdout = (await readFile13(stdoutPath, "utf8")).replace(/\r\n/g, "\n");
|
|
8742
|
+
const stderr = (await readFile13(stderrPath, "utf8")).replace(/\r\n/g, "\n");
|
|
8630
8743
|
return { stdout, stderr, exitCode };
|
|
8631
8744
|
} finally {
|
|
8632
|
-
await
|
|
8745
|
+
await rm7(dir, { recursive: true, force: true });
|
|
8633
8746
|
}
|
|
8634
8747
|
}
|
|
8635
8748
|
|
|
@@ -8947,7 +9060,7 @@ var CodeEvaluator = class {
|
|
|
8947
9060
|
outputPath,
|
|
8948
9061
|
guidelineFiles: context.evalCase.guideline_paths,
|
|
8949
9062
|
inputFiles: context.evalCase.file_paths.filter(
|
|
8950
|
-
(
|
|
9063
|
+
(path42) => !context.evalCase.guideline_paths.includes(path42)
|
|
8951
9064
|
),
|
|
8952
9065
|
input: context.evalCase.input,
|
|
8953
9066
|
trace: context.trace ?? null,
|
|
@@ -9197,6 +9310,8 @@ ${context.fileChanges}`;
|
|
|
9197
9310
|
};
|
|
9198
9311
|
} catch (e) {
|
|
9199
9312
|
const message = e instanceof Error ? e.message : String(e);
|
|
9313
|
+
const evalName = context.evaluator?.name ?? "llm-judge";
|
|
9314
|
+
console.warn(`\u26A0 LLM judge "${evalName}" failed after 3 attempts (${message}) \u2014 skipped`);
|
|
9200
9315
|
return {
|
|
9201
9316
|
score: 0,
|
|
9202
9317
|
verdict: "skip",
|
|
@@ -9225,24 +9340,39 @@ ${context.fileChanges}`;
|
|
|
9225
9340
|
systemPrompt,
|
|
9226
9341
|
target: judgeProvider.targetName
|
|
9227
9342
|
};
|
|
9228
|
-
|
|
9229
|
-
|
|
9230
|
-
|
|
9231
|
-
|
|
9232
|
-
|
|
9233
|
-
|
|
9234
|
-
|
|
9235
|
-
|
|
9236
|
-
|
|
9237
|
-
|
|
9238
|
-
|
|
9239
|
-
|
|
9240
|
-
|
|
9241
|
-
|
|
9242
|
-
|
|
9243
|
-
|
|
9244
|
-
|
|
9245
|
-
|
|
9343
|
+
try {
|
|
9344
|
+
const { data, tokenUsage } = await this.runWithRetry({
|
|
9345
|
+
context,
|
|
9346
|
+
judgeProvider,
|
|
9347
|
+
systemPrompt,
|
|
9348
|
+
userPrompt: prompt,
|
|
9349
|
+
schema: rubricEvaluationSchema
|
|
9350
|
+
});
|
|
9351
|
+
const { score, verdict, hits, misses } = calculateRubricScore(data, rubrics);
|
|
9352
|
+
return {
|
|
9353
|
+
score,
|
|
9354
|
+
verdict,
|
|
9355
|
+
hits,
|
|
9356
|
+
misses,
|
|
9357
|
+
expectedAspectCount: rubrics.length,
|
|
9358
|
+
reasoning: data.overall_reasoning,
|
|
9359
|
+
evaluatorRawRequest,
|
|
9360
|
+
tokenUsage
|
|
9361
|
+
};
|
|
9362
|
+
} catch (e) {
|
|
9363
|
+
const message = e instanceof Error ? e.message : String(e);
|
|
9364
|
+
const evalName = context.evaluator?.name ?? "llm-judge";
|
|
9365
|
+
console.warn(`\u26A0 LLM judge "${evalName}" failed after 3 attempts (${message}) \u2014 skipped`);
|
|
9366
|
+
return {
|
|
9367
|
+
score: 0,
|
|
9368
|
+
verdict: "skip",
|
|
9369
|
+
hits: [],
|
|
9370
|
+
misses: [`Judge parse failure after 3 attempts: ${message}`],
|
|
9371
|
+
expectedAspectCount: rubrics.length,
|
|
9372
|
+
reasoning: `Judge parse failure after 3 attempts: ${message}`,
|
|
9373
|
+
evaluatorRawRequest
|
|
9374
|
+
};
|
|
9375
|
+
}
|
|
9246
9376
|
}
|
|
9247
9377
|
/**
|
|
9248
9378
|
* Evaluate using score-range rubrics (analytic rubric scoring).
|
|
@@ -9256,25 +9386,40 @@ ${context.fileChanges}`;
|
|
|
9256
9386
|
systemPrompt,
|
|
9257
9387
|
target: judgeProvider.targetName
|
|
9258
9388
|
};
|
|
9259
|
-
|
|
9260
|
-
|
|
9261
|
-
|
|
9262
|
-
|
|
9263
|
-
|
|
9264
|
-
|
|
9265
|
-
|
|
9266
|
-
|
|
9267
|
-
|
|
9268
|
-
|
|
9269
|
-
|
|
9270
|
-
|
|
9271
|
-
|
|
9272
|
-
|
|
9273
|
-
|
|
9274
|
-
|
|
9275
|
-
|
|
9276
|
-
|
|
9277
|
-
|
|
9389
|
+
try {
|
|
9390
|
+
const { data, tokenUsage } = await this.runWithRetry({
|
|
9391
|
+
context,
|
|
9392
|
+
judgeProvider,
|
|
9393
|
+
systemPrompt,
|
|
9394
|
+
userPrompt: prompt,
|
|
9395
|
+
schema: scoreRangeEvaluationSchema
|
|
9396
|
+
});
|
|
9397
|
+
const { score, verdict, hits, misses, details } = calculateScoreRangeResult(data, rubrics);
|
|
9398
|
+
return {
|
|
9399
|
+
score,
|
|
9400
|
+
verdict,
|
|
9401
|
+
hits,
|
|
9402
|
+
misses,
|
|
9403
|
+
expectedAspectCount: rubrics.length,
|
|
9404
|
+
reasoning: data.overall_reasoning,
|
|
9405
|
+
evaluatorRawRequest,
|
|
9406
|
+
details,
|
|
9407
|
+
tokenUsage
|
|
9408
|
+
};
|
|
9409
|
+
} catch (e) {
|
|
9410
|
+
const message = e instanceof Error ? e.message : String(e);
|
|
9411
|
+
const evalName = context.evaluator?.name ?? "llm-judge";
|
|
9412
|
+
console.warn(`\u26A0 LLM judge "${evalName}" failed after 3 attempts (${message}) \u2014 skipped`);
|
|
9413
|
+
return {
|
|
9414
|
+
score: 0,
|
|
9415
|
+
verdict: "skip",
|
|
9416
|
+
hits: [],
|
|
9417
|
+
misses: [`Judge parse failure after 3 attempts: ${message}`],
|
|
9418
|
+
expectedAspectCount: rubrics.length,
|
|
9419
|
+
reasoning: `Judge parse failure after 3 attempts: ${message}`,
|
|
9420
|
+
evaluatorRawRequest
|
|
9421
|
+
};
|
|
9422
|
+
}
|
|
9278
9423
|
}
|
|
9279
9424
|
/**
|
|
9280
9425
|
* Build prompt for score-range rubric evaluation.
|
|
@@ -9560,19 +9705,13 @@ var CompositeEvaluator = class {
|
|
|
9560
9705
|
runWeightedAverage(results, weights) {
|
|
9561
9706
|
let totalWeight = 0;
|
|
9562
9707
|
let weightedSum = 0;
|
|
9708
|
+
let evaluatedCount = 0;
|
|
9563
9709
|
const allHits = [];
|
|
9564
9710
|
const allMisses = [];
|
|
9565
9711
|
const reasoningParts = [];
|
|
9566
9712
|
const scores = [];
|
|
9567
9713
|
for (const member of results) {
|
|
9568
9714
|
const weight = weights?.[member.id] ?? 1;
|
|
9569
|
-
totalWeight += weight;
|
|
9570
|
-
weightedSum += member.result.score * weight;
|
|
9571
|
-
allHits.push(...member.result.hits.map((h) => `[${member.id}] ${h}`));
|
|
9572
|
-
allMisses.push(...member.result.misses.map((m) => `[${member.id}] ${m}`));
|
|
9573
|
-
if (member.result.reasoning) {
|
|
9574
|
-
reasoningParts.push(`${member.id}: ${member.result.reasoning}`);
|
|
9575
|
-
}
|
|
9576
9715
|
scores.push({
|
|
9577
9716
|
name: member.id,
|
|
9578
9717
|
type: member.type,
|
|
@@ -9587,6 +9726,32 @@ var CompositeEvaluator = class {
|
|
|
9587
9726
|
details: member.result.details,
|
|
9588
9727
|
tokenUsage: member.result.tokenUsage
|
|
9589
9728
|
});
|
|
9729
|
+
if (member.result.verdict === "skip") {
|
|
9730
|
+
continue;
|
|
9731
|
+
}
|
|
9732
|
+
evaluatedCount++;
|
|
9733
|
+
totalWeight += weight;
|
|
9734
|
+
weightedSum += member.result.score * weight;
|
|
9735
|
+
allHits.push(...member.result.hits.map((h) => `[${member.id}] ${h}`));
|
|
9736
|
+
allMisses.push(...member.result.misses.map((m) => `[${member.id}] ${m}`));
|
|
9737
|
+
if (member.result.reasoning) {
|
|
9738
|
+
reasoningParts.push(`${member.id}: ${member.result.reasoning}`);
|
|
9739
|
+
}
|
|
9740
|
+
}
|
|
9741
|
+
if (evaluatedCount === 0 && results.length > 0) {
|
|
9742
|
+
return {
|
|
9743
|
+
score: 0,
|
|
9744
|
+
verdict: "skip",
|
|
9745
|
+
hits: [],
|
|
9746
|
+
misses: [],
|
|
9747
|
+
expectedAspectCount: 1,
|
|
9748
|
+
reasoning: "All evaluators skipped (infrastructure failure)",
|
|
9749
|
+
evaluatorRawRequest: {
|
|
9750
|
+
aggregator: "weighted_average",
|
|
9751
|
+
...weights ? { weights } : {}
|
|
9752
|
+
},
|
|
9753
|
+
scores
|
|
9754
|
+
};
|
|
9590
9755
|
}
|
|
9591
9756
|
const finalScore = totalWeight > 0 ? weightedSum / totalWeight : 0;
|
|
9592
9757
|
return {
|
|
@@ -9610,19 +9775,8 @@ var CompositeEvaluator = class {
|
|
|
9610
9775
|
const reasoningParts = [];
|
|
9611
9776
|
let passingCount = 0;
|
|
9612
9777
|
let borderlineCount = 0;
|
|
9778
|
+
let evaluatedCount = 0;
|
|
9613
9779
|
for (const member of results) {
|
|
9614
|
-
const isPassing = member.result.verdict === "pass" || member.result.verdict === "borderline";
|
|
9615
|
-
if (isPassing) {
|
|
9616
|
-
passingCount++;
|
|
9617
|
-
if (member.result.verdict === "borderline") {
|
|
9618
|
-
borderlineCount++;
|
|
9619
|
-
}
|
|
9620
|
-
}
|
|
9621
|
-
allHits.push(...member.result.hits.map((h) => `[${member.id}] ${h}`));
|
|
9622
|
-
allMisses.push(...member.result.misses.map((m) => `[${member.id}] ${m}`));
|
|
9623
|
-
if (member.result.reasoning) {
|
|
9624
|
-
reasoningParts.push(`${member.id}: ${member.result.reasoning}`);
|
|
9625
|
-
}
|
|
9626
9780
|
scores.push({
|
|
9627
9781
|
name: member.id,
|
|
9628
9782
|
type: member.type,
|
|
@@ -9636,8 +9790,39 @@ var CompositeEvaluator = class {
|
|
|
9636
9790
|
details: member.result.details,
|
|
9637
9791
|
tokenUsage: member.result.tokenUsage
|
|
9638
9792
|
});
|
|
9793
|
+
if (member.result.verdict === "skip") {
|
|
9794
|
+
continue;
|
|
9795
|
+
}
|
|
9796
|
+
evaluatedCount++;
|
|
9797
|
+
const isPassing = member.result.verdict === "pass" || member.result.verdict === "borderline";
|
|
9798
|
+
if (isPassing) {
|
|
9799
|
+
passingCount++;
|
|
9800
|
+
if (member.result.verdict === "borderline") {
|
|
9801
|
+
borderlineCount++;
|
|
9802
|
+
}
|
|
9803
|
+
}
|
|
9804
|
+
allHits.push(...member.result.hits.map((h) => `[${member.id}] ${h}`));
|
|
9805
|
+
allMisses.push(...member.result.misses.map((m) => `[${member.id}] ${m}`));
|
|
9806
|
+
if (member.result.reasoning) {
|
|
9807
|
+
reasoningParts.push(`${member.id}: ${member.result.reasoning}`);
|
|
9808
|
+
}
|
|
9809
|
+
}
|
|
9810
|
+
if (evaluatedCount === 0 && results.length > 0) {
|
|
9811
|
+
return {
|
|
9812
|
+
score: 0,
|
|
9813
|
+
verdict: "skip",
|
|
9814
|
+
hits: [],
|
|
9815
|
+
misses: [],
|
|
9816
|
+
expectedAspectCount: 1,
|
|
9817
|
+
reasoning: "All evaluators skipped (infrastructure failure)",
|
|
9818
|
+
evaluatorRawRequest: {
|
|
9819
|
+
aggregator: "threshold",
|
|
9820
|
+
threshold
|
|
9821
|
+
},
|
|
9822
|
+
scores
|
|
9823
|
+
};
|
|
9639
9824
|
}
|
|
9640
|
-
const totalCount =
|
|
9825
|
+
const totalCount = evaluatedCount;
|
|
9641
9826
|
const score = totalCount > 0 ? passingCount / totalCount : 0;
|
|
9642
9827
|
const pass = score >= threshold;
|
|
9643
9828
|
if (pass && borderlineCount > 0) {
|
|
@@ -10145,115 +10330,115 @@ var FieldAccuracyEvaluator = class {
|
|
|
10145
10330
|
* Evaluate a single field against the expected value.
|
|
10146
10331
|
*/
|
|
10147
10332
|
evaluateField(fieldConfig, candidateData, expectedData) {
|
|
10148
|
-
const { path:
|
|
10149
|
-
const candidateValue = resolvePath(candidateData,
|
|
10150
|
-
const expectedValue = resolvePath(expectedData,
|
|
10333
|
+
const { path: path42, match, required = true, weight = 1 } = fieldConfig;
|
|
10334
|
+
const candidateValue = resolvePath(candidateData, path42);
|
|
10335
|
+
const expectedValue = resolvePath(expectedData, path42);
|
|
10151
10336
|
if (expectedValue === void 0) {
|
|
10152
10337
|
return {
|
|
10153
|
-
path:
|
|
10338
|
+
path: path42,
|
|
10154
10339
|
score: 1,
|
|
10155
10340
|
// No expected value means no comparison needed
|
|
10156
10341
|
weight,
|
|
10157
10342
|
hit: true,
|
|
10158
|
-
message: `${
|
|
10343
|
+
message: `${path42}: no expected value`
|
|
10159
10344
|
};
|
|
10160
10345
|
}
|
|
10161
10346
|
if (candidateValue === void 0) {
|
|
10162
10347
|
if (required) {
|
|
10163
10348
|
return {
|
|
10164
|
-
path:
|
|
10349
|
+
path: path42,
|
|
10165
10350
|
score: 0,
|
|
10166
10351
|
weight,
|
|
10167
10352
|
hit: false,
|
|
10168
|
-
message: `${
|
|
10353
|
+
message: `${path42} (required, missing)`
|
|
10169
10354
|
};
|
|
10170
10355
|
}
|
|
10171
10356
|
return {
|
|
10172
|
-
path:
|
|
10357
|
+
path: path42,
|
|
10173
10358
|
score: 1,
|
|
10174
10359
|
// Don't penalize missing optional fields
|
|
10175
10360
|
weight: 0,
|
|
10176
10361
|
// Zero weight means it won't affect the score
|
|
10177
10362
|
hit: true,
|
|
10178
|
-
message: `${
|
|
10363
|
+
message: `${path42}: optional field missing`
|
|
10179
10364
|
};
|
|
10180
10365
|
}
|
|
10181
10366
|
switch (match) {
|
|
10182
10367
|
case "exact":
|
|
10183
|
-
return this.compareExact(
|
|
10368
|
+
return this.compareExact(path42, candidateValue, expectedValue, weight);
|
|
10184
10369
|
case "numeric_tolerance":
|
|
10185
10370
|
return this.compareNumericTolerance(
|
|
10186
|
-
|
|
10371
|
+
path42,
|
|
10187
10372
|
candidateValue,
|
|
10188
10373
|
expectedValue,
|
|
10189
10374
|
fieldConfig,
|
|
10190
10375
|
weight
|
|
10191
10376
|
);
|
|
10192
10377
|
case "date":
|
|
10193
|
-
return this.compareDate(
|
|
10378
|
+
return this.compareDate(path42, candidateValue, expectedValue, fieldConfig, weight);
|
|
10194
10379
|
default:
|
|
10195
10380
|
return {
|
|
10196
|
-
path:
|
|
10381
|
+
path: path42,
|
|
10197
10382
|
score: 0,
|
|
10198
10383
|
weight,
|
|
10199
10384
|
hit: false,
|
|
10200
|
-
message: `${
|
|
10385
|
+
message: `${path42}: unknown match type "${match}"`
|
|
10201
10386
|
};
|
|
10202
10387
|
}
|
|
10203
10388
|
}
|
|
10204
10389
|
/**
|
|
10205
10390
|
* Exact equality comparison.
|
|
10206
10391
|
*/
|
|
10207
|
-
compareExact(
|
|
10392
|
+
compareExact(path42, candidateValue, expectedValue, weight) {
|
|
10208
10393
|
if (deepEqual(candidateValue, expectedValue)) {
|
|
10209
10394
|
return {
|
|
10210
|
-
path:
|
|
10395
|
+
path: path42,
|
|
10211
10396
|
score: 1,
|
|
10212
10397
|
weight,
|
|
10213
10398
|
hit: true,
|
|
10214
|
-
message:
|
|
10399
|
+
message: path42
|
|
10215
10400
|
};
|
|
10216
10401
|
}
|
|
10217
10402
|
if (typeof candidateValue !== typeof expectedValue) {
|
|
10218
10403
|
return {
|
|
10219
|
-
path:
|
|
10404
|
+
path: path42,
|
|
10220
10405
|
score: 0,
|
|
10221
10406
|
weight,
|
|
10222
10407
|
hit: false,
|
|
10223
|
-
message: `${
|
|
10408
|
+
message: `${path42} (type mismatch: got ${typeof candidateValue}, expected ${typeof expectedValue})`
|
|
10224
10409
|
};
|
|
10225
10410
|
}
|
|
10226
10411
|
return {
|
|
10227
|
-
path:
|
|
10412
|
+
path: path42,
|
|
10228
10413
|
score: 0,
|
|
10229
10414
|
weight,
|
|
10230
10415
|
hit: false,
|
|
10231
|
-
message: `${
|
|
10416
|
+
message: `${path42} (value mismatch)`
|
|
10232
10417
|
};
|
|
10233
10418
|
}
|
|
10234
10419
|
/**
|
|
10235
10420
|
* Numeric comparison with absolute or relative tolerance.
|
|
10236
10421
|
*/
|
|
10237
|
-
compareNumericTolerance(
|
|
10422
|
+
compareNumericTolerance(path42, candidateValue, expectedValue, fieldConfig, weight) {
|
|
10238
10423
|
const { tolerance = 0, relative = false } = fieldConfig;
|
|
10239
10424
|
const candidateNum = toNumber2(candidateValue);
|
|
10240
10425
|
const expectedNum = toNumber2(expectedValue);
|
|
10241
10426
|
if (candidateNum === null || expectedNum === null) {
|
|
10242
10427
|
return {
|
|
10243
|
-
path:
|
|
10428
|
+
path: path42,
|
|
10244
10429
|
score: 0,
|
|
10245
10430
|
weight,
|
|
10246
10431
|
hit: false,
|
|
10247
|
-
message: `${
|
|
10432
|
+
message: `${path42} (non-numeric value)`
|
|
10248
10433
|
};
|
|
10249
10434
|
}
|
|
10250
10435
|
if (!Number.isFinite(candidateNum) || !Number.isFinite(expectedNum)) {
|
|
10251
10436
|
return {
|
|
10252
|
-
path:
|
|
10437
|
+
path: path42,
|
|
10253
10438
|
score: 0,
|
|
10254
10439
|
weight,
|
|
10255
10440
|
hit: false,
|
|
10256
|
-
message: `${
|
|
10441
|
+
message: `${path42} (invalid numeric value)`
|
|
10257
10442
|
};
|
|
10258
10443
|
}
|
|
10259
10444
|
const diff = Math.abs(candidateNum - expectedNum);
|
|
@@ -10266,61 +10451,61 @@ var FieldAccuracyEvaluator = class {
|
|
|
10266
10451
|
}
|
|
10267
10452
|
if (withinTolerance) {
|
|
10268
10453
|
return {
|
|
10269
|
-
path:
|
|
10454
|
+
path: path42,
|
|
10270
10455
|
score: 1,
|
|
10271
10456
|
weight,
|
|
10272
10457
|
hit: true,
|
|
10273
|
-
message: `${
|
|
10458
|
+
message: `${path42} (within tolerance: diff=${diff.toFixed(2)})`
|
|
10274
10459
|
};
|
|
10275
10460
|
}
|
|
10276
10461
|
return {
|
|
10277
|
-
path:
|
|
10462
|
+
path: path42,
|
|
10278
10463
|
score: 0,
|
|
10279
10464
|
weight,
|
|
10280
10465
|
hit: false,
|
|
10281
|
-
message: `${
|
|
10466
|
+
message: `${path42} (outside tolerance: diff=${diff.toFixed(2)}, tolerance=${tolerance})`
|
|
10282
10467
|
};
|
|
10283
10468
|
}
|
|
10284
10469
|
/**
|
|
10285
10470
|
* Date comparison with format normalization.
|
|
10286
10471
|
*/
|
|
10287
|
-
compareDate(
|
|
10472
|
+
compareDate(path42, candidateValue, expectedValue, fieldConfig, weight) {
|
|
10288
10473
|
const formats = fieldConfig.formats ?? DEFAULT_DATE_FORMATS;
|
|
10289
10474
|
const candidateDate = parseDate(String(candidateValue), formats);
|
|
10290
10475
|
const expectedDate = parseDate(String(expectedValue), formats);
|
|
10291
10476
|
if (candidateDate === null) {
|
|
10292
10477
|
return {
|
|
10293
|
-
path:
|
|
10478
|
+
path: path42,
|
|
10294
10479
|
score: 0,
|
|
10295
10480
|
weight,
|
|
10296
10481
|
hit: false,
|
|
10297
|
-
message: `${
|
|
10482
|
+
message: `${path42} (unparseable candidate date)`
|
|
10298
10483
|
};
|
|
10299
10484
|
}
|
|
10300
10485
|
if (expectedDate === null) {
|
|
10301
10486
|
return {
|
|
10302
|
-
path:
|
|
10487
|
+
path: path42,
|
|
10303
10488
|
score: 0,
|
|
10304
10489
|
weight,
|
|
10305
10490
|
hit: false,
|
|
10306
|
-
message: `${
|
|
10491
|
+
message: `${path42} (unparseable expected date)`
|
|
10307
10492
|
};
|
|
10308
10493
|
}
|
|
10309
10494
|
if (candidateDate.getFullYear() === expectedDate.getFullYear() && candidateDate.getMonth() === expectedDate.getMonth() && candidateDate.getDate() === expectedDate.getDate()) {
|
|
10310
10495
|
return {
|
|
10311
|
-
path:
|
|
10496
|
+
path: path42,
|
|
10312
10497
|
score: 1,
|
|
10313
10498
|
weight,
|
|
10314
10499
|
hit: true,
|
|
10315
|
-
message:
|
|
10500
|
+
message: path42
|
|
10316
10501
|
};
|
|
10317
10502
|
}
|
|
10318
10503
|
return {
|
|
10319
|
-
path:
|
|
10504
|
+
path: path42,
|
|
10320
10505
|
score: 0,
|
|
10321
10506
|
weight,
|
|
10322
10507
|
hit: false,
|
|
10323
|
-
message: `${
|
|
10508
|
+
message: `${path42} (date mismatch: got ${formatDateISO(candidateDate)}, expected ${formatDateISO(expectedDate)})`
|
|
10324
10509
|
};
|
|
10325
10510
|
}
|
|
10326
10511
|
/**
|
|
@@ -10361,11 +10546,11 @@ var FieldAccuracyEvaluator = class {
|
|
|
10361
10546
|
};
|
|
10362
10547
|
}
|
|
10363
10548
|
};
|
|
10364
|
-
function resolvePath(obj,
|
|
10365
|
-
if (!
|
|
10549
|
+
function resolvePath(obj, path42) {
|
|
10550
|
+
if (!path42 || !obj) {
|
|
10366
10551
|
return void 0;
|
|
10367
10552
|
}
|
|
10368
|
-
const parts =
|
|
10553
|
+
const parts = path42.split(/\.|\[|\]/).filter((p) => p.length > 0);
|
|
10369
10554
|
let current = obj;
|
|
10370
10555
|
for (const part of parts) {
|
|
10371
10556
|
if (current === null || current === void 0) {
|
|
@@ -10497,7 +10682,7 @@ var LatencyEvaluator = class {
|
|
|
10497
10682
|
|
|
10498
10683
|
// src/evaluation/evaluators/agent-judge.ts
|
|
10499
10684
|
import fs2 from "node:fs/promises";
|
|
10500
|
-
import
|
|
10685
|
+
import path31 from "node:path";
|
|
10501
10686
|
import { generateText as generateText4, stepCountIs, tool } from "ai";
|
|
10502
10687
|
import { z as z4 } from "zod";
|
|
10503
10688
|
var DEFAULT_MAX_STEPS = 10;
|
|
@@ -10846,8 +11031,8 @@ ${outputSchema}`;
|
|
|
10846
11031
|
}
|
|
10847
11032
|
};
|
|
10848
11033
|
function resolveSandboxed(basePath, relativePath) {
|
|
10849
|
-
const resolved =
|
|
10850
|
-
if (!resolved.startsWith(basePath +
|
|
11034
|
+
const resolved = path31.resolve(basePath, relativePath);
|
|
11035
|
+
if (!resolved.startsWith(basePath + path31.sep) && resolved !== basePath) {
|
|
10851
11036
|
throw new Error(`Path '${relativePath}' is outside the workspace`);
|
|
10852
11037
|
}
|
|
10853
11038
|
return resolved;
|
|
@@ -10930,11 +11115,11 @@ async function searchDirectory(dirPath, workspacePath, regex, matches) {
|
|
|
10930
11115
|
for (const entry of entries) {
|
|
10931
11116
|
if (matches.length >= MAX_SEARCH_MATCHES) return;
|
|
10932
11117
|
if (SEARCH_SKIP_DIRS.has(entry.name)) continue;
|
|
10933
|
-
const fullPath =
|
|
11118
|
+
const fullPath = path31.join(dirPath, entry.name);
|
|
10934
11119
|
if (entry.isDirectory()) {
|
|
10935
11120
|
await searchDirectory(fullPath, workspacePath, regex, matches);
|
|
10936
11121
|
} else if (entry.isFile()) {
|
|
10937
|
-
const ext =
|
|
11122
|
+
const ext = path31.extname(entry.name).toLowerCase();
|
|
10938
11123
|
if (BINARY_EXTENSIONS.has(ext)) continue;
|
|
10939
11124
|
try {
|
|
10940
11125
|
const stat8 = await fs2.stat(fullPath);
|
|
@@ -10946,7 +11131,7 @@ async function searchDirectory(dirPath, workspacePath, regex, matches) {
|
|
|
10946
11131
|
regex.lastIndex = 0;
|
|
10947
11132
|
if (regex.test(lines[i])) {
|
|
10948
11133
|
matches.push({
|
|
10949
|
-
file:
|
|
11134
|
+
file: path31.relative(workspacePath, fullPath),
|
|
10950
11135
|
line: i + 1,
|
|
10951
11136
|
text: lines[i].substring(0, 200)
|
|
10952
11137
|
});
|
|
@@ -11183,8 +11368,8 @@ var TokenUsageEvaluator = class {
|
|
|
11183
11368
|
};
|
|
11184
11369
|
|
|
11185
11370
|
// src/evaluation/evaluators/tool-trajectory.ts
|
|
11186
|
-
function getNestedValue(obj,
|
|
11187
|
-
const parts =
|
|
11371
|
+
function getNestedValue(obj, path42) {
|
|
11372
|
+
const parts = path42.split(".");
|
|
11188
11373
|
let current = obj;
|
|
11189
11374
|
for (const part of parts) {
|
|
11190
11375
|
if (current === null || current === void 0 || typeof current !== "object") {
|
|
@@ -11745,9 +11930,9 @@ function runEqualsAssertion(output, value) {
|
|
|
11745
11930
|
}
|
|
11746
11931
|
|
|
11747
11932
|
// src/evaluation/orchestrator.ts
|
|
11748
|
-
import { createHash as
|
|
11749
|
-
import { mkdir as
|
|
11750
|
-
import
|
|
11933
|
+
import { createHash as createHash3, randomUUID as randomUUID7 } from "node:crypto";
|
|
11934
|
+
import { mkdir as mkdir13, stat as stat7 } from "node:fs/promises";
|
|
11935
|
+
import path39 from "node:path";
|
|
11751
11936
|
import micromatch4 from "micromatch";
|
|
11752
11937
|
|
|
11753
11938
|
// ../../node_modules/.bun/yocto-queue@1.2.2/node_modules/yocto-queue/index.js
|
|
@@ -11938,7 +12123,7 @@ var DeterministicAssertionEvaluator = class {
|
|
|
11938
12123
|
import { readFileSync } from "node:fs";
|
|
11939
12124
|
|
|
11940
12125
|
// src/evaluation/evaluators/prompt-resolution.ts
|
|
11941
|
-
import
|
|
12126
|
+
import path32 from "node:path";
|
|
11942
12127
|
async function resolveCustomPrompt(promptConfig, context, timeoutMs) {
|
|
11943
12128
|
if (promptConfig.resolvedPromptScript && promptConfig.resolvedPromptScript.length > 0) {
|
|
11944
12129
|
if (!context) {
|
|
@@ -11987,7 +12172,7 @@ async function executePromptTemplate(script, context, config, timeoutMs) {
|
|
|
11987
12172
|
};
|
|
11988
12173
|
const inputJson = JSON.stringify(toSnakeCaseDeep(payload), null, 2);
|
|
11989
12174
|
const scriptPath = script[script.length - 1];
|
|
11990
|
-
const cwd =
|
|
12175
|
+
const cwd = path32.dirname(scriptPath);
|
|
11991
12176
|
try {
|
|
11992
12177
|
const stdout = await executeScript(script, inputJson, timeoutMs, cwd);
|
|
11993
12178
|
const prompt = stdout.trim();
|
|
@@ -12280,16 +12465,16 @@ function createBuiltinRegistry() {
|
|
|
12280
12465
|
}
|
|
12281
12466
|
|
|
12282
12467
|
// src/evaluation/registry/assertion-discovery.ts
|
|
12283
|
-
import
|
|
12468
|
+
import path33 from "node:path";
|
|
12284
12469
|
import fg3 from "fast-glob";
|
|
12285
12470
|
async function discoverAssertions(registry, baseDir) {
|
|
12286
12471
|
const patterns = ["*.ts", "*.js", "*.mts", "*.mjs"];
|
|
12287
12472
|
const candidateDirs = [];
|
|
12288
|
-
let dir =
|
|
12289
|
-
const root =
|
|
12473
|
+
let dir = path33.resolve(baseDir);
|
|
12474
|
+
const root = path33.parse(dir).root;
|
|
12290
12475
|
while (dir !== root) {
|
|
12291
|
-
candidateDirs.push(
|
|
12292
|
-
dir =
|
|
12476
|
+
candidateDirs.push(path33.join(dir, ".agentv", "assertions"));
|
|
12477
|
+
dir = path33.dirname(dir);
|
|
12293
12478
|
}
|
|
12294
12479
|
let files = [];
|
|
12295
12480
|
for (const assertionsDir of candidateDirs) {
|
|
@@ -12305,7 +12490,7 @@ async function discoverAssertions(registry, baseDir) {
|
|
|
12305
12490
|
}
|
|
12306
12491
|
const discoveredTypes = [];
|
|
12307
12492
|
for (const filePath of files) {
|
|
12308
|
-
const basename =
|
|
12493
|
+
const basename = path33.basename(filePath);
|
|
12309
12494
|
const typeName = basename.replace(/\.(ts|js|mts|mjs)$/, "");
|
|
12310
12495
|
if (registry.has(typeName)) {
|
|
12311
12496
|
continue;
|
|
@@ -12465,7 +12650,7 @@ function getTCritical(df) {
|
|
|
12465
12650
|
// src/evaluation/workspace/file-changes.ts
|
|
12466
12651
|
import { exec as execCallback } from "node:child_process";
|
|
12467
12652
|
import { readdirSync as readdirSync2, statSync } from "node:fs";
|
|
12468
|
-
import
|
|
12653
|
+
import path34 from "node:path";
|
|
12469
12654
|
import { promisify as promisify4 } from "node:util";
|
|
12470
12655
|
var execAsync4 = promisify4(execCallback);
|
|
12471
12656
|
function gitExecOpts(workspacePath) {
|
|
@@ -12499,10 +12684,10 @@ async function stageNestedRepoChanges(workspacePath) {
|
|
|
12499
12684
|
}
|
|
12500
12685
|
for (const entry of entries) {
|
|
12501
12686
|
if (entry === ".git" || entry === "node_modules") continue;
|
|
12502
|
-
const childPath =
|
|
12687
|
+
const childPath = path34.join(workspacePath, entry);
|
|
12503
12688
|
try {
|
|
12504
12689
|
if (!statSync(childPath).isDirectory()) continue;
|
|
12505
|
-
if (!statSync(
|
|
12690
|
+
if (!statSync(path34.join(childPath, ".git")).isDirectory()) continue;
|
|
12506
12691
|
} catch {
|
|
12507
12692
|
continue;
|
|
12508
12693
|
}
|
|
@@ -12513,9 +12698,7 @@ async function stageNestedRepoChanges(workspacePath) {
|
|
|
12513
12698
|
|
|
12514
12699
|
// src/evaluation/workspace/manager.ts
|
|
12515
12700
|
import { cp, mkdir as mkdir10, readdir as readdir3, rm as rm4, stat as stat5 } from "node:fs/promises";
|
|
12516
|
-
import
|
|
12517
|
-
import path34 from "node:path";
|
|
12518
|
-
var DEFAULT_WORKSPACE_ROOT = path34.join(os3.homedir(), ".agentv", "workspaces");
|
|
12701
|
+
import path35 from "node:path";
|
|
12519
12702
|
var TemplateNotFoundError = class extends Error {
|
|
12520
12703
|
constructor(templatePath) {
|
|
12521
12704
|
super(`Workspace template not found: ${templatePath}`);
|
|
@@ -12544,15 +12727,15 @@ async function isDirectory(filePath) {
|
|
|
12544
12727
|
}
|
|
12545
12728
|
}
|
|
12546
12729
|
function getWorkspacePath(evalRunId, caseId, workspaceRoot) {
|
|
12547
|
-
const root = workspaceRoot ??
|
|
12548
|
-
return
|
|
12730
|
+
const root = workspaceRoot ?? getWorkspacesRoot();
|
|
12731
|
+
return path35.join(root, evalRunId, caseId);
|
|
12549
12732
|
}
|
|
12550
12733
|
async function copyDirectoryRecursive(src, dest) {
|
|
12551
12734
|
await mkdir10(dest, { recursive: true });
|
|
12552
12735
|
const entries = await readdir3(src, { withFileTypes: true });
|
|
12553
12736
|
for (const entry of entries) {
|
|
12554
|
-
const srcPath =
|
|
12555
|
-
const destPath =
|
|
12737
|
+
const srcPath = path35.join(src, entry.name);
|
|
12738
|
+
const destPath = path35.join(dest, entry.name);
|
|
12556
12739
|
if (entry.name === ".git") {
|
|
12557
12740
|
continue;
|
|
12558
12741
|
}
|
|
@@ -12564,7 +12747,7 @@ async function copyDirectoryRecursive(src, dest) {
|
|
|
12564
12747
|
}
|
|
12565
12748
|
}
|
|
12566
12749
|
async function createTempWorkspace(templatePath, evalRunId, caseId, workspaceRoot) {
|
|
12567
|
-
const resolvedTemplatePath =
|
|
12750
|
+
const resolvedTemplatePath = path35.resolve(templatePath);
|
|
12568
12751
|
if (!await fileExists(resolvedTemplatePath)) {
|
|
12569
12752
|
throw new TemplateNotFoundError(resolvedTemplatePath);
|
|
12570
12753
|
}
|
|
@@ -12612,25 +12795,21 @@ async function cleanupWorkspace(workspacePath) {
|
|
|
12612
12795
|
}
|
|
12613
12796
|
}
|
|
12614
12797
|
async function cleanupEvalWorkspaces(evalRunId, workspaceRoot) {
|
|
12615
|
-
const root = workspaceRoot ??
|
|
12616
|
-
const evalDir =
|
|
12798
|
+
const root = workspaceRoot ?? getWorkspacesRoot();
|
|
12799
|
+
const evalDir = path35.join(root, evalRunId);
|
|
12617
12800
|
if (await fileExists(evalDir)) {
|
|
12618
12801
|
await rm4(evalDir, { recursive: true, force: true });
|
|
12619
12802
|
}
|
|
12620
12803
|
}
|
|
12621
12804
|
|
|
12622
|
-
// src/evaluation/workspace/
|
|
12805
|
+
// src/evaluation/workspace/pool-manager.ts
|
|
12623
12806
|
import { execFile } from "node:child_process";
|
|
12624
12807
|
import { createHash } from "node:crypto";
|
|
12625
12808
|
import { existsSync as existsSync2 } from "node:fs";
|
|
12626
|
-
import { mkdir as mkdir11, rm as rm5, unlink, writeFile as writeFile7 } from "node:fs/promises";
|
|
12627
|
-
import
|
|
12628
|
-
import path35 from "node:path";
|
|
12809
|
+
import { cp as cp2, mkdir as mkdir11, readFile as readFile11, readdir as readdir4, rm as rm5, unlink, writeFile as writeFile7 } from "node:fs/promises";
|
|
12810
|
+
import path36 from "node:path";
|
|
12629
12811
|
import { promisify as promisify5 } from "node:util";
|
|
12630
12812
|
var execFileAsync = promisify5(execFile);
|
|
12631
|
-
var DEFAULT_CACHE_DIR = path35.join(os4.homedir(), ".agentv", "git-cache");
|
|
12632
|
-
var DEFAULT_TIMEOUT_MS2 = 3e5;
|
|
12633
|
-
var LOCK_TIMEOUT_MS = 6e4;
|
|
12634
12813
|
function gitEnv() {
|
|
12635
12814
|
const env = { ...process.env };
|
|
12636
12815
|
for (const key of Object.keys(env)) {
|
|
@@ -12645,49 +12824,340 @@ function gitEnv() {
|
|
|
12645
12824
|
GIT_SSH_COMMAND: "ssh -o BatchMode=yes"
|
|
12646
12825
|
};
|
|
12647
12826
|
}
|
|
12648
|
-
function cacheKey(source) {
|
|
12649
|
-
const raw = source.type === "git" ? source.url.toLowerCase().replace(/\.git$/, "") : source.path;
|
|
12650
|
-
return createHash("sha256").update(raw).digest("hex");
|
|
12651
|
-
}
|
|
12652
|
-
function getSourceUrl(source) {
|
|
12653
|
-
return source.type === "git" ? source.url : source.path;
|
|
12654
|
-
}
|
|
12655
12827
|
async function git(args, opts) {
|
|
12656
12828
|
const { stdout } = await execFileAsync("git", args, {
|
|
12657
12829
|
cwd: opts?.cwd,
|
|
12658
|
-
timeout: opts?.timeout ??
|
|
12830
|
+
timeout: opts?.timeout ?? 3e5,
|
|
12659
12831
|
env: gitEnv(),
|
|
12660
12832
|
maxBuffer: 50 * 1024 * 1024
|
|
12661
|
-
// 50MB
|
|
12662
12833
|
});
|
|
12663
12834
|
return stdout.trim();
|
|
12664
12835
|
}
|
|
12665
|
-
|
|
12666
|
-
const
|
|
12667
|
-
|
|
12668
|
-
|
|
12669
|
-
|
|
12670
|
-
|
|
12671
|
-
|
|
12672
|
-
|
|
12673
|
-
|
|
12836
|
+
function normalizeRepoForFingerprint(repo) {
|
|
12837
|
+
const source = repo.source.type === "git" ? { type: "git", url: repo.source.url.toLowerCase().replace(/\.git$/, "") } : { type: "local", path: repo.source.path };
|
|
12838
|
+
const result = {
|
|
12839
|
+
path: repo.path,
|
|
12840
|
+
source,
|
|
12841
|
+
ref: repo.checkout?.ref ?? "HEAD"
|
|
12842
|
+
};
|
|
12843
|
+
if (repo.clone?.depth !== void 0) {
|
|
12844
|
+
result.depth = repo.clone.depth;
|
|
12845
|
+
}
|
|
12846
|
+
if (repo.clone?.filter !== void 0) {
|
|
12847
|
+
result.filter = repo.clone.filter;
|
|
12848
|
+
}
|
|
12849
|
+
if (repo.clone?.sparse?.length) {
|
|
12850
|
+
result.sparse = [...repo.clone.sparse].sort();
|
|
12851
|
+
}
|
|
12852
|
+
return result;
|
|
12853
|
+
}
|
|
12854
|
+
function computeWorkspaceFingerprint(templatePath, repos) {
|
|
12855
|
+
const canonical = {
|
|
12856
|
+
templatePath: templatePath ?? null,
|
|
12857
|
+
repos: [...repos].sort((a, b) => a.path.localeCompare(b.path)).map(normalizeRepoForFingerprint)
|
|
12858
|
+
};
|
|
12859
|
+
return createHash("sha256").update(JSON.stringify(canonical)).digest("hex");
|
|
12860
|
+
}
|
|
12861
|
+
async function copyDirectoryRecursive2(src, dest, skipDirs) {
|
|
12862
|
+
await mkdir11(dest, { recursive: true });
|
|
12863
|
+
const entries = await readdir4(src, { withFileTypes: true });
|
|
12864
|
+
for (const entry of entries) {
|
|
12865
|
+
const srcPath = path36.join(src, entry.name);
|
|
12866
|
+
const destPath = path36.join(dest, entry.name);
|
|
12867
|
+
if (entry.name === ".git") {
|
|
12868
|
+
continue;
|
|
12869
|
+
}
|
|
12870
|
+
if (entry.isDirectory()) {
|
|
12871
|
+
if (skipDirs?.has(entry.name)) {
|
|
12674
12872
|
continue;
|
|
12675
12873
|
}
|
|
12676
|
-
|
|
12874
|
+
await copyDirectoryRecursive2(srcPath, destPath, skipDirs);
|
|
12875
|
+
} else {
|
|
12876
|
+
await cp2(srcPath, destPath, { preserveTimestamps: true, force: true });
|
|
12677
12877
|
}
|
|
12678
12878
|
}
|
|
12679
|
-
throw new Error(`Timed out waiting for lock: ${lockPath}`);
|
|
12680
12879
|
}
|
|
12681
|
-
|
|
12682
|
-
|
|
12683
|
-
|
|
12880
|
+
var WorkspacePoolManager = class {
|
|
12881
|
+
poolRoot;
|
|
12882
|
+
constructor(poolRoot) {
|
|
12883
|
+
this.poolRoot = poolRoot ?? getWorkspacePoolRoot();
|
|
12884
|
+
}
|
|
12885
|
+
/**
|
|
12886
|
+
* Acquire a workspace slot from the pool.
|
|
12887
|
+
*
|
|
12888
|
+
* 1. Compute fingerprint from template + repos
|
|
12889
|
+
* 2. Check drift (compare stored metadata.json fingerprint vs computed)
|
|
12890
|
+
* 3. If drift: warn, remove all slots, rematerialize
|
|
12891
|
+
* 4. Acquire a slot (try-lock slot-0, slot-1, ..., up to maxSlots)
|
|
12892
|
+
* 5. If slot exists: reset repos, re-copy template files (skip repo directories)
|
|
12893
|
+
* 6. If new slot: copy template, materialize all repos, write metadata.json
|
|
12894
|
+
* 7. Return the slot (with path, index, isExisting)
|
|
12895
|
+
*/
|
|
12896
|
+
async acquireWorkspace(options) {
|
|
12897
|
+
const { templatePath, repos, maxSlots, repoManager } = options;
|
|
12898
|
+
const fingerprint = computeWorkspaceFingerprint(templatePath, repos);
|
|
12899
|
+
const poolDir = path36.join(this.poolRoot, fingerprint);
|
|
12900
|
+
await mkdir11(poolDir, { recursive: true });
|
|
12901
|
+
const drifted = await this.checkDrift(poolDir, fingerprint);
|
|
12902
|
+
if (drifted) {
|
|
12903
|
+
console.warn(
|
|
12904
|
+
`[workspace-pool] Drift detected for fingerprint ${fingerprint.slice(0, 12)}... Removing stale slots.`
|
|
12905
|
+
);
|
|
12906
|
+
await this.removeAllSlots(poolDir);
|
|
12907
|
+
}
|
|
12908
|
+
for (let i = 0; i < maxSlots; i++) {
|
|
12909
|
+
const slotPath = path36.join(poolDir, `slot-${i}`);
|
|
12910
|
+
const lockPath = `${slotPath}.lock`;
|
|
12911
|
+
const locked = await this.tryLock(lockPath);
|
|
12912
|
+
if (!locked) {
|
|
12913
|
+
continue;
|
|
12914
|
+
}
|
|
12915
|
+
const slotExists = existsSync2(slotPath);
|
|
12916
|
+
if (slotExists) {
|
|
12917
|
+
await this.resetSlot(slotPath, templatePath, repos);
|
|
12918
|
+
return {
|
|
12919
|
+
index: i,
|
|
12920
|
+
path: slotPath,
|
|
12921
|
+
isExisting: true,
|
|
12922
|
+
lockPath,
|
|
12923
|
+
fingerprint,
|
|
12924
|
+
poolDir
|
|
12925
|
+
};
|
|
12926
|
+
}
|
|
12927
|
+
await mkdir11(slotPath, { recursive: true });
|
|
12928
|
+
if (templatePath) {
|
|
12929
|
+
await copyDirectoryRecursive2(templatePath, slotPath);
|
|
12930
|
+
}
|
|
12931
|
+
if (repos.length > 0) {
|
|
12932
|
+
await repoManager.materializeAll(repos, slotPath);
|
|
12933
|
+
}
|
|
12934
|
+
await this.writeMetadata(poolDir, fingerprint, templatePath ?? null, repos);
|
|
12935
|
+
return {
|
|
12936
|
+
index: i,
|
|
12937
|
+
path: slotPath,
|
|
12938
|
+
isExisting: false,
|
|
12939
|
+
lockPath,
|
|
12940
|
+
fingerprint,
|
|
12941
|
+
poolDir
|
|
12942
|
+
};
|
|
12943
|
+
}
|
|
12944
|
+
throw new Error(
|
|
12945
|
+
`All ${maxSlots} pool slots are locked for fingerprint ${fingerprint.slice(0, 12)}...`
|
|
12946
|
+
);
|
|
12947
|
+
}
|
|
12948
|
+
/** Remove lock file to release a slot. */
|
|
12949
|
+
async releaseSlot(slot) {
|
|
12950
|
+
try {
|
|
12951
|
+
await unlink(slot.lockPath);
|
|
12952
|
+
} catch {
|
|
12953
|
+
}
|
|
12954
|
+
}
|
|
12955
|
+
/**
|
|
12956
|
+
* Try to acquire a PID-based lock file.
|
|
12957
|
+
* On EEXIST, read PID and check if process is alive. If dead, stale lock — remove and retry.
|
|
12958
|
+
* Returns true if lock acquired, false if slot is actively locked.
|
|
12959
|
+
* Uses a bounded loop (max 3 attempts) to avoid unbounded recursion.
|
|
12960
|
+
*/
|
|
12961
|
+
async tryLock(lockPath) {
|
|
12962
|
+
for (let attempt = 0; attempt < 3; attempt++) {
|
|
12963
|
+
try {
|
|
12964
|
+
await writeFile7(lockPath, String(process.pid), { flag: "wx" });
|
|
12965
|
+
return true;
|
|
12966
|
+
} catch (err) {
|
|
12967
|
+
if (err.code !== "EEXIST") {
|
|
12968
|
+
throw err;
|
|
12969
|
+
}
|
|
12970
|
+
try {
|
|
12971
|
+
const pidStr = await readFile11(lockPath, "utf-8");
|
|
12972
|
+
const pid = Number.parseInt(pidStr.trim(), 10);
|
|
12973
|
+
if (!Number.isNaN(pid)) {
|
|
12974
|
+
try {
|
|
12975
|
+
process.kill(pid, 0);
|
|
12976
|
+
return false;
|
|
12977
|
+
} catch {
|
|
12978
|
+
await unlink(lockPath).catch(() => {
|
|
12979
|
+
});
|
|
12980
|
+
continue;
|
|
12981
|
+
}
|
|
12982
|
+
}
|
|
12983
|
+
} catch {
|
|
12984
|
+
}
|
|
12985
|
+
return false;
|
|
12986
|
+
}
|
|
12987
|
+
}
|
|
12988
|
+
return false;
|
|
12989
|
+
}
|
|
12990
|
+
/**
|
|
12991
|
+
* Check if the stored fingerprint in metadata.json differs from the computed one.
|
|
12992
|
+
* Returns true if drifted, false otherwise.
|
|
12993
|
+
* Returns false (no drift) if metadata.json doesn't exist (first use).
|
|
12994
|
+
*/
|
|
12995
|
+
async checkDrift(poolDir, fingerprint) {
|
|
12996
|
+
const metadataPath = path36.join(poolDir, "metadata.json");
|
|
12997
|
+
try {
|
|
12998
|
+
const raw = await readFile11(metadataPath, "utf-8");
|
|
12999
|
+
const metadata = JSON.parse(raw);
|
|
13000
|
+
return metadata.fingerprint !== fingerprint;
|
|
13001
|
+
} catch {
|
|
13002
|
+
return false;
|
|
13003
|
+
}
|
|
13004
|
+
}
|
|
13005
|
+
/** Write metadata.json with fingerprint, inputs, and timestamp. */
|
|
13006
|
+
async writeMetadata(poolDir, fingerprint, templatePath, repos) {
|
|
13007
|
+
const metadata = {
|
|
13008
|
+
fingerprint,
|
|
13009
|
+
templatePath,
|
|
13010
|
+
repos,
|
|
13011
|
+
createdAt: (/* @__PURE__ */ new Date()).toISOString()
|
|
13012
|
+
};
|
|
13013
|
+
await writeFile7(path36.join(poolDir, "metadata.json"), JSON.stringify(metadata, null, 2));
|
|
13014
|
+
}
|
|
13015
|
+
/** Remove all slot directories and their lock files from a pool directory. */
|
|
13016
|
+
async removeAllSlots(poolDir) {
|
|
13017
|
+
const entries = await readdir4(poolDir);
|
|
13018
|
+
for (const entry of entries) {
|
|
13019
|
+
if (entry.startsWith("slot-") && !entry.endsWith(".lock")) {
|
|
13020
|
+
const lockPath = path36.join(poolDir, `${entry}.lock`);
|
|
13021
|
+
if (existsSync2(lockPath)) {
|
|
13022
|
+
try {
|
|
13023
|
+
const pidStr = await readFile11(lockPath, "utf-8");
|
|
13024
|
+
const pid = Number.parseInt(pidStr.trim(), 10);
|
|
13025
|
+
if (!Number.isNaN(pid)) {
|
|
13026
|
+
try {
|
|
13027
|
+
process.kill(pid, 0);
|
|
13028
|
+
console.warn(`[workspace-pool] Skipping slot ${entry}: locked by PID ${pid}`);
|
|
13029
|
+
continue;
|
|
13030
|
+
} catch {
|
|
13031
|
+
}
|
|
13032
|
+
}
|
|
13033
|
+
} catch {
|
|
13034
|
+
}
|
|
13035
|
+
}
|
|
13036
|
+
await rm5(path36.join(poolDir, entry), { recursive: true, force: true });
|
|
13037
|
+
await rm5(lockPath, { force: true }).catch(() => {
|
|
13038
|
+
});
|
|
13039
|
+
}
|
|
13040
|
+
}
|
|
13041
|
+
await rm5(path36.join(poolDir, "metadata.json"), { force: true }).catch(() => {
|
|
13042
|
+
});
|
|
13043
|
+
}
|
|
13044
|
+
/**
|
|
13045
|
+
* Reset an existing slot for reuse:
|
|
13046
|
+
* 1. Reset repos (git reset --hard {ref} && git clean -fd per repo)
|
|
13047
|
+
* 2. Re-copy template files (skip repo directories)
|
|
13048
|
+
*/
|
|
13049
|
+
async resetSlot(slotPath, templatePath, repos) {
|
|
13050
|
+
for (const repo of repos) {
|
|
13051
|
+
const repoDir = path36.join(slotPath, repo.path);
|
|
13052
|
+
if (!existsSync2(repoDir)) {
|
|
13053
|
+
continue;
|
|
13054
|
+
}
|
|
13055
|
+
const ref = repo.checkout?.ref ?? "HEAD";
|
|
13056
|
+
await git(["reset", "--hard", ref], { cwd: repoDir });
|
|
13057
|
+
await git(["clean", "-fd"], { cwd: repoDir });
|
|
13058
|
+
}
|
|
13059
|
+
if (templatePath) {
|
|
13060
|
+
const repoDirNames = new Set(
|
|
13061
|
+
repos.map((r) => {
|
|
13062
|
+
const normalized = r.path.replace(/^\.\//, "");
|
|
13063
|
+
return normalized.split("/")[0];
|
|
13064
|
+
})
|
|
13065
|
+
);
|
|
13066
|
+
await copyDirectoryRecursive2(templatePath, slotPath, repoDirNames);
|
|
13067
|
+
}
|
|
13068
|
+
}
|
|
13069
|
+
};
|
|
13070
|
+
|
|
13071
|
+
// src/evaluation/workspace/repo-manager.ts
|
|
13072
|
+
import { execFile as execFile2 } from "node:child_process";
|
|
13073
|
+
import { createHash as createHash2 } from "node:crypto";
|
|
13074
|
+
import { existsSync as existsSync3 } from "node:fs";
|
|
13075
|
+
import { mkdir as mkdir12, rm as rm6, unlink as unlink2, writeFile as writeFile8 } from "node:fs/promises";
|
|
13076
|
+
import path37 from "node:path";
|
|
13077
|
+
import { promisify as promisify6 } from "node:util";
|
|
13078
|
+
var execFileAsync2 = promisify6(execFile2);
|
|
13079
|
+
var DEFAULT_TIMEOUT_MS2 = 3e5;
|
|
13080
|
+
var LOCK_TIMEOUT_MS = 6e4;
|
|
13081
|
+
function gitEnv2() {
|
|
13082
|
+
const env = { ...process.env };
|
|
13083
|
+
for (const key of Object.keys(env)) {
|
|
13084
|
+
if (key.startsWith("GIT_") && key !== "GIT_SSH_COMMAND") {
|
|
13085
|
+
delete env[key];
|
|
13086
|
+
}
|
|
13087
|
+
}
|
|
13088
|
+
return {
|
|
13089
|
+
...env,
|
|
13090
|
+
GIT_TERMINAL_PROMPT: "0",
|
|
13091
|
+
GIT_ASKPASS: "",
|
|
13092
|
+
GIT_SSH_COMMAND: "ssh -o BatchMode=yes"
|
|
13093
|
+
};
|
|
13094
|
+
}
|
|
13095
|
+
function cacheKey(source) {
|
|
13096
|
+
const raw = source.type === "git" ? source.url.toLowerCase().replace(/\.git$/, "") : source.path;
|
|
13097
|
+
return createHash2("sha256").update(raw).digest("hex");
|
|
13098
|
+
}
|
|
13099
|
+
function getSourceUrl(source) {
|
|
13100
|
+
return source.type === "git" ? source.url : source.path;
|
|
13101
|
+
}
|
|
13102
|
+
async function git2(args, opts) {
|
|
13103
|
+
const { stdout } = await execFileAsync2("git", args, {
|
|
13104
|
+
cwd: opts?.cwd,
|
|
13105
|
+
timeout: opts?.timeout ?? DEFAULT_TIMEOUT_MS2,
|
|
13106
|
+
env: gitEnv2(),
|
|
13107
|
+
maxBuffer: 50 * 1024 * 1024
|
|
13108
|
+
// 50MB
|
|
13109
|
+
});
|
|
13110
|
+
return stdout.trim();
|
|
13111
|
+
}
|
|
13112
|
+
async function acquireLock(lockPath) {
|
|
13113
|
+
const start = Date.now();
|
|
13114
|
+
while (Date.now() - start < LOCK_TIMEOUT_MS) {
|
|
13115
|
+
try {
|
|
13116
|
+
await writeFile8(lockPath, String(process.pid), { flag: "wx" });
|
|
13117
|
+
return;
|
|
13118
|
+
} catch (err) {
|
|
13119
|
+
if (err.code === "EEXIST") {
|
|
13120
|
+
await new Promise((r) => setTimeout(r, 200));
|
|
13121
|
+
continue;
|
|
13122
|
+
}
|
|
13123
|
+
throw err;
|
|
13124
|
+
}
|
|
13125
|
+
}
|
|
13126
|
+
throw new Error(`Timed out waiting for lock: ${lockPath}`);
|
|
13127
|
+
}
|
|
13128
|
+
async function releaseLock(lockPath) {
|
|
13129
|
+
try {
|
|
13130
|
+
await unlink2(lockPath);
|
|
12684
13131
|
} catch {
|
|
12685
13132
|
}
|
|
12686
13133
|
}
|
|
12687
13134
|
var RepoManager = class {
|
|
12688
13135
|
cacheDir;
|
|
12689
|
-
|
|
12690
|
-
|
|
13136
|
+
verbose;
|
|
13137
|
+
constructor(cacheDir, verbose = false) {
|
|
13138
|
+
this.cacheDir = cacheDir ?? getGitCacheRoot();
|
|
13139
|
+
this.verbose = verbose;
|
|
13140
|
+
}
|
|
13141
|
+
async runGit(args, opts) {
|
|
13142
|
+
const startedAt = Date.now();
|
|
13143
|
+
if (this.verbose) {
|
|
13144
|
+
console.log(`[repo] git start cwd=${opts?.cwd ?? process.cwd()} args=${args.join(" ")}`);
|
|
13145
|
+
}
|
|
13146
|
+
try {
|
|
13147
|
+
const output = await git2(args, opts);
|
|
13148
|
+
if (this.verbose) {
|
|
13149
|
+
console.log(`[repo] git ok durationMs=${Date.now() - startedAt} args=${args.join(" ")}`);
|
|
13150
|
+
}
|
|
13151
|
+
return output;
|
|
13152
|
+
} catch (error) {
|
|
13153
|
+
if (this.verbose) {
|
|
13154
|
+
const message = error instanceof Error ? error.message : String(error);
|
|
13155
|
+
console.log(
|
|
13156
|
+
`[repo] git fail durationMs=${Date.now() - startedAt} args=${args.join(" ")} error=${message}`
|
|
13157
|
+
);
|
|
13158
|
+
}
|
|
13159
|
+
throw error;
|
|
13160
|
+
}
|
|
12691
13161
|
}
|
|
12692
13162
|
/**
|
|
12693
13163
|
* Ensure a bare mirror cache exists for the given source.
|
|
@@ -12696,11 +13166,19 @@ var RepoManager = class {
|
|
|
12696
13166
|
*/
|
|
12697
13167
|
async ensureCache(source, depth, resolve) {
|
|
12698
13168
|
const key = cacheKey(source);
|
|
12699
|
-
const cachePath =
|
|
13169
|
+
const cachePath = path37.join(this.cacheDir, key);
|
|
12700
13170
|
const lockPath = `${cachePath}.lock`;
|
|
12701
|
-
const cacheExists =
|
|
13171
|
+
const cacheExists = existsSync3(path37.join(cachePath, "HEAD"));
|
|
13172
|
+
if (this.verbose) {
|
|
13173
|
+
console.log(
|
|
13174
|
+
`[repo] ensureCache source=${getSourceUrl(source)} resolve=${resolve ?? "remote"} cache=${cachePath}`
|
|
13175
|
+
);
|
|
13176
|
+
}
|
|
12702
13177
|
if (resolve === "local") {
|
|
12703
13178
|
if (cacheExists) {
|
|
13179
|
+
if (this.verbose) {
|
|
13180
|
+
console.log(`[repo] using existing local cache ${cachePath}`);
|
|
13181
|
+
}
|
|
12704
13182
|
return cachePath;
|
|
12705
13183
|
}
|
|
12706
13184
|
const url = getSourceUrl(source);
|
|
@@ -12708,16 +13186,26 @@ var RepoManager = class {
|
|
|
12708
13186
|
`No cache found for \`${url}\`. Run \`agentv cache add --url ${url} --from <local-path>\` to seed it.`
|
|
12709
13187
|
);
|
|
12710
13188
|
}
|
|
12711
|
-
await
|
|
13189
|
+
await mkdir12(this.cacheDir, { recursive: true });
|
|
13190
|
+
const lockStartedAt = Date.now();
|
|
12712
13191
|
await acquireLock(lockPath);
|
|
13192
|
+
if (this.verbose) {
|
|
13193
|
+
console.log(`[repo] lock acquired path=${lockPath} waitedMs=${Date.now() - lockStartedAt}`);
|
|
13194
|
+
}
|
|
12713
13195
|
try {
|
|
12714
13196
|
if (cacheExists) {
|
|
13197
|
+
if (this.verbose) {
|
|
13198
|
+
console.log(`[repo] refreshing existing cache ${cachePath}`);
|
|
13199
|
+
}
|
|
12715
13200
|
const fetchArgs = ["fetch", "--prune"];
|
|
12716
13201
|
if (depth) {
|
|
12717
13202
|
fetchArgs.push("--depth", String(depth));
|
|
12718
13203
|
}
|
|
12719
|
-
await
|
|
13204
|
+
await this.runGit(fetchArgs, { cwd: cachePath });
|
|
12720
13205
|
} else {
|
|
13206
|
+
if (this.verbose) {
|
|
13207
|
+
console.log(`[repo] creating new cache ${cachePath}`);
|
|
13208
|
+
}
|
|
12721
13209
|
const cloneArgs = ["clone", "--mirror", "--bare"];
|
|
12722
13210
|
if (depth) {
|
|
12723
13211
|
cloneArgs.push("--depth", String(depth));
|
|
@@ -12725,10 +13213,13 @@ var RepoManager = class {
|
|
|
12725
13213
|
const sourceUrl = getSourceUrl(source);
|
|
12726
13214
|
const cloneUrl = depth && source.type === "local" ? `file://${sourceUrl}` : sourceUrl;
|
|
12727
13215
|
cloneArgs.push(cloneUrl, cachePath);
|
|
12728
|
-
await
|
|
13216
|
+
await this.runGit(cloneArgs);
|
|
12729
13217
|
}
|
|
12730
13218
|
} finally {
|
|
12731
13219
|
await releaseLock(lockPath);
|
|
13220
|
+
if (this.verbose) {
|
|
13221
|
+
console.log(`[repo] lock released path=${lockPath}`);
|
|
13222
|
+
}
|
|
12732
13223
|
}
|
|
12733
13224
|
return cachePath;
|
|
12734
13225
|
}
|
|
@@ -12737,7 +13228,13 @@ var RepoManager = class {
|
|
|
12737
13228
|
* Handles checkout, ref resolution, ancestor walking, shallow clone, sparse checkout.
|
|
12738
13229
|
*/
|
|
12739
13230
|
async materialize(repo, workspacePath) {
|
|
12740
|
-
const targetDir =
|
|
13231
|
+
const targetDir = path37.join(workspacePath, repo.path);
|
|
13232
|
+
const startedAt = Date.now();
|
|
13233
|
+
if (this.verbose) {
|
|
13234
|
+
console.log(
|
|
13235
|
+
`[repo] materialize start path=${repo.path} source=${getSourceUrl(repo.source)} workspace=${workspacePath}`
|
|
13236
|
+
);
|
|
13237
|
+
}
|
|
12741
13238
|
const cachePath = await this.ensureCache(
|
|
12742
13239
|
repo.source,
|
|
12743
13240
|
repo.clone?.depth,
|
|
@@ -12753,10 +13250,10 @@ var RepoManager = class {
|
|
|
12753
13250
|
cloneArgs.push("--no-checkout");
|
|
12754
13251
|
const cloneUrl = repo.clone?.depth || repo.clone?.filter ? `file://${cachePath}` : cachePath;
|
|
12755
13252
|
cloneArgs.push(cloneUrl, targetDir);
|
|
12756
|
-
await
|
|
13253
|
+
await this.runGit(cloneArgs);
|
|
12757
13254
|
if (repo.clone?.sparse?.length) {
|
|
12758
|
-
await
|
|
12759
|
-
await
|
|
13255
|
+
await this.runGit(["sparse-checkout", "init", "--cone"], { cwd: targetDir });
|
|
13256
|
+
await this.runGit(["sparse-checkout", "set", ...repo.clone.sparse], { cwd: targetDir });
|
|
12760
13257
|
}
|
|
12761
13258
|
const ref = repo.checkout?.ref ?? "HEAD";
|
|
12762
13259
|
const resolve = repo.checkout?.resolve ?? "remote";
|
|
@@ -12764,7 +13261,7 @@ var RepoManager = class {
|
|
|
12764
13261
|
if (resolve === "remote" && repo.source.type === "git") {
|
|
12765
13262
|
const url = getSourceUrl(repo.source);
|
|
12766
13263
|
try {
|
|
12767
|
-
const lsOutput = await
|
|
13264
|
+
const lsOutput = await this.runGit(["ls-remote", url, ref]);
|
|
12768
13265
|
const match = lsOutput.split(" ")[0];
|
|
12769
13266
|
if (!match) {
|
|
12770
13267
|
throw new Error(`Ref '${ref}' not found on remote ${url}`);
|
|
@@ -12777,17 +13274,26 @@ var RepoManager = class {
|
|
|
12777
13274
|
} else {
|
|
12778
13275
|
resolvedSha = ref;
|
|
12779
13276
|
}
|
|
12780
|
-
|
|
13277
|
+
if (this.verbose) {
|
|
13278
|
+
console.log(
|
|
13279
|
+
`[repo] checkout path=${repo.path} ref=${ref} resolved=${resolvedSha} resolve=${resolve}`
|
|
13280
|
+
);
|
|
13281
|
+
}
|
|
13282
|
+
await this.runGit(["checkout", resolvedSha], { cwd: targetDir });
|
|
12781
13283
|
const ancestor = repo.checkout?.ancestor ?? 0;
|
|
12782
13284
|
if (ancestor > 0) {
|
|
12783
13285
|
try {
|
|
12784
|
-
const ancestorSha = await
|
|
12785
|
-
|
|
13286
|
+
const ancestorSha = await this.runGit(["rev-parse", `HEAD~${ancestor}`], {
|
|
13287
|
+
cwd: targetDir
|
|
13288
|
+
});
|
|
13289
|
+
await this.runGit(["checkout", ancestorSha], { cwd: targetDir });
|
|
12786
13290
|
} catch {
|
|
12787
13291
|
if (repo.clone?.depth) {
|
|
12788
|
-
await
|
|
12789
|
-
const ancestorSha = await
|
|
12790
|
-
|
|
13292
|
+
await this.runGit(["fetch", "--deepen", String(ancestor)], { cwd: targetDir });
|
|
13293
|
+
const ancestorSha = await this.runGit(["rev-parse", `HEAD~${ancestor}`], {
|
|
13294
|
+
cwd: targetDir
|
|
13295
|
+
});
|
|
13296
|
+
await this.runGit(["checkout", ancestorSha], { cwd: targetDir });
|
|
12791
13297
|
} else {
|
|
12792
13298
|
throw new Error(
|
|
12793
13299
|
`Cannot resolve ancestor ${ancestor} of ref '${ref}'. If using shallow clone, increase clone.depth to at least ${ancestor + 1}.`
|
|
@@ -12795,27 +13301,38 @@ var RepoManager = class {
|
|
|
12795
13301
|
}
|
|
12796
13302
|
}
|
|
12797
13303
|
}
|
|
13304
|
+
if (this.verbose) {
|
|
13305
|
+
console.log(
|
|
13306
|
+
`[repo] materialize done path=${repo.path} target=${targetDir} durationMs=${Date.now() - startedAt}`
|
|
13307
|
+
);
|
|
13308
|
+
}
|
|
12798
13309
|
}
|
|
12799
13310
|
/** Materialize all repos into the workspace. */
|
|
12800
13311
|
async materializeAll(repos, workspacePath) {
|
|
13312
|
+
if (this.verbose) {
|
|
13313
|
+
console.log(`[repo] materializeAll count=${repos.length} workspace=${workspacePath}`);
|
|
13314
|
+
}
|
|
12801
13315
|
for (const repo of repos) {
|
|
12802
13316
|
await this.materialize(repo, workspacePath);
|
|
12803
13317
|
}
|
|
13318
|
+
if (this.verbose) {
|
|
13319
|
+
console.log("[repo] materializeAll complete");
|
|
13320
|
+
}
|
|
12804
13321
|
}
|
|
12805
13322
|
/** Reset repos in workspace to their checkout state. */
|
|
12806
13323
|
async reset(repos, workspacePath, strategy) {
|
|
12807
13324
|
if (strategy === "recreate") {
|
|
12808
13325
|
for (const repo of repos) {
|
|
12809
|
-
const targetDir =
|
|
12810
|
-
await
|
|
13326
|
+
const targetDir = path37.join(workspacePath, repo.path);
|
|
13327
|
+
await rm6(targetDir, { recursive: true, force: true });
|
|
12811
13328
|
}
|
|
12812
13329
|
await this.materializeAll(repos, workspacePath);
|
|
12813
13330
|
return;
|
|
12814
13331
|
}
|
|
12815
13332
|
for (const repo of repos) {
|
|
12816
|
-
const targetDir =
|
|
12817
|
-
await
|
|
12818
|
-
await
|
|
13333
|
+
const targetDir = path37.join(workspacePath, repo.path);
|
|
13334
|
+
await this.runGit(["reset", "--hard", "HEAD"], { cwd: targetDir });
|
|
13335
|
+
await this.runGit(["clean", "-fd"], { cwd: targetDir });
|
|
12819
13336
|
}
|
|
12820
13337
|
}
|
|
12821
13338
|
/**
|
|
@@ -12825,21 +13342,21 @@ var RepoManager = class {
|
|
|
12825
13342
|
async seedCache(localPath, remoteUrl, opts) {
|
|
12826
13343
|
const source = { type: "git", url: remoteUrl };
|
|
12827
13344
|
const key = cacheKey(source);
|
|
12828
|
-
const cachePath =
|
|
13345
|
+
const cachePath = path37.join(this.cacheDir, key);
|
|
12829
13346
|
const lockPath = `${cachePath}.lock`;
|
|
12830
|
-
await
|
|
13347
|
+
await mkdir12(this.cacheDir, { recursive: true });
|
|
12831
13348
|
await acquireLock(lockPath);
|
|
12832
13349
|
try {
|
|
12833
|
-
if (
|
|
13350
|
+
if (existsSync3(path37.join(cachePath, "HEAD"))) {
|
|
12834
13351
|
if (!opts?.force) {
|
|
12835
13352
|
throw new Error(
|
|
12836
13353
|
`Cache already exists for ${remoteUrl} at ${cachePath}. Use force to overwrite.`
|
|
12837
13354
|
);
|
|
12838
13355
|
}
|
|
12839
|
-
await
|
|
13356
|
+
await rm6(cachePath, { recursive: true, force: true });
|
|
12840
13357
|
}
|
|
12841
|
-
await
|
|
12842
|
-
await
|
|
13358
|
+
await git2(["clone", "--mirror", "--bare", localPath, cachePath]);
|
|
13359
|
+
await git2(["remote", "set-url", "origin", remoteUrl], { cwd: cachePath });
|
|
12843
13360
|
} finally {
|
|
12844
13361
|
await releaseLock(lockPath);
|
|
12845
13362
|
}
|
|
@@ -12847,41 +13364,41 @@ var RepoManager = class {
|
|
|
12847
13364
|
}
|
|
12848
13365
|
/** Remove the entire cache directory. */
|
|
12849
13366
|
async cleanCache() {
|
|
12850
|
-
await
|
|
13367
|
+
await rm6(this.cacheDir, { recursive: true, force: true });
|
|
12851
13368
|
}
|
|
12852
13369
|
};
|
|
12853
13370
|
|
|
12854
13371
|
// src/evaluation/workspace/resolve.ts
|
|
12855
|
-
import { readdir as
|
|
12856
|
-
import
|
|
13372
|
+
import { readdir as readdir5, stat as stat6 } from "node:fs/promises";
|
|
13373
|
+
import path38 from "node:path";
|
|
12857
13374
|
async function resolveWorkspaceTemplate(templatePath) {
|
|
12858
13375
|
if (!templatePath) {
|
|
12859
13376
|
return void 0;
|
|
12860
13377
|
}
|
|
12861
|
-
const resolved =
|
|
13378
|
+
const resolved = path38.resolve(templatePath);
|
|
12862
13379
|
const stats = await stat6(resolved);
|
|
12863
13380
|
if (stats.isFile()) {
|
|
12864
13381
|
return {
|
|
12865
|
-
dir:
|
|
13382
|
+
dir: path38.dirname(resolved),
|
|
12866
13383
|
workspaceFile: resolved
|
|
12867
13384
|
};
|
|
12868
13385
|
}
|
|
12869
13386
|
if (!stats.isDirectory()) {
|
|
12870
13387
|
throw new Error(`workspace template is neither a file nor a directory: ${resolved}`);
|
|
12871
13388
|
}
|
|
12872
|
-
const entries = await
|
|
13389
|
+
const entries = await readdir5(resolved);
|
|
12873
13390
|
const workspaceFiles = entries.filter((e) => e.endsWith(".code-workspace"));
|
|
12874
13391
|
if (workspaceFiles.length === 1) {
|
|
12875
13392
|
return {
|
|
12876
13393
|
dir: resolved,
|
|
12877
|
-
workspaceFile:
|
|
13394
|
+
workspaceFile: path38.join(resolved, workspaceFiles[0])
|
|
12878
13395
|
};
|
|
12879
13396
|
}
|
|
12880
13397
|
if (workspaceFiles.length > 1) {
|
|
12881
13398
|
const conventionFile = workspaceFiles.find((f) => f === "template.code-workspace");
|
|
12882
13399
|
return {
|
|
12883
13400
|
dir: resolved,
|
|
12884
|
-
workspaceFile: conventionFile ?
|
|
13401
|
+
workspaceFile: conventionFile ? path38.join(resolved, conventionFile) : void 0
|
|
12885
13402
|
};
|
|
12886
13403
|
}
|
|
12887
13404
|
return { dir: resolved };
|
|
@@ -12963,7 +13480,10 @@ async function runEvaluation(options) {
|
|
|
12963
13480
|
trials,
|
|
12964
13481
|
streamCallbacks,
|
|
12965
13482
|
totalBudgetUsd,
|
|
12966
|
-
failOnError
|
|
13483
|
+
failOnError,
|
|
13484
|
+
poolWorkspaces,
|
|
13485
|
+
poolMaxSlots: configPoolMaxSlots,
|
|
13486
|
+
workspace: userWorkspacePath
|
|
12967
13487
|
} = options;
|
|
12968
13488
|
let useCache = options.useCache;
|
|
12969
13489
|
if (trials && trials.count > 1 && useCache) {
|
|
@@ -13037,7 +13557,7 @@ async function runEvaluation(options) {
|
|
|
13037
13557
|
];
|
|
13038
13558
|
const evaluatorRegistry = buildEvaluatorRegistry(evaluators, resolveJudgeProvider);
|
|
13039
13559
|
const typeRegistry = createBuiltinRegistry();
|
|
13040
|
-
const discoveryBaseDir = evalFilePath ?
|
|
13560
|
+
const discoveryBaseDir = evalFilePath ? path39.dirname(path39.resolve(evalFilePath)) : process.cwd();
|
|
13041
13561
|
const evalDir = discoveryBaseDir;
|
|
13042
13562
|
await discoverAssertions(typeRegistry, discoveryBaseDir);
|
|
13043
13563
|
const providerRegistry = createBuiltinProviderRegistry();
|
|
@@ -13093,11 +13613,25 @@ async function runEvaluation(options) {
|
|
|
13093
13613
|
const resolvedTemplate = await resolveWorkspaceTemplate(rawTemplate);
|
|
13094
13614
|
const workspaceTemplate = resolvedTemplate?.dir;
|
|
13095
13615
|
let suiteWorkspaceFile = resolvedTemplate?.workspaceFile;
|
|
13616
|
+
const setupLog = (message) => {
|
|
13617
|
+
if (verbose) {
|
|
13618
|
+
console.log(`[setup] ${message}`);
|
|
13619
|
+
}
|
|
13620
|
+
};
|
|
13096
13621
|
const isPerTestIsolation = suiteWorkspace?.isolation === "per_test";
|
|
13097
|
-
|
|
13622
|
+
if (userWorkspacePath && isPerTestIsolation) {
|
|
13623
|
+
throw new Error(
|
|
13624
|
+
"--workspace is incompatible with isolation: per_test. Use isolation: shared (default)."
|
|
13625
|
+
);
|
|
13626
|
+
}
|
|
13627
|
+
const hasSharedWorkspace = !!(userWorkspacePath || workspaceTemplate || suiteWorkspace?.before_all || suiteWorkspace?.repos?.length && !isPerTestIsolation);
|
|
13628
|
+
const usePool = poolWorkspaces === true && !!suiteWorkspace?.repos?.length && !isPerTestIsolation && !userWorkspacePath;
|
|
13098
13629
|
const requestedWorkers = options.maxConcurrency ?? target.workers ?? 1;
|
|
13099
|
-
const workers = hasSharedWorkspace ? 1 : requestedWorkers;
|
|
13100
|
-
|
|
13630
|
+
const workers = hasSharedWorkspace && !usePool ? 1 : requestedWorkers;
|
|
13631
|
+
setupLog(
|
|
13632
|
+
`sharedWorkspace=${hasSharedWorkspace} perTestIsolation=${isPerTestIsolation} usePool=${usePool} requestedWorkers=${requestedWorkers} effectiveWorkers=${workers}`
|
|
13633
|
+
);
|
|
13634
|
+
if (hasSharedWorkspace && !usePool && requestedWorkers > 1) {
|
|
13101
13635
|
console.warn(
|
|
13102
13636
|
`Warning: Shared workspace requires sequential execution. Overriding workers from ${requestedWorkers} to 1.`
|
|
13103
13637
|
);
|
|
@@ -13106,285 +13640,383 @@ async function runEvaluation(options) {
|
|
|
13106
13640
|
let sharedWorkspacePath;
|
|
13107
13641
|
let sharedBaselineCommit;
|
|
13108
13642
|
let beforeAllOutput;
|
|
13109
|
-
|
|
13643
|
+
let poolManager;
|
|
13644
|
+
let poolSlot;
|
|
13645
|
+
const poolSlots = [];
|
|
13646
|
+
const availablePoolSlots = [];
|
|
13647
|
+
const poolSlotBaselines = /* @__PURE__ */ new Map();
|
|
13648
|
+
const poolMaxSlots = Math.min(configPoolMaxSlots ?? 10, 50);
|
|
13649
|
+
if (userWorkspacePath) {
|
|
13650
|
+
sharedWorkspacePath = userWorkspacePath;
|
|
13651
|
+
setupLog(`using user-provided workspace: ${userWorkspacePath}`);
|
|
13652
|
+
} else if (usePool && suiteWorkspace?.repos) {
|
|
13653
|
+
const slotsNeeded = workers;
|
|
13654
|
+
setupLog(`acquiring ${slotsNeeded} workspace pool slot(s) (pool capacity: ${poolMaxSlots})`);
|
|
13655
|
+
poolManager = new WorkspacePoolManager(getWorkspacePoolRoot());
|
|
13656
|
+
const poolRepoManager = new RepoManager(void 0, verbose);
|
|
13657
|
+
for (let i = 0; i < slotsNeeded; i++) {
|
|
13658
|
+
const slot = await poolManager.acquireWorkspace({
|
|
13659
|
+
templatePath: workspaceTemplate,
|
|
13660
|
+
repos: suiteWorkspace.repos,
|
|
13661
|
+
maxSlots: poolMaxSlots,
|
|
13662
|
+
repoManager: poolRepoManager
|
|
13663
|
+
});
|
|
13664
|
+
poolSlots.push(slot);
|
|
13665
|
+
setupLog(`pool slot ${i} acquired at: ${slot.path} (existing=${slot.isExisting})`);
|
|
13666
|
+
}
|
|
13667
|
+
if (slotsNeeded === 1) {
|
|
13668
|
+
poolSlot = poolSlots[0];
|
|
13669
|
+
sharedWorkspacePath = poolSlot.path;
|
|
13670
|
+
} else {
|
|
13671
|
+
availablePoolSlots.push(...poolSlots);
|
|
13672
|
+
}
|
|
13673
|
+
} else if (workspaceTemplate) {
|
|
13674
|
+
setupLog(`creating shared workspace from template: ${workspaceTemplate}`);
|
|
13110
13675
|
try {
|
|
13111
13676
|
sharedWorkspacePath = await createTempWorkspace(workspaceTemplate, evalRunId, "shared");
|
|
13677
|
+
setupLog(`shared workspace created at: ${sharedWorkspacePath}`);
|
|
13112
13678
|
} catch (error) {
|
|
13113
13679
|
const message = error instanceof Error ? error.message : String(error);
|
|
13114
13680
|
throw new Error(`Failed to create shared workspace: ${message}`);
|
|
13115
13681
|
}
|
|
13682
|
+
} else if (suiteWorkspace?.before_all || suiteWorkspace?.repos?.length && !isPerTestIsolation) {
|
|
13683
|
+
sharedWorkspacePath = getWorkspacePath(evalRunId, "shared");
|
|
13684
|
+
await mkdir13(sharedWorkspacePath, { recursive: true });
|
|
13685
|
+
setupLog(`created empty shared workspace at: ${sharedWorkspacePath}`);
|
|
13686
|
+
}
|
|
13687
|
+
try {
|
|
13116
13688
|
if (suiteWorkspaceFile && sharedWorkspacePath) {
|
|
13117
|
-
const copiedWorkspaceFile =
|
|
13689
|
+
const copiedWorkspaceFile = path39.join(sharedWorkspacePath, path39.basename(suiteWorkspaceFile));
|
|
13118
13690
|
try {
|
|
13119
13691
|
await stat7(copiedWorkspaceFile);
|
|
13120
13692
|
suiteWorkspaceFile = copiedWorkspaceFile;
|
|
13121
13693
|
} catch {
|
|
13122
13694
|
}
|
|
13123
13695
|
}
|
|
13124
|
-
|
|
13125
|
-
sharedWorkspacePath
|
|
13126
|
-
|
|
13127
|
-
|
|
13128
|
-
|
|
13129
|
-
|
|
13130
|
-
|
|
13131
|
-
|
|
13132
|
-
|
|
13133
|
-
|
|
13134
|
-
|
|
13135
|
-
|
|
13136
|
-
});
|
|
13137
|
-
}
|
|
13138
|
-
throw new Error(`Failed to materialize repos: ${message}`);
|
|
13139
|
-
}
|
|
13140
|
-
}
|
|
13141
|
-
if (sharedWorkspacePath && suiteWorkspace?.before_all) {
|
|
13142
|
-
const scriptContext = {
|
|
13143
|
-
workspacePath: sharedWorkspacePath,
|
|
13144
|
-
testId: "__before_all__",
|
|
13145
|
-
evalRunId,
|
|
13146
|
-
evalDir
|
|
13147
|
-
};
|
|
13148
|
-
try {
|
|
13149
|
-
beforeAllOutput = await executeWorkspaceScript(suiteWorkspace.before_all, scriptContext);
|
|
13150
|
-
} catch (error) {
|
|
13151
|
-
const message = error instanceof Error ? error.message : String(error);
|
|
13152
|
-
if (sharedWorkspacePath) {
|
|
13153
|
-
await cleanupWorkspace(sharedWorkspacePath).catch(() => {
|
|
13154
|
-
});
|
|
13155
|
-
}
|
|
13156
|
-
throw new Error(`before_all script failed: ${message}`);
|
|
13157
|
-
}
|
|
13158
|
-
}
|
|
13159
|
-
if (sharedWorkspacePath) {
|
|
13160
|
-
try {
|
|
13161
|
-
sharedBaselineCommit = await initializeBaseline(sharedWorkspacePath);
|
|
13162
|
-
} catch {
|
|
13163
|
-
}
|
|
13164
|
-
}
|
|
13165
|
-
let nextWorkerId = 1;
|
|
13166
|
-
const workerIdByEvalId = /* @__PURE__ */ new Map();
|
|
13167
|
-
let beforeAllOutputAttached = false;
|
|
13168
|
-
let cumulativeBudgetCost = 0;
|
|
13169
|
-
let budgetExhausted = false;
|
|
13170
|
-
let failOnErrorTriggered = false;
|
|
13171
|
-
const promises = filteredEvalCases.map(
|
|
13172
|
-
(evalCase) => limit(async () => {
|
|
13173
|
-
const workerId = nextWorkerId++;
|
|
13174
|
-
workerIdByEvalId.set(evalCase.id, workerId);
|
|
13175
|
-
if (totalBudgetUsd !== void 0 && budgetExhausted) {
|
|
13176
|
-
const budgetResult = {
|
|
13177
|
-
timestamp: (now ?? (() => /* @__PURE__ */ new Date()))().toISOString(),
|
|
13178
|
-
testId: evalCase.id,
|
|
13179
|
-
dataset: evalCase.dataset,
|
|
13180
|
-
score: 0,
|
|
13181
|
-
hits: [],
|
|
13182
|
-
misses: [],
|
|
13183
|
-
answer: "",
|
|
13184
|
-
target: target.name,
|
|
13185
|
-
error: `Suite budget exceeded ($${cumulativeBudgetCost.toFixed(4)} / $${totalBudgetUsd.toFixed(4)})`,
|
|
13186
|
-
budgetExceeded: true,
|
|
13187
|
-
executionStatus: "execution_error",
|
|
13188
|
-
failureStage: "setup",
|
|
13189
|
-
failureReasonCode: "budget_exceeded",
|
|
13190
|
-
executionError: {
|
|
13191
|
-
message: `Suite budget exceeded ($${cumulativeBudgetCost.toFixed(4)} / $${totalBudgetUsd.toFixed(4)})`,
|
|
13192
|
-
stage: "setup"
|
|
13193
|
-
}
|
|
13194
|
-
};
|
|
13195
|
-
if (onProgress) {
|
|
13196
|
-
await onProgress({
|
|
13197
|
-
workerId,
|
|
13198
|
-
testId: evalCase.id,
|
|
13199
|
-
status: "failed",
|
|
13200
|
-
completedAt: Date.now(),
|
|
13201
|
-
error: budgetResult.error
|
|
13696
|
+
const repoManager = suiteWorkspace?.repos?.length && !usePool && !userWorkspacePath ? new RepoManager(void 0, verbose) : void 0;
|
|
13697
|
+
if (repoManager && sharedWorkspacePath && suiteWorkspace?.repos && !isPerTestIsolation) {
|
|
13698
|
+
setupLog(
|
|
13699
|
+
`materializing ${suiteWorkspace.repos.length} shared repo(s) into ${sharedWorkspacePath}`
|
|
13700
|
+
);
|
|
13701
|
+
try {
|
|
13702
|
+
await repoManager.materializeAll(suiteWorkspace.repos, sharedWorkspacePath);
|
|
13703
|
+
setupLog("shared repo materialization complete");
|
|
13704
|
+
} catch (error) {
|
|
13705
|
+
const message = error instanceof Error ? error.message : String(error);
|
|
13706
|
+
if (sharedWorkspacePath && !userWorkspacePath) {
|
|
13707
|
+
await cleanupWorkspace(sharedWorkspacePath).catch(() => {
|
|
13202
13708
|
});
|
|
13203
13709
|
}
|
|
13204
|
-
|
|
13205
|
-
await onResult(budgetResult);
|
|
13206
|
-
}
|
|
13207
|
-
return budgetResult;
|
|
13710
|
+
throw new Error(`Failed to materialize repos: ${message}`);
|
|
13208
13711
|
}
|
|
13209
|
-
|
|
13210
|
-
|
|
13211
|
-
|
|
13212
|
-
|
|
13213
|
-
|
|
13214
|
-
|
|
13215
|
-
|
|
13216
|
-
|
|
13217
|
-
|
|
13218
|
-
|
|
13219
|
-
|
|
13220
|
-
|
|
13221
|
-
|
|
13222
|
-
|
|
13223
|
-
|
|
13224
|
-
|
|
13225
|
-
|
|
13226
|
-
if (
|
|
13227
|
-
await
|
|
13228
|
-
workerId,
|
|
13229
|
-
testId: evalCase.id,
|
|
13230
|
-
status: "failed",
|
|
13231
|
-
completedAt: Date.now(),
|
|
13232
|
-
error: haltResult.error
|
|
13712
|
+
}
|
|
13713
|
+
if (sharedWorkspacePath && suiteWorkspace?.before_all) {
|
|
13714
|
+
const beforeAllCommand = (suiteWorkspace.before_all.command ?? suiteWorkspace.before_all.script ?? []).join(" ");
|
|
13715
|
+
setupLog(
|
|
13716
|
+
`running shared before_all in cwd=${suiteWorkspace.before_all.cwd ?? evalDir} command=${beforeAllCommand}`
|
|
13717
|
+
);
|
|
13718
|
+
const scriptContext = {
|
|
13719
|
+
workspacePath: sharedWorkspacePath,
|
|
13720
|
+
testId: "__before_all__",
|
|
13721
|
+
evalRunId,
|
|
13722
|
+
evalDir
|
|
13723
|
+
};
|
|
13724
|
+
try {
|
|
13725
|
+
beforeAllOutput = await executeWorkspaceScript(suiteWorkspace.before_all, scriptContext);
|
|
13726
|
+
setupLog("shared before_all completed");
|
|
13727
|
+
} catch (error) {
|
|
13728
|
+
const message = error instanceof Error ? error.message : String(error);
|
|
13729
|
+
if (sharedWorkspacePath && !userWorkspacePath) {
|
|
13730
|
+
await cleanupWorkspace(sharedWorkspacePath).catch(() => {
|
|
13233
13731
|
});
|
|
13234
13732
|
}
|
|
13235
|
-
|
|
13236
|
-
await onResult(haltResult);
|
|
13237
|
-
}
|
|
13238
|
-
return haltResult;
|
|
13733
|
+
throw new Error(`before_all script failed: ${message}`);
|
|
13239
13734
|
}
|
|
13240
|
-
|
|
13241
|
-
|
|
13242
|
-
|
|
13243
|
-
|
|
13244
|
-
|
|
13245
|
-
|
|
13246
|
-
|
|
13247
|
-
}
|
|
13248
|
-
try {
|
|
13249
|
-
const judgeProvider = await resolveJudgeProvider(target);
|
|
13250
|
-
const runCaseOptions = {
|
|
13251
|
-
evalCase,
|
|
13252
|
-
provider: primaryProvider,
|
|
13253
|
-
target,
|
|
13254
|
-
evaluators: evaluatorRegistry,
|
|
13255
|
-
maxRetries,
|
|
13256
|
-
agentTimeoutMs,
|
|
13257
|
-
cache,
|
|
13258
|
-
useCache,
|
|
13259
|
-
now,
|
|
13260
|
-
judgeProvider,
|
|
13261
|
-
targetResolver,
|
|
13262
|
-
availableTargets,
|
|
13735
|
+
}
|
|
13736
|
+
if (availablePoolSlots.length > 0 && suiteWorkspace?.before_all) {
|
|
13737
|
+
for (const slot of availablePoolSlots) {
|
|
13738
|
+
setupLog(`running before_all on pool slot ${slot.index}`);
|
|
13739
|
+
const scriptContext = {
|
|
13740
|
+
workspacePath: slot.path,
|
|
13741
|
+
testId: "__before_all__",
|
|
13263
13742
|
evalRunId,
|
|
13264
|
-
keepWorkspaces,
|
|
13265
|
-
cleanupWorkspaces,
|
|
13266
|
-
sharedWorkspacePath,
|
|
13267
|
-
sharedBaselineCommit,
|
|
13268
|
-
suiteWorkspaceFile,
|
|
13269
|
-
streamCallbacks,
|
|
13270
|
-
typeRegistry,
|
|
13271
|
-
repoManager,
|
|
13272
13743
|
evalDir
|
|
13273
13744
|
};
|
|
13274
|
-
|
|
13275
|
-
|
|
13276
|
-
|
|
13277
|
-
|
|
13278
|
-
|
|
13279
|
-
|
|
13280
|
-
|
|
13745
|
+
try {
|
|
13746
|
+
const output = await executeWorkspaceScript(suiteWorkspace.before_all, scriptContext);
|
|
13747
|
+
if (!beforeAllOutput) beforeAllOutput = output;
|
|
13748
|
+
setupLog(`before_all completed on pool slot ${slot.index}`);
|
|
13749
|
+
} catch (error) {
|
|
13750
|
+
const message = error instanceof Error ? error.message : String(error);
|
|
13751
|
+
throw new Error(`before_all script failed on pool slot ${slot.index}: ${message}`);
|
|
13752
|
+
}
|
|
13753
|
+
}
|
|
13754
|
+
}
|
|
13755
|
+
if (sharedWorkspacePath) {
|
|
13756
|
+
try {
|
|
13757
|
+
sharedBaselineCommit = await initializeBaseline(sharedWorkspacePath);
|
|
13758
|
+
setupLog(`shared baseline initialized: ${sharedBaselineCommit}`);
|
|
13759
|
+
} catch {
|
|
13760
|
+
setupLog("shared baseline initialization skipped (non-fatal)");
|
|
13761
|
+
}
|
|
13762
|
+
}
|
|
13763
|
+
if (availablePoolSlots.length > 0) {
|
|
13764
|
+
for (const slot of availablePoolSlots) {
|
|
13765
|
+
try {
|
|
13766
|
+
const baseline = await initializeBaseline(slot.path);
|
|
13767
|
+
poolSlotBaselines.set(slot.path, baseline);
|
|
13768
|
+
setupLog(`pool slot ${slot.index} baseline initialized: ${baseline}`);
|
|
13769
|
+
} catch {
|
|
13770
|
+
setupLog(`pool slot ${slot.index} baseline initialization skipped (non-fatal)`);
|
|
13771
|
+
}
|
|
13772
|
+
}
|
|
13773
|
+
}
|
|
13774
|
+
let nextWorkerId = 1;
|
|
13775
|
+
const workerIdByEvalId = /* @__PURE__ */ new Map();
|
|
13776
|
+
let beforeAllOutputAttached = false;
|
|
13777
|
+
let cumulativeBudgetCost = 0;
|
|
13778
|
+
let budgetExhausted = false;
|
|
13779
|
+
let failOnErrorTriggered = false;
|
|
13780
|
+
const promises = filteredEvalCases.map(
|
|
13781
|
+
(evalCase) => limit(async () => {
|
|
13782
|
+
const workerId = nextWorkerId++;
|
|
13783
|
+
workerIdByEvalId.set(evalCase.id, workerId);
|
|
13784
|
+
if (totalBudgetUsd !== void 0 && budgetExhausted) {
|
|
13785
|
+
const budgetResult = {
|
|
13786
|
+
timestamp: (now ?? (() => /* @__PURE__ */ new Date()))().toISOString(),
|
|
13787
|
+
testId: evalCase.id,
|
|
13788
|
+
dataset: evalCase.dataset,
|
|
13789
|
+
score: 0,
|
|
13790
|
+
hits: [],
|
|
13791
|
+
misses: [],
|
|
13792
|
+
answer: "",
|
|
13793
|
+
target: target.name,
|
|
13794
|
+
error: `Suite budget exceeded ($${cumulativeBudgetCost.toFixed(4)} / $${totalBudgetUsd.toFixed(4)})`,
|
|
13795
|
+
budgetExceeded: true,
|
|
13796
|
+
executionStatus: "execution_error",
|
|
13797
|
+
failureStage: "setup",
|
|
13798
|
+
failureReasonCode: "budget_exceeded",
|
|
13799
|
+
executionError: {
|
|
13800
|
+
message: `Suite budget exceeded ($${cumulativeBudgetCost.toFixed(4)} / $${totalBudgetUsd.toFixed(4)})`,
|
|
13801
|
+
stage: "setup"
|
|
13281
13802
|
}
|
|
13282
|
-
}
|
|
13283
|
-
|
|
13803
|
+
};
|
|
13804
|
+
if (onProgress) {
|
|
13805
|
+
await onProgress({
|
|
13806
|
+
workerId,
|
|
13807
|
+
testId: evalCase.id,
|
|
13808
|
+
status: "failed",
|
|
13809
|
+
completedAt: Date.now(),
|
|
13810
|
+
error: budgetResult.error
|
|
13811
|
+
});
|
|
13284
13812
|
}
|
|
13285
|
-
if (
|
|
13286
|
-
|
|
13287
|
-
if (cumulativeBudgetCost >= totalBudgetUsd) {
|
|
13288
|
-
budgetExhausted = true;
|
|
13289
|
-
}
|
|
13813
|
+
if (onResult) {
|
|
13814
|
+
await onResult(budgetResult);
|
|
13290
13815
|
}
|
|
13816
|
+
return budgetResult;
|
|
13291
13817
|
}
|
|
13292
|
-
if (failOnError === true &&
|
|
13293
|
-
|
|
13294
|
-
|
|
13295
|
-
|
|
13296
|
-
|
|
13297
|
-
|
|
13818
|
+
if (failOnError === true && failOnErrorTriggered) {
|
|
13819
|
+
const errorMsg = "Halted: execution error encountered with fail_on_error enabled";
|
|
13820
|
+
const haltResult = {
|
|
13821
|
+
timestamp: (now ?? (() => /* @__PURE__ */ new Date()))().toISOString(),
|
|
13822
|
+
testId: evalCase.id,
|
|
13823
|
+
dataset: evalCase.dataset,
|
|
13824
|
+
score: 0,
|
|
13825
|
+
hits: [],
|
|
13826
|
+
misses: [],
|
|
13827
|
+
answer: "",
|
|
13828
|
+
target: target.name,
|
|
13829
|
+
error: errorMsg,
|
|
13830
|
+
executionStatus: "execution_error",
|
|
13831
|
+
failureStage: "setup",
|
|
13832
|
+
failureReasonCode: "error_threshold_exceeded",
|
|
13833
|
+
executionError: { message: errorMsg, stage: "setup" }
|
|
13834
|
+
};
|
|
13835
|
+
if (onProgress) {
|
|
13836
|
+
await onProgress({
|
|
13837
|
+
workerId,
|
|
13838
|
+
testId: evalCase.id,
|
|
13839
|
+
status: "failed",
|
|
13840
|
+
completedAt: Date.now(),
|
|
13841
|
+
error: haltResult.error
|
|
13842
|
+
});
|
|
13843
|
+
}
|
|
13844
|
+
if (onResult) {
|
|
13845
|
+
await onResult(haltResult);
|
|
13846
|
+
}
|
|
13847
|
+
return haltResult;
|
|
13298
13848
|
}
|
|
13299
13849
|
if (onProgress) {
|
|
13300
13850
|
await onProgress({
|
|
13301
13851
|
workerId,
|
|
13302
13852
|
testId: evalCase.id,
|
|
13303
|
-
status:
|
|
13304
|
-
startedAt:
|
|
13305
|
-
// Not used for completed status
|
|
13306
|
-
completedAt: Date.now(),
|
|
13307
|
-
error: result.error
|
|
13853
|
+
status: "running",
|
|
13854
|
+
startedAt: Date.now()
|
|
13308
13855
|
});
|
|
13309
13856
|
}
|
|
13310
|
-
|
|
13311
|
-
|
|
13857
|
+
const testPoolSlot = availablePoolSlots.length > 0 ? availablePoolSlots.pop() : void 0;
|
|
13858
|
+
const testWorkspacePath = testPoolSlot?.path ?? sharedWorkspacePath;
|
|
13859
|
+
const testBaselineCommit = testPoolSlot ? poolSlotBaselines.get(testPoolSlot.path) : sharedBaselineCommit;
|
|
13860
|
+
try {
|
|
13861
|
+
const judgeProvider = await resolveJudgeProvider(target);
|
|
13862
|
+
const runCaseOptions = {
|
|
13863
|
+
evalCase,
|
|
13864
|
+
provider: primaryProvider,
|
|
13865
|
+
target,
|
|
13866
|
+
evaluators: evaluatorRegistry,
|
|
13867
|
+
maxRetries,
|
|
13868
|
+
agentTimeoutMs,
|
|
13869
|
+
cache,
|
|
13870
|
+
useCache,
|
|
13871
|
+
now,
|
|
13872
|
+
judgeProvider,
|
|
13873
|
+
targetResolver,
|
|
13874
|
+
availableTargets,
|
|
13875
|
+
evalRunId,
|
|
13876
|
+
keepWorkspaces,
|
|
13877
|
+
cleanupWorkspaces,
|
|
13878
|
+
sharedWorkspacePath: testWorkspacePath,
|
|
13879
|
+
sharedBaselineCommit: testBaselineCommit,
|
|
13880
|
+
suiteWorkspaceFile,
|
|
13881
|
+
streamCallbacks,
|
|
13882
|
+
typeRegistry,
|
|
13883
|
+
repoManager,
|
|
13884
|
+
evalDir
|
|
13885
|
+
};
|
|
13886
|
+
let result = trials && trials.count > 1 ? await runEvalCaseWithTrials(runCaseOptions, trials) : await runEvalCase(runCaseOptions);
|
|
13887
|
+
if (totalBudgetUsd !== void 0) {
|
|
13888
|
+
let caseCost;
|
|
13889
|
+
if (result.trials && result.trials.length > 0) {
|
|
13890
|
+
const trialCostSum = result.trials.reduce((sum, t) => sum + (t.costUsd ?? 0), 0);
|
|
13891
|
+
if (trialCostSum > 0) {
|
|
13892
|
+
caseCost = trialCostSum;
|
|
13893
|
+
}
|
|
13894
|
+
} else {
|
|
13895
|
+
caseCost = result.costUsd;
|
|
13896
|
+
}
|
|
13897
|
+
if (caseCost !== void 0) {
|
|
13898
|
+
cumulativeBudgetCost += caseCost;
|
|
13899
|
+
if (cumulativeBudgetCost >= totalBudgetUsd) {
|
|
13900
|
+
budgetExhausted = true;
|
|
13901
|
+
}
|
|
13902
|
+
}
|
|
13903
|
+
}
|
|
13904
|
+
if (failOnError === true && result.executionStatus === "execution_error") {
|
|
13905
|
+
failOnErrorTriggered = true;
|
|
13906
|
+
}
|
|
13907
|
+
if (beforeAllOutput && !beforeAllOutputAttached) {
|
|
13908
|
+
result = { ...result, beforeAllOutput };
|
|
13909
|
+
beforeAllOutputAttached = true;
|
|
13910
|
+
}
|
|
13911
|
+
if (onProgress) {
|
|
13912
|
+
await onProgress({
|
|
13913
|
+
workerId,
|
|
13914
|
+
testId: evalCase.id,
|
|
13915
|
+
status: result.error ? "failed" : "completed",
|
|
13916
|
+
startedAt: 0,
|
|
13917
|
+
// Not used for completed status
|
|
13918
|
+
completedAt: Date.now(),
|
|
13919
|
+
error: result.error
|
|
13920
|
+
});
|
|
13921
|
+
}
|
|
13922
|
+
if (onResult) {
|
|
13923
|
+
await onResult(result);
|
|
13924
|
+
}
|
|
13925
|
+
return result;
|
|
13926
|
+
} catch (error) {
|
|
13927
|
+
if (onProgress) {
|
|
13928
|
+
await onProgress({
|
|
13929
|
+
workerId,
|
|
13930
|
+
testId: evalCase.id,
|
|
13931
|
+
status: "failed",
|
|
13932
|
+
completedAt: Date.now(),
|
|
13933
|
+
error: error instanceof Error ? error.message : String(error)
|
|
13934
|
+
});
|
|
13935
|
+
}
|
|
13936
|
+
throw error;
|
|
13937
|
+
} finally {
|
|
13938
|
+
if (testPoolSlot) {
|
|
13939
|
+
availablePoolSlots.push(testPoolSlot);
|
|
13940
|
+
}
|
|
13312
13941
|
}
|
|
13313
|
-
|
|
13314
|
-
|
|
13315
|
-
|
|
13316
|
-
|
|
13317
|
-
|
|
13318
|
-
|
|
13319
|
-
|
|
13320
|
-
|
|
13321
|
-
|
|
13322
|
-
|
|
13942
|
+
})
|
|
13943
|
+
);
|
|
13944
|
+
const settled = await Promise.allSettled(promises);
|
|
13945
|
+
const results = [];
|
|
13946
|
+
for (let i = 0; i < settled.length; i++) {
|
|
13947
|
+
const outcome = settled[i];
|
|
13948
|
+
if (outcome.status === "fulfilled") {
|
|
13949
|
+
results.push(outcome.value);
|
|
13950
|
+
} else {
|
|
13951
|
+
const evalCase = filteredEvalCases[i];
|
|
13952
|
+
const formattingMode = usesFileReferencePrompt(primaryProvider) ? "agent" : "lm";
|
|
13953
|
+
const promptInputs = await buildPromptInputs(evalCase, formattingMode);
|
|
13954
|
+
const errorResult = buildErrorResult(
|
|
13955
|
+
evalCase,
|
|
13956
|
+
target.name,
|
|
13957
|
+
(now ?? (() => /* @__PURE__ */ new Date()))(),
|
|
13958
|
+
outcome.reason,
|
|
13959
|
+
promptInputs,
|
|
13960
|
+
primaryProvider,
|
|
13961
|
+
"agent",
|
|
13962
|
+
"provider_error"
|
|
13963
|
+
);
|
|
13964
|
+
results.push(errorResult);
|
|
13965
|
+
if (onResult) {
|
|
13966
|
+
await onResult(errorResult);
|
|
13323
13967
|
}
|
|
13324
|
-
throw error;
|
|
13325
13968
|
}
|
|
13326
|
-
}
|
|
13327
|
-
|
|
13328
|
-
|
|
13329
|
-
|
|
13330
|
-
|
|
13331
|
-
|
|
13332
|
-
|
|
13333
|
-
|
|
13334
|
-
|
|
13335
|
-
|
|
13336
|
-
|
|
13337
|
-
|
|
13338
|
-
|
|
13339
|
-
|
|
13340
|
-
|
|
13341
|
-
|
|
13342
|
-
|
|
13343
|
-
|
|
13344
|
-
|
|
13345
|
-
|
|
13346
|
-
|
|
13347
|
-
);
|
|
13348
|
-
results.push(errorResult);
|
|
13349
|
-
if (onResult) {
|
|
13350
|
-
await onResult(errorResult);
|
|
13969
|
+
}
|
|
13970
|
+
const afterAllWorkspaces = poolSlots.length > 1 ? poolSlots.map((s) => s.path) : sharedWorkspacePath ? [sharedWorkspacePath] : [];
|
|
13971
|
+
if (afterAllWorkspaces.length > 0 && suiteWorkspace?.after_all) {
|
|
13972
|
+
for (const wsPath of afterAllWorkspaces) {
|
|
13973
|
+
const scriptContext = {
|
|
13974
|
+
workspacePath: wsPath,
|
|
13975
|
+
testId: "__after_all__",
|
|
13976
|
+
evalRunId,
|
|
13977
|
+
evalDir
|
|
13978
|
+
};
|
|
13979
|
+
try {
|
|
13980
|
+
const afterAllOutput = await executeWorkspaceScript(
|
|
13981
|
+
suiteWorkspace.after_all,
|
|
13982
|
+
scriptContext,
|
|
13983
|
+
"warn"
|
|
13984
|
+
);
|
|
13985
|
+
if (afterAllOutput && results.length > 0 && wsPath === afterAllWorkspaces[0]) {
|
|
13986
|
+
results[results.length - 1] = { ...results[results.length - 1], afterAllOutput };
|
|
13987
|
+
}
|
|
13988
|
+
} catch {
|
|
13989
|
+
}
|
|
13351
13990
|
}
|
|
13352
13991
|
}
|
|
13353
|
-
|
|
13354
|
-
|
|
13355
|
-
|
|
13356
|
-
|
|
13357
|
-
|
|
13358
|
-
|
|
13359
|
-
|
|
13360
|
-
|
|
13361
|
-
try {
|
|
13362
|
-
const afterAllOutput = await executeWorkspaceScript(
|
|
13363
|
-
suiteWorkspace.after_all,
|
|
13364
|
-
scriptContext,
|
|
13365
|
-
"warn"
|
|
13366
|
-
);
|
|
13367
|
-
if (afterAllOutput && results.length > 0) {
|
|
13368
|
-
results[results.length - 1] = { ...results[results.length - 1], afterAllOutput };
|
|
13992
|
+
if (sharedWorkspacePath && !poolSlot && poolSlots.length === 0 && !userWorkspacePath) {
|
|
13993
|
+
const hasFailure = results.some((r) => !!r.error || r.score < 0.5);
|
|
13994
|
+
if (cleanupWorkspaces) {
|
|
13995
|
+
await cleanupWorkspace(sharedWorkspacePath).catch(() => {
|
|
13996
|
+
});
|
|
13997
|
+
} else if (!hasFailure && !keepWorkspaces) {
|
|
13998
|
+
await cleanupWorkspace(sharedWorkspacePath).catch(() => {
|
|
13999
|
+
});
|
|
13369
14000
|
}
|
|
13370
|
-
} catch {
|
|
13371
14001
|
}
|
|
13372
|
-
}
|
|
13373
|
-
if (sharedWorkspacePath) {
|
|
13374
|
-
const hasFailure = results.some((r) => !!r.error || r.score < 0.5);
|
|
13375
14002
|
if (cleanupWorkspaces) {
|
|
13376
|
-
await
|
|
13377
|
-
});
|
|
13378
|
-
} else if (!hasFailure && !keepWorkspaces) {
|
|
13379
|
-
await cleanupWorkspace(sharedWorkspacePath).catch(() => {
|
|
14003
|
+
await cleanupEvalWorkspaces(evalRunId).catch(() => {
|
|
13380
14004
|
});
|
|
13381
14005
|
}
|
|
14006
|
+
return results;
|
|
14007
|
+
} finally {
|
|
14008
|
+
if (poolManager) {
|
|
14009
|
+
if (poolSlot) {
|
|
14010
|
+
await poolManager.releaseSlot(poolSlot);
|
|
14011
|
+
}
|
|
14012
|
+
for (const slot of poolSlots) {
|
|
14013
|
+
if (slot !== poolSlot) {
|
|
14014
|
+
await poolManager.releaseSlot(slot).catch(() => {
|
|
14015
|
+
});
|
|
14016
|
+
}
|
|
14017
|
+
}
|
|
14018
|
+
}
|
|
13382
14019
|
}
|
|
13383
|
-
if (cleanupWorkspaces) {
|
|
13384
|
-
await cleanupEvalWorkspaces(evalRunId).catch(() => {
|
|
13385
|
-
});
|
|
13386
|
-
}
|
|
13387
|
-
return results;
|
|
13388
14020
|
}
|
|
13389
14021
|
async function runBatchEvaluation(options) {
|
|
13390
14022
|
const {
|
|
@@ -13563,6 +14195,7 @@ async function runEvalCase(options) {
|
|
|
13563
14195
|
repoManager,
|
|
13564
14196
|
evalDir
|
|
13565
14197
|
} = options;
|
|
14198
|
+
const setupDebug = process.env.AGENTV_SETUP_DEBUG === "1";
|
|
13566
14199
|
const formattingMode = usesFileReferencePrompt(provider) ? "agent" : "lm";
|
|
13567
14200
|
const promptInputs = await buildPromptInputs(evalCase, formattingMode);
|
|
13568
14201
|
const typeRegistry = providedTypeRegistry ?? createBuiltinRegistry();
|
|
@@ -13600,7 +14233,7 @@ async function runEvalCase(options) {
|
|
|
13600
14233
|
);
|
|
13601
14234
|
}
|
|
13602
14235
|
if (caseWorkspaceFile && workspacePath) {
|
|
13603
|
-
const copiedFile =
|
|
14236
|
+
const copiedFile = path39.join(workspacePath, path39.basename(caseWorkspaceFile));
|
|
13604
14237
|
try {
|
|
13605
14238
|
await stat7(copiedFile);
|
|
13606
14239
|
caseWorkspaceFile = copiedFile;
|
|
@@ -13610,12 +14243,20 @@ async function runEvalCase(options) {
|
|
|
13610
14243
|
}
|
|
13611
14244
|
if (!workspacePath && (evalCase.workspace?.before_all || evalCase.workspace?.repos?.length) && evalRunId) {
|
|
13612
14245
|
workspacePath = getWorkspacePath(evalRunId, evalCase.id);
|
|
13613
|
-
await
|
|
14246
|
+
await mkdir13(workspacePath, { recursive: true });
|
|
13614
14247
|
}
|
|
13615
14248
|
if (evalCase.workspace?.repos?.length && workspacePath) {
|
|
13616
|
-
const perCaseRepoManager = new RepoManager();
|
|
14249
|
+
const perCaseRepoManager = new RepoManager(void 0, setupDebug);
|
|
13617
14250
|
try {
|
|
14251
|
+
if (setupDebug) {
|
|
14252
|
+
console.log(
|
|
14253
|
+
`[setup] test=${evalCase.id} materializing ${evalCase.workspace.repos.length} per-test repo(s) into ${workspacePath}`
|
|
14254
|
+
);
|
|
14255
|
+
}
|
|
13618
14256
|
await perCaseRepoManager.materializeAll(evalCase.workspace.repos, workspacePath);
|
|
14257
|
+
if (setupDebug) {
|
|
14258
|
+
console.log(`[setup] test=${evalCase.id} per-test repo materialization complete`);
|
|
14259
|
+
}
|
|
13619
14260
|
} catch (error) {
|
|
13620
14261
|
const message = error instanceof Error ? error.message : String(error);
|
|
13621
14262
|
return buildErrorResult(
|
|
@@ -13631,6 +14272,12 @@ async function runEvalCase(options) {
|
|
|
13631
14272
|
}
|
|
13632
14273
|
}
|
|
13633
14274
|
if (workspacePath && evalCase.workspace?.before_all) {
|
|
14275
|
+
const beforeAllCommand = (evalCase.workspace.before_all.command ?? evalCase.workspace.before_all.script ?? []).join(" ");
|
|
14276
|
+
if (setupDebug) {
|
|
14277
|
+
console.log(
|
|
14278
|
+
`[setup] test=${evalCase.id} running before_all in cwd=${evalCase.workspace.before_all.cwd ?? evalDir} command=${beforeAllCommand}`
|
|
14279
|
+
);
|
|
14280
|
+
}
|
|
13634
14281
|
const scriptContext = {
|
|
13635
14282
|
workspacePath,
|
|
13636
14283
|
testId: evalCase.id,
|
|
@@ -13644,6 +14291,9 @@ async function runEvalCase(options) {
|
|
|
13644
14291
|
evalCase.workspace.before_all,
|
|
13645
14292
|
scriptContext
|
|
13646
14293
|
);
|
|
14294
|
+
if (setupDebug) {
|
|
14295
|
+
console.log(`[setup] test=${evalCase.id} before_all completed`);
|
|
14296
|
+
}
|
|
13647
14297
|
} catch (error) {
|
|
13648
14298
|
const message = error instanceof Error ? error.message : String(error);
|
|
13649
14299
|
if (forceCleanup && workspacePath) {
|
|
@@ -14193,7 +14843,7 @@ async function runEvaluatorList(options) {
|
|
|
14193
14843
|
fileChanges,
|
|
14194
14844
|
workspacePath
|
|
14195
14845
|
};
|
|
14196
|
-
const evalFileDir = evalCase.guideline_paths[0] ?
|
|
14846
|
+
const evalFileDir = evalCase.guideline_paths[0] ? path39.dirname(evalCase.guideline_paths[0]) : process.cwd();
|
|
14197
14847
|
const dispatchContext = {
|
|
14198
14848
|
judgeProvider,
|
|
14199
14849
|
targetResolver,
|
|
@@ -14427,7 +15077,7 @@ function extractProviderError(response) {
|
|
|
14427
15077
|
return trimmed.length > 0 ? trimmed : void 0;
|
|
14428
15078
|
}
|
|
14429
15079
|
function createCacheKey(provider, target, evalCase, promptInputs) {
|
|
14430
|
-
const hash =
|
|
15080
|
+
const hash = createHash3("sha256");
|
|
14431
15081
|
hash.update(provider.id);
|
|
14432
15082
|
hash.update(target.name);
|
|
14433
15083
|
hash.update(evalCase.id);
|
|
@@ -14495,8 +15145,8 @@ function computeWeightedMean(entries) {
|
|
|
14495
15145
|
}
|
|
14496
15146
|
|
|
14497
15147
|
// src/evaluation/evaluate.ts
|
|
14498
|
-
import { existsSync as
|
|
14499
|
-
import
|
|
15148
|
+
import { existsSync as existsSync4 } from "node:fs";
|
|
15149
|
+
import path40 from "node:path";
|
|
14500
15150
|
async function evaluate(config) {
|
|
14501
15151
|
const startTime = Date.now();
|
|
14502
15152
|
if (config.tests && config.specFile) {
|
|
@@ -14518,13 +15168,13 @@ async function evaluate(config) {
|
|
|
14518
15168
|
let evalCases;
|
|
14519
15169
|
let testFilePath;
|
|
14520
15170
|
if (config.specFile) {
|
|
14521
|
-
testFilePath =
|
|
15171
|
+
testFilePath = path40.resolve(config.specFile);
|
|
14522
15172
|
evalCases = await loadTests(testFilePath, repoRoot, {
|
|
14523
15173
|
verbose: config.verbose,
|
|
14524
15174
|
filter: config.filter
|
|
14525
15175
|
});
|
|
14526
15176
|
} else {
|
|
14527
|
-
testFilePath =
|
|
15177
|
+
testFilePath = path40.join(process.cwd(), "__programmatic__.yaml");
|
|
14528
15178
|
evalCases = (config.tests ?? []).map((test) => {
|
|
14529
15179
|
const input = typeof test.input === "string" ? [{ role: "user", content: test.input }] : test.input;
|
|
14530
15180
|
const question = typeof test.input === "string" ? test.input : test.input.find((m) => m.role === "user")?.content ?? "";
|
|
@@ -14610,11 +15260,11 @@ function computeSummary(results, durationMs) {
|
|
|
14610
15260
|
var TARGET_FILE_CANDIDATES = [".agentv/targets.yaml", ".agentv/targets.yml"];
|
|
14611
15261
|
async function discoverDefaultTarget(repoRoot) {
|
|
14612
15262
|
const cwd = process.cwd();
|
|
14613
|
-
const chain = buildDirectoryChain(
|
|
15263
|
+
const chain = buildDirectoryChain(path40.join(cwd, "_placeholder"), repoRoot);
|
|
14614
15264
|
for (const dir of chain) {
|
|
14615
15265
|
for (const candidate of TARGET_FILE_CANDIDATES) {
|
|
14616
|
-
const targetsPath =
|
|
14617
|
-
if (!
|
|
15266
|
+
const targetsPath = path40.join(dir, candidate);
|
|
15267
|
+
if (!existsSync4(targetsPath)) continue;
|
|
14618
15268
|
try {
|
|
14619
15269
|
const definitions = await readTargetDefinitions(targetsPath);
|
|
14620
15270
|
const defaultTarget = definitions.find((d) => d.name === "default");
|
|
@@ -14628,11 +15278,11 @@ async function discoverDefaultTarget(repoRoot) {
|
|
|
14628
15278
|
async function loadEnvHierarchy(repoRoot) {
|
|
14629
15279
|
const { readFileSync: readFileSync2 } = await import("node:fs");
|
|
14630
15280
|
const cwd = process.cwd();
|
|
14631
|
-
const chain = buildDirectoryChain(
|
|
15281
|
+
const chain = buildDirectoryChain(path40.join(cwd, "_placeholder"), repoRoot);
|
|
14632
15282
|
const envFiles = [];
|
|
14633
15283
|
for (const dir of chain) {
|
|
14634
|
-
const envPath =
|
|
14635
|
-
if (
|
|
15284
|
+
const envPath = path40.join(dir, ".env");
|
|
15285
|
+
if (existsSync4(envPath)) envFiles.push(envPath);
|
|
14636
15286
|
}
|
|
14637
15287
|
for (let i = envFiles.length - 1; i >= 0; i--) {
|
|
14638
15288
|
try {
|
|
@@ -14710,12 +15360,12 @@ var CONFIG_FILE_NAMES = [
|
|
|
14710
15360
|
".agentv/config.js"
|
|
14711
15361
|
];
|
|
14712
15362
|
async function loadTsConfig(projectRoot) {
|
|
14713
|
-
const { existsSync:
|
|
15363
|
+
const { existsSync: existsSync5 } = await import("node:fs");
|
|
14714
15364
|
const { pathToFileURL } = await import("node:url");
|
|
14715
15365
|
const { join: join2 } = await import("node:path");
|
|
14716
15366
|
for (const fileName of CONFIG_FILE_NAMES) {
|
|
14717
15367
|
const filePath = join2(projectRoot, fileName);
|
|
14718
|
-
if (!
|
|
15368
|
+
if (!existsSync5(filePath)) {
|
|
14719
15369
|
continue;
|
|
14720
15370
|
}
|
|
14721
15371
|
try {
|
|
@@ -14812,8 +15462,8 @@ function buildPrompt(criteria, question, referenceAnswer) {
|
|
|
14812
15462
|
}
|
|
14813
15463
|
|
|
14814
15464
|
// src/evaluation/cache/response-cache.ts
|
|
14815
|
-
import { mkdir as
|
|
14816
|
-
import
|
|
15465
|
+
import { mkdir as mkdir14, readFile as readFile12, writeFile as writeFile9 } from "node:fs/promises";
|
|
15466
|
+
import path41 from "node:path";
|
|
14817
15467
|
var DEFAULT_CACHE_PATH = ".agentv/cache";
|
|
14818
15468
|
var ResponseCache = class {
|
|
14819
15469
|
cachePath;
|
|
@@ -14823,7 +15473,7 @@ var ResponseCache = class {
|
|
|
14823
15473
|
async get(key) {
|
|
14824
15474
|
const filePath = this.keyToPath(key);
|
|
14825
15475
|
try {
|
|
14826
|
-
const data = await
|
|
15476
|
+
const data = await readFile12(filePath, "utf8");
|
|
14827
15477
|
return JSON.parse(data);
|
|
14828
15478
|
} catch {
|
|
14829
15479
|
return void 0;
|
|
@@ -14831,13 +15481,13 @@ var ResponseCache = class {
|
|
|
14831
15481
|
}
|
|
14832
15482
|
async set(key, value) {
|
|
14833
15483
|
const filePath = this.keyToPath(key);
|
|
14834
|
-
const dir =
|
|
14835
|
-
await
|
|
14836
|
-
await
|
|
15484
|
+
const dir = path41.dirname(filePath);
|
|
15485
|
+
await mkdir14(dir, { recursive: true });
|
|
15486
|
+
await writeFile9(filePath, JSON.stringify(value, null, 2), "utf8");
|
|
14837
15487
|
}
|
|
14838
15488
|
keyToPath(key) {
|
|
14839
15489
|
const prefix = key.slice(0, 2);
|
|
14840
|
-
return
|
|
15490
|
+
return path41.join(this.cachePath, prefix, `${key}.json`);
|
|
14841
15491
|
}
|
|
14842
15492
|
};
|
|
14843
15493
|
function shouldEnableCache(params) {
|
|
@@ -15319,6 +15969,7 @@ export {
|
|
|
15319
15969
|
TokenUsageEvaluator,
|
|
15320
15970
|
ToolTrajectoryEvaluator,
|
|
15321
15971
|
WorkspaceCreationError,
|
|
15972
|
+
WorkspacePoolManager,
|
|
15322
15973
|
assembleLlmJudgePrompt,
|
|
15323
15974
|
avgToolDurationMs,
|
|
15324
15975
|
buildDirectoryChain,
|
|
@@ -15333,6 +15984,7 @@ export {
|
|
|
15333
15984
|
cleanupEvalWorkspaces,
|
|
15334
15985
|
cleanupWorkspace,
|
|
15335
15986
|
computeTraceSummary,
|
|
15987
|
+
computeWorkspaceFingerprint,
|
|
15336
15988
|
consumeClaudeLogEntries,
|
|
15337
15989
|
consumeCodexLogEntries,
|
|
15338
15990
|
consumeCopilotCliLogEntries,
|
|
@@ -15364,8 +16016,14 @@ export {
|
|
|
15364
16016
|
findGitRoot,
|
|
15365
16017
|
freeformEvaluationSchema,
|
|
15366
16018
|
generateRubrics,
|
|
16019
|
+
getAgentvHome,
|
|
16020
|
+
getGitCacheRoot,
|
|
15367
16021
|
getHitCount,
|
|
16022
|
+
getSubagentsRoot,
|
|
16023
|
+
getTraceStateRoot,
|
|
15368
16024
|
getWorkspacePath,
|
|
16025
|
+
getWorkspacePoolRoot,
|
|
16026
|
+
getWorkspacesRoot,
|
|
15369
16027
|
initializeBaseline,
|
|
15370
16028
|
isEvaluatorKind,
|
|
15371
16029
|
isGuidelineFile,
|