@agentv/core 2.15.0 → 2.16.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/{chunk-N55K52OO.js → chunk-E6AJPAXM.js} +1 -1
- package/dist/chunk-E6AJPAXM.js.map +1 -0
- package/dist/evaluation/validation/index.cjs +8 -7
- package/dist/evaluation/validation/index.cjs.map +1 -1
- package/dist/evaluation/validation/index.js +9 -8
- package/dist/evaluation/validation/index.js.map +1 -1
- package/dist/index.cjs +224 -260
- package/dist/index.cjs.map +1 -1
- package/dist/index.d.cts +56 -35
- package/dist/index.d.ts +56 -35
- package/dist/index.js +208 -243
- package/dist/index.js.map +1 -1
- package/package.json +1 -1
- package/dist/chunk-N55K52OO.js.map +0 -1
package/dist/index.js
CHANGED
|
@@ -17,7 +17,7 @@ import {
|
|
|
17
17
|
readTextFile,
|
|
18
18
|
resolveFileReference,
|
|
19
19
|
resolveTargetDefinition
|
|
20
|
-
} from "./chunk-
|
|
20
|
+
} from "./chunk-E6AJPAXM.js";
|
|
21
21
|
import {
|
|
22
22
|
OtlpJsonFileExporter
|
|
23
23
|
} from "./chunk-HFSYZHGF.js";
|
|
@@ -3114,16 +3114,37 @@ function parseRepoConfig(raw) {
|
|
|
3114
3114
|
...clone !== void 0 && { clone }
|
|
3115
3115
|
};
|
|
3116
3116
|
}
|
|
3117
|
-
function
|
|
3117
|
+
function parseWorkspaceHookConfig(raw, evalFileDir) {
|
|
3118
3118
|
if (!isJsonObject(raw)) return void 0;
|
|
3119
|
+
const script = parseWorkspaceScriptConfig(raw, evalFileDir);
|
|
3119
3120
|
const obj = raw;
|
|
3120
|
-
const
|
|
3121
|
-
const
|
|
3122
|
-
if (!
|
|
3121
|
+
const reset = obj.reset === "none" || obj.reset === "fast" || obj.reset === "strict" ? obj.reset : void 0;
|
|
3122
|
+
const clean = obj.clean === "always" || obj.clean === "on_success" || obj.clean === "on_failure" || obj.clean === "never" ? obj.clean : void 0;
|
|
3123
|
+
if (!script && !reset && !clean) return void 0;
|
|
3123
3124
|
return {
|
|
3124
|
-
...
|
|
3125
|
-
...
|
|
3125
|
+
...script ?? {},
|
|
3126
|
+
...reset !== void 0 && { reset },
|
|
3127
|
+
...clean !== void 0 && { clean }
|
|
3128
|
+
};
|
|
3129
|
+
}
|
|
3130
|
+
function parseWorkspaceHooksConfig(raw, evalFileDir) {
|
|
3131
|
+
if (!isJsonObject(raw)) return void 0;
|
|
3132
|
+
const obj = raw;
|
|
3133
|
+
const beforeAllTests = parseWorkspaceHookConfig(obj.before_all_tests, evalFileDir);
|
|
3134
|
+
const beforeEachTest = parseWorkspaceHookConfig(obj.before_each_test, evalFileDir);
|
|
3135
|
+
const afterEachTest = parseWorkspaceHookConfig(obj.after_each_test, evalFileDir);
|
|
3136
|
+
const afterAllTests = parseWorkspaceHookConfig(obj.after_all_tests, evalFileDir);
|
|
3137
|
+
const onReuse = parseWorkspaceHookConfig(obj.on_reuse, evalFileDir);
|
|
3138
|
+
const onFinish = parseWorkspaceHookConfig(obj.on_finish, evalFileDir);
|
|
3139
|
+
const hooks = {
|
|
3140
|
+
...beforeAllTests !== void 0 && { before_all_tests: beforeAllTests },
|
|
3141
|
+
...beforeEachTest !== void 0 && { before_each_test: beforeEachTest },
|
|
3142
|
+
...afterEachTest !== void 0 && { after_each_test: afterEachTest },
|
|
3143
|
+
...afterAllTests !== void 0 && { after_all_tests: afterAllTests },
|
|
3144
|
+
...onReuse !== void 0 && { on_reuse: onReuse },
|
|
3145
|
+
...onFinish !== void 0 && { on_finish: onFinish }
|
|
3126
3146
|
};
|
|
3147
|
+
return Object.keys(hooks).length > 0 ? hooks : void 0;
|
|
3127
3148
|
}
|
|
3128
3149
|
async function resolveWorkspaceConfig(raw, evalFileDir) {
|
|
3129
3150
|
if (typeof raw === "string") {
|
|
@@ -3154,37 +3175,56 @@ function parseWorkspaceConfig(raw, evalFileDir) {
|
|
|
3154
3175
|
}
|
|
3155
3176
|
const isolation = obj.isolation === "shared" || obj.isolation === "per_test" ? obj.isolation : void 0;
|
|
3156
3177
|
const repos = Array.isArray(obj.repos) ? obj.repos.map(parseRepoConfig).filter(Boolean) : void 0;
|
|
3157
|
-
const
|
|
3158
|
-
const
|
|
3159
|
-
const
|
|
3160
|
-
const
|
|
3161
|
-
|
|
3162
|
-
if (!template && !isolation && !repos && !reset && !beforeAll && !afterAll && !beforeEach && !afterEach)
|
|
3178
|
+
const hooks = parseWorkspaceHooksConfig(obj.hooks, evalFileDir);
|
|
3179
|
+
const mode = obj.mode === "pooled" || obj.mode === "ephemeral" || obj.mode === "static" ? obj.mode : void 0;
|
|
3180
|
+
const staticPath = typeof obj.static_path === "string" ? obj.static_path : void 0;
|
|
3181
|
+
const pool = typeof obj.pool === "boolean" ? obj.pool : void 0;
|
|
3182
|
+
if (!template && !isolation && !repos && !hooks && !mode && !staticPath && pool === void 0)
|
|
3163
3183
|
return void 0;
|
|
3164
3184
|
return {
|
|
3165
3185
|
...template !== void 0 && { template },
|
|
3166
3186
|
...isolation !== void 0 && { isolation },
|
|
3167
3187
|
...repos !== void 0 && { repos },
|
|
3168
|
-
...
|
|
3169
|
-
...
|
|
3170
|
-
...
|
|
3171
|
-
...
|
|
3172
|
-
...afterEach !== void 0 && { after_each: afterEach }
|
|
3188
|
+
...hooks !== void 0 && { hooks },
|
|
3189
|
+
...mode !== void 0 && { mode },
|
|
3190
|
+
...staticPath !== void 0 && { static_path: staticPath },
|
|
3191
|
+
...pool !== void 0 && { pool }
|
|
3173
3192
|
};
|
|
3174
3193
|
}
|
|
3175
3194
|
function mergeWorkspaceConfigs(suiteLevel, caseLevel) {
|
|
3176
3195
|
if (!suiteLevel && !caseLevel) return void 0;
|
|
3177
3196
|
if (!suiteLevel) return caseLevel;
|
|
3178
3197
|
if (!caseLevel) return suiteLevel;
|
|
3198
|
+
const mergeHook = (suiteHook, caseHook) => {
|
|
3199
|
+
if (!suiteHook && !caseHook) return void 0;
|
|
3200
|
+
return {
|
|
3201
|
+
...suiteHook ?? {},
|
|
3202
|
+
...caseHook ?? {}
|
|
3203
|
+
};
|
|
3204
|
+
};
|
|
3205
|
+
const mergedHooks = {
|
|
3206
|
+
before_all_tests: mergeHook(
|
|
3207
|
+
suiteLevel.hooks?.before_all_tests,
|
|
3208
|
+
caseLevel.hooks?.before_all_tests
|
|
3209
|
+
),
|
|
3210
|
+
before_each_test: mergeHook(
|
|
3211
|
+
suiteLevel.hooks?.before_each_test,
|
|
3212
|
+
caseLevel.hooks?.before_each_test
|
|
3213
|
+
),
|
|
3214
|
+
after_each_test: mergeHook(suiteLevel.hooks?.after_each_test, caseLevel.hooks?.after_each_test),
|
|
3215
|
+
after_all_tests: mergeHook(suiteLevel.hooks?.after_all_tests, caseLevel.hooks?.after_all_tests),
|
|
3216
|
+
on_reuse: mergeHook(suiteLevel.hooks?.on_reuse, caseLevel.hooks?.on_reuse),
|
|
3217
|
+
on_finish: mergeHook(suiteLevel.hooks?.on_finish, caseLevel.hooks?.on_finish)
|
|
3218
|
+
};
|
|
3219
|
+
const hasHooks = Object.values(mergedHooks).some((hook) => hook !== void 0);
|
|
3179
3220
|
return {
|
|
3180
3221
|
template: caseLevel.template ?? suiteLevel.template,
|
|
3181
3222
|
isolation: caseLevel.isolation ?? suiteLevel.isolation,
|
|
3182
3223
|
repos: caseLevel.repos ?? suiteLevel.repos,
|
|
3183
|
-
|
|
3184
|
-
|
|
3185
|
-
|
|
3186
|
-
|
|
3187
|
-
after_each: caseLevel.after_each ?? suiteLevel.after_each
|
|
3224
|
+
...hasHooks && { hooks: mergedHooks },
|
|
3225
|
+
mode: caseLevel.mode ?? suiteLevel.mode,
|
|
3226
|
+
static_path: caseLevel.static_path ?? suiteLevel.static_path,
|
|
3227
|
+
pool: caseLevel.pool ?? suiteLevel.pool
|
|
3188
3228
|
};
|
|
3189
3229
|
}
|
|
3190
3230
|
function asString6(value) {
|
|
@@ -7238,9 +7278,6 @@ function getAgentvHome() {
|
|
|
7238
7278
|
function getWorkspacesRoot() {
|
|
7239
7279
|
return path21.join(getAgentvHome(), "workspaces");
|
|
7240
7280
|
}
|
|
7241
|
-
function getGitCacheRoot() {
|
|
7242
|
-
return path21.join(getAgentvHome(), "git-cache");
|
|
7243
|
-
}
|
|
7244
7281
|
function getSubagentsRoot() {
|
|
7245
7282
|
return path21.join(getAgentvHome(), "subagents");
|
|
7246
7283
|
}
|
|
@@ -8700,16 +8737,16 @@ async function execFileWithStdinNode(argv, stdinPayload, options) {
|
|
|
8700
8737
|
});
|
|
8701
8738
|
}
|
|
8702
8739
|
async function execShellWithStdin(command, stdinPayload, options = {}) {
|
|
8703
|
-
const { mkdir:
|
|
8740
|
+
const { mkdir: mkdir14, readFile: readFile13, rm: rm6, writeFile: writeFile9 } = await import("node:fs/promises");
|
|
8704
8741
|
const { tmpdir: tmpdir3 } = await import("node:os");
|
|
8705
8742
|
const path42 = await import("node:path");
|
|
8706
8743
|
const { randomUUID: randomUUID8 } = await import("node:crypto");
|
|
8707
8744
|
const dir = path42.join(tmpdir3(), `agentv-exec-${randomUUID8()}`);
|
|
8708
|
-
await
|
|
8745
|
+
await mkdir14(dir, { recursive: true });
|
|
8709
8746
|
const stdinPath = path42.join(dir, "stdin.txt");
|
|
8710
8747
|
const stdoutPath = path42.join(dir, "stdout.txt");
|
|
8711
8748
|
const stderrPath = path42.join(dir, "stderr.txt");
|
|
8712
|
-
await
|
|
8749
|
+
await writeFile9(stdinPath, stdinPayload, "utf8");
|
|
8713
8750
|
const wrappedCommand = process.platform === "win32" ? `(${command}) < ${shellEscapePath(stdinPath)} > ${shellEscapePath(stdoutPath)} 2> ${shellEscapePath(stderrPath)}` : `(${command}) < ${shellEscapePath(stdinPath)} > ${shellEscapePath(stdoutPath)} 2> ${shellEscapePath(stderrPath)}`;
|
|
8714
8751
|
const { spawn: spawn4 } = await import("node:child_process");
|
|
8715
8752
|
try {
|
|
@@ -8742,7 +8779,7 @@ async function execShellWithStdin(command, stdinPayload, options = {}) {
|
|
|
8742
8779
|
const stderr = (await readFile13(stderrPath, "utf8")).replace(/\r\n/g, "\n");
|
|
8743
8780
|
return { stdout, stderr, exitCode };
|
|
8744
8781
|
} finally {
|
|
8745
|
-
await
|
|
8782
|
+
await rm6(dir, { recursive: true, force: true });
|
|
8746
8783
|
}
|
|
8747
8784
|
}
|
|
8748
8785
|
|
|
@@ -11930,8 +11967,8 @@ function runEqualsAssertion(output, value) {
|
|
|
11930
11967
|
}
|
|
11931
11968
|
|
|
11932
11969
|
// src/evaluation/orchestrator.ts
|
|
11933
|
-
import { createHash as
|
|
11934
|
-
import { mkdir as
|
|
11970
|
+
import { createHash as createHash2, randomUUID as randomUUID7 } from "node:crypto";
|
|
11971
|
+
import { mkdir as mkdir12, stat as stat7 } from "node:fs/promises";
|
|
11935
11972
|
import path39 from "node:path";
|
|
11936
11973
|
import micromatch4 from "micromatch";
|
|
11937
11974
|
|
|
@@ -12894,7 +12931,7 @@ var WorkspacePoolManager = class {
|
|
|
12894
12931
|
* 7. Return the slot (with path, index, isExisting)
|
|
12895
12932
|
*/
|
|
12896
12933
|
async acquireWorkspace(options) {
|
|
12897
|
-
const { templatePath, repos, maxSlots, repoManager } = options;
|
|
12934
|
+
const { templatePath, repos, maxSlots, repoManager, poolReset } = options;
|
|
12898
12935
|
const fingerprint = computeWorkspaceFingerprint(templatePath, repos);
|
|
12899
12936
|
const poolDir = path36.join(this.poolRoot, fingerprint);
|
|
12900
12937
|
await mkdir11(poolDir, { recursive: true });
|
|
@@ -12914,7 +12951,7 @@ var WorkspacePoolManager = class {
|
|
|
12914
12951
|
}
|
|
12915
12952
|
const slotExists = existsSync2(slotPath);
|
|
12916
12953
|
if (slotExists) {
|
|
12917
|
-
await this.resetSlot(slotPath, templatePath, repos);
|
|
12954
|
+
await this.resetSlot(slotPath, templatePath, repos, poolReset);
|
|
12918
12955
|
return {
|
|
12919
12956
|
index: i,
|
|
12920
12957
|
path: slotPath,
|
|
@@ -13046,15 +13083,19 @@ var WorkspacePoolManager = class {
|
|
|
13046
13083
|
* 1. Reset repos (git reset --hard {ref} && git clean -fd per repo)
|
|
13047
13084
|
* 2. Re-copy template files (skip repo directories)
|
|
13048
13085
|
*/
|
|
13049
|
-
async resetSlot(slotPath, templatePath, repos) {
|
|
13086
|
+
async resetSlot(slotPath, templatePath, repos, poolReset = "fast") {
|
|
13050
13087
|
for (const repo of repos) {
|
|
13051
13088
|
const repoDir = path36.join(slotPath, repo.path);
|
|
13052
13089
|
if (!existsSync2(repoDir)) {
|
|
13053
13090
|
continue;
|
|
13054
13091
|
}
|
|
13092
|
+
if (poolReset === "none") {
|
|
13093
|
+
continue;
|
|
13094
|
+
}
|
|
13055
13095
|
const ref = repo.checkout?.ref ?? "HEAD";
|
|
13056
13096
|
await git(["reset", "--hard", ref], { cwd: repoDir });
|
|
13057
|
-
|
|
13097
|
+
const cleanFlag = poolReset === "strict" ? "-fdx" : "-fd";
|
|
13098
|
+
await git(["clean", cleanFlag], { cwd: repoDir });
|
|
13058
13099
|
}
|
|
13059
13100
|
if (templatePath) {
|
|
13060
13101
|
const repoDirNames = new Set(
|
|
@@ -13070,14 +13111,10 @@ var WorkspacePoolManager = class {
|
|
|
13070
13111
|
|
|
13071
13112
|
// src/evaluation/workspace/repo-manager.ts
|
|
13072
13113
|
import { execFile as execFile2 } from "node:child_process";
|
|
13073
|
-
import { createHash as createHash2 } from "node:crypto";
|
|
13074
|
-
import { existsSync as existsSync3 } from "node:fs";
|
|
13075
|
-
import { mkdir as mkdir12, rm as rm6, unlink as unlink2, writeFile as writeFile8 } from "node:fs/promises";
|
|
13076
13114
|
import path37 from "node:path";
|
|
13077
13115
|
import { promisify as promisify6 } from "node:util";
|
|
13078
13116
|
var execFileAsync2 = promisify6(execFile2);
|
|
13079
13117
|
var DEFAULT_TIMEOUT_MS2 = 3e5;
|
|
13080
|
-
var LOCK_TIMEOUT_MS = 6e4;
|
|
13081
13118
|
function gitEnv2() {
|
|
13082
13119
|
const env = { ...process.env };
|
|
13083
13120
|
for (const key of Object.keys(env)) {
|
|
@@ -13092,10 +13129,6 @@ function gitEnv2() {
|
|
|
13092
13129
|
GIT_SSH_COMMAND: "ssh -o BatchMode=yes"
|
|
13093
13130
|
};
|
|
13094
13131
|
}
|
|
13095
|
-
function cacheKey(source) {
|
|
13096
|
-
const raw = source.type === "git" ? source.url.toLowerCase().replace(/\.git$/, "") : source.path;
|
|
13097
|
-
return createHash2("sha256").update(raw).digest("hex");
|
|
13098
|
-
}
|
|
13099
13132
|
function getSourceUrl(source) {
|
|
13100
13133
|
return source.type === "git" ? source.url : source.path;
|
|
13101
13134
|
}
|
|
@@ -13109,33 +13142,9 @@ async function git2(args, opts) {
|
|
|
13109
13142
|
});
|
|
13110
13143
|
return stdout.trim();
|
|
13111
13144
|
}
|
|
13112
|
-
async function acquireLock(lockPath) {
|
|
13113
|
-
const start = Date.now();
|
|
13114
|
-
while (Date.now() - start < LOCK_TIMEOUT_MS) {
|
|
13115
|
-
try {
|
|
13116
|
-
await writeFile8(lockPath, String(process.pid), { flag: "wx" });
|
|
13117
|
-
return;
|
|
13118
|
-
} catch (err) {
|
|
13119
|
-
if (err.code === "EEXIST") {
|
|
13120
|
-
await new Promise((r) => setTimeout(r, 200));
|
|
13121
|
-
continue;
|
|
13122
|
-
}
|
|
13123
|
-
throw err;
|
|
13124
|
-
}
|
|
13125
|
-
}
|
|
13126
|
-
throw new Error(`Timed out waiting for lock: ${lockPath}`);
|
|
13127
|
-
}
|
|
13128
|
-
async function releaseLock(lockPath) {
|
|
13129
|
-
try {
|
|
13130
|
-
await unlink2(lockPath);
|
|
13131
|
-
} catch {
|
|
13132
|
-
}
|
|
13133
|
-
}
|
|
13134
13145
|
var RepoManager = class {
|
|
13135
|
-
cacheDir;
|
|
13136
13146
|
verbose;
|
|
13137
|
-
constructor(
|
|
13138
|
-
this.cacheDir = cacheDir ?? getGitCacheRoot();
|
|
13147
|
+
constructor(verbose = false) {
|
|
13139
13148
|
this.verbose = verbose;
|
|
13140
13149
|
}
|
|
13141
13150
|
async runGit(args, opts) {
|
|
@@ -13160,86 +13169,18 @@ var RepoManager = class {
|
|
|
13160
13169
|
}
|
|
13161
13170
|
}
|
|
13162
13171
|
/**
|
|
13163
|
-
*
|
|
13164
|
-
* Creates on first access, fetches updates on subsequent calls.
|
|
13165
|
-
* Returns the absolute path to the cache directory.
|
|
13166
|
-
*/
|
|
13167
|
-
async ensureCache(source, depth, resolve) {
|
|
13168
|
-
const key = cacheKey(source);
|
|
13169
|
-
const cachePath = path37.join(this.cacheDir, key);
|
|
13170
|
-
const lockPath = `${cachePath}.lock`;
|
|
13171
|
-
const cacheExists = existsSync3(path37.join(cachePath, "HEAD"));
|
|
13172
|
-
if (this.verbose) {
|
|
13173
|
-
console.log(
|
|
13174
|
-
`[repo] ensureCache source=${getSourceUrl(source)} resolve=${resolve ?? "remote"} cache=${cachePath}`
|
|
13175
|
-
);
|
|
13176
|
-
}
|
|
13177
|
-
if (resolve === "local") {
|
|
13178
|
-
if (cacheExists) {
|
|
13179
|
-
if (this.verbose) {
|
|
13180
|
-
console.log(`[repo] using existing local cache ${cachePath}`);
|
|
13181
|
-
}
|
|
13182
|
-
return cachePath;
|
|
13183
|
-
}
|
|
13184
|
-
const url = getSourceUrl(source);
|
|
13185
|
-
throw new Error(
|
|
13186
|
-
`No cache found for \`${url}\`. Run \`agentv cache add --url ${url} --from <local-path>\` to seed it.`
|
|
13187
|
-
);
|
|
13188
|
-
}
|
|
13189
|
-
await mkdir12(this.cacheDir, { recursive: true });
|
|
13190
|
-
const lockStartedAt = Date.now();
|
|
13191
|
-
await acquireLock(lockPath);
|
|
13192
|
-
if (this.verbose) {
|
|
13193
|
-
console.log(`[repo] lock acquired path=${lockPath} waitedMs=${Date.now() - lockStartedAt}`);
|
|
13194
|
-
}
|
|
13195
|
-
try {
|
|
13196
|
-
if (cacheExists) {
|
|
13197
|
-
if (this.verbose) {
|
|
13198
|
-
console.log(`[repo] refreshing existing cache ${cachePath}`);
|
|
13199
|
-
}
|
|
13200
|
-
const fetchArgs = ["fetch", "--prune"];
|
|
13201
|
-
if (depth) {
|
|
13202
|
-
fetchArgs.push("--depth", String(depth));
|
|
13203
|
-
}
|
|
13204
|
-
await this.runGit(fetchArgs, { cwd: cachePath });
|
|
13205
|
-
} else {
|
|
13206
|
-
if (this.verbose) {
|
|
13207
|
-
console.log(`[repo] creating new cache ${cachePath}`);
|
|
13208
|
-
}
|
|
13209
|
-
const cloneArgs = ["clone", "--mirror", "--bare"];
|
|
13210
|
-
if (depth) {
|
|
13211
|
-
cloneArgs.push("--depth", String(depth));
|
|
13212
|
-
}
|
|
13213
|
-
const sourceUrl = getSourceUrl(source);
|
|
13214
|
-
const cloneUrl = depth && source.type === "local" ? `file://${sourceUrl}` : sourceUrl;
|
|
13215
|
-
cloneArgs.push(cloneUrl, cachePath);
|
|
13216
|
-
await this.runGit(cloneArgs);
|
|
13217
|
-
}
|
|
13218
|
-
} finally {
|
|
13219
|
-
await releaseLock(lockPath);
|
|
13220
|
-
if (this.verbose) {
|
|
13221
|
-
console.log(`[repo] lock released path=${lockPath}`);
|
|
13222
|
-
}
|
|
13223
|
-
}
|
|
13224
|
-
return cachePath;
|
|
13225
|
-
}
|
|
13226
|
-
/**
|
|
13227
|
-
* Clone a repo from cache into the workspace at the configured path.
|
|
13172
|
+
* Clone a repo directly from source into the workspace at the configured path.
|
|
13228
13173
|
* Handles checkout, ref resolution, ancestor walking, shallow clone, sparse checkout.
|
|
13229
13174
|
*/
|
|
13230
13175
|
async materialize(repo, workspacePath) {
|
|
13231
13176
|
const targetDir = path37.join(workspacePath, repo.path);
|
|
13177
|
+
const sourceUrl = getSourceUrl(repo.source);
|
|
13232
13178
|
const startedAt = Date.now();
|
|
13233
13179
|
if (this.verbose) {
|
|
13234
13180
|
console.log(
|
|
13235
|
-
`[repo] materialize start path=${repo.path} source=${
|
|
13181
|
+
`[repo] materialize start path=${repo.path} source=${sourceUrl} workspace=${workspacePath}`
|
|
13236
13182
|
);
|
|
13237
13183
|
}
|
|
13238
|
-
const cachePath = await this.ensureCache(
|
|
13239
|
-
repo.source,
|
|
13240
|
-
repo.clone?.depth,
|
|
13241
|
-
repo.checkout?.resolve
|
|
13242
|
-
);
|
|
13243
13184
|
const cloneArgs = ["clone"];
|
|
13244
13185
|
if (repo.clone?.depth) {
|
|
13245
13186
|
cloneArgs.push("--depth", String(repo.clone.depth));
|
|
@@ -13248,7 +13189,7 @@ var RepoManager = class {
|
|
|
13248
13189
|
cloneArgs.push("--filter", repo.clone.filter);
|
|
13249
13190
|
}
|
|
13250
13191
|
cloneArgs.push("--no-checkout");
|
|
13251
|
-
const cloneUrl = repo.clone?.depth || repo.clone?.filter ? `file://${
|
|
13192
|
+
const cloneUrl = (repo.clone?.depth || repo.clone?.filter) && repo.source.type === "local" ? `file://${sourceUrl}` : sourceUrl;
|
|
13252
13193
|
cloneArgs.push(cloneUrl, targetDir);
|
|
13253
13194
|
await this.runGit(cloneArgs);
|
|
13254
13195
|
if (repo.clone?.sparse?.length) {
|
|
@@ -13320,52 +13261,14 @@ var RepoManager = class {
|
|
|
13320
13261
|
}
|
|
13321
13262
|
}
|
|
13322
13263
|
/** Reset repos in workspace to their checkout state. */
|
|
13323
|
-
async reset(repos, workspacePath,
|
|
13324
|
-
|
|
13325
|
-
for (const repo of repos) {
|
|
13326
|
-
const targetDir = path37.join(workspacePath, repo.path);
|
|
13327
|
-
await rm6(targetDir, { recursive: true, force: true });
|
|
13328
|
-
}
|
|
13329
|
-
await this.materializeAll(repos, workspacePath);
|
|
13330
|
-
return;
|
|
13331
|
-
}
|
|
13264
|
+
async reset(repos, workspacePath, reset) {
|
|
13265
|
+
const cleanFlag = reset === "strict" ? "-fdx" : "-fd";
|
|
13332
13266
|
for (const repo of repos) {
|
|
13333
13267
|
const targetDir = path37.join(workspacePath, repo.path);
|
|
13334
13268
|
await this.runGit(["reset", "--hard", "HEAD"], { cwd: targetDir });
|
|
13335
|
-
await this.runGit(["clean",
|
|
13269
|
+
await this.runGit(["clean", cleanFlag], { cwd: targetDir });
|
|
13336
13270
|
}
|
|
13337
13271
|
}
|
|
13338
|
-
/**
|
|
13339
|
-
* Seed the cache from a local repository, setting the remote to a given URL.
|
|
13340
|
-
* Useful for avoiding slow network clones when a local clone already exists.
|
|
13341
|
-
*/
|
|
13342
|
-
async seedCache(localPath, remoteUrl, opts) {
|
|
13343
|
-
const source = { type: "git", url: remoteUrl };
|
|
13344
|
-
const key = cacheKey(source);
|
|
13345
|
-
const cachePath = path37.join(this.cacheDir, key);
|
|
13346
|
-
const lockPath = `${cachePath}.lock`;
|
|
13347
|
-
await mkdir12(this.cacheDir, { recursive: true });
|
|
13348
|
-
await acquireLock(lockPath);
|
|
13349
|
-
try {
|
|
13350
|
-
if (existsSync3(path37.join(cachePath, "HEAD"))) {
|
|
13351
|
-
if (!opts?.force) {
|
|
13352
|
-
throw new Error(
|
|
13353
|
-
`Cache already exists for ${remoteUrl} at ${cachePath}. Use force to overwrite.`
|
|
13354
|
-
);
|
|
13355
|
-
}
|
|
13356
|
-
await rm6(cachePath, { recursive: true, force: true });
|
|
13357
|
-
}
|
|
13358
|
-
await git2(["clone", "--mirror", "--bare", localPath, cachePath]);
|
|
13359
|
-
await git2(["remote", "set-url", "origin", remoteUrl], { cwd: cachePath });
|
|
13360
|
-
} finally {
|
|
13361
|
-
await releaseLock(lockPath);
|
|
13362
|
-
}
|
|
13363
|
-
return cachePath;
|
|
13364
|
-
}
|
|
13365
|
-
/** Remove the entire cache directory. */
|
|
13366
|
-
async cleanCache() {
|
|
13367
|
-
await rm6(this.cacheDir, { recursive: true, force: true });
|
|
13368
|
-
}
|
|
13369
13272
|
};
|
|
13370
13273
|
|
|
13371
13274
|
// src/evaluation/workspace/resolve.ts
|
|
@@ -13450,6 +13353,22 @@ function classifyQualityStatus(score) {
|
|
|
13450
13353
|
function usesFileReferencePrompt(provider) {
|
|
13451
13354
|
return isAgentProvider(provider) || provider.kind === "cli";
|
|
13452
13355
|
}
|
|
13356
|
+
function toScriptConfig(hook, hookName, context) {
|
|
13357
|
+
const command = hook.command ?? hook.script;
|
|
13358
|
+
if (!command || command.length === 0) {
|
|
13359
|
+
throw new Error(`${hookName} hook in ${context} requires command or script`);
|
|
13360
|
+
}
|
|
13361
|
+
return {
|
|
13362
|
+
command,
|
|
13363
|
+
...hook.timeout_ms !== void 0 && { timeout_ms: hook.timeout_ms },
|
|
13364
|
+
...hook.timeoutMs !== void 0 && { timeoutMs: hook.timeoutMs },
|
|
13365
|
+
...hook.cwd !== void 0 && { cwd: hook.cwd },
|
|
13366
|
+
...hook.script !== void 0 && { script: hook.script }
|
|
13367
|
+
};
|
|
13368
|
+
}
|
|
13369
|
+
function hasHookCommand(hook) {
|
|
13370
|
+
return !!(hook?.command && hook.command.length > 0 || hook?.script && hook.script.length > 0);
|
|
13371
|
+
}
|
|
13453
13372
|
function getWorkspaceTemplate(target) {
|
|
13454
13373
|
const config = target.config;
|
|
13455
13374
|
if ("workspaceTemplate" in config && typeof config.workspaceTemplate === "string") {
|
|
@@ -13483,7 +13402,12 @@ async function runEvaluation(options) {
|
|
|
13483
13402
|
failOnError,
|
|
13484
13403
|
poolWorkspaces,
|
|
13485
13404
|
poolMaxSlots: configPoolMaxSlots,
|
|
13486
|
-
workspace:
|
|
13405
|
+
workspace: legacyWorkspacePath,
|
|
13406
|
+
workspaceMode,
|
|
13407
|
+
workspacePath,
|
|
13408
|
+
workspaceClean,
|
|
13409
|
+
retainOnSuccess,
|
|
13410
|
+
retainOnFailure
|
|
13487
13411
|
} = options;
|
|
13488
13412
|
let useCache = options.useCache;
|
|
13489
13413
|
if (trials && trials.count > 1 && useCache) {
|
|
@@ -13619,13 +13543,23 @@ async function runEvaluation(options) {
|
|
|
13619
13543
|
}
|
|
13620
13544
|
};
|
|
13621
13545
|
const isPerTestIsolation = suiteWorkspace?.isolation === "per_test";
|
|
13622
|
-
|
|
13546
|
+
const configuredMode = suiteWorkspace?.mode ?? workspaceMode;
|
|
13547
|
+
const configuredStaticPath = suiteWorkspace?.static_path ?? workspacePath ?? legacyWorkspacePath;
|
|
13548
|
+
const useStaticWorkspace = configuredMode === "static" || !!configuredStaticPath && !configuredMode;
|
|
13549
|
+
if (useStaticWorkspace && isPerTestIsolation) {
|
|
13623
13550
|
throw new Error(
|
|
13624
|
-
"
|
|
13551
|
+
"static workspace mode is incompatible with isolation: per_test. Use isolation: shared (default)."
|
|
13625
13552
|
);
|
|
13626
13553
|
}
|
|
13627
|
-
|
|
13628
|
-
|
|
13554
|
+
if (configuredMode === "static" && !configuredStaticPath) {
|
|
13555
|
+
throw new Error("workspace.mode=static requires workspace.static_path or --workspace-path");
|
|
13556
|
+
}
|
|
13557
|
+
const hasSharedWorkspace = !!(useStaticWorkspace || workspaceTemplate || suiteWorkspace?.hooks || suiteWorkspace?.repos?.length && !isPerTestIsolation);
|
|
13558
|
+
const poolEnabled = configuredMode === "pooled" ? true : configuredMode === "ephemeral" || useStaticWorkspace ? false : suiteWorkspace?.pool ?? poolWorkspaces ?? true;
|
|
13559
|
+
const usePool = poolEnabled !== false && !!suiteWorkspace?.repos?.length && !isPerTestIsolation && !useStaticWorkspace;
|
|
13560
|
+
const finishCleanPolicy = suiteWorkspace?.hooks?.on_finish?.clean;
|
|
13561
|
+
const resolvedRetainOnSuccess = (finishCleanPolicy === "always" || finishCleanPolicy === "on_success" ? "cleanup" : finishCleanPolicy === "on_failure" || finishCleanPolicy === "never" ? "keep" : void 0) ?? retainOnSuccess ?? (keepWorkspaces ? "keep" : "cleanup");
|
|
13562
|
+
const resolvedRetainOnFailure = (finishCleanPolicy === "always" || finishCleanPolicy === "on_failure" ? "cleanup" : finishCleanPolicy === "on_success" || finishCleanPolicy === "never" ? "keep" : void 0) ?? retainOnFailure ?? (cleanupWorkspaces ? "cleanup" : "keep");
|
|
13629
13563
|
const requestedWorkers = options.maxConcurrency ?? target.workers ?? 1;
|
|
13630
13564
|
const workers = hasSharedWorkspace && !usePool ? 1 : requestedWorkers;
|
|
13631
13565
|
setupLog(
|
|
@@ -13646,20 +13580,21 @@ async function runEvaluation(options) {
|
|
|
13646
13580
|
const availablePoolSlots = [];
|
|
13647
13581
|
const poolSlotBaselines = /* @__PURE__ */ new Map();
|
|
13648
13582
|
const poolMaxSlots = Math.min(configPoolMaxSlots ?? 10, 50);
|
|
13649
|
-
if (
|
|
13650
|
-
sharedWorkspacePath =
|
|
13651
|
-
setupLog(`using
|
|
13583
|
+
if (useStaticWorkspace && configuredStaticPath) {
|
|
13584
|
+
sharedWorkspacePath = configuredStaticPath;
|
|
13585
|
+
setupLog(`using static workspace: ${configuredStaticPath}`);
|
|
13652
13586
|
} else if (usePool && suiteWorkspace?.repos) {
|
|
13653
13587
|
const slotsNeeded = workers;
|
|
13654
13588
|
setupLog(`acquiring ${slotsNeeded} workspace pool slot(s) (pool capacity: ${poolMaxSlots})`);
|
|
13655
13589
|
poolManager = new WorkspacePoolManager(getWorkspacePoolRoot());
|
|
13656
|
-
const poolRepoManager = new RepoManager(
|
|
13590
|
+
const poolRepoManager = new RepoManager(verbose);
|
|
13657
13591
|
for (let i = 0; i < slotsNeeded; i++) {
|
|
13658
13592
|
const slot = await poolManager.acquireWorkspace({
|
|
13659
13593
|
templatePath: workspaceTemplate,
|
|
13660
13594
|
repos: suiteWorkspace.repos,
|
|
13661
13595
|
maxSlots: poolMaxSlots,
|
|
13662
|
-
repoManager: poolRepoManager
|
|
13596
|
+
repoManager: poolRepoManager,
|
|
13597
|
+
poolReset: (workspaceClean === "full" ? "strict" : workspaceClean === "standard" ? "fast" : null) ?? suiteWorkspace.hooks?.on_reuse?.reset ?? "fast"
|
|
13663
13598
|
});
|
|
13664
13599
|
poolSlots.push(slot);
|
|
13665
13600
|
setupLog(`pool slot ${i} acquired at: ${slot.path} (existing=${slot.isExisting})`);
|
|
@@ -13679,9 +13614,9 @@ async function runEvaluation(options) {
|
|
|
13679
13614
|
const message = error instanceof Error ? error.message : String(error);
|
|
13680
13615
|
throw new Error(`Failed to create shared workspace: ${message}`);
|
|
13681
13616
|
}
|
|
13682
|
-
} else if (suiteWorkspace?.
|
|
13617
|
+
} else if (suiteWorkspace?.hooks || suiteWorkspace?.repos?.length && !isPerTestIsolation) {
|
|
13683
13618
|
sharedWorkspacePath = getWorkspacePath(evalRunId, "shared");
|
|
13684
|
-
await
|
|
13619
|
+
await mkdir12(sharedWorkspacePath, { recursive: true });
|
|
13685
13620
|
setupLog(`created empty shared workspace at: ${sharedWorkspacePath}`);
|
|
13686
13621
|
}
|
|
13687
13622
|
try {
|
|
@@ -13693,7 +13628,7 @@ async function runEvaluation(options) {
|
|
|
13693
13628
|
} catch {
|
|
13694
13629
|
}
|
|
13695
13630
|
}
|
|
13696
|
-
const repoManager = suiteWorkspace?.repos?.length && !usePool && !
|
|
13631
|
+
const repoManager = suiteWorkspace?.repos?.length && !usePool && !useStaticWorkspace ? new RepoManager(verbose) : void 0;
|
|
13697
13632
|
if (repoManager && sharedWorkspacePath && suiteWorkspace?.repos && !isPerTestIsolation) {
|
|
13698
13633
|
setupLog(
|
|
13699
13634
|
`materializing ${suiteWorkspace.repos.length} shared repo(s) into ${sharedWorkspacePath}`
|
|
@@ -13703,17 +13638,19 @@ async function runEvaluation(options) {
|
|
|
13703
13638
|
setupLog("shared repo materialization complete");
|
|
13704
13639
|
} catch (error) {
|
|
13705
13640
|
const message = error instanceof Error ? error.message : String(error);
|
|
13706
|
-
if (sharedWorkspacePath && !
|
|
13641
|
+
if (sharedWorkspacePath && !useStaticWorkspace) {
|
|
13707
13642
|
await cleanupWorkspace(sharedWorkspacePath).catch(() => {
|
|
13708
13643
|
});
|
|
13709
13644
|
}
|
|
13710
13645
|
throw new Error(`Failed to materialize repos: ${message}`);
|
|
13711
13646
|
}
|
|
13712
13647
|
}
|
|
13713
|
-
|
|
13714
|
-
|
|
13648
|
+
const suiteBeforeAllHook = suiteWorkspace?.hooks?.before_all_tests;
|
|
13649
|
+
if (sharedWorkspacePath && hasHookCommand(suiteBeforeAllHook)) {
|
|
13650
|
+
const beforeAllHook = suiteBeforeAllHook;
|
|
13651
|
+
const beforeAllCommand = (beforeAllHook.command ?? beforeAllHook.script ?? []).join(" ");
|
|
13715
13652
|
setupLog(
|
|
13716
|
-
`running shared before_all in cwd=${
|
|
13653
|
+
`running shared before_all in cwd=${beforeAllHook.cwd ?? evalDir} command=${beforeAllCommand}`
|
|
13717
13654
|
);
|
|
13718
13655
|
const scriptContext = {
|
|
13719
13656
|
workspacePath: sharedWorkspacePath,
|
|
@@ -13722,18 +13659,22 @@ async function runEvaluation(options) {
|
|
|
13722
13659
|
evalDir
|
|
13723
13660
|
};
|
|
13724
13661
|
try {
|
|
13725
|
-
beforeAllOutput = await executeWorkspaceScript(
|
|
13662
|
+
beforeAllOutput = await executeWorkspaceScript(
|
|
13663
|
+
toScriptConfig(beforeAllHook, "before_all_tests", "suite workspace"),
|
|
13664
|
+
scriptContext
|
|
13665
|
+
);
|
|
13726
13666
|
setupLog("shared before_all completed");
|
|
13727
13667
|
} catch (error) {
|
|
13728
13668
|
const message = error instanceof Error ? error.message : String(error);
|
|
13729
|
-
if (sharedWorkspacePath && !
|
|
13669
|
+
if (sharedWorkspacePath && !useStaticWorkspace) {
|
|
13730
13670
|
await cleanupWorkspace(sharedWorkspacePath).catch(() => {
|
|
13731
13671
|
});
|
|
13732
13672
|
}
|
|
13733
13673
|
throw new Error(`before_all script failed: ${message}`);
|
|
13734
13674
|
}
|
|
13735
13675
|
}
|
|
13736
|
-
if (availablePoolSlots.length > 0 &&
|
|
13676
|
+
if (availablePoolSlots.length > 0 && hasHookCommand(suiteBeforeAllHook)) {
|
|
13677
|
+
const beforeAllHook = suiteBeforeAllHook;
|
|
13737
13678
|
for (const slot of availablePoolSlots) {
|
|
13738
13679
|
setupLog(`running before_all on pool slot ${slot.index}`);
|
|
13739
13680
|
const scriptContext = {
|
|
@@ -13743,7 +13684,10 @@ async function runEvaluation(options) {
|
|
|
13743
13684
|
evalDir
|
|
13744
13685
|
};
|
|
13745
13686
|
try {
|
|
13746
|
-
const output = await executeWorkspaceScript(
|
|
13687
|
+
const output = await executeWorkspaceScript(
|
|
13688
|
+
toScriptConfig(beforeAllHook, "before_all_tests", "suite workspace"),
|
|
13689
|
+
scriptContext
|
|
13690
|
+
);
|
|
13747
13691
|
if (!beforeAllOutput) beforeAllOutput = output;
|
|
13748
13692
|
setupLog(`before_all completed on pool slot ${slot.index}`);
|
|
13749
13693
|
} catch (error) {
|
|
@@ -13875,6 +13819,8 @@ async function runEvaluation(options) {
|
|
|
13875
13819
|
evalRunId,
|
|
13876
13820
|
keepWorkspaces,
|
|
13877
13821
|
cleanupWorkspaces,
|
|
13822
|
+
retainOnSuccess: resolvedRetainOnSuccess,
|
|
13823
|
+
retainOnFailure: resolvedRetainOnFailure,
|
|
13878
13824
|
sharedWorkspacePath: testWorkspacePath,
|
|
13879
13825
|
sharedBaselineCommit: testBaselineCommit,
|
|
13880
13826
|
suiteWorkspaceFile,
|
|
@@ -13968,7 +13914,9 @@ async function runEvaluation(options) {
|
|
|
13968
13914
|
}
|
|
13969
13915
|
}
|
|
13970
13916
|
const afterAllWorkspaces = poolSlots.length > 1 ? poolSlots.map((s) => s.path) : sharedWorkspacePath ? [sharedWorkspacePath] : [];
|
|
13971
|
-
|
|
13917
|
+
const suiteAfterAllHook = suiteWorkspace?.hooks?.after_all_tests;
|
|
13918
|
+
if (afterAllWorkspaces.length > 0 && hasHookCommand(suiteAfterAllHook)) {
|
|
13919
|
+
const afterAllHook = suiteAfterAllHook;
|
|
13972
13920
|
for (const wsPath of afterAllWorkspaces) {
|
|
13973
13921
|
const scriptContext = {
|
|
13974
13922
|
workspacePath: wsPath,
|
|
@@ -13978,7 +13926,7 @@ async function runEvaluation(options) {
|
|
|
13978
13926
|
};
|
|
13979
13927
|
try {
|
|
13980
13928
|
const afterAllOutput = await executeWorkspaceScript(
|
|
13981
|
-
|
|
13929
|
+
toScriptConfig(afterAllHook, "after_all_tests", "suite workspace"),
|
|
13982
13930
|
scriptContext,
|
|
13983
13931
|
"warn"
|
|
13984
13932
|
);
|
|
@@ -13989,12 +13937,14 @@ async function runEvaluation(options) {
|
|
|
13989
13937
|
}
|
|
13990
13938
|
}
|
|
13991
13939
|
}
|
|
13992
|
-
if (sharedWorkspacePath && !poolSlot && poolSlots.length === 0 && !
|
|
13940
|
+
if (sharedWorkspacePath && !poolSlot && poolSlots.length === 0 && !useStaticWorkspace) {
|
|
13993
13941
|
const hasFailure = results.some((r) => !!r.error || r.score < 0.5);
|
|
13994
|
-
if (
|
|
13995
|
-
|
|
13996
|
-
|
|
13997
|
-
|
|
13942
|
+
if (hasFailure) {
|
|
13943
|
+
if (resolvedRetainOnFailure === "cleanup") {
|
|
13944
|
+
await cleanupWorkspace(sharedWorkspacePath).catch(() => {
|
|
13945
|
+
});
|
|
13946
|
+
}
|
|
13947
|
+
} else if (resolvedRetainOnSuccess === "cleanup") {
|
|
13998
13948
|
await cleanupWorkspace(sharedWorkspacePath).catch(() => {
|
|
13999
13949
|
});
|
|
14000
13950
|
}
|
|
@@ -14188,6 +14138,8 @@ async function runEvalCase(options) {
|
|
|
14188
14138
|
evalRunId,
|
|
14189
14139
|
keepWorkspaces,
|
|
14190
14140
|
cleanupWorkspaces: forceCleanup,
|
|
14141
|
+
retainOnSuccess,
|
|
14142
|
+
retainOnFailure,
|
|
14191
14143
|
sharedWorkspacePath,
|
|
14192
14144
|
sharedBaselineCommit,
|
|
14193
14145
|
suiteWorkspaceFile,
|
|
@@ -14199,10 +14151,10 @@ async function runEvalCase(options) {
|
|
|
14199
14151
|
const formattingMode = usesFileReferencePrompt(provider) ? "agent" : "lm";
|
|
14200
14152
|
const promptInputs = await buildPromptInputs(evalCase, formattingMode);
|
|
14201
14153
|
const typeRegistry = providedTypeRegistry ?? createBuiltinRegistry();
|
|
14202
|
-
const
|
|
14154
|
+
const cacheKey = useCache ? createCacheKey(provider, target, evalCase, promptInputs) : void 0;
|
|
14203
14155
|
let cachedResponse;
|
|
14204
|
-
if (
|
|
14205
|
-
cachedResponse = await cache.get(
|
|
14156
|
+
if (cacheKey && cache) {
|
|
14157
|
+
cachedResponse = await cache.get(cacheKey);
|
|
14206
14158
|
}
|
|
14207
14159
|
const nowFn = now ?? (() => /* @__PURE__ */ new Date());
|
|
14208
14160
|
let workspacePath = sharedWorkspacePath;
|
|
@@ -14241,12 +14193,12 @@ async function runEvalCase(options) {
|
|
|
14241
14193
|
}
|
|
14242
14194
|
}
|
|
14243
14195
|
}
|
|
14244
|
-
if (!workspacePath && (evalCase.workspace?.
|
|
14196
|
+
if (!workspacePath && (evalCase.workspace?.hooks || evalCase.workspace?.repos?.length) && evalRunId) {
|
|
14245
14197
|
workspacePath = getWorkspacePath(evalRunId, evalCase.id);
|
|
14246
|
-
await
|
|
14198
|
+
await mkdir12(workspacePath, { recursive: true });
|
|
14247
14199
|
}
|
|
14248
14200
|
if (evalCase.workspace?.repos?.length && workspacePath) {
|
|
14249
|
-
const perCaseRepoManager = new RepoManager(
|
|
14201
|
+
const perCaseRepoManager = new RepoManager(setupDebug);
|
|
14250
14202
|
try {
|
|
14251
14203
|
if (setupDebug) {
|
|
14252
14204
|
console.log(
|
|
@@ -14271,11 +14223,13 @@ async function runEvalCase(options) {
|
|
|
14271
14223
|
);
|
|
14272
14224
|
}
|
|
14273
14225
|
}
|
|
14274
|
-
|
|
14275
|
-
|
|
14226
|
+
const caseBeforeAllHook = evalCase.workspace?.hooks?.before_all_tests;
|
|
14227
|
+
if (workspacePath && hasHookCommand(caseBeforeAllHook)) {
|
|
14228
|
+
const beforeAllHook = caseBeforeAllHook;
|
|
14229
|
+
const beforeAllCommand = (beforeAllHook.command ?? beforeAllHook.script ?? []).join(" ");
|
|
14276
14230
|
if (setupDebug) {
|
|
14277
14231
|
console.log(
|
|
14278
|
-
`[setup] test=${evalCase.id} running before_all in cwd=${
|
|
14232
|
+
`[setup] test=${evalCase.id} running before_all in cwd=${beforeAllHook.cwd ?? evalDir} command=${beforeAllCommand}`
|
|
14279
14233
|
);
|
|
14280
14234
|
}
|
|
14281
14235
|
const scriptContext = {
|
|
@@ -14288,7 +14242,7 @@ async function runEvalCase(options) {
|
|
|
14288
14242
|
};
|
|
14289
14243
|
try {
|
|
14290
14244
|
beforeAllOutput = await executeWorkspaceScript(
|
|
14291
|
-
evalCase.
|
|
14245
|
+
toScriptConfig(beforeAllHook, "before_all_tests", `test '${evalCase.id}'`),
|
|
14292
14246
|
scriptContext
|
|
14293
14247
|
);
|
|
14294
14248
|
if (setupDebug) {
|
|
@@ -14313,7 +14267,9 @@ async function runEvalCase(options) {
|
|
|
14313
14267
|
}
|
|
14314
14268
|
}
|
|
14315
14269
|
}
|
|
14316
|
-
|
|
14270
|
+
const caseBeforeEachHook = evalCase.workspace?.hooks?.before_each_test;
|
|
14271
|
+
if (workspacePath && hasHookCommand(caseBeforeEachHook)) {
|
|
14272
|
+
const beforeEachHook = caseBeforeEachHook;
|
|
14317
14273
|
const scriptContext = {
|
|
14318
14274
|
workspacePath,
|
|
14319
14275
|
testId: evalCase.id,
|
|
@@ -14324,7 +14280,7 @@ async function runEvalCase(options) {
|
|
|
14324
14280
|
};
|
|
14325
14281
|
try {
|
|
14326
14282
|
beforeEachOutput = await executeWorkspaceScript(
|
|
14327
|
-
evalCase.
|
|
14283
|
+
toScriptConfig(beforeEachHook, "before_each_test", `test '${evalCase.id}'`),
|
|
14328
14284
|
scriptContext
|
|
14329
14285
|
);
|
|
14330
14286
|
} catch (error) {
|
|
@@ -14412,8 +14368,8 @@ async function runEvalCase(options) {
|
|
|
14412
14368
|
}
|
|
14413
14369
|
return errorResult;
|
|
14414
14370
|
}
|
|
14415
|
-
if (
|
|
14416
|
-
await cache.set(
|
|
14371
|
+
if (cacheKey && cache && !cachedResponse) {
|
|
14372
|
+
await cache.set(cacheKey, providerResponse);
|
|
14417
14373
|
}
|
|
14418
14374
|
const output = providerResponse.output;
|
|
14419
14375
|
const hasExecutionMetrics = providerResponse.tokenUsage !== void 0 || providerResponse.costUsd !== void 0 || providerResponse.durationMs !== void 0;
|
|
@@ -14441,17 +14397,19 @@ async function runEvalCase(options) {
|
|
|
14441
14397
|
}
|
|
14442
14398
|
}
|
|
14443
14399
|
const providerError = extractProviderError(providerResponse);
|
|
14444
|
-
if (repoManager && workspacePath && evalCase.workspace?.reset
|
|
14400
|
+
if (repoManager && workspacePath && evalCase.workspace?.hooks?.after_each_test?.reset && evalCase.workspace.hooks.after_each_test.reset !== "none" && evalCase.workspace.repos) {
|
|
14445
14401
|
try {
|
|
14446
14402
|
await repoManager.reset(
|
|
14447
14403
|
evalCase.workspace.repos,
|
|
14448
14404
|
workspacePath,
|
|
14449
|
-
evalCase.workspace.reset
|
|
14405
|
+
evalCase.workspace.hooks.after_each_test.reset
|
|
14450
14406
|
);
|
|
14451
14407
|
} catch {
|
|
14452
14408
|
}
|
|
14453
14409
|
}
|
|
14454
|
-
|
|
14410
|
+
const caseAfterEachHook = evalCase.workspace?.hooks?.after_each_test;
|
|
14411
|
+
if (workspacePath && hasHookCommand(caseAfterEachHook)) {
|
|
14412
|
+
const afterEachHook = caseAfterEachHook;
|
|
14455
14413
|
const scriptContext = {
|
|
14456
14414
|
workspacePath,
|
|
14457
14415
|
testId: evalCase.id,
|
|
@@ -14462,7 +14420,7 @@ async function runEvalCase(options) {
|
|
|
14462
14420
|
};
|
|
14463
14421
|
try {
|
|
14464
14422
|
afterEachOutput = await executeWorkspaceScript(
|
|
14465
|
-
evalCase.
|
|
14423
|
+
toScriptConfig(afterEachHook, "after_each_test", `test '${evalCase.id}'`),
|
|
14466
14424
|
scriptContext,
|
|
14467
14425
|
"warn"
|
|
14468
14426
|
);
|
|
@@ -14512,8 +14470,13 @@ async function runEvalCase(options) {
|
|
|
14512
14470
|
await cleanupWorkspace(workspacePath).catch(() => {
|
|
14513
14471
|
});
|
|
14514
14472
|
} else if (isFailure) {
|
|
14515
|
-
|
|
14516
|
-
|
|
14473
|
+
if ((retainOnFailure ?? "keep") === "cleanup") {
|
|
14474
|
+
await cleanupWorkspace(workspacePath).catch(() => {
|
|
14475
|
+
});
|
|
14476
|
+
} else {
|
|
14477
|
+
return { ...finalResult, workspacePath };
|
|
14478
|
+
}
|
|
14479
|
+
} else if ((retainOnSuccess ?? (keepWorkspaces ? "keep" : "cleanup")) !== "keep") {
|
|
14517
14480
|
await cleanupWorkspace(workspacePath).catch(() => {
|
|
14518
14481
|
});
|
|
14519
14482
|
}
|
|
@@ -14531,11 +14494,12 @@ async function runEvalCase(options) {
|
|
|
14531
14494
|
"evaluator_error"
|
|
14532
14495
|
);
|
|
14533
14496
|
if (workspacePath && !isSharedWorkspace) {
|
|
14534
|
-
if (forceCleanup) {
|
|
14497
|
+
if (forceCleanup || (retainOnFailure ?? "keep") === "cleanup") {
|
|
14535
14498
|
await cleanupWorkspace(workspacePath).catch(() => {
|
|
14536
14499
|
});
|
|
14500
|
+
} else {
|
|
14501
|
+
return { ...errorResult, workspacePath, beforeEachOutput, afterEachOutput };
|
|
14537
14502
|
}
|
|
14538
|
-
return { ...errorResult, workspacePath, beforeEachOutput, afterEachOutput };
|
|
14539
14503
|
}
|
|
14540
14504
|
return { ...errorResult, beforeEachOutput, afterEachOutput };
|
|
14541
14505
|
}
|
|
@@ -14554,7 +14518,9 @@ async function runEvalCaseWithTrials(options, trialsConfig) {
|
|
|
14554
14518
|
useCache: false,
|
|
14555
14519
|
// Force cleanup for intermediate trials
|
|
14556
14520
|
cleanupWorkspaces: isLastDeclaredTrial ? options.cleanupWorkspaces : true,
|
|
14557
|
-
keepWorkspaces: isLastDeclaredTrial ? options.keepWorkspaces : false
|
|
14521
|
+
keepWorkspaces: isLastDeclaredTrial ? options.keepWorkspaces : false,
|
|
14522
|
+
retainOnSuccess: isLastDeclaredTrial ? options.retainOnSuccess : "cleanup",
|
|
14523
|
+
retainOnFailure: isLastDeclaredTrial ? options.retainOnFailure : "cleanup"
|
|
14558
14524
|
};
|
|
14559
14525
|
const result = await runEvalCase(trialOptions);
|
|
14560
14526
|
allResults.push(result);
|
|
@@ -15077,7 +15043,7 @@ function extractProviderError(response) {
|
|
|
15077
15043
|
return trimmed.length > 0 ? trimmed : void 0;
|
|
15078
15044
|
}
|
|
15079
15045
|
function createCacheKey(provider, target, evalCase, promptInputs) {
|
|
15080
|
-
const hash =
|
|
15046
|
+
const hash = createHash2("sha256");
|
|
15081
15047
|
hash.update(provider.id);
|
|
15082
15048
|
hash.update(target.name);
|
|
15083
15049
|
hash.update(evalCase.id);
|
|
@@ -15145,7 +15111,7 @@ function computeWeightedMean(entries) {
|
|
|
15145
15111
|
}
|
|
15146
15112
|
|
|
15147
15113
|
// src/evaluation/evaluate.ts
|
|
15148
|
-
import { existsSync as
|
|
15114
|
+
import { existsSync as existsSync3 } from "node:fs";
|
|
15149
15115
|
import path40 from "node:path";
|
|
15150
15116
|
async function evaluate(config) {
|
|
15151
15117
|
const startTime = Date.now();
|
|
@@ -15264,7 +15230,7 @@ async function discoverDefaultTarget(repoRoot) {
|
|
|
15264
15230
|
for (const dir of chain) {
|
|
15265
15231
|
for (const candidate of TARGET_FILE_CANDIDATES) {
|
|
15266
15232
|
const targetsPath = path40.join(dir, candidate);
|
|
15267
|
-
if (!
|
|
15233
|
+
if (!existsSync3(targetsPath)) continue;
|
|
15268
15234
|
try {
|
|
15269
15235
|
const definitions = await readTargetDefinitions(targetsPath);
|
|
15270
15236
|
const defaultTarget = definitions.find((d) => d.name === "default");
|
|
@@ -15282,7 +15248,7 @@ async function loadEnvHierarchy(repoRoot) {
|
|
|
15282
15248
|
const envFiles = [];
|
|
15283
15249
|
for (const dir of chain) {
|
|
15284
15250
|
const envPath = path40.join(dir, ".env");
|
|
15285
|
-
if (
|
|
15251
|
+
if (existsSync3(envPath)) envFiles.push(envPath);
|
|
15286
15252
|
}
|
|
15287
15253
|
for (let i = envFiles.length - 1; i >= 0; i--) {
|
|
15288
15254
|
try {
|
|
@@ -15360,12 +15326,12 @@ var CONFIG_FILE_NAMES = [
|
|
|
15360
15326
|
".agentv/config.js"
|
|
15361
15327
|
];
|
|
15362
15328
|
async function loadTsConfig(projectRoot) {
|
|
15363
|
-
const { existsSync:
|
|
15329
|
+
const { existsSync: existsSync4 } = await import("node:fs");
|
|
15364
15330
|
const { pathToFileURL } = await import("node:url");
|
|
15365
15331
|
const { join: join2 } = await import("node:path");
|
|
15366
15332
|
for (const fileName of CONFIG_FILE_NAMES) {
|
|
15367
15333
|
const filePath = join2(projectRoot, fileName);
|
|
15368
|
-
if (!
|
|
15334
|
+
if (!existsSync4(filePath)) {
|
|
15369
15335
|
continue;
|
|
15370
15336
|
}
|
|
15371
15337
|
try {
|
|
@@ -15462,7 +15428,7 @@ function buildPrompt(criteria, question, referenceAnswer) {
|
|
|
15462
15428
|
}
|
|
15463
15429
|
|
|
15464
15430
|
// src/evaluation/cache/response-cache.ts
|
|
15465
|
-
import { mkdir as
|
|
15431
|
+
import { mkdir as mkdir13, readFile as readFile12, writeFile as writeFile8 } from "node:fs/promises";
|
|
15466
15432
|
import path41 from "node:path";
|
|
15467
15433
|
var DEFAULT_CACHE_PATH = ".agentv/cache";
|
|
15468
15434
|
var ResponseCache = class {
|
|
@@ -15482,8 +15448,8 @@ var ResponseCache = class {
|
|
|
15482
15448
|
async set(key, value) {
|
|
15483
15449
|
const filePath = this.keyToPath(key);
|
|
15484
15450
|
const dir = path41.dirname(filePath);
|
|
15485
|
-
await
|
|
15486
|
-
await
|
|
15451
|
+
await mkdir13(dir, { recursive: true });
|
|
15452
|
+
await writeFile8(filePath, JSON.stringify(value, null, 2), "utf8");
|
|
15487
15453
|
}
|
|
15488
15454
|
keyToPath(key) {
|
|
15489
15455
|
const prefix = key.slice(0, 2);
|
|
@@ -16017,7 +15983,6 @@ export {
|
|
|
16017
15983
|
freeformEvaluationSchema,
|
|
16018
15984
|
generateRubrics,
|
|
16019
15985
|
getAgentvHome,
|
|
16020
|
-
getGitCacheRoot,
|
|
16021
15986
|
getHitCount,
|
|
16022
15987
|
getSubagentsRoot,
|
|
16023
15988
|
getTraceStateRoot,
|