agentv 2.15.0 → 2.16.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/{chunk-IKGJTJSU.js → chunk-JZ62HLUC.js} +68 -120
- package/dist/chunk-JZ62HLUC.js.map +1 -0
- package/dist/{chunk-VBK7BJLE.js → chunk-LZ5MPQFM.js} +210 -245
- package/dist/chunk-LZ5MPQFM.js.map +1 -0
- package/dist/{chunk-ZDSLKUCM.js → chunk-QLCVA3ZS.js} +38 -17
- package/dist/chunk-QLCVA3ZS.js.map +1 -0
- package/dist/cli.js +3 -3
- package/dist/{dist-XZPR3AGM.js → dist-BMNEH377.js} +2 -4
- package/dist/index.js +3 -3
- package/dist/{interactive-SSGXAAKA.js → interactive-ZXYNPRCT.js} +3 -3
- package/package.json +1 -1
- package/dist/chunk-IKGJTJSU.js.map +0 -1
- package/dist/chunk-VBK7BJLE.js.map +0 -1
- package/dist/chunk-ZDSLKUCM.js.map +0 -1
- /package/dist/{dist-XZPR3AGM.js.map → dist-BMNEH377.js.map} +0 -0
- /package/dist/{interactive-SSGXAAKA.js.map → interactive-ZXYNPRCT.js.map} +0 -0
|
@@ -148,7 +148,7 @@ var require_dist = __commonJS({
|
|
|
148
148
|
}
|
|
149
149
|
});
|
|
150
150
|
|
|
151
|
-
// ../../packages/core/dist/chunk-
|
|
151
|
+
// ../../packages/core/dist/chunk-E6AJPAXM.js
|
|
152
152
|
import { constants } from "node:fs";
|
|
153
153
|
import { access, readFile } from "node:fs/promises";
|
|
154
154
|
import path from "node:path";
|
|
@@ -4195,7 +4195,7 @@ var coerce = {
|
|
|
4195
4195
|
};
|
|
4196
4196
|
var NEVER = INVALID;
|
|
4197
4197
|
|
|
4198
|
-
// ../../packages/core/dist/chunk-
|
|
4198
|
+
// ../../packages/core/dist/chunk-E6AJPAXM.js
|
|
4199
4199
|
var TEST_MESSAGE_ROLE_VALUES = ["system", "user", "assistant", "tool"];
|
|
4200
4200
|
var TEST_MESSAGE_ROLES = TEST_MESSAGE_ROLE_VALUES;
|
|
4201
4201
|
var TEST_MESSAGE_ROLE_SET = new Set(TEST_MESSAGE_ROLE_VALUES);
|
|
@@ -33960,8 +33960,8 @@ import { randomBytes } from "node:crypto";
|
|
|
33960
33960
|
import { createServer } from "node:http";
|
|
33961
33961
|
import fs2 from "node:fs/promises";
|
|
33962
33962
|
import path31 from "node:path";
|
|
33963
|
-
import { createHash as
|
|
33964
|
-
import { mkdir as
|
|
33963
|
+
import { createHash as createHash2, randomUUID as randomUUID7 } from "node:crypto";
|
|
33964
|
+
import { mkdir as mkdir12, stat as stat7 } from "node:fs/promises";
|
|
33965
33965
|
import path39 from "node:path";
|
|
33966
33966
|
import micromatch4 from "micromatch";
|
|
33967
33967
|
import { readFileSync } from "node:fs";
|
|
@@ -33981,16 +33981,13 @@ import { cp as cp2, mkdir as mkdir11, readFile as readFile11, readdir as readdir
|
|
|
33981
33981
|
import path36 from "node:path";
|
|
33982
33982
|
import { promisify as promisify5 } from "node:util";
|
|
33983
33983
|
import { execFile as execFile2 } from "node:child_process";
|
|
33984
|
-
import { createHash as createHash2 } from "node:crypto";
|
|
33985
|
-
import { existsSync as existsSync3 } from "node:fs";
|
|
33986
|
-
import { mkdir as mkdir12, rm as rm6, unlink as unlink2, writeFile as writeFile8 } from "node:fs/promises";
|
|
33987
33984
|
import path37 from "node:path";
|
|
33988
33985
|
import { promisify as promisify6 } from "node:util";
|
|
33989
33986
|
import { readdir as readdir5, stat as stat6 } from "node:fs/promises";
|
|
33990
33987
|
import path38 from "node:path";
|
|
33991
|
-
import { existsSync as
|
|
33988
|
+
import { existsSync as existsSync3 } from "node:fs";
|
|
33992
33989
|
import path40 from "node:path";
|
|
33993
|
-
import { mkdir as
|
|
33990
|
+
import { mkdir as mkdir13, readFile as readFile12, writeFile as writeFile8 } from "node:fs/promises";
|
|
33994
33991
|
import path41 from "node:path";
|
|
33995
33992
|
function computeTraceSummary(messages) {
|
|
33996
33993
|
const toolCallCounts = {};
|
|
@@ -37015,17 +37012,38 @@ function parseRepoConfig(raw) {
|
|
|
37015
37012
|
...clone2 !== void 0 && { clone: clone2 }
|
|
37016
37013
|
};
|
|
37017
37014
|
}
|
|
37018
|
-
function
|
|
37015
|
+
function parseWorkspaceHookConfig(raw, evalFileDir) {
|
|
37019
37016
|
if (!isJsonObject(raw)) return void 0;
|
|
37017
|
+
const script = parseWorkspaceScriptConfig(raw, evalFileDir);
|
|
37020
37018
|
const obj = raw;
|
|
37021
|
-
const
|
|
37022
|
-
const
|
|
37023
|
-
if (!
|
|
37019
|
+
const reset = obj.reset === "none" || obj.reset === "fast" || obj.reset === "strict" ? obj.reset : void 0;
|
|
37020
|
+
const clean = obj.clean === "always" || obj.clean === "on_success" || obj.clean === "on_failure" || obj.clean === "never" ? obj.clean : void 0;
|
|
37021
|
+
if (!script && !reset && !clean) return void 0;
|
|
37024
37022
|
return {
|
|
37025
|
-
...
|
|
37026
|
-
...
|
|
37023
|
+
...script ?? {},
|
|
37024
|
+
...reset !== void 0 && { reset },
|
|
37025
|
+
...clean !== void 0 && { clean }
|
|
37027
37026
|
};
|
|
37028
37027
|
}
|
|
37028
|
+
function parseWorkspaceHooksConfig(raw, evalFileDir) {
|
|
37029
|
+
if (!isJsonObject(raw)) return void 0;
|
|
37030
|
+
const obj = raw;
|
|
37031
|
+
const beforeAllTests = parseWorkspaceHookConfig(obj.before_all_tests, evalFileDir);
|
|
37032
|
+
const beforeEachTest = parseWorkspaceHookConfig(obj.before_each_test, evalFileDir);
|
|
37033
|
+
const afterEachTest = parseWorkspaceHookConfig(obj.after_each_test, evalFileDir);
|
|
37034
|
+
const afterAllTests = parseWorkspaceHookConfig(obj.after_all_tests, evalFileDir);
|
|
37035
|
+
const onReuse = parseWorkspaceHookConfig(obj.on_reuse, evalFileDir);
|
|
37036
|
+
const onFinish = parseWorkspaceHookConfig(obj.on_finish, evalFileDir);
|
|
37037
|
+
const hooks = {
|
|
37038
|
+
...beforeAllTests !== void 0 && { before_all_tests: beforeAllTests },
|
|
37039
|
+
...beforeEachTest !== void 0 && { before_each_test: beforeEachTest },
|
|
37040
|
+
...afterEachTest !== void 0 && { after_each_test: afterEachTest },
|
|
37041
|
+
...afterAllTests !== void 0 && { after_all_tests: afterAllTests },
|
|
37042
|
+
...onReuse !== void 0 && { on_reuse: onReuse },
|
|
37043
|
+
...onFinish !== void 0 && { on_finish: onFinish }
|
|
37044
|
+
};
|
|
37045
|
+
return Object.keys(hooks).length > 0 ? hooks : void 0;
|
|
37046
|
+
}
|
|
37029
37047
|
async function resolveWorkspaceConfig(raw, evalFileDir) {
|
|
37030
37048
|
if (typeof raw === "string") {
|
|
37031
37049
|
const workspaceFilePath = path8.resolve(evalFileDir, raw);
|
|
@@ -37055,37 +37073,56 @@ function parseWorkspaceConfig(raw, evalFileDir) {
|
|
|
37055
37073
|
}
|
|
37056
37074
|
const isolation = obj.isolation === "shared" || obj.isolation === "per_test" ? obj.isolation : void 0;
|
|
37057
37075
|
const repos = Array.isArray(obj.repos) ? obj.repos.map(parseRepoConfig).filter(Boolean) : void 0;
|
|
37058
|
-
const
|
|
37059
|
-
const
|
|
37060
|
-
const
|
|
37061
|
-
const
|
|
37062
|
-
|
|
37063
|
-
if (!template && !isolation && !repos && !reset && !beforeAll && !afterAll && !beforeEach && !afterEach)
|
|
37076
|
+
const hooks = parseWorkspaceHooksConfig(obj.hooks, evalFileDir);
|
|
37077
|
+
const mode = obj.mode === "pooled" || obj.mode === "ephemeral" || obj.mode === "static" ? obj.mode : void 0;
|
|
37078
|
+
const staticPath = typeof obj.static_path === "string" ? obj.static_path : void 0;
|
|
37079
|
+
const pool = typeof obj.pool === "boolean" ? obj.pool : void 0;
|
|
37080
|
+
if (!template && !isolation && !repos && !hooks && !mode && !staticPath && pool === void 0)
|
|
37064
37081
|
return void 0;
|
|
37065
37082
|
return {
|
|
37066
37083
|
...template !== void 0 && { template },
|
|
37067
37084
|
...isolation !== void 0 && { isolation },
|
|
37068
37085
|
...repos !== void 0 && { repos },
|
|
37069
|
-
...
|
|
37070
|
-
...
|
|
37071
|
-
...
|
|
37072
|
-
...
|
|
37073
|
-
...afterEach !== void 0 && { after_each: afterEach }
|
|
37086
|
+
...hooks !== void 0 && { hooks },
|
|
37087
|
+
...mode !== void 0 && { mode },
|
|
37088
|
+
...staticPath !== void 0 && { static_path: staticPath },
|
|
37089
|
+
...pool !== void 0 && { pool }
|
|
37074
37090
|
};
|
|
37075
37091
|
}
|
|
37076
37092
|
function mergeWorkspaceConfigs(suiteLevel, caseLevel) {
|
|
37077
37093
|
if (!suiteLevel && !caseLevel) return void 0;
|
|
37078
37094
|
if (!suiteLevel) return caseLevel;
|
|
37079
37095
|
if (!caseLevel) return suiteLevel;
|
|
37096
|
+
const mergeHook = (suiteHook, caseHook) => {
|
|
37097
|
+
if (!suiteHook && !caseHook) return void 0;
|
|
37098
|
+
return {
|
|
37099
|
+
...suiteHook ?? {},
|
|
37100
|
+
...caseHook ?? {}
|
|
37101
|
+
};
|
|
37102
|
+
};
|
|
37103
|
+
const mergedHooks = {
|
|
37104
|
+
before_all_tests: mergeHook(
|
|
37105
|
+
suiteLevel.hooks?.before_all_tests,
|
|
37106
|
+
caseLevel.hooks?.before_all_tests
|
|
37107
|
+
),
|
|
37108
|
+
before_each_test: mergeHook(
|
|
37109
|
+
suiteLevel.hooks?.before_each_test,
|
|
37110
|
+
caseLevel.hooks?.before_each_test
|
|
37111
|
+
),
|
|
37112
|
+
after_each_test: mergeHook(suiteLevel.hooks?.after_each_test, caseLevel.hooks?.after_each_test),
|
|
37113
|
+
after_all_tests: mergeHook(suiteLevel.hooks?.after_all_tests, caseLevel.hooks?.after_all_tests),
|
|
37114
|
+
on_reuse: mergeHook(suiteLevel.hooks?.on_reuse, caseLevel.hooks?.on_reuse),
|
|
37115
|
+
on_finish: mergeHook(suiteLevel.hooks?.on_finish, caseLevel.hooks?.on_finish)
|
|
37116
|
+
};
|
|
37117
|
+
const hasHooks = Object.values(mergedHooks).some((hook) => hook !== void 0);
|
|
37080
37118
|
return {
|
|
37081
37119
|
template: caseLevel.template ?? suiteLevel.template,
|
|
37082
37120
|
isolation: caseLevel.isolation ?? suiteLevel.isolation,
|
|
37083
37121
|
repos: caseLevel.repos ?? suiteLevel.repos,
|
|
37084
|
-
|
|
37085
|
-
|
|
37086
|
-
|
|
37087
|
-
|
|
37088
|
-
after_each: caseLevel.after_each ?? suiteLevel.after_each
|
|
37122
|
+
...hasHooks && { hooks: mergedHooks },
|
|
37123
|
+
mode: caseLevel.mode ?? suiteLevel.mode,
|
|
37124
|
+
static_path: caseLevel.static_path ?? suiteLevel.static_path,
|
|
37125
|
+
pool: caseLevel.pool ?? suiteLevel.pool
|
|
37089
37126
|
};
|
|
37090
37127
|
}
|
|
37091
37128
|
function asString6(value) {
|
|
@@ -41008,9 +41045,6 @@ function getAgentvHome() {
|
|
|
41008
41045
|
function getWorkspacesRoot() {
|
|
41009
41046
|
return path21.join(getAgentvHome(), "workspaces");
|
|
41010
41047
|
}
|
|
41011
|
-
function getGitCacheRoot() {
|
|
41012
|
-
return path21.join(getAgentvHome(), "git-cache");
|
|
41013
|
-
}
|
|
41014
41048
|
function getSubagentsRoot() {
|
|
41015
41049
|
return path21.join(getAgentvHome(), "subagents");
|
|
41016
41050
|
}
|
|
@@ -42425,16 +42459,16 @@ async function execFileWithStdinNode(argv, stdinPayload, options) {
|
|
|
42425
42459
|
});
|
|
42426
42460
|
}
|
|
42427
42461
|
async function execShellWithStdin(command, stdinPayload, options = {}) {
|
|
42428
|
-
const { mkdir:
|
|
42462
|
+
const { mkdir: mkdir14, readFile: readFile13, rm: rm6, writeFile: writeFile9 } = await import("node:fs/promises");
|
|
42429
42463
|
const { tmpdir: tmpdir3 } = await import("node:os");
|
|
42430
42464
|
const path42 = await import("node:path");
|
|
42431
42465
|
const { randomUUID: randomUUID8 } = await import("node:crypto");
|
|
42432
42466
|
const dir = path42.join(tmpdir3(), `agentv-exec-${randomUUID8()}`);
|
|
42433
|
-
await
|
|
42467
|
+
await mkdir14(dir, { recursive: true });
|
|
42434
42468
|
const stdinPath = path42.join(dir, "stdin.txt");
|
|
42435
42469
|
const stdoutPath = path42.join(dir, "stdout.txt");
|
|
42436
42470
|
const stderrPath = path42.join(dir, "stderr.txt");
|
|
42437
|
-
await
|
|
42471
|
+
await writeFile9(stdinPath, stdinPayload, "utf8");
|
|
42438
42472
|
const wrappedCommand = process.platform === "win32" ? `(${command}) < ${shellEscapePath(stdinPath)} > ${shellEscapePath(stdoutPath)} 2> ${shellEscapePath(stderrPath)}` : `(${command}) < ${shellEscapePath(stdinPath)} > ${shellEscapePath(stdoutPath)} 2> ${shellEscapePath(stderrPath)}`;
|
|
42439
42473
|
const { spawn: spawn4 } = await import("node:child_process");
|
|
42440
42474
|
try {
|
|
@@ -42467,7 +42501,7 @@ async function execShellWithStdin(command, stdinPayload, options = {}) {
|
|
|
42467
42501
|
const stderr = (await readFile13(stderrPath, "utf8")).replace(/\r\n/g, "\n");
|
|
42468
42502
|
return { stdout, stderr, exitCode };
|
|
42469
42503
|
} finally {
|
|
42470
|
-
await
|
|
42504
|
+
await rm6(dir, { recursive: true, force: true });
|
|
42471
42505
|
}
|
|
42472
42506
|
}
|
|
42473
42507
|
var DEFAULT_MAX_CALLS = 50;
|
|
@@ -46536,7 +46570,7 @@ var WorkspacePoolManager = class {
|
|
|
46536
46570
|
* 7. Return the slot (with path, index, isExisting)
|
|
46537
46571
|
*/
|
|
46538
46572
|
async acquireWorkspace(options) {
|
|
46539
|
-
const { templatePath, repos, maxSlots, repoManager } = options;
|
|
46573
|
+
const { templatePath, repos, maxSlots, repoManager, poolReset } = options;
|
|
46540
46574
|
const fingerprint = computeWorkspaceFingerprint(templatePath, repos);
|
|
46541
46575
|
const poolDir = path36.join(this.poolRoot, fingerprint);
|
|
46542
46576
|
await mkdir11(poolDir, { recursive: true });
|
|
@@ -46556,7 +46590,7 @@ var WorkspacePoolManager = class {
|
|
|
46556
46590
|
}
|
|
46557
46591
|
const slotExists = existsSync2(slotPath);
|
|
46558
46592
|
if (slotExists) {
|
|
46559
|
-
await this.resetSlot(slotPath, templatePath, repos);
|
|
46593
|
+
await this.resetSlot(slotPath, templatePath, repos, poolReset);
|
|
46560
46594
|
return {
|
|
46561
46595
|
index: i,
|
|
46562
46596
|
path: slotPath,
|
|
@@ -46688,15 +46722,19 @@ var WorkspacePoolManager = class {
|
|
|
46688
46722
|
* 1. Reset repos (git reset --hard {ref} && git clean -fd per repo)
|
|
46689
46723
|
* 2. Re-copy template files (skip repo directories)
|
|
46690
46724
|
*/
|
|
46691
|
-
async resetSlot(slotPath, templatePath, repos) {
|
|
46725
|
+
async resetSlot(slotPath, templatePath, repos, poolReset = "fast") {
|
|
46692
46726
|
for (const repo of repos) {
|
|
46693
46727
|
const repoDir = path36.join(slotPath, repo.path);
|
|
46694
46728
|
if (!existsSync2(repoDir)) {
|
|
46695
46729
|
continue;
|
|
46696
46730
|
}
|
|
46731
|
+
if (poolReset === "none") {
|
|
46732
|
+
continue;
|
|
46733
|
+
}
|
|
46697
46734
|
const ref = repo.checkout?.ref ?? "HEAD";
|
|
46698
46735
|
await git(["reset", "--hard", ref], { cwd: repoDir });
|
|
46699
|
-
|
|
46736
|
+
const cleanFlag = poolReset === "strict" ? "-fdx" : "-fd";
|
|
46737
|
+
await git(["clean", cleanFlag], { cwd: repoDir });
|
|
46700
46738
|
}
|
|
46701
46739
|
if (templatePath) {
|
|
46702
46740
|
const repoDirNames = new Set(
|
|
@@ -46711,7 +46749,6 @@ var WorkspacePoolManager = class {
|
|
|
46711
46749
|
};
|
|
46712
46750
|
var execFileAsync2 = promisify6(execFile2);
|
|
46713
46751
|
var DEFAULT_TIMEOUT_MS2 = 3e5;
|
|
46714
|
-
var LOCK_TIMEOUT_MS = 6e4;
|
|
46715
46752
|
function gitEnv2() {
|
|
46716
46753
|
const env = { ...process.env };
|
|
46717
46754
|
for (const key of Object.keys(env)) {
|
|
@@ -46726,10 +46763,6 @@ function gitEnv2() {
|
|
|
46726
46763
|
GIT_SSH_COMMAND: "ssh -o BatchMode=yes"
|
|
46727
46764
|
};
|
|
46728
46765
|
}
|
|
46729
|
-
function cacheKey(source) {
|
|
46730
|
-
const raw = source.type === "git" ? source.url.toLowerCase().replace(/\.git$/, "") : source.path;
|
|
46731
|
-
return createHash2("sha256").update(raw).digest("hex");
|
|
46732
|
-
}
|
|
46733
46766
|
function getSourceUrl(source) {
|
|
46734
46767
|
return source.type === "git" ? source.url : source.path;
|
|
46735
46768
|
}
|
|
@@ -46743,33 +46776,9 @@ async function git2(args, opts) {
|
|
|
46743
46776
|
});
|
|
46744
46777
|
return stdout.trim();
|
|
46745
46778
|
}
|
|
46746
|
-
async function acquireLock(lockPath) {
|
|
46747
|
-
const start = Date.now();
|
|
46748
|
-
while (Date.now() - start < LOCK_TIMEOUT_MS) {
|
|
46749
|
-
try {
|
|
46750
|
-
await writeFile8(lockPath, String(process.pid), { flag: "wx" });
|
|
46751
|
-
return;
|
|
46752
|
-
} catch (err) {
|
|
46753
|
-
if (err.code === "EEXIST") {
|
|
46754
|
-
await new Promise((r) => setTimeout(r, 200));
|
|
46755
|
-
continue;
|
|
46756
|
-
}
|
|
46757
|
-
throw err;
|
|
46758
|
-
}
|
|
46759
|
-
}
|
|
46760
|
-
throw new Error(`Timed out waiting for lock: ${lockPath}`);
|
|
46761
|
-
}
|
|
46762
|
-
async function releaseLock(lockPath) {
|
|
46763
|
-
try {
|
|
46764
|
-
await unlink2(lockPath);
|
|
46765
|
-
} catch {
|
|
46766
|
-
}
|
|
46767
|
-
}
|
|
46768
46779
|
var RepoManager = class {
|
|
46769
|
-
cacheDir;
|
|
46770
46780
|
verbose;
|
|
46771
|
-
constructor(
|
|
46772
|
-
this.cacheDir = cacheDir ?? getGitCacheRoot();
|
|
46781
|
+
constructor(verbose = false) {
|
|
46773
46782
|
this.verbose = verbose;
|
|
46774
46783
|
}
|
|
46775
46784
|
async runGit(args, opts) {
|
|
@@ -46794,86 +46803,18 @@ var RepoManager = class {
|
|
|
46794
46803
|
}
|
|
46795
46804
|
}
|
|
46796
46805
|
/**
|
|
46797
|
-
*
|
|
46798
|
-
* Creates on first access, fetches updates on subsequent calls.
|
|
46799
|
-
* Returns the absolute path to the cache directory.
|
|
46800
|
-
*/
|
|
46801
|
-
async ensureCache(source, depth, resolve2) {
|
|
46802
|
-
const key = cacheKey(source);
|
|
46803
|
-
const cachePath = path37.join(this.cacheDir, key);
|
|
46804
|
-
const lockPath = `${cachePath}.lock`;
|
|
46805
|
-
const cacheExists = existsSync3(path37.join(cachePath, "HEAD"));
|
|
46806
|
-
if (this.verbose) {
|
|
46807
|
-
console.log(
|
|
46808
|
-
`[repo] ensureCache source=${getSourceUrl(source)} resolve=${resolve2 ?? "remote"} cache=${cachePath}`
|
|
46809
|
-
);
|
|
46810
|
-
}
|
|
46811
|
-
if (resolve2 === "local") {
|
|
46812
|
-
if (cacheExists) {
|
|
46813
|
-
if (this.verbose) {
|
|
46814
|
-
console.log(`[repo] using existing local cache ${cachePath}`);
|
|
46815
|
-
}
|
|
46816
|
-
return cachePath;
|
|
46817
|
-
}
|
|
46818
|
-
const url2 = getSourceUrl(source);
|
|
46819
|
-
throw new Error(
|
|
46820
|
-
`No cache found for \`${url2}\`. Run \`agentv cache add --url ${url2} --from <local-path>\` to seed it.`
|
|
46821
|
-
);
|
|
46822
|
-
}
|
|
46823
|
-
await mkdir12(this.cacheDir, { recursive: true });
|
|
46824
|
-
const lockStartedAt = Date.now();
|
|
46825
|
-
await acquireLock(lockPath);
|
|
46826
|
-
if (this.verbose) {
|
|
46827
|
-
console.log(`[repo] lock acquired path=${lockPath} waitedMs=${Date.now() - lockStartedAt}`);
|
|
46828
|
-
}
|
|
46829
|
-
try {
|
|
46830
|
-
if (cacheExists) {
|
|
46831
|
-
if (this.verbose) {
|
|
46832
|
-
console.log(`[repo] refreshing existing cache ${cachePath}`);
|
|
46833
|
-
}
|
|
46834
|
-
const fetchArgs = ["fetch", "--prune"];
|
|
46835
|
-
if (depth) {
|
|
46836
|
-
fetchArgs.push("--depth", String(depth));
|
|
46837
|
-
}
|
|
46838
|
-
await this.runGit(fetchArgs, { cwd: cachePath });
|
|
46839
|
-
} else {
|
|
46840
|
-
if (this.verbose) {
|
|
46841
|
-
console.log(`[repo] creating new cache ${cachePath}`);
|
|
46842
|
-
}
|
|
46843
|
-
const cloneArgs = ["clone", "--mirror", "--bare"];
|
|
46844
|
-
if (depth) {
|
|
46845
|
-
cloneArgs.push("--depth", String(depth));
|
|
46846
|
-
}
|
|
46847
|
-
const sourceUrl = getSourceUrl(source);
|
|
46848
|
-
const cloneUrl = depth && source.type === "local" ? `file://${sourceUrl}` : sourceUrl;
|
|
46849
|
-
cloneArgs.push(cloneUrl, cachePath);
|
|
46850
|
-
await this.runGit(cloneArgs);
|
|
46851
|
-
}
|
|
46852
|
-
} finally {
|
|
46853
|
-
await releaseLock(lockPath);
|
|
46854
|
-
if (this.verbose) {
|
|
46855
|
-
console.log(`[repo] lock released path=${lockPath}`);
|
|
46856
|
-
}
|
|
46857
|
-
}
|
|
46858
|
-
return cachePath;
|
|
46859
|
-
}
|
|
46860
|
-
/**
|
|
46861
|
-
* Clone a repo from cache into the workspace at the configured path.
|
|
46806
|
+
* Clone a repo directly from source into the workspace at the configured path.
|
|
46862
46807
|
* Handles checkout, ref resolution, ancestor walking, shallow clone, sparse checkout.
|
|
46863
46808
|
*/
|
|
46864
46809
|
async materialize(repo, workspacePath) {
|
|
46865
46810
|
const targetDir = path37.join(workspacePath, repo.path);
|
|
46811
|
+
const sourceUrl = getSourceUrl(repo.source);
|
|
46866
46812
|
const startedAt = Date.now();
|
|
46867
46813
|
if (this.verbose) {
|
|
46868
46814
|
console.log(
|
|
46869
|
-
`[repo] materialize start path=${repo.path} source=${
|
|
46815
|
+
`[repo] materialize start path=${repo.path} source=${sourceUrl} workspace=${workspacePath}`
|
|
46870
46816
|
);
|
|
46871
46817
|
}
|
|
46872
|
-
const cachePath = await this.ensureCache(
|
|
46873
|
-
repo.source,
|
|
46874
|
-
repo.clone?.depth,
|
|
46875
|
-
repo.checkout?.resolve
|
|
46876
|
-
);
|
|
46877
46818
|
const cloneArgs = ["clone"];
|
|
46878
46819
|
if (repo.clone?.depth) {
|
|
46879
46820
|
cloneArgs.push("--depth", String(repo.clone.depth));
|
|
@@ -46882,7 +46823,7 @@ var RepoManager = class {
|
|
|
46882
46823
|
cloneArgs.push("--filter", repo.clone.filter);
|
|
46883
46824
|
}
|
|
46884
46825
|
cloneArgs.push("--no-checkout");
|
|
46885
|
-
const cloneUrl = repo.clone?.depth || repo.clone?.filter ? `file://${
|
|
46826
|
+
const cloneUrl = (repo.clone?.depth || repo.clone?.filter) && repo.source.type === "local" ? `file://${sourceUrl}` : sourceUrl;
|
|
46886
46827
|
cloneArgs.push(cloneUrl, targetDir);
|
|
46887
46828
|
await this.runGit(cloneArgs);
|
|
46888
46829
|
if (repo.clone?.sparse?.length) {
|
|
@@ -46954,52 +46895,14 @@ var RepoManager = class {
|
|
|
46954
46895
|
}
|
|
46955
46896
|
}
|
|
46956
46897
|
/** Reset repos in workspace to their checkout state. */
|
|
46957
|
-
async reset(repos, workspacePath,
|
|
46958
|
-
|
|
46959
|
-
for (const repo of repos) {
|
|
46960
|
-
const targetDir = path37.join(workspacePath, repo.path);
|
|
46961
|
-
await rm6(targetDir, { recursive: true, force: true });
|
|
46962
|
-
}
|
|
46963
|
-
await this.materializeAll(repos, workspacePath);
|
|
46964
|
-
return;
|
|
46965
|
-
}
|
|
46898
|
+
async reset(repos, workspacePath, reset) {
|
|
46899
|
+
const cleanFlag = reset === "strict" ? "-fdx" : "-fd";
|
|
46966
46900
|
for (const repo of repos) {
|
|
46967
46901
|
const targetDir = path37.join(workspacePath, repo.path);
|
|
46968
46902
|
await this.runGit(["reset", "--hard", "HEAD"], { cwd: targetDir });
|
|
46969
|
-
await this.runGit(["clean",
|
|
46903
|
+
await this.runGit(["clean", cleanFlag], { cwd: targetDir });
|
|
46970
46904
|
}
|
|
46971
46905
|
}
|
|
46972
|
-
/**
|
|
46973
|
-
* Seed the cache from a local repository, setting the remote to a given URL.
|
|
46974
|
-
* Useful for avoiding slow network clones when a local clone already exists.
|
|
46975
|
-
*/
|
|
46976
|
-
async seedCache(localPath, remoteUrl, opts) {
|
|
46977
|
-
const source = { type: "git", url: remoteUrl };
|
|
46978
|
-
const key = cacheKey(source);
|
|
46979
|
-
const cachePath = path37.join(this.cacheDir, key);
|
|
46980
|
-
const lockPath = `${cachePath}.lock`;
|
|
46981
|
-
await mkdir12(this.cacheDir, { recursive: true });
|
|
46982
|
-
await acquireLock(lockPath);
|
|
46983
|
-
try {
|
|
46984
|
-
if (existsSync3(path37.join(cachePath, "HEAD"))) {
|
|
46985
|
-
if (!opts?.force) {
|
|
46986
|
-
throw new Error(
|
|
46987
|
-
`Cache already exists for ${remoteUrl} at ${cachePath}. Use force to overwrite.`
|
|
46988
|
-
);
|
|
46989
|
-
}
|
|
46990
|
-
await rm6(cachePath, { recursive: true, force: true });
|
|
46991
|
-
}
|
|
46992
|
-
await git2(["clone", "--mirror", "--bare", localPath, cachePath]);
|
|
46993
|
-
await git2(["remote", "set-url", "origin", remoteUrl], { cwd: cachePath });
|
|
46994
|
-
} finally {
|
|
46995
|
-
await releaseLock(lockPath);
|
|
46996
|
-
}
|
|
46997
|
-
return cachePath;
|
|
46998
|
-
}
|
|
46999
|
-
/** Remove the entire cache directory. */
|
|
47000
|
-
async cleanCache() {
|
|
47001
|
-
await rm6(this.cacheDir, { recursive: true, force: true });
|
|
47002
|
-
}
|
|
47003
46906
|
};
|
|
47004
46907
|
async function resolveWorkspaceTemplate(templatePath) {
|
|
47005
46908
|
if (!templatePath) {
|
|
@@ -47076,6 +46979,22 @@ function classifyQualityStatus(score) {
|
|
|
47076
46979
|
function usesFileReferencePrompt(provider) {
|
|
47077
46980
|
return isAgentProvider(provider) || provider.kind === "cli";
|
|
47078
46981
|
}
|
|
46982
|
+
function toScriptConfig(hook, hookName, context) {
|
|
46983
|
+
const command = hook.command ?? hook.script;
|
|
46984
|
+
if (!command || command.length === 0) {
|
|
46985
|
+
throw new Error(`${hookName} hook in ${context} requires command or script`);
|
|
46986
|
+
}
|
|
46987
|
+
return {
|
|
46988
|
+
command,
|
|
46989
|
+
...hook.timeout_ms !== void 0 && { timeout_ms: hook.timeout_ms },
|
|
46990
|
+
...hook.timeoutMs !== void 0 && { timeoutMs: hook.timeoutMs },
|
|
46991
|
+
...hook.cwd !== void 0 && { cwd: hook.cwd },
|
|
46992
|
+
...hook.script !== void 0 && { script: hook.script }
|
|
46993
|
+
};
|
|
46994
|
+
}
|
|
46995
|
+
function hasHookCommand(hook) {
|
|
46996
|
+
return !!(hook?.command && hook.command.length > 0 || hook?.script && hook.script.length > 0);
|
|
46997
|
+
}
|
|
47079
46998
|
function getWorkspaceTemplate(target) {
|
|
47080
46999
|
const config2 = target.config;
|
|
47081
47000
|
if ("workspaceTemplate" in config2 && typeof config2.workspaceTemplate === "string") {
|
|
@@ -47109,7 +47028,12 @@ async function runEvaluation(options) {
|
|
|
47109
47028
|
failOnError,
|
|
47110
47029
|
poolWorkspaces,
|
|
47111
47030
|
poolMaxSlots: configPoolMaxSlots,
|
|
47112
|
-
workspace:
|
|
47031
|
+
workspace: legacyWorkspacePath,
|
|
47032
|
+
workspaceMode,
|
|
47033
|
+
workspacePath,
|
|
47034
|
+
workspaceClean,
|
|
47035
|
+
retainOnSuccess,
|
|
47036
|
+
retainOnFailure
|
|
47113
47037
|
} = options;
|
|
47114
47038
|
let useCache = options.useCache;
|
|
47115
47039
|
if (trials && trials.count > 1 && useCache) {
|
|
@@ -47245,13 +47169,23 @@ async function runEvaluation(options) {
|
|
|
47245
47169
|
}
|
|
47246
47170
|
};
|
|
47247
47171
|
const isPerTestIsolation = suiteWorkspace?.isolation === "per_test";
|
|
47248
|
-
|
|
47172
|
+
const configuredMode = suiteWorkspace?.mode ?? workspaceMode;
|
|
47173
|
+
const configuredStaticPath = suiteWorkspace?.static_path ?? workspacePath ?? legacyWorkspacePath;
|
|
47174
|
+
const useStaticWorkspace = configuredMode === "static" || !!configuredStaticPath && !configuredMode;
|
|
47175
|
+
if (useStaticWorkspace && isPerTestIsolation) {
|
|
47249
47176
|
throw new Error(
|
|
47250
|
-
"
|
|
47177
|
+
"static workspace mode is incompatible with isolation: per_test. Use isolation: shared (default)."
|
|
47251
47178
|
);
|
|
47252
47179
|
}
|
|
47253
|
-
|
|
47254
|
-
|
|
47180
|
+
if (configuredMode === "static" && !configuredStaticPath) {
|
|
47181
|
+
throw new Error("workspace.mode=static requires workspace.static_path or --workspace-path");
|
|
47182
|
+
}
|
|
47183
|
+
const hasSharedWorkspace = !!(useStaticWorkspace || workspaceTemplate || suiteWorkspace?.hooks || suiteWorkspace?.repos?.length && !isPerTestIsolation);
|
|
47184
|
+
const poolEnabled = configuredMode === "pooled" ? true : configuredMode === "ephemeral" || useStaticWorkspace ? false : suiteWorkspace?.pool ?? poolWorkspaces ?? true;
|
|
47185
|
+
const usePool = poolEnabled !== false && !!suiteWorkspace?.repos?.length && !isPerTestIsolation && !useStaticWorkspace;
|
|
47186
|
+
const finishCleanPolicy = suiteWorkspace?.hooks?.on_finish?.clean;
|
|
47187
|
+
const resolvedRetainOnSuccess = (finishCleanPolicy === "always" || finishCleanPolicy === "on_success" ? "cleanup" : finishCleanPolicy === "on_failure" || finishCleanPolicy === "never" ? "keep" : void 0) ?? retainOnSuccess ?? (keepWorkspaces ? "keep" : "cleanup");
|
|
47188
|
+
const resolvedRetainOnFailure = (finishCleanPolicy === "always" || finishCleanPolicy === "on_failure" ? "cleanup" : finishCleanPolicy === "on_success" || finishCleanPolicy === "never" ? "keep" : void 0) ?? retainOnFailure ?? (cleanupWorkspaces ? "cleanup" : "keep");
|
|
47255
47189
|
const requestedWorkers = options.maxConcurrency ?? target.workers ?? 1;
|
|
47256
47190
|
const workers = hasSharedWorkspace && !usePool ? 1 : requestedWorkers;
|
|
47257
47191
|
setupLog(
|
|
@@ -47272,20 +47206,21 @@ async function runEvaluation(options) {
|
|
|
47272
47206
|
const availablePoolSlots = [];
|
|
47273
47207
|
const poolSlotBaselines = /* @__PURE__ */ new Map();
|
|
47274
47208
|
const poolMaxSlots = Math.min(configPoolMaxSlots ?? 10, 50);
|
|
47275
|
-
if (
|
|
47276
|
-
sharedWorkspacePath =
|
|
47277
|
-
setupLog(`using
|
|
47209
|
+
if (useStaticWorkspace && configuredStaticPath) {
|
|
47210
|
+
sharedWorkspacePath = configuredStaticPath;
|
|
47211
|
+
setupLog(`using static workspace: ${configuredStaticPath}`);
|
|
47278
47212
|
} else if (usePool && suiteWorkspace?.repos) {
|
|
47279
47213
|
const slotsNeeded = workers;
|
|
47280
47214
|
setupLog(`acquiring ${slotsNeeded} workspace pool slot(s) (pool capacity: ${poolMaxSlots})`);
|
|
47281
47215
|
poolManager = new WorkspacePoolManager(getWorkspacePoolRoot());
|
|
47282
|
-
const poolRepoManager = new RepoManager(
|
|
47216
|
+
const poolRepoManager = new RepoManager(verbose);
|
|
47283
47217
|
for (let i = 0; i < slotsNeeded; i++) {
|
|
47284
47218
|
const slot = await poolManager.acquireWorkspace({
|
|
47285
47219
|
templatePath: workspaceTemplate,
|
|
47286
47220
|
repos: suiteWorkspace.repos,
|
|
47287
47221
|
maxSlots: poolMaxSlots,
|
|
47288
|
-
repoManager: poolRepoManager
|
|
47222
|
+
repoManager: poolRepoManager,
|
|
47223
|
+
poolReset: (workspaceClean === "full" ? "strict" : workspaceClean === "standard" ? "fast" : null) ?? suiteWorkspace.hooks?.on_reuse?.reset ?? "fast"
|
|
47289
47224
|
});
|
|
47290
47225
|
poolSlots.push(slot);
|
|
47291
47226
|
setupLog(`pool slot ${i} acquired at: ${slot.path} (existing=${slot.isExisting})`);
|
|
@@ -47305,9 +47240,9 @@ async function runEvaluation(options) {
|
|
|
47305
47240
|
const message = error40 instanceof Error ? error40.message : String(error40);
|
|
47306
47241
|
throw new Error(`Failed to create shared workspace: ${message}`);
|
|
47307
47242
|
}
|
|
47308
|
-
} else if (suiteWorkspace?.
|
|
47243
|
+
} else if (suiteWorkspace?.hooks || suiteWorkspace?.repos?.length && !isPerTestIsolation) {
|
|
47309
47244
|
sharedWorkspacePath = getWorkspacePath(evalRunId, "shared");
|
|
47310
|
-
await
|
|
47245
|
+
await mkdir12(sharedWorkspacePath, { recursive: true });
|
|
47311
47246
|
setupLog(`created empty shared workspace at: ${sharedWorkspacePath}`);
|
|
47312
47247
|
}
|
|
47313
47248
|
try {
|
|
@@ -47319,7 +47254,7 @@ async function runEvaluation(options) {
|
|
|
47319
47254
|
} catch {
|
|
47320
47255
|
}
|
|
47321
47256
|
}
|
|
47322
|
-
const repoManager = suiteWorkspace?.repos?.length && !usePool && !
|
|
47257
|
+
const repoManager = suiteWorkspace?.repos?.length && !usePool && !useStaticWorkspace ? new RepoManager(verbose) : void 0;
|
|
47323
47258
|
if (repoManager && sharedWorkspacePath && suiteWorkspace?.repos && !isPerTestIsolation) {
|
|
47324
47259
|
setupLog(
|
|
47325
47260
|
`materializing ${suiteWorkspace.repos.length} shared repo(s) into ${sharedWorkspacePath}`
|
|
@@ -47329,17 +47264,19 @@ async function runEvaluation(options) {
|
|
|
47329
47264
|
setupLog("shared repo materialization complete");
|
|
47330
47265
|
} catch (error40) {
|
|
47331
47266
|
const message = error40 instanceof Error ? error40.message : String(error40);
|
|
47332
|
-
if (sharedWorkspacePath && !
|
|
47267
|
+
if (sharedWorkspacePath && !useStaticWorkspace) {
|
|
47333
47268
|
await cleanupWorkspace(sharedWorkspacePath).catch(() => {
|
|
47334
47269
|
});
|
|
47335
47270
|
}
|
|
47336
47271
|
throw new Error(`Failed to materialize repos: ${message}`);
|
|
47337
47272
|
}
|
|
47338
47273
|
}
|
|
47339
|
-
|
|
47340
|
-
|
|
47274
|
+
const suiteBeforeAllHook = suiteWorkspace?.hooks?.before_all_tests;
|
|
47275
|
+
if (sharedWorkspacePath && hasHookCommand(suiteBeforeAllHook)) {
|
|
47276
|
+
const beforeAllHook = suiteBeforeAllHook;
|
|
47277
|
+
const beforeAllCommand = (beforeAllHook.command ?? beforeAllHook.script ?? []).join(" ");
|
|
47341
47278
|
setupLog(
|
|
47342
|
-
`running shared before_all in cwd=${
|
|
47279
|
+
`running shared before_all in cwd=${beforeAllHook.cwd ?? evalDir} command=${beforeAllCommand}`
|
|
47343
47280
|
);
|
|
47344
47281
|
const scriptContext = {
|
|
47345
47282
|
workspacePath: sharedWorkspacePath,
|
|
@@ -47348,18 +47285,22 @@ async function runEvaluation(options) {
|
|
|
47348
47285
|
evalDir
|
|
47349
47286
|
};
|
|
47350
47287
|
try {
|
|
47351
|
-
beforeAllOutput = await executeWorkspaceScript(
|
|
47288
|
+
beforeAllOutput = await executeWorkspaceScript(
|
|
47289
|
+
toScriptConfig(beforeAllHook, "before_all_tests", "suite workspace"),
|
|
47290
|
+
scriptContext
|
|
47291
|
+
);
|
|
47352
47292
|
setupLog("shared before_all completed");
|
|
47353
47293
|
} catch (error40) {
|
|
47354
47294
|
const message = error40 instanceof Error ? error40.message : String(error40);
|
|
47355
|
-
if (sharedWorkspacePath && !
|
|
47295
|
+
if (sharedWorkspacePath && !useStaticWorkspace) {
|
|
47356
47296
|
await cleanupWorkspace(sharedWorkspacePath).catch(() => {
|
|
47357
47297
|
});
|
|
47358
47298
|
}
|
|
47359
47299
|
throw new Error(`before_all script failed: ${message}`);
|
|
47360
47300
|
}
|
|
47361
47301
|
}
|
|
47362
|
-
if (availablePoolSlots.length > 0 &&
|
|
47302
|
+
if (availablePoolSlots.length > 0 && hasHookCommand(suiteBeforeAllHook)) {
|
|
47303
|
+
const beforeAllHook = suiteBeforeAllHook;
|
|
47363
47304
|
for (const slot of availablePoolSlots) {
|
|
47364
47305
|
setupLog(`running before_all on pool slot ${slot.index}`);
|
|
47365
47306
|
const scriptContext = {
|
|
@@ -47369,7 +47310,10 @@ async function runEvaluation(options) {
|
|
|
47369
47310
|
evalDir
|
|
47370
47311
|
};
|
|
47371
47312
|
try {
|
|
47372
|
-
const output = await executeWorkspaceScript(
|
|
47313
|
+
const output = await executeWorkspaceScript(
|
|
47314
|
+
toScriptConfig(beforeAllHook, "before_all_tests", "suite workspace"),
|
|
47315
|
+
scriptContext
|
|
47316
|
+
);
|
|
47373
47317
|
if (!beforeAllOutput) beforeAllOutput = output;
|
|
47374
47318
|
setupLog(`before_all completed on pool slot ${slot.index}`);
|
|
47375
47319
|
} catch (error40) {
|
|
@@ -47501,6 +47445,8 @@ async function runEvaluation(options) {
|
|
|
47501
47445
|
evalRunId,
|
|
47502
47446
|
keepWorkspaces,
|
|
47503
47447
|
cleanupWorkspaces,
|
|
47448
|
+
retainOnSuccess: resolvedRetainOnSuccess,
|
|
47449
|
+
retainOnFailure: resolvedRetainOnFailure,
|
|
47504
47450
|
sharedWorkspacePath: testWorkspacePath,
|
|
47505
47451
|
sharedBaselineCommit: testBaselineCommit,
|
|
47506
47452
|
suiteWorkspaceFile,
|
|
@@ -47594,7 +47540,9 @@ async function runEvaluation(options) {
|
|
|
47594
47540
|
}
|
|
47595
47541
|
}
|
|
47596
47542
|
const afterAllWorkspaces = poolSlots.length > 1 ? poolSlots.map((s) => s.path) : sharedWorkspacePath ? [sharedWorkspacePath] : [];
|
|
47597
|
-
|
|
47543
|
+
const suiteAfterAllHook = suiteWorkspace?.hooks?.after_all_tests;
|
|
47544
|
+
if (afterAllWorkspaces.length > 0 && hasHookCommand(suiteAfterAllHook)) {
|
|
47545
|
+
const afterAllHook = suiteAfterAllHook;
|
|
47598
47546
|
for (const wsPath of afterAllWorkspaces) {
|
|
47599
47547
|
const scriptContext = {
|
|
47600
47548
|
workspacePath: wsPath,
|
|
@@ -47604,7 +47552,7 @@ async function runEvaluation(options) {
|
|
|
47604
47552
|
};
|
|
47605
47553
|
try {
|
|
47606
47554
|
const afterAllOutput = await executeWorkspaceScript(
|
|
47607
|
-
|
|
47555
|
+
toScriptConfig(afterAllHook, "after_all_tests", "suite workspace"),
|
|
47608
47556
|
scriptContext,
|
|
47609
47557
|
"warn"
|
|
47610
47558
|
);
|
|
@@ -47615,12 +47563,14 @@ async function runEvaluation(options) {
|
|
|
47615
47563
|
}
|
|
47616
47564
|
}
|
|
47617
47565
|
}
|
|
47618
|
-
if (sharedWorkspacePath && !poolSlot && poolSlots.length === 0 && !
|
|
47566
|
+
if (sharedWorkspacePath && !poolSlot && poolSlots.length === 0 && !useStaticWorkspace) {
|
|
47619
47567
|
const hasFailure = results.some((r) => !!r.error || r.score < 0.5);
|
|
47620
|
-
if (
|
|
47621
|
-
|
|
47622
|
-
|
|
47623
|
-
|
|
47568
|
+
if (hasFailure) {
|
|
47569
|
+
if (resolvedRetainOnFailure === "cleanup") {
|
|
47570
|
+
await cleanupWorkspace(sharedWorkspacePath).catch(() => {
|
|
47571
|
+
});
|
|
47572
|
+
}
|
|
47573
|
+
} else if (resolvedRetainOnSuccess === "cleanup") {
|
|
47624
47574
|
await cleanupWorkspace(sharedWorkspacePath).catch(() => {
|
|
47625
47575
|
});
|
|
47626
47576
|
}
|
|
@@ -47814,6 +47764,8 @@ async function runEvalCase(options) {
|
|
|
47814
47764
|
evalRunId,
|
|
47815
47765
|
keepWorkspaces,
|
|
47816
47766
|
cleanupWorkspaces: forceCleanup,
|
|
47767
|
+
retainOnSuccess,
|
|
47768
|
+
retainOnFailure,
|
|
47817
47769
|
sharedWorkspacePath,
|
|
47818
47770
|
sharedBaselineCommit,
|
|
47819
47771
|
suiteWorkspaceFile,
|
|
@@ -47825,10 +47777,10 @@ async function runEvalCase(options) {
|
|
|
47825
47777
|
const formattingMode = usesFileReferencePrompt(provider) ? "agent" : "lm";
|
|
47826
47778
|
const promptInputs = await buildPromptInputs(evalCase, formattingMode);
|
|
47827
47779
|
const typeRegistry = providedTypeRegistry ?? createBuiltinRegistry();
|
|
47828
|
-
const
|
|
47780
|
+
const cacheKey = useCache ? createCacheKey(provider, target, evalCase, promptInputs) : void 0;
|
|
47829
47781
|
let cachedResponse;
|
|
47830
|
-
if (
|
|
47831
|
-
cachedResponse = await cache.get(
|
|
47782
|
+
if (cacheKey && cache) {
|
|
47783
|
+
cachedResponse = await cache.get(cacheKey);
|
|
47832
47784
|
}
|
|
47833
47785
|
const nowFn = now ?? (() => /* @__PURE__ */ new Date());
|
|
47834
47786
|
let workspacePath = sharedWorkspacePath;
|
|
@@ -47867,12 +47819,12 @@ async function runEvalCase(options) {
|
|
|
47867
47819
|
}
|
|
47868
47820
|
}
|
|
47869
47821
|
}
|
|
47870
|
-
if (!workspacePath && (evalCase.workspace?.
|
|
47822
|
+
if (!workspacePath && (evalCase.workspace?.hooks || evalCase.workspace?.repos?.length) && evalRunId) {
|
|
47871
47823
|
workspacePath = getWorkspacePath(evalRunId, evalCase.id);
|
|
47872
|
-
await
|
|
47824
|
+
await mkdir12(workspacePath, { recursive: true });
|
|
47873
47825
|
}
|
|
47874
47826
|
if (evalCase.workspace?.repos?.length && workspacePath) {
|
|
47875
|
-
const perCaseRepoManager = new RepoManager(
|
|
47827
|
+
const perCaseRepoManager = new RepoManager(setupDebug);
|
|
47876
47828
|
try {
|
|
47877
47829
|
if (setupDebug) {
|
|
47878
47830
|
console.log(
|
|
@@ -47897,11 +47849,13 @@ async function runEvalCase(options) {
|
|
|
47897
47849
|
);
|
|
47898
47850
|
}
|
|
47899
47851
|
}
|
|
47900
|
-
|
|
47901
|
-
|
|
47852
|
+
const caseBeforeAllHook = evalCase.workspace?.hooks?.before_all_tests;
|
|
47853
|
+
if (workspacePath && hasHookCommand(caseBeforeAllHook)) {
|
|
47854
|
+
const beforeAllHook = caseBeforeAllHook;
|
|
47855
|
+
const beforeAllCommand = (beforeAllHook.command ?? beforeAllHook.script ?? []).join(" ");
|
|
47902
47856
|
if (setupDebug) {
|
|
47903
47857
|
console.log(
|
|
47904
|
-
`[setup] test=${evalCase.id} running before_all in cwd=${
|
|
47858
|
+
`[setup] test=${evalCase.id} running before_all in cwd=${beforeAllHook.cwd ?? evalDir} command=${beforeAllCommand}`
|
|
47905
47859
|
);
|
|
47906
47860
|
}
|
|
47907
47861
|
const scriptContext = {
|
|
@@ -47914,7 +47868,7 @@ async function runEvalCase(options) {
|
|
|
47914
47868
|
};
|
|
47915
47869
|
try {
|
|
47916
47870
|
beforeAllOutput = await executeWorkspaceScript(
|
|
47917
|
-
evalCase.
|
|
47871
|
+
toScriptConfig(beforeAllHook, "before_all_tests", `test '${evalCase.id}'`),
|
|
47918
47872
|
scriptContext
|
|
47919
47873
|
);
|
|
47920
47874
|
if (setupDebug) {
|
|
@@ -47939,7 +47893,9 @@ async function runEvalCase(options) {
|
|
|
47939
47893
|
}
|
|
47940
47894
|
}
|
|
47941
47895
|
}
|
|
47942
|
-
|
|
47896
|
+
const caseBeforeEachHook = evalCase.workspace?.hooks?.before_each_test;
|
|
47897
|
+
if (workspacePath && hasHookCommand(caseBeforeEachHook)) {
|
|
47898
|
+
const beforeEachHook = caseBeforeEachHook;
|
|
47943
47899
|
const scriptContext = {
|
|
47944
47900
|
workspacePath,
|
|
47945
47901
|
testId: evalCase.id,
|
|
@@ -47950,7 +47906,7 @@ async function runEvalCase(options) {
|
|
|
47950
47906
|
};
|
|
47951
47907
|
try {
|
|
47952
47908
|
beforeEachOutput = await executeWorkspaceScript(
|
|
47953
|
-
evalCase.
|
|
47909
|
+
toScriptConfig(beforeEachHook, "before_each_test", `test '${evalCase.id}'`),
|
|
47954
47910
|
scriptContext
|
|
47955
47911
|
);
|
|
47956
47912
|
} catch (error40) {
|
|
@@ -48038,8 +47994,8 @@ async function runEvalCase(options) {
|
|
|
48038
47994
|
}
|
|
48039
47995
|
return errorResult;
|
|
48040
47996
|
}
|
|
48041
|
-
if (
|
|
48042
|
-
await cache.set(
|
|
47997
|
+
if (cacheKey && cache && !cachedResponse) {
|
|
47998
|
+
await cache.set(cacheKey, providerResponse);
|
|
48043
47999
|
}
|
|
48044
48000
|
const output = providerResponse.output;
|
|
48045
48001
|
const hasExecutionMetrics = providerResponse.tokenUsage !== void 0 || providerResponse.costUsd !== void 0 || providerResponse.durationMs !== void 0;
|
|
@@ -48067,17 +48023,19 @@ async function runEvalCase(options) {
|
|
|
48067
48023
|
}
|
|
48068
48024
|
}
|
|
48069
48025
|
const providerError = extractProviderError(providerResponse);
|
|
48070
|
-
if (repoManager && workspacePath && evalCase.workspace?.reset
|
|
48026
|
+
if (repoManager && workspacePath && evalCase.workspace?.hooks?.after_each_test?.reset && evalCase.workspace.hooks.after_each_test.reset !== "none" && evalCase.workspace.repos) {
|
|
48071
48027
|
try {
|
|
48072
48028
|
await repoManager.reset(
|
|
48073
48029
|
evalCase.workspace.repos,
|
|
48074
48030
|
workspacePath,
|
|
48075
|
-
evalCase.workspace.reset
|
|
48031
|
+
evalCase.workspace.hooks.after_each_test.reset
|
|
48076
48032
|
);
|
|
48077
48033
|
} catch {
|
|
48078
48034
|
}
|
|
48079
48035
|
}
|
|
48080
|
-
|
|
48036
|
+
const caseAfterEachHook = evalCase.workspace?.hooks?.after_each_test;
|
|
48037
|
+
if (workspacePath && hasHookCommand(caseAfterEachHook)) {
|
|
48038
|
+
const afterEachHook = caseAfterEachHook;
|
|
48081
48039
|
const scriptContext = {
|
|
48082
48040
|
workspacePath,
|
|
48083
48041
|
testId: evalCase.id,
|
|
@@ -48088,7 +48046,7 @@ async function runEvalCase(options) {
|
|
|
48088
48046
|
};
|
|
48089
48047
|
try {
|
|
48090
48048
|
afterEachOutput = await executeWorkspaceScript(
|
|
48091
|
-
evalCase.
|
|
48049
|
+
toScriptConfig(afterEachHook, "after_each_test", `test '${evalCase.id}'`),
|
|
48092
48050
|
scriptContext,
|
|
48093
48051
|
"warn"
|
|
48094
48052
|
);
|
|
@@ -48138,8 +48096,13 @@ async function runEvalCase(options) {
|
|
|
48138
48096
|
await cleanupWorkspace(workspacePath).catch(() => {
|
|
48139
48097
|
});
|
|
48140
48098
|
} else if (isFailure) {
|
|
48141
|
-
|
|
48142
|
-
|
|
48099
|
+
if ((retainOnFailure ?? "keep") === "cleanup") {
|
|
48100
|
+
await cleanupWorkspace(workspacePath).catch(() => {
|
|
48101
|
+
});
|
|
48102
|
+
} else {
|
|
48103
|
+
return { ...finalResult, workspacePath };
|
|
48104
|
+
}
|
|
48105
|
+
} else if ((retainOnSuccess ?? (keepWorkspaces ? "keep" : "cleanup")) !== "keep") {
|
|
48143
48106
|
await cleanupWorkspace(workspacePath).catch(() => {
|
|
48144
48107
|
});
|
|
48145
48108
|
}
|
|
@@ -48157,11 +48120,12 @@ async function runEvalCase(options) {
|
|
|
48157
48120
|
"evaluator_error"
|
|
48158
48121
|
);
|
|
48159
48122
|
if (workspacePath && !isSharedWorkspace) {
|
|
48160
|
-
if (forceCleanup) {
|
|
48123
|
+
if (forceCleanup || (retainOnFailure ?? "keep") === "cleanup") {
|
|
48161
48124
|
await cleanupWorkspace(workspacePath).catch(() => {
|
|
48162
48125
|
});
|
|
48126
|
+
} else {
|
|
48127
|
+
return { ...errorResult, workspacePath, beforeEachOutput, afterEachOutput };
|
|
48163
48128
|
}
|
|
48164
|
-
return { ...errorResult, workspacePath, beforeEachOutput, afterEachOutput };
|
|
48165
48129
|
}
|
|
48166
48130
|
return { ...errorResult, beforeEachOutput, afterEachOutput };
|
|
48167
48131
|
}
|
|
@@ -48180,7 +48144,9 @@ async function runEvalCaseWithTrials(options, trialsConfig) {
|
|
|
48180
48144
|
useCache: false,
|
|
48181
48145
|
// Force cleanup for intermediate trials
|
|
48182
48146
|
cleanupWorkspaces: isLastDeclaredTrial ? options.cleanupWorkspaces : true,
|
|
48183
|
-
keepWorkspaces: isLastDeclaredTrial ? options.keepWorkspaces : false
|
|
48147
|
+
keepWorkspaces: isLastDeclaredTrial ? options.keepWorkspaces : false,
|
|
48148
|
+
retainOnSuccess: isLastDeclaredTrial ? options.retainOnSuccess : "cleanup",
|
|
48149
|
+
retainOnFailure: isLastDeclaredTrial ? options.retainOnFailure : "cleanup"
|
|
48184
48150
|
};
|
|
48185
48151
|
const result = await runEvalCase(trialOptions);
|
|
48186
48152
|
allResults.push(result);
|
|
@@ -48703,7 +48669,7 @@ function extractProviderError(response) {
|
|
|
48703
48669
|
return trimmed.length > 0 ? trimmed : void 0;
|
|
48704
48670
|
}
|
|
48705
48671
|
function createCacheKey(provider, target, evalCase, promptInputs) {
|
|
48706
|
-
const hash =
|
|
48672
|
+
const hash = createHash2("sha256");
|
|
48707
48673
|
hash.update(provider.id);
|
|
48708
48674
|
hash.update(target.name);
|
|
48709
48675
|
hash.update(evalCase.id);
|
|
@@ -48886,7 +48852,7 @@ async function discoverDefaultTarget(repoRoot) {
|
|
|
48886
48852
|
for (const dir of chain) {
|
|
48887
48853
|
for (const candidate of TARGET_FILE_CANDIDATES) {
|
|
48888
48854
|
const targetsPath = path40.join(dir, candidate);
|
|
48889
|
-
if (!
|
|
48855
|
+
if (!existsSync3(targetsPath)) continue;
|
|
48890
48856
|
try {
|
|
48891
48857
|
const definitions = await readTargetDefinitions(targetsPath);
|
|
48892
48858
|
const defaultTarget = definitions.find((d) => d.name === "default");
|
|
@@ -48904,7 +48870,7 @@ async function loadEnvHierarchy(repoRoot) {
|
|
|
48904
48870
|
const envFiles = [];
|
|
48905
48871
|
for (const dir of chain) {
|
|
48906
48872
|
const envPath = path40.join(dir, ".env");
|
|
48907
|
-
if (
|
|
48873
|
+
if (existsSync3(envPath)) envFiles.push(envPath);
|
|
48908
48874
|
}
|
|
48909
48875
|
for (let i = envFiles.length - 1; i >= 0; i--) {
|
|
48910
48876
|
try {
|
|
@@ -48979,12 +48945,12 @@ var CONFIG_FILE_NAMES = [
|
|
|
48979
48945
|
".agentv/config.js"
|
|
48980
48946
|
];
|
|
48981
48947
|
async function loadTsConfig(projectRoot) {
|
|
48982
|
-
const { existsSync:
|
|
48948
|
+
const { existsSync: existsSync4 } = await import("node:fs");
|
|
48983
48949
|
const { pathToFileURL } = await import("node:url");
|
|
48984
48950
|
const { join: join2 } = await import("node:path");
|
|
48985
48951
|
for (const fileName of CONFIG_FILE_NAMES) {
|
|
48986
48952
|
const filePath = join2(projectRoot, fileName);
|
|
48987
|
-
if (!
|
|
48953
|
+
if (!existsSync4(filePath)) {
|
|
48988
48954
|
continue;
|
|
48989
48955
|
}
|
|
48990
48956
|
try {
|
|
@@ -49093,8 +49059,8 @@ var ResponseCache = class {
|
|
|
49093
49059
|
async set(key, value) {
|
|
49094
49060
|
const filePath = this.keyToPath(key);
|
|
49095
49061
|
const dir = path41.dirname(filePath);
|
|
49096
|
-
await
|
|
49097
|
-
await
|
|
49062
|
+
await mkdir13(dir, { recursive: true });
|
|
49063
|
+
await writeFile8(filePath, JSON.stringify(value, null, 2), "utf8");
|
|
49098
49064
|
}
|
|
49099
49065
|
keyToPath(key) {
|
|
49100
49066
|
const prefix = key.slice(0, 2);
|
|
@@ -49604,7 +49570,6 @@ export {
|
|
|
49604
49570
|
ProviderRegistry,
|
|
49605
49571
|
getAgentvHome,
|
|
49606
49572
|
getWorkspacesRoot,
|
|
49607
|
-
getGitCacheRoot,
|
|
49608
49573
|
getSubagentsRoot,
|
|
49609
49574
|
getTraceStateRoot,
|
|
49610
49575
|
getWorkspacePoolRoot,
|
|
@@ -49689,4 +49654,4 @@ export {
|
|
|
49689
49654
|
OtelStreamingObserver,
|
|
49690
49655
|
createAgentKernel
|
|
49691
49656
|
};
|
|
49692
|
-
//# sourceMappingURL=chunk-
|
|
49657
|
+
//# sourceMappingURL=chunk-LZ5MPQFM.js.map
|