@agentv/core 2.15.0 → 2.16.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/{chunk-N55K52OO.js → chunk-E6AJPAXM.js} +1 -1
- package/dist/chunk-E6AJPAXM.js.map +1 -0
- package/dist/evaluation/validation/index.cjs +8 -7
- package/dist/evaluation/validation/index.cjs.map +1 -1
- package/dist/evaluation/validation/index.js +9 -8
- package/dist/evaluation/validation/index.js.map +1 -1
- package/dist/index.cjs +224 -260
- package/dist/index.cjs.map +1 -1
- package/dist/index.d.cts +56 -35
- package/dist/index.d.ts +56 -35
- package/dist/index.js +208 -243
- package/dist/index.js.map +1 -1
- package/package.json +1 -1
- package/dist/chunk-N55K52OO.js.map +0 -1
package/dist/index.cjs
CHANGED
|
@@ -1244,11 +1244,11 @@ function serializeAttributeValue(value) {
|
|
|
1244
1244
|
if (Array.isArray(value)) return { arrayValue: { values: value.map(serializeAttributeValue) } };
|
|
1245
1245
|
return { stringValue: String(value) };
|
|
1246
1246
|
}
|
|
1247
|
-
var
|
|
1247
|
+
var import_promises31, import_node_path45, OtlpJsonFileExporter;
|
|
1248
1248
|
var init_otlp_json_file_exporter = __esm({
|
|
1249
1249
|
"src/observability/otlp-json-file-exporter.ts"() {
|
|
1250
1250
|
"use strict";
|
|
1251
|
-
|
|
1251
|
+
import_promises31 = require("fs/promises");
|
|
1252
1252
|
import_node_path45 = require("path");
|
|
1253
1253
|
OtlpJsonFileExporter = class {
|
|
1254
1254
|
// biome-ignore lint/suspicious/noExplicitAny: serialized span data
|
|
@@ -1288,7 +1288,7 @@ var init_otlp_json_file_exporter = __esm({
|
|
|
1288
1288
|
}
|
|
1289
1289
|
async flush() {
|
|
1290
1290
|
if (this.spans.length === 0) return;
|
|
1291
|
-
await (0,
|
|
1291
|
+
await (0, import_promises31.mkdir)((0, import_node_path45.dirname)(this.filePath), { recursive: true });
|
|
1292
1292
|
const otlpJson = {
|
|
1293
1293
|
resourceSpans: [
|
|
1294
1294
|
{
|
|
@@ -1302,8 +1302,8 @@ var init_otlp_json_file_exporter = __esm({
|
|
|
1302
1302
|
}
|
|
1303
1303
|
]
|
|
1304
1304
|
};
|
|
1305
|
-
const { writeFile:
|
|
1306
|
-
await
|
|
1305
|
+
const { writeFile: writeFile9 } = await import("fs/promises");
|
|
1306
|
+
await writeFile9(this.filePath, JSON.stringify(otlpJson, null, 2));
|
|
1307
1307
|
}
|
|
1308
1308
|
};
|
|
1309
1309
|
}
|
|
@@ -1319,12 +1319,12 @@ function hrTimeDiffMs(start, end) {
|
|
|
1319
1319
|
const diffNano = end[1] - start[1];
|
|
1320
1320
|
return Math.round(diffSec * 1e3 + diffNano / 1e6);
|
|
1321
1321
|
}
|
|
1322
|
-
var
|
|
1322
|
+
var import_node_fs13, import_promises32, import_node_path46, SimpleTraceFileExporter;
|
|
1323
1323
|
var init_simple_trace_file_exporter = __esm({
|
|
1324
1324
|
"src/observability/simple-trace-file-exporter.ts"() {
|
|
1325
1325
|
"use strict";
|
|
1326
|
-
|
|
1327
|
-
|
|
1326
|
+
import_node_fs13 = require("fs");
|
|
1327
|
+
import_promises32 = require("fs/promises");
|
|
1328
1328
|
import_node_path46 = require("path");
|
|
1329
1329
|
SimpleTraceFileExporter = class {
|
|
1330
1330
|
stream = null;
|
|
@@ -1338,8 +1338,8 @@ var init_simple_trace_file_exporter = __esm({
|
|
|
1338
1338
|
async ensureStream() {
|
|
1339
1339
|
if (!this.streamReady) {
|
|
1340
1340
|
this.streamReady = (async () => {
|
|
1341
|
-
await (0,
|
|
1342
|
-
this.stream = (0,
|
|
1341
|
+
await (0, import_promises32.mkdir)((0, import_node_path46.dirname)(this.filePath), { recursive: true });
|
|
1342
|
+
this.stream = (0, import_node_fs13.createWriteStream)(this.filePath, { flags: "w" });
|
|
1343
1343
|
return this.stream;
|
|
1344
1344
|
})();
|
|
1345
1345
|
}
|
|
@@ -1505,7 +1505,6 @@ __export(index_exports, {
|
|
|
1505
1505
|
freeformEvaluationSchema: () => freeformEvaluationSchema,
|
|
1506
1506
|
generateRubrics: () => generateRubrics,
|
|
1507
1507
|
getAgentvHome: () => getAgentvHome,
|
|
1508
|
-
getGitCacheRoot: () => getGitCacheRoot,
|
|
1509
1508
|
getHitCount: () => getHitCount,
|
|
1510
1509
|
getSubagentsRoot: () => getSubagentsRoot,
|
|
1511
1510
|
getTraceStateRoot: () => getTraceStateRoot,
|
|
@@ -4741,16 +4740,37 @@ function parseRepoConfig(raw) {
|
|
|
4741
4740
|
...clone !== void 0 && { clone }
|
|
4742
4741
|
};
|
|
4743
4742
|
}
|
|
4744
|
-
function
|
|
4743
|
+
function parseWorkspaceHookConfig(raw, evalFileDir) {
|
|
4745
4744
|
if (!isJsonObject(raw)) return void 0;
|
|
4745
|
+
const script = parseWorkspaceScriptConfig(raw, evalFileDir);
|
|
4746
4746
|
const obj = raw;
|
|
4747
|
-
const
|
|
4748
|
-
const
|
|
4749
|
-
if (!
|
|
4747
|
+
const reset = obj.reset === "none" || obj.reset === "fast" || obj.reset === "strict" ? obj.reset : void 0;
|
|
4748
|
+
const clean = obj.clean === "always" || obj.clean === "on_success" || obj.clean === "on_failure" || obj.clean === "never" ? obj.clean : void 0;
|
|
4749
|
+
if (!script && !reset && !clean) return void 0;
|
|
4750
4750
|
return {
|
|
4751
|
-
...
|
|
4752
|
-
...
|
|
4751
|
+
...script ?? {},
|
|
4752
|
+
...reset !== void 0 && { reset },
|
|
4753
|
+
...clean !== void 0 && { clean }
|
|
4754
|
+
};
|
|
4755
|
+
}
|
|
4756
|
+
function parseWorkspaceHooksConfig(raw, evalFileDir) {
|
|
4757
|
+
if (!isJsonObject(raw)) return void 0;
|
|
4758
|
+
const obj = raw;
|
|
4759
|
+
const beforeAllTests = parseWorkspaceHookConfig(obj.before_all_tests, evalFileDir);
|
|
4760
|
+
const beforeEachTest = parseWorkspaceHookConfig(obj.before_each_test, evalFileDir);
|
|
4761
|
+
const afterEachTest = parseWorkspaceHookConfig(obj.after_each_test, evalFileDir);
|
|
4762
|
+
const afterAllTests = parseWorkspaceHookConfig(obj.after_all_tests, evalFileDir);
|
|
4763
|
+
const onReuse = parseWorkspaceHookConfig(obj.on_reuse, evalFileDir);
|
|
4764
|
+
const onFinish = parseWorkspaceHookConfig(obj.on_finish, evalFileDir);
|
|
4765
|
+
const hooks = {
|
|
4766
|
+
...beforeAllTests !== void 0 && { before_all_tests: beforeAllTests },
|
|
4767
|
+
...beforeEachTest !== void 0 && { before_each_test: beforeEachTest },
|
|
4768
|
+
...afterEachTest !== void 0 && { after_each_test: afterEachTest },
|
|
4769
|
+
...afterAllTests !== void 0 && { after_all_tests: afterAllTests },
|
|
4770
|
+
...onReuse !== void 0 && { on_reuse: onReuse },
|
|
4771
|
+
...onFinish !== void 0 && { on_finish: onFinish }
|
|
4753
4772
|
};
|
|
4773
|
+
return Object.keys(hooks).length > 0 ? hooks : void 0;
|
|
4754
4774
|
}
|
|
4755
4775
|
async function resolveWorkspaceConfig(raw, evalFileDir) {
|
|
4756
4776
|
if (typeof raw === "string") {
|
|
@@ -4781,37 +4801,56 @@ function parseWorkspaceConfig(raw, evalFileDir) {
|
|
|
4781
4801
|
}
|
|
4782
4802
|
const isolation = obj.isolation === "shared" || obj.isolation === "per_test" ? obj.isolation : void 0;
|
|
4783
4803
|
const repos = Array.isArray(obj.repos) ? obj.repos.map(parseRepoConfig).filter(Boolean) : void 0;
|
|
4784
|
-
const
|
|
4785
|
-
const
|
|
4786
|
-
const
|
|
4787
|
-
const
|
|
4788
|
-
|
|
4789
|
-
if (!template && !isolation && !repos && !reset && !beforeAll && !afterAll && !beforeEach && !afterEach)
|
|
4804
|
+
const hooks = parseWorkspaceHooksConfig(obj.hooks, evalFileDir);
|
|
4805
|
+
const mode = obj.mode === "pooled" || obj.mode === "ephemeral" || obj.mode === "static" ? obj.mode : void 0;
|
|
4806
|
+
const staticPath = typeof obj.static_path === "string" ? obj.static_path : void 0;
|
|
4807
|
+
const pool = typeof obj.pool === "boolean" ? obj.pool : void 0;
|
|
4808
|
+
if (!template && !isolation && !repos && !hooks && !mode && !staticPath && pool === void 0)
|
|
4790
4809
|
return void 0;
|
|
4791
4810
|
return {
|
|
4792
4811
|
...template !== void 0 && { template },
|
|
4793
4812
|
...isolation !== void 0 && { isolation },
|
|
4794
4813
|
...repos !== void 0 && { repos },
|
|
4795
|
-
...
|
|
4796
|
-
...
|
|
4797
|
-
...
|
|
4798
|
-
...
|
|
4799
|
-
...afterEach !== void 0 && { after_each: afterEach }
|
|
4814
|
+
...hooks !== void 0 && { hooks },
|
|
4815
|
+
...mode !== void 0 && { mode },
|
|
4816
|
+
...staticPath !== void 0 && { static_path: staticPath },
|
|
4817
|
+
...pool !== void 0 && { pool }
|
|
4800
4818
|
};
|
|
4801
4819
|
}
|
|
4802
4820
|
function mergeWorkspaceConfigs(suiteLevel, caseLevel) {
|
|
4803
4821
|
if (!suiteLevel && !caseLevel) return void 0;
|
|
4804
4822
|
if (!suiteLevel) return caseLevel;
|
|
4805
4823
|
if (!caseLevel) return suiteLevel;
|
|
4824
|
+
const mergeHook = (suiteHook, caseHook) => {
|
|
4825
|
+
if (!suiteHook && !caseHook) return void 0;
|
|
4826
|
+
return {
|
|
4827
|
+
...suiteHook ?? {},
|
|
4828
|
+
...caseHook ?? {}
|
|
4829
|
+
};
|
|
4830
|
+
};
|
|
4831
|
+
const mergedHooks = {
|
|
4832
|
+
before_all_tests: mergeHook(
|
|
4833
|
+
suiteLevel.hooks?.before_all_tests,
|
|
4834
|
+
caseLevel.hooks?.before_all_tests
|
|
4835
|
+
),
|
|
4836
|
+
before_each_test: mergeHook(
|
|
4837
|
+
suiteLevel.hooks?.before_each_test,
|
|
4838
|
+
caseLevel.hooks?.before_each_test
|
|
4839
|
+
),
|
|
4840
|
+
after_each_test: mergeHook(suiteLevel.hooks?.after_each_test, caseLevel.hooks?.after_each_test),
|
|
4841
|
+
after_all_tests: mergeHook(suiteLevel.hooks?.after_all_tests, caseLevel.hooks?.after_all_tests),
|
|
4842
|
+
on_reuse: mergeHook(suiteLevel.hooks?.on_reuse, caseLevel.hooks?.on_reuse),
|
|
4843
|
+
on_finish: mergeHook(suiteLevel.hooks?.on_finish, caseLevel.hooks?.on_finish)
|
|
4844
|
+
};
|
|
4845
|
+
const hasHooks = Object.values(mergedHooks).some((hook) => hook !== void 0);
|
|
4806
4846
|
return {
|
|
4807
4847
|
template: caseLevel.template ?? suiteLevel.template,
|
|
4808
4848
|
isolation: caseLevel.isolation ?? suiteLevel.isolation,
|
|
4809
4849
|
repos: caseLevel.repos ?? suiteLevel.repos,
|
|
4810
|
-
|
|
4811
|
-
|
|
4812
|
-
|
|
4813
|
-
|
|
4814
|
-
after_each: caseLevel.after_each ?? suiteLevel.after_each
|
|
4850
|
+
...hasHooks && { hooks: mergedHooks },
|
|
4851
|
+
mode: caseLevel.mode ?? suiteLevel.mode,
|
|
4852
|
+
static_path: caseLevel.static_path ?? suiteLevel.static_path,
|
|
4853
|
+
pool: caseLevel.pool ?? suiteLevel.pool
|
|
4815
4854
|
};
|
|
4816
4855
|
}
|
|
4817
4856
|
function asString6(value) {
|
|
@@ -10077,9 +10116,6 @@ function getAgentvHome() {
|
|
|
10077
10116
|
function getWorkspacesRoot() {
|
|
10078
10117
|
return import_node_path23.default.join(getAgentvHome(), "workspaces");
|
|
10079
10118
|
}
|
|
10080
|
-
function getGitCacheRoot() {
|
|
10081
|
-
return import_node_path23.default.join(getAgentvHome(), "git-cache");
|
|
10082
|
-
}
|
|
10083
10119
|
function getSubagentsRoot() {
|
|
10084
10120
|
return import_node_path23.default.join(getAgentvHome(), "subagents");
|
|
10085
10121
|
}
|
|
@@ -11539,16 +11575,16 @@ async function execFileWithStdinNode(argv, stdinPayload, options) {
|
|
|
11539
11575
|
});
|
|
11540
11576
|
}
|
|
11541
11577
|
async function execShellWithStdin(command, stdinPayload, options = {}) {
|
|
11542
|
-
const { mkdir:
|
|
11578
|
+
const { mkdir: mkdir16, readFile: readFile14, rm: rm6, writeFile: writeFile9 } = await import("fs/promises");
|
|
11543
11579
|
const { tmpdir: tmpdir3 } = await import("os");
|
|
11544
11580
|
const path44 = await import("path");
|
|
11545
11581
|
const { randomUUID: randomUUID8 } = await import("crypto");
|
|
11546
11582
|
const dir = path44.join(tmpdir3(), `agentv-exec-${randomUUID8()}`);
|
|
11547
|
-
await
|
|
11583
|
+
await mkdir16(dir, { recursive: true });
|
|
11548
11584
|
const stdinPath = path44.join(dir, "stdin.txt");
|
|
11549
11585
|
const stdoutPath = path44.join(dir, "stdout.txt");
|
|
11550
11586
|
const stderrPath = path44.join(dir, "stderr.txt");
|
|
11551
|
-
await
|
|
11587
|
+
await writeFile9(stdinPath, stdinPayload, "utf8");
|
|
11552
11588
|
const wrappedCommand = process.platform === "win32" ? `(${command}) < ${shellEscapePath(stdinPath)} > ${shellEscapePath(stdoutPath)} 2> ${shellEscapePath(stderrPath)}` : `(${command}) < ${shellEscapePath(stdinPath)} > ${shellEscapePath(stdoutPath)} 2> ${shellEscapePath(stderrPath)}`;
|
|
11553
11589
|
const { spawn: spawn4 } = await import("child_process");
|
|
11554
11590
|
try {
|
|
@@ -11581,7 +11617,7 @@ async function execShellWithStdin(command, stdinPayload, options = {}) {
|
|
|
11581
11617
|
const stderr = (await readFile14(stderrPath, "utf8")).replace(/\r\n/g, "\n");
|
|
11582
11618
|
return { stdout, stderr, exitCode };
|
|
11583
11619
|
} finally {
|
|
11584
|
-
await
|
|
11620
|
+
await rm6(dir, { recursive: true, force: true });
|
|
11585
11621
|
}
|
|
11586
11622
|
}
|
|
11587
11623
|
|
|
@@ -14798,8 +14834,8 @@ function runEqualsAssertion(output, value) {
|
|
|
14798
14834
|
}
|
|
14799
14835
|
|
|
14800
14836
|
// src/evaluation/orchestrator.ts
|
|
14801
|
-
var
|
|
14802
|
-
var
|
|
14837
|
+
var import_node_crypto9 = require("crypto");
|
|
14838
|
+
var import_promises29 = require("fs/promises");
|
|
14803
14839
|
var import_node_path42 = __toESM(require("path"), 1);
|
|
14804
14840
|
var import_micromatch4 = __toESM(require("micromatch"), 1);
|
|
14805
14841
|
|
|
@@ -15762,7 +15798,7 @@ var WorkspacePoolManager = class {
|
|
|
15762
15798
|
* 7. Return the slot (with path, index, isExisting)
|
|
15763
15799
|
*/
|
|
15764
15800
|
async acquireWorkspace(options) {
|
|
15765
|
-
const { templatePath, repos, maxSlots, repoManager } = options;
|
|
15801
|
+
const { templatePath, repos, maxSlots, repoManager, poolReset } = options;
|
|
15766
15802
|
const fingerprint = computeWorkspaceFingerprint(templatePath, repos);
|
|
15767
15803
|
const poolDir = import_node_path39.default.join(this.poolRoot, fingerprint);
|
|
15768
15804
|
await (0, import_promises27.mkdir)(poolDir, { recursive: true });
|
|
@@ -15782,7 +15818,7 @@ var WorkspacePoolManager = class {
|
|
|
15782
15818
|
}
|
|
15783
15819
|
const slotExists = (0, import_node_fs11.existsSync)(slotPath);
|
|
15784
15820
|
if (slotExists) {
|
|
15785
|
-
await this.resetSlot(slotPath, templatePath, repos);
|
|
15821
|
+
await this.resetSlot(slotPath, templatePath, repos, poolReset);
|
|
15786
15822
|
return {
|
|
15787
15823
|
index: i,
|
|
15788
15824
|
path: slotPath,
|
|
@@ -15914,15 +15950,19 @@ var WorkspacePoolManager = class {
|
|
|
15914
15950
|
* 1. Reset repos (git reset --hard {ref} && git clean -fd per repo)
|
|
15915
15951
|
* 2. Re-copy template files (skip repo directories)
|
|
15916
15952
|
*/
|
|
15917
|
-
async resetSlot(slotPath, templatePath, repos) {
|
|
15953
|
+
async resetSlot(slotPath, templatePath, repos, poolReset = "fast") {
|
|
15918
15954
|
for (const repo of repos) {
|
|
15919
15955
|
const repoDir = import_node_path39.default.join(slotPath, repo.path);
|
|
15920
15956
|
if (!(0, import_node_fs11.existsSync)(repoDir)) {
|
|
15921
15957
|
continue;
|
|
15922
15958
|
}
|
|
15959
|
+
if (poolReset === "none") {
|
|
15960
|
+
continue;
|
|
15961
|
+
}
|
|
15923
15962
|
const ref = repo.checkout?.ref ?? "HEAD";
|
|
15924
15963
|
await git(["reset", "--hard", ref], { cwd: repoDir });
|
|
15925
|
-
|
|
15964
|
+
const cleanFlag = poolReset === "strict" ? "-fdx" : "-fd";
|
|
15965
|
+
await git(["clean", cleanFlag], { cwd: repoDir });
|
|
15926
15966
|
}
|
|
15927
15967
|
if (templatePath) {
|
|
15928
15968
|
const repoDirNames = new Set(
|
|
@@ -15938,14 +15978,10 @@ var WorkspacePoolManager = class {
|
|
|
15938
15978
|
|
|
15939
15979
|
// src/evaluation/workspace/repo-manager.ts
|
|
15940
15980
|
var import_node_child_process8 = require("child_process");
|
|
15941
|
-
var import_node_crypto9 = require("crypto");
|
|
15942
|
-
var import_node_fs12 = require("fs");
|
|
15943
|
-
var import_promises28 = require("fs/promises");
|
|
15944
15981
|
var import_node_path40 = __toESM(require("path"), 1);
|
|
15945
15982
|
var import_node_util6 = require("util");
|
|
15946
15983
|
var execFileAsync2 = (0, import_node_util6.promisify)(import_node_child_process8.execFile);
|
|
15947
15984
|
var DEFAULT_TIMEOUT_MS2 = 3e5;
|
|
15948
|
-
var LOCK_TIMEOUT_MS = 6e4;
|
|
15949
15985
|
function gitEnv2() {
|
|
15950
15986
|
const env = { ...process.env };
|
|
15951
15987
|
for (const key of Object.keys(env)) {
|
|
@@ -15960,10 +15996,6 @@ function gitEnv2() {
|
|
|
15960
15996
|
GIT_SSH_COMMAND: "ssh -o BatchMode=yes"
|
|
15961
15997
|
};
|
|
15962
15998
|
}
|
|
15963
|
-
function cacheKey(source) {
|
|
15964
|
-
const raw = source.type === "git" ? source.url.toLowerCase().replace(/\.git$/, "") : source.path;
|
|
15965
|
-
return (0, import_node_crypto9.createHash)("sha256").update(raw).digest("hex");
|
|
15966
|
-
}
|
|
15967
15999
|
function getSourceUrl(source) {
|
|
15968
16000
|
return source.type === "git" ? source.url : source.path;
|
|
15969
16001
|
}
|
|
@@ -15977,33 +16009,9 @@ async function git2(args, opts) {
|
|
|
15977
16009
|
});
|
|
15978
16010
|
return stdout.trim();
|
|
15979
16011
|
}
|
|
15980
|
-
async function acquireLock(lockPath) {
|
|
15981
|
-
const start = Date.now();
|
|
15982
|
-
while (Date.now() - start < LOCK_TIMEOUT_MS) {
|
|
15983
|
-
try {
|
|
15984
|
-
await (0, import_promises28.writeFile)(lockPath, String(process.pid), { flag: "wx" });
|
|
15985
|
-
return;
|
|
15986
|
-
} catch (err) {
|
|
15987
|
-
if (err.code === "EEXIST") {
|
|
15988
|
-
await new Promise((r) => setTimeout(r, 200));
|
|
15989
|
-
continue;
|
|
15990
|
-
}
|
|
15991
|
-
throw err;
|
|
15992
|
-
}
|
|
15993
|
-
}
|
|
15994
|
-
throw new Error(`Timed out waiting for lock: ${lockPath}`);
|
|
15995
|
-
}
|
|
15996
|
-
async function releaseLock(lockPath) {
|
|
15997
|
-
try {
|
|
15998
|
-
await (0, import_promises28.unlink)(lockPath);
|
|
15999
|
-
} catch {
|
|
16000
|
-
}
|
|
16001
|
-
}
|
|
16002
16012
|
var RepoManager = class {
|
|
16003
|
-
cacheDir;
|
|
16004
16013
|
verbose;
|
|
16005
|
-
constructor(
|
|
16006
|
-
this.cacheDir = cacheDir ?? getGitCacheRoot();
|
|
16014
|
+
constructor(verbose = false) {
|
|
16007
16015
|
this.verbose = verbose;
|
|
16008
16016
|
}
|
|
16009
16017
|
async runGit(args, opts) {
|
|
@@ -16028,86 +16036,18 @@ var RepoManager = class {
|
|
|
16028
16036
|
}
|
|
16029
16037
|
}
|
|
16030
16038
|
/**
|
|
16031
|
-
*
|
|
16032
|
-
* Creates on first access, fetches updates on subsequent calls.
|
|
16033
|
-
* Returns the absolute path to the cache directory.
|
|
16034
|
-
*/
|
|
16035
|
-
async ensureCache(source, depth, resolve) {
|
|
16036
|
-
const key = cacheKey(source);
|
|
16037
|
-
const cachePath = import_node_path40.default.join(this.cacheDir, key);
|
|
16038
|
-
const lockPath = `${cachePath}.lock`;
|
|
16039
|
-
const cacheExists = (0, import_node_fs12.existsSync)(import_node_path40.default.join(cachePath, "HEAD"));
|
|
16040
|
-
if (this.verbose) {
|
|
16041
|
-
console.log(
|
|
16042
|
-
`[repo] ensureCache source=${getSourceUrl(source)} resolve=${resolve ?? "remote"} cache=${cachePath}`
|
|
16043
|
-
);
|
|
16044
|
-
}
|
|
16045
|
-
if (resolve === "local") {
|
|
16046
|
-
if (cacheExists) {
|
|
16047
|
-
if (this.verbose) {
|
|
16048
|
-
console.log(`[repo] using existing local cache ${cachePath}`);
|
|
16049
|
-
}
|
|
16050
|
-
return cachePath;
|
|
16051
|
-
}
|
|
16052
|
-
const url = getSourceUrl(source);
|
|
16053
|
-
throw new Error(
|
|
16054
|
-
`No cache found for \`${url}\`. Run \`agentv cache add --url ${url} --from <local-path>\` to seed it.`
|
|
16055
|
-
);
|
|
16056
|
-
}
|
|
16057
|
-
await (0, import_promises28.mkdir)(this.cacheDir, { recursive: true });
|
|
16058
|
-
const lockStartedAt = Date.now();
|
|
16059
|
-
await acquireLock(lockPath);
|
|
16060
|
-
if (this.verbose) {
|
|
16061
|
-
console.log(`[repo] lock acquired path=${lockPath} waitedMs=${Date.now() - lockStartedAt}`);
|
|
16062
|
-
}
|
|
16063
|
-
try {
|
|
16064
|
-
if (cacheExists) {
|
|
16065
|
-
if (this.verbose) {
|
|
16066
|
-
console.log(`[repo] refreshing existing cache ${cachePath}`);
|
|
16067
|
-
}
|
|
16068
|
-
const fetchArgs = ["fetch", "--prune"];
|
|
16069
|
-
if (depth) {
|
|
16070
|
-
fetchArgs.push("--depth", String(depth));
|
|
16071
|
-
}
|
|
16072
|
-
await this.runGit(fetchArgs, { cwd: cachePath });
|
|
16073
|
-
} else {
|
|
16074
|
-
if (this.verbose) {
|
|
16075
|
-
console.log(`[repo] creating new cache ${cachePath}`);
|
|
16076
|
-
}
|
|
16077
|
-
const cloneArgs = ["clone", "--mirror", "--bare"];
|
|
16078
|
-
if (depth) {
|
|
16079
|
-
cloneArgs.push("--depth", String(depth));
|
|
16080
|
-
}
|
|
16081
|
-
const sourceUrl = getSourceUrl(source);
|
|
16082
|
-
const cloneUrl = depth && source.type === "local" ? `file://${sourceUrl}` : sourceUrl;
|
|
16083
|
-
cloneArgs.push(cloneUrl, cachePath);
|
|
16084
|
-
await this.runGit(cloneArgs);
|
|
16085
|
-
}
|
|
16086
|
-
} finally {
|
|
16087
|
-
await releaseLock(lockPath);
|
|
16088
|
-
if (this.verbose) {
|
|
16089
|
-
console.log(`[repo] lock released path=${lockPath}`);
|
|
16090
|
-
}
|
|
16091
|
-
}
|
|
16092
|
-
return cachePath;
|
|
16093
|
-
}
|
|
16094
|
-
/**
|
|
16095
|
-
* Clone a repo from cache into the workspace at the configured path.
|
|
16039
|
+
* Clone a repo directly from source into the workspace at the configured path.
|
|
16096
16040
|
* Handles checkout, ref resolution, ancestor walking, shallow clone, sparse checkout.
|
|
16097
16041
|
*/
|
|
16098
16042
|
async materialize(repo, workspacePath) {
|
|
16099
16043
|
const targetDir = import_node_path40.default.join(workspacePath, repo.path);
|
|
16044
|
+
const sourceUrl = getSourceUrl(repo.source);
|
|
16100
16045
|
const startedAt = Date.now();
|
|
16101
16046
|
if (this.verbose) {
|
|
16102
16047
|
console.log(
|
|
16103
|
-
`[repo] materialize start path=${repo.path} source=${
|
|
16048
|
+
`[repo] materialize start path=${repo.path} source=${sourceUrl} workspace=${workspacePath}`
|
|
16104
16049
|
);
|
|
16105
16050
|
}
|
|
16106
|
-
const cachePath = await this.ensureCache(
|
|
16107
|
-
repo.source,
|
|
16108
|
-
repo.clone?.depth,
|
|
16109
|
-
repo.checkout?.resolve
|
|
16110
|
-
);
|
|
16111
16051
|
const cloneArgs = ["clone"];
|
|
16112
16052
|
if (repo.clone?.depth) {
|
|
16113
16053
|
cloneArgs.push("--depth", String(repo.clone.depth));
|
|
@@ -16116,7 +16056,7 @@ var RepoManager = class {
|
|
|
16116
16056
|
cloneArgs.push("--filter", repo.clone.filter);
|
|
16117
16057
|
}
|
|
16118
16058
|
cloneArgs.push("--no-checkout");
|
|
16119
|
-
const cloneUrl = repo.clone?.depth || repo.clone?.filter ? `file://${
|
|
16059
|
+
const cloneUrl = (repo.clone?.depth || repo.clone?.filter) && repo.source.type === "local" ? `file://${sourceUrl}` : sourceUrl;
|
|
16120
16060
|
cloneArgs.push(cloneUrl, targetDir);
|
|
16121
16061
|
await this.runGit(cloneArgs);
|
|
16122
16062
|
if (repo.clone?.sparse?.length) {
|
|
@@ -16188,63 +16128,25 @@ var RepoManager = class {
|
|
|
16188
16128
|
}
|
|
16189
16129
|
}
|
|
16190
16130
|
/** Reset repos in workspace to their checkout state. */
|
|
16191
|
-
async reset(repos, workspacePath,
|
|
16192
|
-
|
|
16193
|
-
for (const repo of repos) {
|
|
16194
|
-
const targetDir = import_node_path40.default.join(workspacePath, repo.path);
|
|
16195
|
-
await (0, import_promises28.rm)(targetDir, { recursive: true, force: true });
|
|
16196
|
-
}
|
|
16197
|
-
await this.materializeAll(repos, workspacePath);
|
|
16198
|
-
return;
|
|
16199
|
-
}
|
|
16131
|
+
async reset(repos, workspacePath, reset) {
|
|
16132
|
+
const cleanFlag = reset === "strict" ? "-fdx" : "-fd";
|
|
16200
16133
|
for (const repo of repos) {
|
|
16201
16134
|
const targetDir = import_node_path40.default.join(workspacePath, repo.path);
|
|
16202
16135
|
await this.runGit(["reset", "--hard", "HEAD"], { cwd: targetDir });
|
|
16203
|
-
await this.runGit(["clean",
|
|
16136
|
+
await this.runGit(["clean", cleanFlag], { cwd: targetDir });
|
|
16204
16137
|
}
|
|
16205
16138
|
}
|
|
16206
|
-
/**
|
|
16207
|
-
* Seed the cache from a local repository, setting the remote to a given URL.
|
|
16208
|
-
* Useful for avoiding slow network clones when a local clone already exists.
|
|
16209
|
-
*/
|
|
16210
|
-
async seedCache(localPath, remoteUrl, opts) {
|
|
16211
|
-
const source = { type: "git", url: remoteUrl };
|
|
16212
|
-
const key = cacheKey(source);
|
|
16213
|
-
const cachePath = import_node_path40.default.join(this.cacheDir, key);
|
|
16214
|
-
const lockPath = `${cachePath}.lock`;
|
|
16215
|
-
await (0, import_promises28.mkdir)(this.cacheDir, { recursive: true });
|
|
16216
|
-
await acquireLock(lockPath);
|
|
16217
|
-
try {
|
|
16218
|
-
if ((0, import_node_fs12.existsSync)(import_node_path40.default.join(cachePath, "HEAD"))) {
|
|
16219
|
-
if (!opts?.force) {
|
|
16220
|
-
throw new Error(
|
|
16221
|
-
`Cache already exists for ${remoteUrl} at ${cachePath}. Use force to overwrite.`
|
|
16222
|
-
);
|
|
16223
|
-
}
|
|
16224
|
-
await (0, import_promises28.rm)(cachePath, { recursive: true, force: true });
|
|
16225
|
-
}
|
|
16226
|
-
await git2(["clone", "--mirror", "--bare", localPath, cachePath]);
|
|
16227
|
-
await git2(["remote", "set-url", "origin", remoteUrl], { cwd: cachePath });
|
|
16228
|
-
} finally {
|
|
16229
|
-
await releaseLock(lockPath);
|
|
16230
|
-
}
|
|
16231
|
-
return cachePath;
|
|
16232
|
-
}
|
|
16233
|
-
/** Remove the entire cache directory. */
|
|
16234
|
-
async cleanCache() {
|
|
16235
|
-
await (0, import_promises28.rm)(this.cacheDir, { recursive: true, force: true });
|
|
16236
|
-
}
|
|
16237
16139
|
};
|
|
16238
16140
|
|
|
16239
16141
|
// src/evaluation/workspace/resolve.ts
|
|
16240
|
-
var
|
|
16142
|
+
var import_promises28 = require("fs/promises");
|
|
16241
16143
|
var import_node_path41 = __toESM(require("path"), 1);
|
|
16242
16144
|
async function resolveWorkspaceTemplate(templatePath) {
|
|
16243
16145
|
if (!templatePath) {
|
|
16244
16146
|
return void 0;
|
|
16245
16147
|
}
|
|
16246
16148
|
const resolved = import_node_path41.default.resolve(templatePath);
|
|
16247
|
-
const stats = await (0,
|
|
16149
|
+
const stats = await (0, import_promises28.stat)(resolved);
|
|
16248
16150
|
if (stats.isFile()) {
|
|
16249
16151
|
return {
|
|
16250
16152
|
dir: import_node_path41.default.dirname(resolved),
|
|
@@ -16254,7 +16156,7 @@ async function resolveWorkspaceTemplate(templatePath) {
|
|
|
16254
16156
|
if (!stats.isDirectory()) {
|
|
16255
16157
|
throw new Error(`workspace template is neither a file nor a directory: ${resolved}`);
|
|
16256
16158
|
}
|
|
16257
|
-
const entries = await (0,
|
|
16159
|
+
const entries = await (0, import_promises28.readdir)(resolved);
|
|
16258
16160
|
const workspaceFiles = entries.filter((e) => e.endsWith(".code-workspace"));
|
|
16259
16161
|
if (workspaceFiles.length === 1) {
|
|
16260
16162
|
return {
|
|
@@ -16318,6 +16220,22 @@ function classifyQualityStatus(score) {
|
|
|
16318
16220
|
function usesFileReferencePrompt(provider) {
|
|
16319
16221
|
return isAgentProvider(provider) || provider.kind === "cli";
|
|
16320
16222
|
}
|
|
16223
|
+
function toScriptConfig(hook, hookName, context2) {
|
|
16224
|
+
const command = hook.command ?? hook.script;
|
|
16225
|
+
if (!command || command.length === 0) {
|
|
16226
|
+
throw new Error(`${hookName} hook in ${context2} requires command or script`);
|
|
16227
|
+
}
|
|
16228
|
+
return {
|
|
16229
|
+
command,
|
|
16230
|
+
...hook.timeout_ms !== void 0 && { timeout_ms: hook.timeout_ms },
|
|
16231
|
+
...hook.timeoutMs !== void 0 && { timeoutMs: hook.timeoutMs },
|
|
16232
|
+
...hook.cwd !== void 0 && { cwd: hook.cwd },
|
|
16233
|
+
...hook.script !== void 0 && { script: hook.script }
|
|
16234
|
+
};
|
|
16235
|
+
}
|
|
16236
|
+
function hasHookCommand(hook) {
|
|
16237
|
+
return !!(hook?.command && hook.command.length > 0 || hook?.script && hook.script.length > 0);
|
|
16238
|
+
}
|
|
16321
16239
|
function getWorkspaceTemplate(target) {
|
|
16322
16240
|
const config = target.config;
|
|
16323
16241
|
if ("workspaceTemplate" in config && typeof config.workspaceTemplate === "string") {
|
|
@@ -16351,7 +16269,12 @@ async function runEvaluation(options) {
|
|
|
16351
16269
|
failOnError,
|
|
16352
16270
|
poolWorkspaces,
|
|
16353
16271
|
poolMaxSlots: configPoolMaxSlots,
|
|
16354
|
-
workspace:
|
|
16272
|
+
workspace: legacyWorkspacePath,
|
|
16273
|
+
workspaceMode,
|
|
16274
|
+
workspacePath,
|
|
16275
|
+
workspaceClean,
|
|
16276
|
+
retainOnSuccess,
|
|
16277
|
+
retainOnFailure
|
|
16355
16278
|
} = options;
|
|
16356
16279
|
let useCache = options.useCache;
|
|
16357
16280
|
if (trials && trials.count > 1 && useCache) {
|
|
@@ -16360,7 +16283,7 @@ async function runEvaluation(options) {
|
|
|
16360
16283
|
);
|
|
16361
16284
|
useCache = false;
|
|
16362
16285
|
}
|
|
16363
|
-
const evalRunId = (0,
|
|
16286
|
+
const evalRunId = (0, import_node_crypto9.randomUUID)();
|
|
16364
16287
|
const evalCases = preloadedEvalCases ?? await loadTests(evalFilePath, repoRoot, { verbose, filter });
|
|
16365
16288
|
const filteredEvalCases = filterEvalCases(evalCases, filter);
|
|
16366
16289
|
if (filteredEvalCases.length === 0) {
|
|
@@ -16487,13 +16410,23 @@ async function runEvaluation(options) {
|
|
|
16487
16410
|
}
|
|
16488
16411
|
};
|
|
16489
16412
|
const isPerTestIsolation = suiteWorkspace?.isolation === "per_test";
|
|
16490
|
-
|
|
16413
|
+
const configuredMode = suiteWorkspace?.mode ?? workspaceMode;
|
|
16414
|
+
const configuredStaticPath = suiteWorkspace?.static_path ?? workspacePath ?? legacyWorkspacePath;
|
|
16415
|
+
const useStaticWorkspace = configuredMode === "static" || !!configuredStaticPath && !configuredMode;
|
|
16416
|
+
if (useStaticWorkspace && isPerTestIsolation) {
|
|
16491
16417
|
throw new Error(
|
|
16492
|
-
"
|
|
16418
|
+
"static workspace mode is incompatible with isolation: per_test. Use isolation: shared (default)."
|
|
16493
16419
|
);
|
|
16494
16420
|
}
|
|
16495
|
-
|
|
16496
|
-
|
|
16421
|
+
if (configuredMode === "static" && !configuredStaticPath) {
|
|
16422
|
+
throw new Error("workspace.mode=static requires workspace.static_path or --workspace-path");
|
|
16423
|
+
}
|
|
16424
|
+
const hasSharedWorkspace = !!(useStaticWorkspace || workspaceTemplate || suiteWorkspace?.hooks || suiteWorkspace?.repos?.length && !isPerTestIsolation);
|
|
16425
|
+
const poolEnabled = configuredMode === "pooled" ? true : configuredMode === "ephemeral" || useStaticWorkspace ? false : suiteWorkspace?.pool ?? poolWorkspaces ?? true;
|
|
16426
|
+
const usePool = poolEnabled !== false && !!suiteWorkspace?.repos?.length && !isPerTestIsolation && !useStaticWorkspace;
|
|
16427
|
+
const finishCleanPolicy = suiteWorkspace?.hooks?.on_finish?.clean;
|
|
16428
|
+
const resolvedRetainOnSuccess = (finishCleanPolicy === "always" || finishCleanPolicy === "on_success" ? "cleanup" : finishCleanPolicy === "on_failure" || finishCleanPolicy === "never" ? "keep" : void 0) ?? retainOnSuccess ?? (keepWorkspaces ? "keep" : "cleanup");
|
|
16429
|
+
const resolvedRetainOnFailure = (finishCleanPolicy === "always" || finishCleanPolicy === "on_failure" ? "cleanup" : finishCleanPolicy === "on_success" || finishCleanPolicy === "never" ? "keep" : void 0) ?? retainOnFailure ?? (cleanupWorkspaces ? "cleanup" : "keep");
|
|
16497
16430
|
const requestedWorkers = options.maxConcurrency ?? target.workers ?? 1;
|
|
16498
16431
|
const workers = hasSharedWorkspace && !usePool ? 1 : requestedWorkers;
|
|
16499
16432
|
setupLog(
|
|
@@ -16514,20 +16447,21 @@ async function runEvaluation(options) {
|
|
|
16514
16447
|
const availablePoolSlots = [];
|
|
16515
16448
|
const poolSlotBaselines = /* @__PURE__ */ new Map();
|
|
16516
16449
|
const poolMaxSlots = Math.min(configPoolMaxSlots ?? 10, 50);
|
|
16517
|
-
if (
|
|
16518
|
-
sharedWorkspacePath =
|
|
16519
|
-
setupLog(`using
|
|
16450
|
+
if (useStaticWorkspace && configuredStaticPath) {
|
|
16451
|
+
sharedWorkspacePath = configuredStaticPath;
|
|
16452
|
+
setupLog(`using static workspace: ${configuredStaticPath}`);
|
|
16520
16453
|
} else if (usePool && suiteWorkspace?.repos) {
|
|
16521
16454
|
const slotsNeeded = workers;
|
|
16522
16455
|
setupLog(`acquiring ${slotsNeeded} workspace pool slot(s) (pool capacity: ${poolMaxSlots})`);
|
|
16523
16456
|
poolManager = new WorkspacePoolManager(getWorkspacePoolRoot());
|
|
16524
|
-
const poolRepoManager = new RepoManager(
|
|
16457
|
+
const poolRepoManager = new RepoManager(verbose);
|
|
16525
16458
|
for (let i = 0; i < slotsNeeded; i++) {
|
|
16526
16459
|
const slot = await poolManager.acquireWorkspace({
|
|
16527
16460
|
templatePath: workspaceTemplate,
|
|
16528
16461
|
repos: suiteWorkspace.repos,
|
|
16529
16462
|
maxSlots: poolMaxSlots,
|
|
16530
|
-
repoManager: poolRepoManager
|
|
16463
|
+
repoManager: poolRepoManager,
|
|
16464
|
+
poolReset: (workspaceClean === "full" ? "strict" : workspaceClean === "standard" ? "fast" : null) ?? suiteWorkspace.hooks?.on_reuse?.reset ?? "fast"
|
|
16531
16465
|
});
|
|
16532
16466
|
poolSlots.push(slot);
|
|
16533
16467
|
setupLog(`pool slot ${i} acquired at: ${slot.path} (existing=${slot.isExisting})`);
|
|
@@ -16547,21 +16481,21 @@ async function runEvaluation(options) {
|
|
|
16547
16481
|
const message = error instanceof Error ? error.message : String(error);
|
|
16548
16482
|
throw new Error(`Failed to create shared workspace: ${message}`);
|
|
16549
16483
|
}
|
|
16550
|
-
} else if (suiteWorkspace?.
|
|
16484
|
+
} else if (suiteWorkspace?.hooks || suiteWorkspace?.repos?.length && !isPerTestIsolation) {
|
|
16551
16485
|
sharedWorkspacePath = getWorkspacePath(evalRunId, "shared");
|
|
16552
|
-
await (0,
|
|
16486
|
+
await (0, import_promises29.mkdir)(sharedWorkspacePath, { recursive: true });
|
|
16553
16487
|
setupLog(`created empty shared workspace at: ${sharedWorkspacePath}`);
|
|
16554
16488
|
}
|
|
16555
16489
|
try {
|
|
16556
16490
|
if (suiteWorkspaceFile && sharedWorkspacePath) {
|
|
16557
16491
|
const copiedWorkspaceFile = import_node_path42.default.join(sharedWorkspacePath, import_node_path42.default.basename(suiteWorkspaceFile));
|
|
16558
16492
|
try {
|
|
16559
|
-
await (0,
|
|
16493
|
+
await (0, import_promises29.stat)(copiedWorkspaceFile);
|
|
16560
16494
|
suiteWorkspaceFile = copiedWorkspaceFile;
|
|
16561
16495
|
} catch {
|
|
16562
16496
|
}
|
|
16563
16497
|
}
|
|
16564
|
-
const repoManager = suiteWorkspace?.repos?.length && !usePool && !
|
|
16498
|
+
const repoManager = suiteWorkspace?.repos?.length && !usePool && !useStaticWorkspace ? new RepoManager(verbose) : void 0;
|
|
16565
16499
|
if (repoManager && sharedWorkspacePath && suiteWorkspace?.repos && !isPerTestIsolation) {
|
|
16566
16500
|
setupLog(
|
|
16567
16501
|
`materializing ${suiteWorkspace.repos.length} shared repo(s) into ${sharedWorkspacePath}`
|
|
@@ -16571,17 +16505,19 @@ async function runEvaluation(options) {
|
|
|
16571
16505
|
setupLog("shared repo materialization complete");
|
|
16572
16506
|
} catch (error) {
|
|
16573
16507
|
const message = error instanceof Error ? error.message : String(error);
|
|
16574
|
-
if (sharedWorkspacePath && !
|
|
16508
|
+
if (sharedWorkspacePath && !useStaticWorkspace) {
|
|
16575
16509
|
await cleanupWorkspace(sharedWorkspacePath).catch(() => {
|
|
16576
16510
|
});
|
|
16577
16511
|
}
|
|
16578
16512
|
throw new Error(`Failed to materialize repos: ${message}`);
|
|
16579
16513
|
}
|
|
16580
16514
|
}
|
|
16581
|
-
|
|
16582
|
-
|
|
16515
|
+
const suiteBeforeAllHook = suiteWorkspace?.hooks?.before_all_tests;
|
|
16516
|
+
if (sharedWorkspacePath && hasHookCommand(suiteBeforeAllHook)) {
|
|
16517
|
+
const beforeAllHook = suiteBeforeAllHook;
|
|
16518
|
+
const beforeAllCommand = (beforeAllHook.command ?? beforeAllHook.script ?? []).join(" ");
|
|
16583
16519
|
setupLog(
|
|
16584
|
-
`running shared before_all in cwd=${
|
|
16520
|
+
`running shared before_all in cwd=${beforeAllHook.cwd ?? evalDir} command=${beforeAllCommand}`
|
|
16585
16521
|
);
|
|
16586
16522
|
const scriptContext = {
|
|
16587
16523
|
workspacePath: sharedWorkspacePath,
|
|
@@ -16590,18 +16526,22 @@ async function runEvaluation(options) {
|
|
|
16590
16526
|
evalDir
|
|
16591
16527
|
};
|
|
16592
16528
|
try {
|
|
16593
|
-
beforeAllOutput = await executeWorkspaceScript(
|
|
16529
|
+
beforeAllOutput = await executeWorkspaceScript(
|
|
16530
|
+
toScriptConfig(beforeAllHook, "before_all_tests", "suite workspace"),
|
|
16531
|
+
scriptContext
|
|
16532
|
+
);
|
|
16594
16533
|
setupLog("shared before_all completed");
|
|
16595
16534
|
} catch (error) {
|
|
16596
16535
|
const message = error instanceof Error ? error.message : String(error);
|
|
16597
|
-
if (sharedWorkspacePath && !
|
|
16536
|
+
if (sharedWorkspacePath && !useStaticWorkspace) {
|
|
16598
16537
|
await cleanupWorkspace(sharedWorkspacePath).catch(() => {
|
|
16599
16538
|
});
|
|
16600
16539
|
}
|
|
16601
16540
|
throw new Error(`before_all script failed: ${message}`);
|
|
16602
16541
|
}
|
|
16603
16542
|
}
|
|
16604
|
-
if (availablePoolSlots.length > 0 &&
|
|
16543
|
+
if (availablePoolSlots.length > 0 && hasHookCommand(suiteBeforeAllHook)) {
|
|
16544
|
+
const beforeAllHook = suiteBeforeAllHook;
|
|
16605
16545
|
for (const slot of availablePoolSlots) {
|
|
16606
16546
|
setupLog(`running before_all on pool slot ${slot.index}`);
|
|
16607
16547
|
const scriptContext = {
|
|
@@ -16611,7 +16551,10 @@ async function runEvaluation(options) {
|
|
|
16611
16551
|
evalDir
|
|
16612
16552
|
};
|
|
16613
16553
|
try {
|
|
16614
|
-
const output = await executeWorkspaceScript(
|
|
16554
|
+
const output = await executeWorkspaceScript(
|
|
16555
|
+
toScriptConfig(beforeAllHook, "before_all_tests", "suite workspace"),
|
|
16556
|
+
scriptContext
|
|
16557
|
+
);
|
|
16615
16558
|
if (!beforeAllOutput) beforeAllOutput = output;
|
|
16616
16559
|
setupLog(`before_all completed on pool slot ${slot.index}`);
|
|
16617
16560
|
} catch (error) {
|
|
@@ -16743,6 +16686,8 @@ async function runEvaluation(options) {
|
|
|
16743
16686
|
evalRunId,
|
|
16744
16687
|
keepWorkspaces,
|
|
16745
16688
|
cleanupWorkspaces,
|
|
16689
|
+
retainOnSuccess: resolvedRetainOnSuccess,
|
|
16690
|
+
retainOnFailure: resolvedRetainOnFailure,
|
|
16746
16691
|
sharedWorkspacePath: testWorkspacePath,
|
|
16747
16692
|
sharedBaselineCommit: testBaselineCommit,
|
|
16748
16693
|
suiteWorkspaceFile,
|
|
@@ -16836,7 +16781,9 @@ async function runEvaluation(options) {
|
|
|
16836
16781
|
}
|
|
16837
16782
|
}
|
|
16838
16783
|
const afterAllWorkspaces = poolSlots.length > 1 ? poolSlots.map((s) => s.path) : sharedWorkspacePath ? [sharedWorkspacePath] : [];
|
|
16839
|
-
|
|
16784
|
+
const suiteAfterAllHook = suiteWorkspace?.hooks?.after_all_tests;
|
|
16785
|
+
if (afterAllWorkspaces.length > 0 && hasHookCommand(suiteAfterAllHook)) {
|
|
16786
|
+
const afterAllHook = suiteAfterAllHook;
|
|
16840
16787
|
for (const wsPath of afterAllWorkspaces) {
|
|
16841
16788
|
const scriptContext = {
|
|
16842
16789
|
workspacePath: wsPath,
|
|
@@ -16846,7 +16793,7 @@ async function runEvaluation(options) {
|
|
|
16846
16793
|
};
|
|
16847
16794
|
try {
|
|
16848
16795
|
const afterAllOutput = await executeWorkspaceScript(
|
|
16849
|
-
|
|
16796
|
+
toScriptConfig(afterAllHook, "after_all_tests", "suite workspace"),
|
|
16850
16797
|
scriptContext,
|
|
16851
16798
|
"warn"
|
|
16852
16799
|
);
|
|
@@ -16857,12 +16804,14 @@ async function runEvaluation(options) {
|
|
|
16857
16804
|
}
|
|
16858
16805
|
}
|
|
16859
16806
|
}
|
|
16860
|
-
if (sharedWorkspacePath && !poolSlot && poolSlots.length === 0 && !
|
|
16807
|
+
if (sharedWorkspacePath && !poolSlot && poolSlots.length === 0 && !useStaticWorkspace) {
|
|
16861
16808
|
const hasFailure = results.some((r) => !!r.error || r.score < 0.5);
|
|
16862
|
-
if (
|
|
16863
|
-
|
|
16864
|
-
|
|
16865
|
-
|
|
16809
|
+
if (hasFailure) {
|
|
16810
|
+
if (resolvedRetainOnFailure === "cleanup") {
|
|
16811
|
+
await cleanupWorkspace(sharedWorkspacePath).catch(() => {
|
|
16812
|
+
});
|
|
16813
|
+
}
|
|
16814
|
+
} else if (resolvedRetainOnSuccess === "cleanup") {
|
|
16866
16815
|
await cleanupWorkspace(sharedWorkspacePath).catch(() => {
|
|
16867
16816
|
});
|
|
16868
16817
|
}
|
|
@@ -17056,6 +17005,8 @@ async function runEvalCase(options) {
|
|
|
17056
17005
|
evalRunId,
|
|
17057
17006
|
keepWorkspaces,
|
|
17058
17007
|
cleanupWorkspaces: forceCleanup,
|
|
17008
|
+
retainOnSuccess,
|
|
17009
|
+
retainOnFailure,
|
|
17059
17010
|
sharedWorkspacePath,
|
|
17060
17011
|
sharedBaselineCommit,
|
|
17061
17012
|
suiteWorkspaceFile,
|
|
@@ -17067,10 +17018,10 @@ async function runEvalCase(options) {
|
|
|
17067
17018
|
const formattingMode = usesFileReferencePrompt(provider) ? "agent" : "lm";
|
|
17068
17019
|
const promptInputs = await buildPromptInputs(evalCase, formattingMode);
|
|
17069
17020
|
const typeRegistry = providedTypeRegistry ?? createBuiltinRegistry();
|
|
17070
|
-
const
|
|
17021
|
+
const cacheKey = useCache ? createCacheKey(provider, target, evalCase, promptInputs) : void 0;
|
|
17071
17022
|
let cachedResponse;
|
|
17072
|
-
if (
|
|
17073
|
-
cachedResponse = await cache.get(
|
|
17023
|
+
if (cacheKey && cache) {
|
|
17024
|
+
cachedResponse = await cache.get(cacheKey);
|
|
17074
17025
|
}
|
|
17075
17026
|
const nowFn = now ?? (() => /* @__PURE__ */ new Date());
|
|
17076
17027
|
let workspacePath = sharedWorkspacePath;
|
|
@@ -17103,18 +17054,18 @@ async function runEvalCase(options) {
|
|
|
17103
17054
|
if (caseWorkspaceFile && workspacePath) {
|
|
17104
17055
|
const copiedFile = import_node_path42.default.join(workspacePath, import_node_path42.default.basename(caseWorkspaceFile));
|
|
17105
17056
|
try {
|
|
17106
|
-
await (0,
|
|
17057
|
+
await (0, import_promises29.stat)(copiedFile);
|
|
17107
17058
|
caseWorkspaceFile = copiedFile;
|
|
17108
17059
|
} catch {
|
|
17109
17060
|
}
|
|
17110
17061
|
}
|
|
17111
17062
|
}
|
|
17112
|
-
if (!workspacePath && (evalCase.workspace?.
|
|
17063
|
+
if (!workspacePath && (evalCase.workspace?.hooks || evalCase.workspace?.repos?.length) && evalRunId) {
|
|
17113
17064
|
workspacePath = getWorkspacePath(evalRunId, evalCase.id);
|
|
17114
|
-
await (0,
|
|
17065
|
+
await (0, import_promises29.mkdir)(workspacePath, { recursive: true });
|
|
17115
17066
|
}
|
|
17116
17067
|
if (evalCase.workspace?.repos?.length && workspacePath) {
|
|
17117
|
-
const perCaseRepoManager = new RepoManager(
|
|
17068
|
+
const perCaseRepoManager = new RepoManager(setupDebug);
|
|
17118
17069
|
try {
|
|
17119
17070
|
if (setupDebug) {
|
|
17120
17071
|
console.log(
|
|
@@ -17139,11 +17090,13 @@ async function runEvalCase(options) {
|
|
|
17139
17090
|
);
|
|
17140
17091
|
}
|
|
17141
17092
|
}
|
|
17142
|
-
|
|
17143
|
-
|
|
17093
|
+
const caseBeforeAllHook = evalCase.workspace?.hooks?.before_all_tests;
|
|
17094
|
+
if (workspacePath && hasHookCommand(caseBeforeAllHook)) {
|
|
17095
|
+
const beforeAllHook = caseBeforeAllHook;
|
|
17096
|
+
const beforeAllCommand = (beforeAllHook.command ?? beforeAllHook.script ?? []).join(" ");
|
|
17144
17097
|
if (setupDebug) {
|
|
17145
17098
|
console.log(
|
|
17146
|
-
`[setup] test=${evalCase.id} running before_all in cwd=${
|
|
17099
|
+
`[setup] test=${evalCase.id} running before_all in cwd=${beforeAllHook.cwd ?? evalDir} command=${beforeAllCommand}`
|
|
17147
17100
|
);
|
|
17148
17101
|
}
|
|
17149
17102
|
const scriptContext = {
|
|
@@ -17156,7 +17109,7 @@ async function runEvalCase(options) {
|
|
|
17156
17109
|
};
|
|
17157
17110
|
try {
|
|
17158
17111
|
beforeAllOutput = await executeWorkspaceScript(
|
|
17159
|
-
evalCase.
|
|
17112
|
+
toScriptConfig(beforeAllHook, "before_all_tests", `test '${evalCase.id}'`),
|
|
17160
17113
|
scriptContext
|
|
17161
17114
|
);
|
|
17162
17115
|
if (setupDebug) {
|
|
@@ -17181,7 +17134,9 @@ async function runEvalCase(options) {
|
|
|
17181
17134
|
}
|
|
17182
17135
|
}
|
|
17183
17136
|
}
|
|
17184
|
-
|
|
17137
|
+
const caseBeforeEachHook = evalCase.workspace?.hooks?.before_each_test;
|
|
17138
|
+
if (workspacePath && hasHookCommand(caseBeforeEachHook)) {
|
|
17139
|
+
const beforeEachHook = caseBeforeEachHook;
|
|
17185
17140
|
const scriptContext = {
|
|
17186
17141
|
workspacePath,
|
|
17187
17142
|
testId: evalCase.id,
|
|
@@ -17192,7 +17147,7 @@ async function runEvalCase(options) {
|
|
|
17192
17147
|
};
|
|
17193
17148
|
try {
|
|
17194
17149
|
beforeEachOutput = await executeWorkspaceScript(
|
|
17195
|
-
evalCase.
|
|
17150
|
+
toScriptConfig(beforeEachHook, "before_each_test", `test '${evalCase.id}'`),
|
|
17196
17151
|
scriptContext
|
|
17197
17152
|
);
|
|
17198
17153
|
} catch (error) {
|
|
@@ -17280,8 +17235,8 @@ async function runEvalCase(options) {
|
|
|
17280
17235
|
}
|
|
17281
17236
|
return errorResult;
|
|
17282
17237
|
}
|
|
17283
|
-
if (
|
|
17284
|
-
await cache.set(
|
|
17238
|
+
if (cacheKey && cache && !cachedResponse) {
|
|
17239
|
+
await cache.set(cacheKey, providerResponse);
|
|
17285
17240
|
}
|
|
17286
17241
|
const output = providerResponse.output;
|
|
17287
17242
|
const hasExecutionMetrics = providerResponse.tokenUsage !== void 0 || providerResponse.costUsd !== void 0 || providerResponse.durationMs !== void 0;
|
|
@@ -17309,17 +17264,19 @@ async function runEvalCase(options) {
|
|
|
17309
17264
|
}
|
|
17310
17265
|
}
|
|
17311
17266
|
const providerError = extractProviderError(providerResponse);
|
|
17312
|
-
if (repoManager && workspacePath && evalCase.workspace?.reset
|
|
17267
|
+
if (repoManager && workspacePath && evalCase.workspace?.hooks?.after_each_test?.reset && evalCase.workspace.hooks.after_each_test.reset !== "none" && evalCase.workspace.repos) {
|
|
17313
17268
|
try {
|
|
17314
17269
|
await repoManager.reset(
|
|
17315
17270
|
evalCase.workspace.repos,
|
|
17316
17271
|
workspacePath,
|
|
17317
|
-
evalCase.workspace.reset
|
|
17272
|
+
evalCase.workspace.hooks.after_each_test.reset
|
|
17318
17273
|
);
|
|
17319
17274
|
} catch {
|
|
17320
17275
|
}
|
|
17321
17276
|
}
|
|
17322
|
-
|
|
17277
|
+
const caseAfterEachHook = evalCase.workspace?.hooks?.after_each_test;
|
|
17278
|
+
if (workspacePath && hasHookCommand(caseAfterEachHook)) {
|
|
17279
|
+
const afterEachHook = caseAfterEachHook;
|
|
17323
17280
|
const scriptContext = {
|
|
17324
17281
|
workspacePath,
|
|
17325
17282
|
testId: evalCase.id,
|
|
@@ -17330,7 +17287,7 @@ async function runEvalCase(options) {
|
|
|
17330
17287
|
};
|
|
17331
17288
|
try {
|
|
17332
17289
|
afterEachOutput = await executeWorkspaceScript(
|
|
17333
|
-
evalCase.
|
|
17290
|
+
toScriptConfig(afterEachHook, "after_each_test", `test '${evalCase.id}'`),
|
|
17334
17291
|
scriptContext,
|
|
17335
17292
|
"warn"
|
|
17336
17293
|
);
|
|
@@ -17380,8 +17337,13 @@ async function runEvalCase(options) {
|
|
|
17380
17337
|
await cleanupWorkspace(workspacePath).catch(() => {
|
|
17381
17338
|
});
|
|
17382
17339
|
} else if (isFailure) {
|
|
17383
|
-
|
|
17384
|
-
|
|
17340
|
+
if ((retainOnFailure ?? "keep") === "cleanup") {
|
|
17341
|
+
await cleanupWorkspace(workspacePath).catch(() => {
|
|
17342
|
+
});
|
|
17343
|
+
} else {
|
|
17344
|
+
return { ...finalResult, workspacePath };
|
|
17345
|
+
}
|
|
17346
|
+
} else if ((retainOnSuccess ?? (keepWorkspaces ? "keep" : "cleanup")) !== "keep") {
|
|
17385
17347
|
await cleanupWorkspace(workspacePath).catch(() => {
|
|
17386
17348
|
});
|
|
17387
17349
|
}
|
|
@@ -17399,11 +17361,12 @@ async function runEvalCase(options) {
|
|
|
17399
17361
|
"evaluator_error"
|
|
17400
17362
|
);
|
|
17401
17363
|
if (workspacePath && !isSharedWorkspace) {
|
|
17402
|
-
if (forceCleanup) {
|
|
17364
|
+
if (forceCleanup || (retainOnFailure ?? "keep") === "cleanup") {
|
|
17403
17365
|
await cleanupWorkspace(workspacePath).catch(() => {
|
|
17404
17366
|
});
|
|
17367
|
+
} else {
|
|
17368
|
+
return { ...errorResult, workspacePath, beforeEachOutput, afterEachOutput };
|
|
17405
17369
|
}
|
|
17406
|
-
return { ...errorResult, workspacePath, beforeEachOutput, afterEachOutput };
|
|
17407
17370
|
}
|
|
17408
17371
|
return { ...errorResult, beforeEachOutput, afterEachOutput };
|
|
17409
17372
|
}
|
|
@@ -17422,7 +17385,9 @@ async function runEvalCaseWithTrials(options, trialsConfig) {
|
|
|
17422
17385
|
useCache: false,
|
|
17423
17386
|
// Force cleanup for intermediate trials
|
|
17424
17387
|
cleanupWorkspaces: isLastDeclaredTrial ? options.cleanupWorkspaces : true,
|
|
17425
|
-
keepWorkspaces: isLastDeclaredTrial ? options.keepWorkspaces : false
|
|
17388
|
+
keepWorkspaces: isLastDeclaredTrial ? options.keepWorkspaces : false,
|
|
17389
|
+
retainOnSuccess: isLastDeclaredTrial ? options.retainOnSuccess : "cleanup",
|
|
17390
|
+
retainOnFailure: isLastDeclaredTrial ? options.retainOnFailure : "cleanup"
|
|
17426
17391
|
};
|
|
17427
17392
|
const result = await runEvalCase(trialOptions);
|
|
17428
17393
|
allResults.push(result);
|
|
@@ -17945,7 +17910,7 @@ function extractProviderError(response) {
|
|
|
17945
17910
|
return trimmed.length > 0 ? trimmed : void 0;
|
|
17946
17911
|
}
|
|
17947
17912
|
function createCacheKey(provider, target, evalCase, promptInputs) {
|
|
17948
|
-
const hash = (0,
|
|
17913
|
+
const hash = (0, import_node_crypto9.createHash)("sha256");
|
|
17949
17914
|
hash.update(provider.id);
|
|
17950
17915
|
hash.update(target.name);
|
|
17951
17916
|
hash.update(evalCase.id);
|
|
@@ -18013,7 +17978,7 @@ function computeWeightedMean(entries) {
|
|
|
18013
17978
|
}
|
|
18014
17979
|
|
|
18015
17980
|
// src/evaluation/evaluate.ts
|
|
18016
|
-
var
|
|
17981
|
+
var import_node_fs12 = require("fs");
|
|
18017
17982
|
var import_node_path43 = __toESM(require("path"), 1);
|
|
18018
17983
|
async function evaluate(config) {
|
|
18019
17984
|
const startTime = Date.now();
|
|
@@ -18132,7 +18097,7 @@ async function discoverDefaultTarget(repoRoot) {
|
|
|
18132
18097
|
for (const dir of chain) {
|
|
18133
18098
|
for (const candidate of TARGET_FILE_CANDIDATES) {
|
|
18134
18099
|
const targetsPath = import_node_path43.default.join(dir, candidate);
|
|
18135
|
-
if (!(0,
|
|
18100
|
+
if (!(0, import_node_fs12.existsSync)(targetsPath)) continue;
|
|
18136
18101
|
try {
|
|
18137
18102
|
const definitions = await readTargetDefinitions(targetsPath);
|
|
18138
18103
|
const defaultTarget = definitions.find((d) => d.name === "default");
|
|
@@ -18150,7 +18115,7 @@ async function loadEnvHierarchy(repoRoot) {
|
|
|
18150
18115
|
const envFiles = [];
|
|
18151
18116
|
for (const dir of chain) {
|
|
18152
18117
|
const envPath = import_node_path43.default.join(dir, ".env");
|
|
18153
|
-
if ((0,
|
|
18118
|
+
if ((0, import_node_fs12.existsSync)(envPath)) envFiles.push(envPath);
|
|
18154
18119
|
}
|
|
18155
18120
|
for (let i = envFiles.length - 1; i >= 0; i--) {
|
|
18156
18121
|
try {
|
|
@@ -18228,12 +18193,12 @@ var CONFIG_FILE_NAMES = [
|
|
|
18228
18193
|
".agentv/config.js"
|
|
18229
18194
|
];
|
|
18230
18195
|
async function loadTsConfig(projectRoot) {
|
|
18231
|
-
const { existsSync:
|
|
18196
|
+
const { existsSync: existsSync4 } = await import("fs");
|
|
18232
18197
|
const { pathToFileURL } = await import("url");
|
|
18233
18198
|
const { join: join2 } = await import("path");
|
|
18234
18199
|
for (const fileName of CONFIG_FILE_NAMES) {
|
|
18235
18200
|
const filePath = join2(projectRoot, fileName);
|
|
18236
|
-
if (!
|
|
18201
|
+
if (!existsSync4(filePath)) {
|
|
18237
18202
|
continue;
|
|
18238
18203
|
}
|
|
18239
18204
|
try {
|
|
@@ -18330,7 +18295,7 @@ function buildPrompt(criteria, question, referenceAnswer) {
|
|
|
18330
18295
|
}
|
|
18331
18296
|
|
|
18332
18297
|
// src/evaluation/cache/response-cache.ts
|
|
18333
|
-
var
|
|
18298
|
+
var import_promises30 = require("fs/promises");
|
|
18334
18299
|
var import_node_path44 = __toESM(require("path"), 1);
|
|
18335
18300
|
var DEFAULT_CACHE_PATH = ".agentv/cache";
|
|
18336
18301
|
var ResponseCache = class {
|
|
@@ -18341,7 +18306,7 @@ var ResponseCache = class {
|
|
|
18341
18306
|
async get(key) {
|
|
18342
18307
|
const filePath = this.keyToPath(key);
|
|
18343
18308
|
try {
|
|
18344
|
-
const data = await (0,
|
|
18309
|
+
const data = await (0, import_promises30.readFile)(filePath, "utf8");
|
|
18345
18310
|
return JSON.parse(data);
|
|
18346
18311
|
} catch {
|
|
18347
18312
|
return void 0;
|
|
@@ -18350,8 +18315,8 @@ var ResponseCache = class {
|
|
|
18350
18315
|
async set(key, value) {
|
|
18351
18316
|
const filePath = this.keyToPath(key);
|
|
18352
18317
|
const dir = import_node_path44.default.dirname(filePath);
|
|
18353
|
-
await (0,
|
|
18354
|
-
await (0,
|
|
18318
|
+
await (0, import_promises30.mkdir)(dir, { recursive: true });
|
|
18319
|
+
await (0, import_promises30.writeFile)(filePath, JSON.stringify(value, null, 2), "utf8");
|
|
18355
18320
|
}
|
|
18356
18321
|
keyToPath(key) {
|
|
18357
18322
|
const prefix = key.slice(0, 2);
|
|
@@ -18890,7 +18855,6 @@ function createAgentKernel() {
|
|
|
18890
18855
|
freeformEvaluationSchema,
|
|
18891
18856
|
generateRubrics,
|
|
18892
18857
|
getAgentvHome,
|
|
18893
|
-
getGitCacheRoot,
|
|
18894
18858
|
getHitCount,
|
|
18895
18859
|
getSubagentsRoot,
|
|
18896
18860
|
getTraceStateRoot,
|