@agentv/core 2.15.0 → 2.17.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/{chunk-N55K52OO.js → chunk-CPPYERD2.js} +1 -1
- package/dist/chunk-CPPYERD2.js.map +1 -0
- package/dist/evaluation/validation/index.cjs +8 -7
- package/dist/evaluation/validation/index.cjs.map +1 -1
- package/dist/evaluation/validation/index.js +9 -8
- package/dist/evaluation/validation/index.js.map +1 -1
- package/dist/index.cjs +251 -260
- package/dist/index.cjs.map +1 -1
- package/dist/index.d.cts +50 -35
- package/dist/index.d.ts +50 -35
- package/dist/index.js +235 -243
- package/dist/index.js.map +1 -1
- package/package.json +1 -1
- package/dist/chunk-N55K52OO.js.map +0 -1
package/dist/index.cjs
CHANGED
|
@@ -1244,11 +1244,11 @@ function serializeAttributeValue(value) {
|
|
|
1244
1244
|
if (Array.isArray(value)) return { arrayValue: { values: value.map(serializeAttributeValue) } };
|
|
1245
1245
|
return { stringValue: String(value) };
|
|
1246
1246
|
}
|
|
1247
|
-
var
|
|
1247
|
+
var import_promises31, import_node_path45, OtlpJsonFileExporter;
|
|
1248
1248
|
var init_otlp_json_file_exporter = __esm({
|
|
1249
1249
|
"src/observability/otlp-json-file-exporter.ts"() {
|
|
1250
1250
|
"use strict";
|
|
1251
|
-
|
|
1251
|
+
import_promises31 = require("fs/promises");
|
|
1252
1252
|
import_node_path45 = require("path");
|
|
1253
1253
|
OtlpJsonFileExporter = class {
|
|
1254
1254
|
// biome-ignore lint/suspicious/noExplicitAny: serialized span data
|
|
@@ -1288,7 +1288,7 @@ var init_otlp_json_file_exporter = __esm({
|
|
|
1288
1288
|
}
|
|
1289
1289
|
async flush() {
|
|
1290
1290
|
if (this.spans.length === 0) return;
|
|
1291
|
-
await (0,
|
|
1291
|
+
await (0, import_promises31.mkdir)((0, import_node_path45.dirname)(this.filePath), { recursive: true });
|
|
1292
1292
|
const otlpJson = {
|
|
1293
1293
|
resourceSpans: [
|
|
1294
1294
|
{
|
|
@@ -1302,8 +1302,8 @@ var init_otlp_json_file_exporter = __esm({
|
|
|
1302
1302
|
}
|
|
1303
1303
|
]
|
|
1304
1304
|
};
|
|
1305
|
-
const { writeFile:
|
|
1306
|
-
await
|
|
1305
|
+
const { writeFile: writeFile9 } = await import("fs/promises");
|
|
1306
|
+
await writeFile9(this.filePath, JSON.stringify(otlpJson, null, 2));
|
|
1307
1307
|
}
|
|
1308
1308
|
};
|
|
1309
1309
|
}
|
|
@@ -1319,12 +1319,12 @@ function hrTimeDiffMs(start, end) {
|
|
|
1319
1319
|
const diffNano = end[1] - start[1];
|
|
1320
1320
|
return Math.round(diffSec * 1e3 + diffNano / 1e6);
|
|
1321
1321
|
}
|
|
1322
|
-
var
|
|
1322
|
+
var import_node_fs13, import_promises32, import_node_path46, SimpleTraceFileExporter;
|
|
1323
1323
|
var init_simple_trace_file_exporter = __esm({
|
|
1324
1324
|
"src/observability/simple-trace-file-exporter.ts"() {
|
|
1325
1325
|
"use strict";
|
|
1326
|
-
|
|
1327
|
-
|
|
1326
|
+
import_node_fs13 = require("fs");
|
|
1327
|
+
import_promises32 = require("fs/promises");
|
|
1328
1328
|
import_node_path46 = require("path");
|
|
1329
1329
|
SimpleTraceFileExporter = class {
|
|
1330
1330
|
stream = null;
|
|
@@ -1338,8 +1338,8 @@ var init_simple_trace_file_exporter = __esm({
|
|
|
1338
1338
|
async ensureStream() {
|
|
1339
1339
|
if (!this.streamReady) {
|
|
1340
1340
|
this.streamReady = (async () => {
|
|
1341
|
-
await (0,
|
|
1342
|
-
this.stream = (0,
|
|
1341
|
+
await (0, import_promises32.mkdir)((0, import_node_path46.dirname)(this.filePath), { recursive: true });
|
|
1342
|
+
this.stream = (0, import_node_fs13.createWriteStream)(this.filePath, { flags: "w" });
|
|
1343
1343
|
return this.stream;
|
|
1344
1344
|
})();
|
|
1345
1345
|
}
|
|
@@ -1505,7 +1505,6 @@ __export(index_exports, {
|
|
|
1505
1505
|
freeformEvaluationSchema: () => freeformEvaluationSchema,
|
|
1506
1506
|
generateRubrics: () => generateRubrics,
|
|
1507
1507
|
getAgentvHome: () => getAgentvHome,
|
|
1508
|
-
getGitCacheRoot: () => getGitCacheRoot,
|
|
1509
1508
|
getHitCount: () => getHitCount,
|
|
1510
1509
|
getSubagentsRoot: () => getSubagentsRoot,
|
|
1511
1510
|
getTraceStateRoot: () => getTraceStateRoot,
|
|
@@ -4741,16 +4740,31 @@ function parseRepoConfig(raw) {
|
|
|
4741
4740
|
...clone !== void 0 && { clone }
|
|
4742
4741
|
};
|
|
4743
4742
|
}
|
|
4744
|
-
function
|
|
4743
|
+
function parseWorkspaceHookConfig(raw, evalFileDir) {
|
|
4745
4744
|
if (!isJsonObject(raw)) return void 0;
|
|
4745
|
+
const script = parseWorkspaceScriptConfig(raw, evalFileDir);
|
|
4746
4746
|
const obj = raw;
|
|
4747
|
-
const
|
|
4748
|
-
|
|
4749
|
-
if (!strategy && afterEach === void 0) return void 0;
|
|
4747
|
+
const reset = obj.reset === "none" || obj.reset === "fast" || obj.reset === "strict" ? obj.reset : void 0;
|
|
4748
|
+
if (!script && !reset) return void 0;
|
|
4750
4749
|
return {
|
|
4751
|
-
...
|
|
4752
|
-
...
|
|
4750
|
+
...script ?? {},
|
|
4751
|
+
...reset !== void 0 && { reset }
|
|
4752
|
+
};
|
|
4753
|
+
}
|
|
4754
|
+
function parseWorkspaceHooksConfig(raw, evalFileDir) {
|
|
4755
|
+
if (!isJsonObject(raw)) return void 0;
|
|
4756
|
+
const obj = raw;
|
|
4757
|
+
const beforeAll = parseWorkspaceHookConfig(obj.before_all, evalFileDir);
|
|
4758
|
+
const beforeEach = parseWorkspaceHookConfig(obj.before_each, evalFileDir);
|
|
4759
|
+
const afterEach = parseWorkspaceHookConfig(obj.after_each, evalFileDir);
|
|
4760
|
+
const afterAll = parseWorkspaceHookConfig(obj.after_all, evalFileDir);
|
|
4761
|
+
const hooks = {
|
|
4762
|
+
...beforeAll !== void 0 && { before_all: beforeAll },
|
|
4763
|
+
...beforeEach !== void 0 && { before_each: beforeEach },
|
|
4764
|
+
...afterEach !== void 0 && { after_each: afterEach },
|
|
4765
|
+
...afterAll !== void 0 && { after_all: afterAll }
|
|
4753
4766
|
};
|
|
4767
|
+
return Object.keys(hooks).length > 0 ? hooks : void 0;
|
|
4754
4768
|
}
|
|
4755
4769
|
async function resolveWorkspaceConfig(raw, evalFileDir) {
|
|
4756
4770
|
if (typeof raw === "string") {
|
|
@@ -4781,37 +4795,48 @@ function parseWorkspaceConfig(raw, evalFileDir) {
|
|
|
4781
4795
|
}
|
|
4782
4796
|
const isolation = obj.isolation === "shared" || obj.isolation === "per_test" ? obj.isolation : void 0;
|
|
4783
4797
|
const repos = Array.isArray(obj.repos) ? obj.repos.map(parseRepoConfig).filter(Boolean) : void 0;
|
|
4784
|
-
const
|
|
4785
|
-
const
|
|
4786
|
-
const
|
|
4787
|
-
const
|
|
4788
|
-
|
|
4789
|
-
if (!template && !isolation && !repos && !reset && !beforeAll && !afterAll && !beforeEach && !afterEach)
|
|
4798
|
+
const hooks = parseWorkspaceHooksConfig(obj.hooks, evalFileDir);
|
|
4799
|
+
const mode = obj.mode === "pooled" || obj.mode === "ephemeral" || obj.mode === "static" ? obj.mode : void 0;
|
|
4800
|
+
const staticPath = typeof obj.static_path === "string" ? obj.static_path : void 0;
|
|
4801
|
+
const pool = typeof obj.pool === "boolean" ? obj.pool : void 0;
|
|
4802
|
+
if (!template && !isolation && !repos && !hooks && !mode && !staticPath && pool === void 0)
|
|
4790
4803
|
return void 0;
|
|
4791
4804
|
return {
|
|
4792
4805
|
...template !== void 0 && { template },
|
|
4793
4806
|
...isolation !== void 0 && { isolation },
|
|
4794
4807
|
...repos !== void 0 && { repos },
|
|
4795
|
-
...
|
|
4796
|
-
...
|
|
4797
|
-
...
|
|
4798
|
-
...
|
|
4799
|
-
...afterEach !== void 0 && { after_each: afterEach }
|
|
4808
|
+
...hooks !== void 0 && { hooks },
|
|
4809
|
+
...mode !== void 0 && { mode },
|
|
4810
|
+
...staticPath !== void 0 && { static_path: staticPath },
|
|
4811
|
+
...pool !== void 0 && { pool }
|
|
4800
4812
|
};
|
|
4801
4813
|
}
|
|
4802
4814
|
function mergeWorkspaceConfigs(suiteLevel, caseLevel) {
|
|
4803
4815
|
if (!suiteLevel && !caseLevel) return void 0;
|
|
4804
4816
|
if (!suiteLevel) return caseLevel;
|
|
4805
4817
|
if (!caseLevel) return suiteLevel;
|
|
4818
|
+
const mergeHook = (suiteHook, caseHook) => {
|
|
4819
|
+
if (!suiteHook && !caseHook) return void 0;
|
|
4820
|
+
return {
|
|
4821
|
+
...suiteHook ?? {},
|
|
4822
|
+
...caseHook ?? {}
|
|
4823
|
+
};
|
|
4824
|
+
};
|
|
4825
|
+
const mergedHooks = {
|
|
4826
|
+
before_all: mergeHook(suiteLevel.hooks?.before_all, caseLevel.hooks?.before_all),
|
|
4827
|
+
before_each: mergeHook(suiteLevel.hooks?.before_each, caseLevel.hooks?.before_each),
|
|
4828
|
+
after_each: mergeHook(suiteLevel.hooks?.after_each, caseLevel.hooks?.after_each),
|
|
4829
|
+
after_all: mergeHook(suiteLevel.hooks?.after_all, caseLevel.hooks?.after_all)
|
|
4830
|
+
};
|
|
4831
|
+
const hasHooks = Object.values(mergedHooks).some((hook) => hook !== void 0);
|
|
4806
4832
|
return {
|
|
4807
4833
|
template: caseLevel.template ?? suiteLevel.template,
|
|
4808
4834
|
isolation: caseLevel.isolation ?? suiteLevel.isolation,
|
|
4809
4835
|
repos: caseLevel.repos ?? suiteLevel.repos,
|
|
4810
|
-
|
|
4811
|
-
|
|
4812
|
-
|
|
4813
|
-
|
|
4814
|
-
after_each: caseLevel.after_each ?? suiteLevel.after_each
|
|
4836
|
+
...hasHooks && { hooks: mergedHooks },
|
|
4837
|
+
mode: caseLevel.mode ?? suiteLevel.mode,
|
|
4838
|
+
static_path: caseLevel.static_path ?? suiteLevel.static_path,
|
|
4839
|
+
pool: caseLevel.pool ?? suiteLevel.pool
|
|
4815
4840
|
};
|
|
4816
4841
|
}
|
|
4817
4842
|
function asString6(value) {
|
|
@@ -7047,6 +7072,7 @@ var CopilotCliProvider = class {
|
|
|
7047
7072
|
const agentProcess = (0, import_node_child_process2.spawn)(executable, args, {
|
|
7048
7073
|
stdio: ["pipe", "pipe", "inherit"]
|
|
7049
7074
|
});
|
|
7075
|
+
await waitForProcessSpawn(agentProcess, executable, this.targetName);
|
|
7050
7076
|
const toolCallsInProgress = /* @__PURE__ */ new Map();
|
|
7051
7077
|
const completedToolCalls = [];
|
|
7052
7078
|
let finalContent = "";
|
|
@@ -7326,6 +7352,47 @@ var CopilotCliProvider = class {
|
|
|
7326
7352
|
}
|
|
7327
7353
|
}
|
|
7328
7354
|
};
|
|
7355
|
+
async function waitForProcessSpawn(proc, executable, targetName) {
|
|
7356
|
+
if (proc.pid) {
|
|
7357
|
+
return;
|
|
7358
|
+
}
|
|
7359
|
+
await new Promise((resolve, reject) => {
|
|
7360
|
+
const onSpawn = () => {
|
|
7361
|
+
cleanup();
|
|
7362
|
+
resolve();
|
|
7363
|
+
};
|
|
7364
|
+
const onError = (error) => {
|
|
7365
|
+
cleanup();
|
|
7366
|
+
reject(new Error(formatCopilotSpawnError(error, executable, targetName)));
|
|
7367
|
+
};
|
|
7368
|
+
const cleanup = () => {
|
|
7369
|
+
proc.off("spawn", onSpawn);
|
|
7370
|
+
proc.off("error", onError);
|
|
7371
|
+
};
|
|
7372
|
+
proc.once("spawn", onSpawn);
|
|
7373
|
+
proc.once("error", onError);
|
|
7374
|
+
});
|
|
7375
|
+
}
|
|
7376
|
+
function formatCopilotSpawnError(error, executable, targetName) {
|
|
7377
|
+
const code = error.code;
|
|
7378
|
+
const base = `Failed to start Copilot CLI executable '${executable}' for target '${targetName}'. ${error.message}`;
|
|
7379
|
+
if (process.platform !== "win32") {
|
|
7380
|
+
return base;
|
|
7381
|
+
}
|
|
7382
|
+
if (code !== "ENOENT" && code !== "EINVAL") {
|
|
7383
|
+
return base;
|
|
7384
|
+
}
|
|
7385
|
+
return `${base}
|
|
7386
|
+
|
|
7387
|
+
On Windows, shell commands like 'copilot -h' can work via .ps1/.bat shims, but AgentV launches a subprocess that needs a directly spawnable executable path.
|
|
7388
|
+
|
|
7389
|
+
Fix options:
|
|
7390
|
+
1) Install native Copilot binary package:
|
|
7391
|
+
npm install -g @github/copilot-win32-x64
|
|
7392
|
+
2) Set explicit executable for Copilot targets:
|
|
7393
|
+
- In .env: COPILOT_EXE=C:\\Users\\<you>\\AppData\\Roaming\\npm\\node_modules\\@github\\copilot-win32-x64\\copilot.exe
|
|
7394
|
+
- In .agentv/targets.yaml: executable: \${{ COPILOT_EXE }}`;
|
|
7395
|
+
}
|
|
7329
7396
|
function summarizeAcpEvent(eventType, data) {
|
|
7330
7397
|
if (!data || typeof data !== "object") {
|
|
7331
7398
|
return eventType;
|
|
@@ -10077,9 +10144,6 @@ function getAgentvHome() {
|
|
|
10077
10144
|
function getWorkspacesRoot() {
|
|
10078
10145
|
return import_node_path23.default.join(getAgentvHome(), "workspaces");
|
|
10079
10146
|
}
|
|
10080
|
-
function getGitCacheRoot() {
|
|
10081
|
-
return import_node_path23.default.join(getAgentvHome(), "git-cache");
|
|
10082
|
-
}
|
|
10083
10147
|
function getSubagentsRoot() {
|
|
10084
10148
|
return import_node_path23.default.join(getAgentvHome(), "subagents");
|
|
10085
10149
|
}
|
|
@@ -11539,16 +11603,16 @@ async function execFileWithStdinNode(argv, stdinPayload, options) {
|
|
|
11539
11603
|
});
|
|
11540
11604
|
}
|
|
11541
11605
|
async function execShellWithStdin(command, stdinPayload, options = {}) {
|
|
11542
|
-
const { mkdir:
|
|
11606
|
+
const { mkdir: mkdir16, readFile: readFile14, rm: rm6, writeFile: writeFile9 } = await import("fs/promises");
|
|
11543
11607
|
const { tmpdir: tmpdir3 } = await import("os");
|
|
11544
11608
|
const path44 = await import("path");
|
|
11545
11609
|
const { randomUUID: randomUUID8 } = await import("crypto");
|
|
11546
11610
|
const dir = path44.join(tmpdir3(), `agentv-exec-${randomUUID8()}`);
|
|
11547
|
-
await
|
|
11611
|
+
await mkdir16(dir, { recursive: true });
|
|
11548
11612
|
const stdinPath = path44.join(dir, "stdin.txt");
|
|
11549
11613
|
const stdoutPath = path44.join(dir, "stdout.txt");
|
|
11550
11614
|
const stderrPath = path44.join(dir, "stderr.txt");
|
|
11551
|
-
await
|
|
11615
|
+
await writeFile9(stdinPath, stdinPayload, "utf8");
|
|
11552
11616
|
const wrappedCommand = process.platform === "win32" ? `(${command}) < ${shellEscapePath(stdinPath)} > ${shellEscapePath(stdoutPath)} 2> ${shellEscapePath(stderrPath)}` : `(${command}) < ${shellEscapePath(stdinPath)} > ${shellEscapePath(stdoutPath)} 2> ${shellEscapePath(stderrPath)}`;
|
|
11553
11617
|
const { spawn: spawn4 } = await import("child_process");
|
|
11554
11618
|
try {
|
|
@@ -11581,7 +11645,7 @@ async function execShellWithStdin(command, stdinPayload, options = {}) {
|
|
|
11581
11645
|
const stderr = (await readFile14(stderrPath, "utf8")).replace(/\r\n/g, "\n");
|
|
11582
11646
|
return { stdout, stderr, exitCode };
|
|
11583
11647
|
} finally {
|
|
11584
|
-
await
|
|
11648
|
+
await rm6(dir, { recursive: true, force: true });
|
|
11585
11649
|
}
|
|
11586
11650
|
}
|
|
11587
11651
|
|
|
@@ -14798,8 +14862,8 @@ function runEqualsAssertion(output, value) {
|
|
|
14798
14862
|
}
|
|
14799
14863
|
|
|
14800
14864
|
// src/evaluation/orchestrator.ts
|
|
14801
|
-
var
|
|
14802
|
-
var
|
|
14865
|
+
var import_node_crypto9 = require("crypto");
|
|
14866
|
+
var import_promises29 = require("fs/promises");
|
|
14803
14867
|
var import_node_path42 = __toESM(require("path"), 1);
|
|
14804
14868
|
var import_micromatch4 = __toESM(require("micromatch"), 1);
|
|
14805
14869
|
|
|
@@ -15762,7 +15826,7 @@ var WorkspacePoolManager = class {
|
|
|
15762
15826
|
* 7. Return the slot (with path, index, isExisting)
|
|
15763
15827
|
*/
|
|
15764
15828
|
async acquireWorkspace(options) {
|
|
15765
|
-
const { templatePath, repos, maxSlots, repoManager } = options;
|
|
15829
|
+
const { templatePath, repos, maxSlots, repoManager, poolReset } = options;
|
|
15766
15830
|
const fingerprint = computeWorkspaceFingerprint(templatePath, repos);
|
|
15767
15831
|
const poolDir = import_node_path39.default.join(this.poolRoot, fingerprint);
|
|
15768
15832
|
await (0, import_promises27.mkdir)(poolDir, { recursive: true });
|
|
@@ -15782,7 +15846,7 @@ var WorkspacePoolManager = class {
|
|
|
15782
15846
|
}
|
|
15783
15847
|
const slotExists = (0, import_node_fs11.existsSync)(slotPath);
|
|
15784
15848
|
if (slotExists) {
|
|
15785
|
-
await this.resetSlot(slotPath, templatePath, repos);
|
|
15849
|
+
await this.resetSlot(slotPath, templatePath, repos, poolReset);
|
|
15786
15850
|
return {
|
|
15787
15851
|
index: i,
|
|
15788
15852
|
path: slotPath,
|
|
@@ -15914,15 +15978,19 @@ var WorkspacePoolManager = class {
|
|
|
15914
15978
|
* 1. Reset repos (git reset --hard {ref} && git clean -fd per repo)
|
|
15915
15979
|
* 2. Re-copy template files (skip repo directories)
|
|
15916
15980
|
*/
|
|
15917
|
-
async resetSlot(slotPath, templatePath, repos) {
|
|
15981
|
+
async resetSlot(slotPath, templatePath, repos, poolReset = "fast") {
|
|
15918
15982
|
for (const repo of repos) {
|
|
15919
15983
|
const repoDir = import_node_path39.default.join(slotPath, repo.path);
|
|
15920
15984
|
if (!(0, import_node_fs11.existsSync)(repoDir)) {
|
|
15921
15985
|
continue;
|
|
15922
15986
|
}
|
|
15987
|
+
if (poolReset === "none") {
|
|
15988
|
+
continue;
|
|
15989
|
+
}
|
|
15923
15990
|
const ref = repo.checkout?.ref ?? "HEAD";
|
|
15924
15991
|
await git(["reset", "--hard", ref], { cwd: repoDir });
|
|
15925
|
-
|
|
15992
|
+
const cleanFlag = poolReset === "strict" ? "-fdx" : "-fd";
|
|
15993
|
+
await git(["clean", cleanFlag], { cwd: repoDir });
|
|
15926
15994
|
}
|
|
15927
15995
|
if (templatePath) {
|
|
15928
15996
|
const repoDirNames = new Set(
|
|
@@ -15938,14 +16006,10 @@ var WorkspacePoolManager = class {
|
|
|
15938
16006
|
|
|
15939
16007
|
// src/evaluation/workspace/repo-manager.ts
|
|
15940
16008
|
var import_node_child_process8 = require("child_process");
|
|
15941
|
-
var import_node_crypto9 = require("crypto");
|
|
15942
|
-
var import_node_fs12 = require("fs");
|
|
15943
|
-
var import_promises28 = require("fs/promises");
|
|
15944
16009
|
var import_node_path40 = __toESM(require("path"), 1);
|
|
15945
16010
|
var import_node_util6 = require("util");
|
|
15946
16011
|
var execFileAsync2 = (0, import_node_util6.promisify)(import_node_child_process8.execFile);
|
|
15947
16012
|
var DEFAULT_TIMEOUT_MS2 = 3e5;
|
|
15948
|
-
var LOCK_TIMEOUT_MS = 6e4;
|
|
15949
16013
|
function gitEnv2() {
|
|
15950
16014
|
const env = { ...process.env };
|
|
15951
16015
|
for (const key of Object.keys(env)) {
|
|
@@ -15960,10 +16024,6 @@ function gitEnv2() {
|
|
|
15960
16024
|
GIT_SSH_COMMAND: "ssh -o BatchMode=yes"
|
|
15961
16025
|
};
|
|
15962
16026
|
}
|
|
15963
|
-
function cacheKey(source) {
|
|
15964
|
-
const raw = source.type === "git" ? source.url.toLowerCase().replace(/\.git$/, "") : source.path;
|
|
15965
|
-
return (0, import_node_crypto9.createHash)("sha256").update(raw).digest("hex");
|
|
15966
|
-
}
|
|
15967
16027
|
function getSourceUrl(source) {
|
|
15968
16028
|
return source.type === "git" ? source.url : source.path;
|
|
15969
16029
|
}
|
|
@@ -15977,33 +16037,9 @@ async function git2(args, opts) {
|
|
|
15977
16037
|
});
|
|
15978
16038
|
return stdout.trim();
|
|
15979
16039
|
}
|
|
15980
|
-
async function acquireLock(lockPath) {
|
|
15981
|
-
const start = Date.now();
|
|
15982
|
-
while (Date.now() - start < LOCK_TIMEOUT_MS) {
|
|
15983
|
-
try {
|
|
15984
|
-
await (0, import_promises28.writeFile)(lockPath, String(process.pid), { flag: "wx" });
|
|
15985
|
-
return;
|
|
15986
|
-
} catch (err) {
|
|
15987
|
-
if (err.code === "EEXIST") {
|
|
15988
|
-
await new Promise((r) => setTimeout(r, 200));
|
|
15989
|
-
continue;
|
|
15990
|
-
}
|
|
15991
|
-
throw err;
|
|
15992
|
-
}
|
|
15993
|
-
}
|
|
15994
|
-
throw new Error(`Timed out waiting for lock: ${lockPath}`);
|
|
15995
|
-
}
|
|
15996
|
-
async function releaseLock(lockPath) {
|
|
15997
|
-
try {
|
|
15998
|
-
await (0, import_promises28.unlink)(lockPath);
|
|
15999
|
-
} catch {
|
|
16000
|
-
}
|
|
16001
|
-
}
|
|
16002
16040
|
var RepoManager = class {
|
|
16003
|
-
cacheDir;
|
|
16004
16041
|
verbose;
|
|
16005
|
-
constructor(
|
|
16006
|
-
this.cacheDir = cacheDir ?? getGitCacheRoot();
|
|
16042
|
+
constructor(verbose = false) {
|
|
16007
16043
|
this.verbose = verbose;
|
|
16008
16044
|
}
|
|
16009
16045
|
async runGit(args, opts) {
|
|
@@ -16028,86 +16064,18 @@ var RepoManager = class {
|
|
|
16028
16064
|
}
|
|
16029
16065
|
}
|
|
16030
16066
|
/**
|
|
16031
|
-
*
|
|
16032
|
-
* Creates on first access, fetches updates on subsequent calls.
|
|
16033
|
-
* Returns the absolute path to the cache directory.
|
|
16034
|
-
*/
|
|
16035
|
-
async ensureCache(source, depth, resolve) {
|
|
16036
|
-
const key = cacheKey(source);
|
|
16037
|
-
const cachePath = import_node_path40.default.join(this.cacheDir, key);
|
|
16038
|
-
const lockPath = `${cachePath}.lock`;
|
|
16039
|
-
const cacheExists = (0, import_node_fs12.existsSync)(import_node_path40.default.join(cachePath, "HEAD"));
|
|
16040
|
-
if (this.verbose) {
|
|
16041
|
-
console.log(
|
|
16042
|
-
`[repo] ensureCache source=${getSourceUrl(source)} resolve=${resolve ?? "remote"} cache=${cachePath}`
|
|
16043
|
-
);
|
|
16044
|
-
}
|
|
16045
|
-
if (resolve === "local") {
|
|
16046
|
-
if (cacheExists) {
|
|
16047
|
-
if (this.verbose) {
|
|
16048
|
-
console.log(`[repo] using existing local cache ${cachePath}`);
|
|
16049
|
-
}
|
|
16050
|
-
return cachePath;
|
|
16051
|
-
}
|
|
16052
|
-
const url = getSourceUrl(source);
|
|
16053
|
-
throw new Error(
|
|
16054
|
-
`No cache found for \`${url}\`. Run \`agentv cache add --url ${url} --from <local-path>\` to seed it.`
|
|
16055
|
-
);
|
|
16056
|
-
}
|
|
16057
|
-
await (0, import_promises28.mkdir)(this.cacheDir, { recursive: true });
|
|
16058
|
-
const lockStartedAt = Date.now();
|
|
16059
|
-
await acquireLock(lockPath);
|
|
16060
|
-
if (this.verbose) {
|
|
16061
|
-
console.log(`[repo] lock acquired path=${lockPath} waitedMs=${Date.now() - lockStartedAt}`);
|
|
16062
|
-
}
|
|
16063
|
-
try {
|
|
16064
|
-
if (cacheExists) {
|
|
16065
|
-
if (this.verbose) {
|
|
16066
|
-
console.log(`[repo] refreshing existing cache ${cachePath}`);
|
|
16067
|
-
}
|
|
16068
|
-
const fetchArgs = ["fetch", "--prune"];
|
|
16069
|
-
if (depth) {
|
|
16070
|
-
fetchArgs.push("--depth", String(depth));
|
|
16071
|
-
}
|
|
16072
|
-
await this.runGit(fetchArgs, { cwd: cachePath });
|
|
16073
|
-
} else {
|
|
16074
|
-
if (this.verbose) {
|
|
16075
|
-
console.log(`[repo] creating new cache ${cachePath}`);
|
|
16076
|
-
}
|
|
16077
|
-
const cloneArgs = ["clone", "--mirror", "--bare"];
|
|
16078
|
-
if (depth) {
|
|
16079
|
-
cloneArgs.push("--depth", String(depth));
|
|
16080
|
-
}
|
|
16081
|
-
const sourceUrl = getSourceUrl(source);
|
|
16082
|
-
const cloneUrl = depth && source.type === "local" ? `file://${sourceUrl}` : sourceUrl;
|
|
16083
|
-
cloneArgs.push(cloneUrl, cachePath);
|
|
16084
|
-
await this.runGit(cloneArgs);
|
|
16085
|
-
}
|
|
16086
|
-
} finally {
|
|
16087
|
-
await releaseLock(lockPath);
|
|
16088
|
-
if (this.verbose) {
|
|
16089
|
-
console.log(`[repo] lock released path=${lockPath}`);
|
|
16090
|
-
}
|
|
16091
|
-
}
|
|
16092
|
-
return cachePath;
|
|
16093
|
-
}
|
|
16094
|
-
/**
|
|
16095
|
-
* Clone a repo from cache into the workspace at the configured path.
|
|
16067
|
+
* Clone a repo directly from source into the workspace at the configured path.
|
|
16096
16068
|
* Handles checkout, ref resolution, ancestor walking, shallow clone, sparse checkout.
|
|
16097
16069
|
*/
|
|
16098
16070
|
async materialize(repo, workspacePath) {
|
|
16099
16071
|
const targetDir = import_node_path40.default.join(workspacePath, repo.path);
|
|
16072
|
+
const sourceUrl = getSourceUrl(repo.source);
|
|
16100
16073
|
const startedAt = Date.now();
|
|
16101
16074
|
if (this.verbose) {
|
|
16102
16075
|
console.log(
|
|
16103
|
-
`[repo] materialize start path=${repo.path} source=${
|
|
16076
|
+
`[repo] materialize start path=${repo.path} source=${sourceUrl} workspace=${workspacePath}`
|
|
16104
16077
|
);
|
|
16105
16078
|
}
|
|
16106
|
-
const cachePath = await this.ensureCache(
|
|
16107
|
-
repo.source,
|
|
16108
|
-
repo.clone?.depth,
|
|
16109
|
-
repo.checkout?.resolve
|
|
16110
|
-
);
|
|
16111
16079
|
const cloneArgs = ["clone"];
|
|
16112
16080
|
if (repo.clone?.depth) {
|
|
16113
16081
|
cloneArgs.push("--depth", String(repo.clone.depth));
|
|
@@ -16116,7 +16084,7 @@ var RepoManager = class {
|
|
|
16116
16084
|
cloneArgs.push("--filter", repo.clone.filter);
|
|
16117
16085
|
}
|
|
16118
16086
|
cloneArgs.push("--no-checkout");
|
|
16119
|
-
const cloneUrl = repo.clone?.depth || repo.clone?.filter ? `file://${
|
|
16087
|
+
const cloneUrl = (repo.clone?.depth || repo.clone?.filter) && repo.source.type === "local" ? `file://${sourceUrl}` : sourceUrl;
|
|
16120
16088
|
cloneArgs.push(cloneUrl, targetDir);
|
|
16121
16089
|
await this.runGit(cloneArgs);
|
|
16122
16090
|
if (repo.clone?.sparse?.length) {
|
|
@@ -16188,63 +16156,25 @@ var RepoManager = class {
|
|
|
16188
16156
|
}
|
|
16189
16157
|
}
|
|
16190
16158
|
/** Reset repos in workspace to their checkout state. */
|
|
16191
|
-
async reset(repos, workspacePath,
|
|
16192
|
-
|
|
16193
|
-
for (const repo of repos) {
|
|
16194
|
-
const targetDir = import_node_path40.default.join(workspacePath, repo.path);
|
|
16195
|
-
await (0, import_promises28.rm)(targetDir, { recursive: true, force: true });
|
|
16196
|
-
}
|
|
16197
|
-
await this.materializeAll(repos, workspacePath);
|
|
16198
|
-
return;
|
|
16199
|
-
}
|
|
16159
|
+
async reset(repos, workspacePath, reset) {
|
|
16160
|
+
const cleanFlag = reset === "strict" ? "-fdx" : "-fd";
|
|
16200
16161
|
for (const repo of repos) {
|
|
16201
16162
|
const targetDir = import_node_path40.default.join(workspacePath, repo.path);
|
|
16202
16163
|
await this.runGit(["reset", "--hard", "HEAD"], { cwd: targetDir });
|
|
16203
|
-
await this.runGit(["clean",
|
|
16204
|
-
}
|
|
16205
|
-
}
|
|
16206
|
-
/**
|
|
16207
|
-
* Seed the cache from a local repository, setting the remote to a given URL.
|
|
16208
|
-
* Useful for avoiding slow network clones when a local clone already exists.
|
|
16209
|
-
*/
|
|
16210
|
-
async seedCache(localPath, remoteUrl, opts) {
|
|
16211
|
-
const source = { type: "git", url: remoteUrl };
|
|
16212
|
-
const key = cacheKey(source);
|
|
16213
|
-
const cachePath = import_node_path40.default.join(this.cacheDir, key);
|
|
16214
|
-
const lockPath = `${cachePath}.lock`;
|
|
16215
|
-
await (0, import_promises28.mkdir)(this.cacheDir, { recursive: true });
|
|
16216
|
-
await acquireLock(lockPath);
|
|
16217
|
-
try {
|
|
16218
|
-
if ((0, import_node_fs12.existsSync)(import_node_path40.default.join(cachePath, "HEAD"))) {
|
|
16219
|
-
if (!opts?.force) {
|
|
16220
|
-
throw new Error(
|
|
16221
|
-
`Cache already exists for ${remoteUrl} at ${cachePath}. Use force to overwrite.`
|
|
16222
|
-
);
|
|
16223
|
-
}
|
|
16224
|
-
await (0, import_promises28.rm)(cachePath, { recursive: true, force: true });
|
|
16225
|
-
}
|
|
16226
|
-
await git2(["clone", "--mirror", "--bare", localPath, cachePath]);
|
|
16227
|
-
await git2(["remote", "set-url", "origin", remoteUrl], { cwd: cachePath });
|
|
16228
|
-
} finally {
|
|
16229
|
-
await releaseLock(lockPath);
|
|
16164
|
+
await this.runGit(["clean", cleanFlag], { cwd: targetDir });
|
|
16230
16165
|
}
|
|
16231
|
-
return cachePath;
|
|
16232
|
-
}
|
|
16233
|
-
/** Remove the entire cache directory. */
|
|
16234
|
-
async cleanCache() {
|
|
16235
|
-
await (0, import_promises28.rm)(this.cacheDir, { recursive: true, force: true });
|
|
16236
16166
|
}
|
|
16237
16167
|
};
|
|
16238
16168
|
|
|
16239
16169
|
// src/evaluation/workspace/resolve.ts
|
|
16240
|
-
var
|
|
16170
|
+
var import_promises28 = require("fs/promises");
|
|
16241
16171
|
var import_node_path41 = __toESM(require("path"), 1);
|
|
16242
16172
|
async function resolveWorkspaceTemplate(templatePath) {
|
|
16243
16173
|
if (!templatePath) {
|
|
16244
16174
|
return void 0;
|
|
16245
16175
|
}
|
|
16246
16176
|
const resolved = import_node_path41.default.resolve(templatePath);
|
|
16247
|
-
const stats = await (0,
|
|
16177
|
+
const stats = await (0, import_promises28.stat)(resolved);
|
|
16248
16178
|
if (stats.isFile()) {
|
|
16249
16179
|
return {
|
|
16250
16180
|
dir: import_node_path41.default.dirname(resolved),
|
|
@@ -16254,7 +16184,7 @@ async function resolveWorkspaceTemplate(templatePath) {
|
|
|
16254
16184
|
if (!stats.isDirectory()) {
|
|
16255
16185
|
throw new Error(`workspace template is neither a file nor a directory: ${resolved}`);
|
|
16256
16186
|
}
|
|
16257
|
-
const entries = await (0,
|
|
16187
|
+
const entries = await (0, import_promises28.readdir)(resolved);
|
|
16258
16188
|
const workspaceFiles = entries.filter((e) => e.endsWith(".code-workspace"));
|
|
16259
16189
|
if (workspaceFiles.length === 1) {
|
|
16260
16190
|
return {
|
|
@@ -16318,6 +16248,22 @@ function classifyQualityStatus(score) {
|
|
|
16318
16248
|
function usesFileReferencePrompt(provider) {
|
|
16319
16249
|
return isAgentProvider(provider) || provider.kind === "cli";
|
|
16320
16250
|
}
|
|
16251
|
+
function toScriptConfig(hook, hookName, context2) {
|
|
16252
|
+
const command = hook.command ?? hook.script;
|
|
16253
|
+
if (!command || command.length === 0) {
|
|
16254
|
+
throw new Error(`${hookName} hook in ${context2} requires command or script`);
|
|
16255
|
+
}
|
|
16256
|
+
return {
|
|
16257
|
+
command,
|
|
16258
|
+
...hook.timeout_ms !== void 0 && { timeout_ms: hook.timeout_ms },
|
|
16259
|
+
...hook.timeoutMs !== void 0 && { timeoutMs: hook.timeoutMs },
|
|
16260
|
+
...hook.cwd !== void 0 && { cwd: hook.cwd },
|
|
16261
|
+
...hook.script !== void 0 && { script: hook.script }
|
|
16262
|
+
};
|
|
16263
|
+
}
|
|
16264
|
+
function hasHookCommand(hook) {
|
|
16265
|
+
return !!(hook?.command && hook.command.length > 0 || hook?.script && hook.script.length > 0);
|
|
16266
|
+
}
|
|
16321
16267
|
function getWorkspaceTemplate(target) {
|
|
16322
16268
|
const config = target.config;
|
|
16323
16269
|
if ("workspaceTemplate" in config && typeof config.workspaceTemplate === "string") {
|
|
@@ -16351,7 +16297,12 @@ async function runEvaluation(options) {
|
|
|
16351
16297
|
failOnError,
|
|
16352
16298
|
poolWorkspaces,
|
|
16353
16299
|
poolMaxSlots: configPoolMaxSlots,
|
|
16354
|
-
workspace:
|
|
16300
|
+
workspace: legacyWorkspacePath,
|
|
16301
|
+
workspaceMode,
|
|
16302
|
+
workspacePath,
|
|
16303
|
+
workspaceClean,
|
|
16304
|
+
retainOnSuccess,
|
|
16305
|
+
retainOnFailure
|
|
16355
16306
|
} = options;
|
|
16356
16307
|
let useCache = options.useCache;
|
|
16357
16308
|
if (trials && trials.count > 1 && useCache) {
|
|
@@ -16360,7 +16311,7 @@ async function runEvaluation(options) {
|
|
|
16360
16311
|
);
|
|
16361
16312
|
useCache = false;
|
|
16362
16313
|
}
|
|
16363
|
-
const evalRunId = (0,
|
|
16314
|
+
const evalRunId = (0, import_node_crypto9.randomUUID)();
|
|
16364
16315
|
const evalCases = preloadedEvalCases ?? await loadTests(evalFilePath, repoRoot, { verbose, filter });
|
|
16365
16316
|
const filteredEvalCases = filterEvalCases(evalCases, filter);
|
|
16366
16317
|
if (filteredEvalCases.length === 0) {
|
|
@@ -16487,13 +16438,22 @@ async function runEvaluation(options) {
|
|
|
16487
16438
|
}
|
|
16488
16439
|
};
|
|
16489
16440
|
const isPerTestIsolation = suiteWorkspace?.isolation === "per_test";
|
|
16490
|
-
|
|
16441
|
+
const configuredMode = suiteWorkspace?.mode ?? workspaceMode;
|
|
16442
|
+
const configuredStaticPath = suiteWorkspace?.static_path ?? workspacePath ?? legacyWorkspacePath;
|
|
16443
|
+
const useStaticWorkspace = configuredMode === "static" || !!configuredStaticPath && !configuredMode;
|
|
16444
|
+
if (useStaticWorkspace && isPerTestIsolation) {
|
|
16491
16445
|
throw new Error(
|
|
16492
|
-
"
|
|
16446
|
+
"static workspace mode is incompatible with isolation: per_test. Use isolation: shared (default)."
|
|
16493
16447
|
);
|
|
16494
16448
|
}
|
|
16495
|
-
|
|
16496
|
-
|
|
16449
|
+
if (configuredMode === "static" && !configuredStaticPath) {
|
|
16450
|
+
throw new Error("workspace.mode=static requires workspace.static_path or --workspace-path");
|
|
16451
|
+
}
|
|
16452
|
+
const hasSharedWorkspace = !!(useStaticWorkspace || workspaceTemplate || suiteWorkspace?.hooks || suiteWorkspace?.repos?.length && !isPerTestIsolation);
|
|
16453
|
+
const poolEnabled = configuredMode === "pooled" ? true : configuredMode === "ephemeral" || useStaticWorkspace ? false : suiteWorkspace?.pool ?? poolWorkspaces ?? true;
|
|
16454
|
+
const usePool = poolEnabled !== false && !!suiteWorkspace?.repos?.length && !isPerTestIsolation && !useStaticWorkspace;
|
|
16455
|
+
const resolvedRetainOnSuccess = retainOnSuccess ?? (keepWorkspaces ? "keep" : "cleanup");
|
|
16456
|
+
const resolvedRetainOnFailure = retainOnFailure ?? (cleanupWorkspaces ? "cleanup" : "keep");
|
|
16497
16457
|
const requestedWorkers = options.maxConcurrency ?? target.workers ?? 1;
|
|
16498
16458
|
const workers = hasSharedWorkspace && !usePool ? 1 : requestedWorkers;
|
|
16499
16459
|
setupLog(
|
|
@@ -16514,20 +16474,21 @@ async function runEvaluation(options) {
|
|
|
16514
16474
|
const availablePoolSlots = [];
|
|
16515
16475
|
const poolSlotBaselines = /* @__PURE__ */ new Map();
|
|
16516
16476
|
const poolMaxSlots = Math.min(configPoolMaxSlots ?? 10, 50);
|
|
16517
|
-
if (
|
|
16518
|
-
sharedWorkspacePath =
|
|
16519
|
-
setupLog(`using
|
|
16477
|
+
if (useStaticWorkspace && configuredStaticPath) {
|
|
16478
|
+
sharedWorkspacePath = configuredStaticPath;
|
|
16479
|
+
setupLog(`using static workspace: ${configuredStaticPath}`);
|
|
16520
16480
|
} else if (usePool && suiteWorkspace?.repos) {
|
|
16521
16481
|
const slotsNeeded = workers;
|
|
16522
16482
|
setupLog(`acquiring ${slotsNeeded} workspace pool slot(s) (pool capacity: ${poolMaxSlots})`);
|
|
16523
16483
|
poolManager = new WorkspacePoolManager(getWorkspacePoolRoot());
|
|
16524
|
-
const poolRepoManager = new RepoManager(
|
|
16484
|
+
const poolRepoManager = new RepoManager(verbose);
|
|
16525
16485
|
for (let i = 0; i < slotsNeeded; i++) {
|
|
16526
16486
|
const slot = await poolManager.acquireWorkspace({
|
|
16527
16487
|
templatePath: workspaceTemplate,
|
|
16528
16488
|
repos: suiteWorkspace.repos,
|
|
16529
16489
|
maxSlots: poolMaxSlots,
|
|
16530
|
-
repoManager: poolRepoManager
|
|
16490
|
+
repoManager: poolRepoManager,
|
|
16491
|
+
poolReset: (workspaceClean === "full" ? "strict" : workspaceClean === "standard" ? "fast" : null) ?? "fast"
|
|
16531
16492
|
});
|
|
16532
16493
|
poolSlots.push(slot);
|
|
16533
16494
|
setupLog(`pool slot ${i} acquired at: ${slot.path} (existing=${slot.isExisting})`);
|
|
@@ -16547,21 +16508,21 @@ async function runEvaluation(options) {
|
|
|
16547
16508
|
const message = error instanceof Error ? error.message : String(error);
|
|
16548
16509
|
throw new Error(`Failed to create shared workspace: ${message}`);
|
|
16549
16510
|
}
|
|
16550
|
-
} else if (suiteWorkspace?.
|
|
16511
|
+
} else if (suiteWorkspace?.hooks || suiteWorkspace?.repos?.length && !isPerTestIsolation) {
|
|
16551
16512
|
sharedWorkspacePath = getWorkspacePath(evalRunId, "shared");
|
|
16552
|
-
await (0,
|
|
16513
|
+
await (0, import_promises29.mkdir)(sharedWorkspacePath, { recursive: true });
|
|
16553
16514
|
setupLog(`created empty shared workspace at: ${sharedWorkspacePath}`);
|
|
16554
16515
|
}
|
|
16555
16516
|
try {
|
|
16556
16517
|
if (suiteWorkspaceFile && sharedWorkspacePath) {
|
|
16557
16518
|
const copiedWorkspaceFile = import_node_path42.default.join(sharedWorkspacePath, import_node_path42.default.basename(suiteWorkspaceFile));
|
|
16558
16519
|
try {
|
|
16559
|
-
await (0,
|
|
16520
|
+
await (0, import_promises29.stat)(copiedWorkspaceFile);
|
|
16560
16521
|
suiteWorkspaceFile = copiedWorkspaceFile;
|
|
16561
16522
|
} catch {
|
|
16562
16523
|
}
|
|
16563
16524
|
}
|
|
16564
|
-
const repoManager = suiteWorkspace?.repos?.length && !usePool && !
|
|
16525
|
+
const repoManager = suiteWorkspace?.repos?.length && !usePool && !useStaticWorkspace ? new RepoManager(verbose) : void 0;
|
|
16565
16526
|
if (repoManager && sharedWorkspacePath && suiteWorkspace?.repos && !isPerTestIsolation) {
|
|
16566
16527
|
setupLog(
|
|
16567
16528
|
`materializing ${suiteWorkspace.repos.length} shared repo(s) into ${sharedWorkspacePath}`
|
|
@@ -16571,17 +16532,19 @@ async function runEvaluation(options) {
|
|
|
16571
16532
|
setupLog("shared repo materialization complete");
|
|
16572
16533
|
} catch (error) {
|
|
16573
16534
|
const message = error instanceof Error ? error.message : String(error);
|
|
16574
|
-
if (sharedWorkspacePath && !
|
|
16535
|
+
if (sharedWorkspacePath && !useStaticWorkspace) {
|
|
16575
16536
|
await cleanupWorkspace(sharedWorkspacePath).catch(() => {
|
|
16576
16537
|
});
|
|
16577
16538
|
}
|
|
16578
16539
|
throw new Error(`Failed to materialize repos: ${message}`);
|
|
16579
16540
|
}
|
|
16580
16541
|
}
|
|
16581
|
-
|
|
16582
|
-
|
|
16542
|
+
const suiteBeforeAllHook = suiteWorkspace?.hooks?.before_all;
|
|
16543
|
+
if (sharedWorkspacePath && hasHookCommand(suiteBeforeAllHook)) {
|
|
16544
|
+
const beforeAllHook = suiteBeforeAllHook;
|
|
16545
|
+
const beforeAllCommand = (beforeAllHook.command ?? beforeAllHook.script ?? []).join(" ");
|
|
16583
16546
|
setupLog(
|
|
16584
|
-
`running shared before_all in cwd=${
|
|
16547
|
+
`running shared before_all in cwd=${beforeAllHook.cwd ?? evalDir} command=${beforeAllCommand}`
|
|
16585
16548
|
);
|
|
16586
16549
|
const scriptContext = {
|
|
16587
16550
|
workspacePath: sharedWorkspacePath,
|
|
@@ -16590,18 +16553,22 @@ async function runEvaluation(options) {
|
|
|
16590
16553
|
evalDir
|
|
16591
16554
|
};
|
|
16592
16555
|
try {
|
|
16593
|
-
beforeAllOutput = await executeWorkspaceScript(
|
|
16556
|
+
beforeAllOutput = await executeWorkspaceScript(
|
|
16557
|
+
toScriptConfig(beforeAllHook, "before_all", "suite workspace"),
|
|
16558
|
+
scriptContext
|
|
16559
|
+
);
|
|
16594
16560
|
setupLog("shared before_all completed");
|
|
16595
16561
|
} catch (error) {
|
|
16596
16562
|
const message = error instanceof Error ? error.message : String(error);
|
|
16597
|
-
if (sharedWorkspacePath && !
|
|
16563
|
+
if (sharedWorkspacePath && !useStaticWorkspace) {
|
|
16598
16564
|
await cleanupWorkspace(sharedWorkspacePath).catch(() => {
|
|
16599
16565
|
});
|
|
16600
16566
|
}
|
|
16601
16567
|
throw new Error(`before_all script failed: ${message}`);
|
|
16602
16568
|
}
|
|
16603
16569
|
}
|
|
16604
|
-
if (availablePoolSlots.length > 0 &&
|
|
16570
|
+
if (availablePoolSlots.length > 0 && hasHookCommand(suiteBeforeAllHook)) {
|
|
16571
|
+
const beforeAllHook = suiteBeforeAllHook;
|
|
16605
16572
|
for (const slot of availablePoolSlots) {
|
|
16606
16573
|
setupLog(`running before_all on pool slot ${slot.index}`);
|
|
16607
16574
|
const scriptContext = {
|
|
@@ -16611,7 +16578,10 @@ async function runEvaluation(options) {
|
|
|
16611
16578
|
evalDir
|
|
16612
16579
|
};
|
|
16613
16580
|
try {
|
|
16614
|
-
const output = await executeWorkspaceScript(
|
|
16581
|
+
const output = await executeWorkspaceScript(
|
|
16582
|
+
toScriptConfig(beforeAllHook, "before_all", "suite workspace"),
|
|
16583
|
+
scriptContext
|
|
16584
|
+
);
|
|
16615
16585
|
if (!beforeAllOutput) beforeAllOutput = output;
|
|
16616
16586
|
setupLog(`before_all completed on pool slot ${slot.index}`);
|
|
16617
16587
|
} catch (error) {
|
|
@@ -16743,6 +16713,8 @@ async function runEvaluation(options) {
|
|
|
16743
16713
|
evalRunId,
|
|
16744
16714
|
keepWorkspaces,
|
|
16745
16715
|
cleanupWorkspaces,
|
|
16716
|
+
retainOnSuccess: resolvedRetainOnSuccess,
|
|
16717
|
+
retainOnFailure: resolvedRetainOnFailure,
|
|
16746
16718
|
sharedWorkspacePath: testWorkspacePath,
|
|
16747
16719
|
sharedBaselineCommit: testBaselineCommit,
|
|
16748
16720
|
suiteWorkspaceFile,
|
|
@@ -16836,7 +16808,9 @@ async function runEvaluation(options) {
|
|
|
16836
16808
|
}
|
|
16837
16809
|
}
|
|
16838
16810
|
const afterAllWorkspaces = poolSlots.length > 1 ? poolSlots.map((s) => s.path) : sharedWorkspacePath ? [sharedWorkspacePath] : [];
|
|
16839
|
-
|
|
16811
|
+
const suiteAfterAllHook = suiteWorkspace?.hooks?.after_all;
|
|
16812
|
+
if (afterAllWorkspaces.length > 0 && hasHookCommand(suiteAfterAllHook)) {
|
|
16813
|
+
const afterAllHook = suiteAfterAllHook;
|
|
16840
16814
|
for (const wsPath of afterAllWorkspaces) {
|
|
16841
16815
|
const scriptContext = {
|
|
16842
16816
|
workspacePath: wsPath,
|
|
@@ -16846,7 +16820,7 @@ async function runEvaluation(options) {
|
|
|
16846
16820
|
};
|
|
16847
16821
|
try {
|
|
16848
16822
|
const afterAllOutput = await executeWorkspaceScript(
|
|
16849
|
-
|
|
16823
|
+
toScriptConfig(afterAllHook, "after_all", "suite workspace"),
|
|
16850
16824
|
scriptContext,
|
|
16851
16825
|
"warn"
|
|
16852
16826
|
);
|
|
@@ -16857,12 +16831,14 @@ async function runEvaluation(options) {
|
|
|
16857
16831
|
}
|
|
16858
16832
|
}
|
|
16859
16833
|
}
|
|
16860
|
-
if (sharedWorkspacePath && !poolSlot && poolSlots.length === 0 && !
|
|
16834
|
+
if (sharedWorkspacePath && !poolSlot && poolSlots.length === 0 && !useStaticWorkspace) {
|
|
16861
16835
|
const hasFailure = results.some((r) => !!r.error || r.score < 0.5);
|
|
16862
|
-
if (
|
|
16863
|
-
|
|
16864
|
-
|
|
16865
|
-
|
|
16836
|
+
if (hasFailure) {
|
|
16837
|
+
if (resolvedRetainOnFailure === "cleanup") {
|
|
16838
|
+
await cleanupWorkspace(sharedWorkspacePath).catch(() => {
|
|
16839
|
+
});
|
|
16840
|
+
}
|
|
16841
|
+
} else if (resolvedRetainOnSuccess === "cleanup") {
|
|
16866
16842
|
await cleanupWorkspace(sharedWorkspacePath).catch(() => {
|
|
16867
16843
|
});
|
|
16868
16844
|
}
|
|
@@ -17056,6 +17032,8 @@ async function runEvalCase(options) {
|
|
|
17056
17032
|
evalRunId,
|
|
17057
17033
|
keepWorkspaces,
|
|
17058
17034
|
cleanupWorkspaces: forceCleanup,
|
|
17035
|
+
retainOnSuccess,
|
|
17036
|
+
retainOnFailure,
|
|
17059
17037
|
sharedWorkspacePath,
|
|
17060
17038
|
sharedBaselineCommit,
|
|
17061
17039
|
suiteWorkspaceFile,
|
|
@@ -17067,10 +17045,10 @@ async function runEvalCase(options) {
|
|
|
17067
17045
|
const formattingMode = usesFileReferencePrompt(provider) ? "agent" : "lm";
|
|
17068
17046
|
const promptInputs = await buildPromptInputs(evalCase, formattingMode);
|
|
17069
17047
|
const typeRegistry = providedTypeRegistry ?? createBuiltinRegistry();
|
|
17070
|
-
const
|
|
17048
|
+
const cacheKey = useCache ? createCacheKey(provider, target, evalCase, promptInputs) : void 0;
|
|
17071
17049
|
let cachedResponse;
|
|
17072
|
-
if (
|
|
17073
|
-
cachedResponse = await cache.get(
|
|
17050
|
+
if (cacheKey && cache) {
|
|
17051
|
+
cachedResponse = await cache.get(cacheKey);
|
|
17074
17052
|
}
|
|
17075
17053
|
const nowFn = now ?? (() => /* @__PURE__ */ new Date());
|
|
17076
17054
|
let workspacePath = sharedWorkspacePath;
|
|
@@ -17103,18 +17081,18 @@ async function runEvalCase(options) {
|
|
|
17103
17081
|
if (caseWorkspaceFile && workspacePath) {
|
|
17104
17082
|
const copiedFile = import_node_path42.default.join(workspacePath, import_node_path42.default.basename(caseWorkspaceFile));
|
|
17105
17083
|
try {
|
|
17106
|
-
await (0,
|
|
17084
|
+
await (0, import_promises29.stat)(copiedFile);
|
|
17107
17085
|
caseWorkspaceFile = copiedFile;
|
|
17108
17086
|
} catch {
|
|
17109
17087
|
}
|
|
17110
17088
|
}
|
|
17111
17089
|
}
|
|
17112
|
-
if (!workspacePath && (evalCase.workspace?.
|
|
17090
|
+
if (!workspacePath && (evalCase.workspace?.hooks || evalCase.workspace?.repos?.length) && evalRunId) {
|
|
17113
17091
|
workspacePath = getWorkspacePath(evalRunId, evalCase.id);
|
|
17114
|
-
await (0,
|
|
17092
|
+
await (0, import_promises29.mkdir)(workspacePath, { recursive: true });
|
|
17115
17093
|
}
|
|
17116
17094
|
if (evalCase.workspace?.repos?.length && workspacePath) {
|
|
17117
|
-
const perCaseRepoManager = new RepoManager(
|
|
17095
|
+
const perCaseRepoManager = new RepoManager(setupDebug);
|
|
17118
17096
|
try {
|
|
17119
17097
|
if (setupDebug) {
|
|
17120
17098
|
console.log(
|
|
@@ -17139,11 +17117,13 @@ async function runEvalCase(options) {
|
|
|
17139
17117
|
);
|
|
17140
17118
|
}
|
|
17141
17119
|
}
|
|
17142
|
-
|
|
17143
|
-
|
|
17120
|
+
const caseBeforeAllHook = evalCase.workspace?.hooks?.before_all;
|
|
17121
|
+
if (workspacePath && hasHookCommand(caseBeforeAllHook)) {
|
|
17122
|
+
const beforeAllHook = caseBeforeAllHook;
|
|
17123
|
+
const beforeAllCommand = (beforeAllHook.command ?? beforeAllHook.script ?? []).join(" ");
|
|
17144
17124
|
if (setupDebug) {
|
|
17145
17125
|
console.log(
|
|
17146
|
-
`[setup] test=${evalCase.id} running before_all in cwd=${
|
|
17126
|
+
`[setup] test=${evalCase.id} running before_all in cwd=${beforeAllHook.cwd ?? evalDir} command=${beforeAllCommand}`
|
|
17147
17127
|
);
|
|
17148
17128
|
}
|
|
17149
17129
|
const scriptContext = {
|
|
@@ -17156,7 +17136,7 @@ async function runEvalCase(options) {
|
|
|
17156
17136
|
};
|
|
17157
17137
|
try {
|
|
17158
17138
|
beforeAllOutput = await executeWorkspaceScript(
|
|
17159
|
-
evalCase.
|
|
17139
|
+
toScriptConfig(beforeAllHook, "before_all", `test '${evalCase.id}'`),
|
|
17160
17140
|
scriptContext
|
|
17161
17141
|
);
|
|
17162
17142
|
if (setupDebug) {
|
|
@@ -17181,7 +17161,9 @@ async function runEvalCase(options) {
|
|
|
17181
17161
|
}
|
|
17182
17162
|
}
|
|
17183
17163
|
}
|
|
17184
|
-
|
|
17164
|
+
const caseBeforeEachHook = evalCase.workspace?.hooks?.before_each;
|
|
17165
|
+
if (workspacePath && hasHookCommand(caseBeforeEachHook)) {
|
|
17166
|
+
const beforeEachHook = caseBeforeEachHook;
|
|
17185
17167
|
const scriptContext = {
|
|
17186
17168
|
workspacePath,
|
|
17187
17169
|
testId: evalCase.id,
|
|
@@ -17192,7 +17174,7 @@ async function runEvalCase(options) {
|
|
|
17192
17174
|
};
|
|
17193
17175
|
try {
|
|
17194
17176
|
beforeEachOutput = await executeWorkspaceScript(
|
|
17195
|
-
evalCase.
|
|
17177
|
+
toScriptConfig(beforeEachHook, "before_each", `test '${evalCase.id}'`),
|
|
17196
17178
|
scriptContext
|
|
17197
17179
|
);
|
|
17198
17180
|
} catch (error) {
|
|
@@ -17280,8 +17262,8 @@ async function runEvalCase(options) {
|
|
|
17280
17262
|
}
|
|
17281
17263
|
return errorResult;
|
|
17282
17264
|
}
|
|
17283
|
-
if (
|
|
17284
|
-
await cache.set(
|
|
17265
|
+
if (cacheKey && cache && !cachedResponse) {
|
|
17266
|
+
await cache.set(cacheKey, providerResponse);
|
|
17285
17267
|
}
|
|
17286
17268
|
const output = providerResponse.output;
|
|
17287
17269
|
const hasExecutionMetrics = providerResponse.tokenUsage !== void 0 || providerResponse.costUsd !== void 0 || providerResponse.durationMs !== void 0;
|
|
@@ -17309,17 +17291,19 @@ async function runEvalCase(options) {
|
|
|
17309
17291
|
}
|
|
17310
17292
|
}
|
|
17311
17293
|
const providerError = extractProviderError(providerResponse);
|
|
17312
|
-
if (repoManager && workspacePath && evalCase.workspace?.
|
|
17294
|
+
if (repoManager && workspacePath && evalCase.workspace?.hooks?.after_each?.reset && evalCase.workspace.hooks.after_each.reset !== "none" && evalCase.workspace.repos) {
|
|
17313
17295
|
try {
|
|
17314
17296
|
await repoManager.reset(
|
|
17315
17297
|
evalCase.workspace.repos,
|
|
17316
17298
|
workspacePath,
|
|
17317
|
-
evalCase.workspace.reset
|
|
17299
|
+
evalCase.workspace.hooks.after_each.reset
|
|
17318
17300
|
);
|
|
17319
17301
|
} catch {
|
|
17320
17302
|
}
|
|
17321
17303
|
}
|
|
17322
|
-
|
|
17304
|
+
const caseAfterEachHook = evalCase.workspace?.hooks?.after_each;
|
|
17305
|
+
if (workspacePath && hasHookCommand(caseAfterEachHook)) {
|
|
17306
|
+
const afterEachHook = caseAfterEachHook;
|
|
17323
17307
|
const scriptContext = {
|
|
17324
17308
|
workspacePath,
|
|
17325
17309
|
testId: evalCase.id,
|
|
@@ -17330,7 +17314,7 @@ async function runEvalCase(options) {
|
|
|
17330
17314
|
};
|
|
17331
17315
|
try {
|
|
17332
17316
|
afterEachOutput = await executeWorkspaceScript(
|
|
17333
|
-
evalCase.
|
|
17317
|
+
toScriptConfig(afterEachHook, "after_each", `test '${evalCase.id}'`),
|
|
17334
17318
|
scriptContext,
|
|
17335
17319
|
"warn"
|
|
17336
17320
|
);
|
|
@@ -17380,8 +17364,13 @@ async function runEvalCase(options) {
|
|
|
17380
17364
|
await cleanupWorkspace(workspacePath).catch(() => {
|
|
17381
17365
|
});
|
|
17382
17366
|
} else if (isFailure) {
|
|
17383
|
-
|
|
17384
|
-
|
|
17367
|
+
if ((retainOnFailure ?? "keep") === "cleanup") {
|
|
17368
|
+
await cleanupWorkspace(workspacePath).catch(() => {
|
|
17369
|
+
});
|
|
17370
|
+
} else {
|
|
17371
|
+
return { ...finalResult, workspacePath };
|
|
17372
|
+
}
|
|
17373
|
+
} else if ((retainOnSuccess ?? (keepWorkspaces ? "keep" : "cleanup")) !== "keep") {
|
|
17385
17374
|
await cleanupWorkspace(workspacePath).catch(() => {
|
|
17386
17375
|
});
|
|
17387
17376
|
}
|
|
@@ -17399,11 +17388,12 @@ async function runEvalCase(options) {
|
|
|
17399
17388
|
"evaluator_error"
|
|
17400
17389
|
);
|
|
17401
17390
|
if (workspacePath && !isSharedWorkspace) {
|
|
17402
|
-
if (forceCleanup) {
|
|
17391
|
+
if (forceCleanup || (retainOnFailure ?? "keep") === "cleanup") {
|
|
17403
17392
|
await cleanupWorkspace(workspacePath).catch(() => {
|
|
17404
17393
|
});
|
|
17394
|
+
} else {
|
|
17395
|
+
return { ...errorResult, workspacePath, beforeEachOutput, afterEachOutput };
|
|
17405
17396
|
}
|
|
17406
|
-
return { ...errorResult, workspacePath, beforeEachOutput, afterEachOutput };
|
|
17407
17397
|
}
|
|
17408
17398
|
return { ...errorResult, beforeEachOutput, afterEachOutput };
|
|
17409
17399
|
}
|
|
@@ -17422,7 +17412,9 @@ async function runEvalCaseWithTrials(options, trialsConfig) {
|
|
|
17422
17412
|
useCache: false,
|
|
17423
17413
|
// Force cleanup for intermediate trials
|
|
17424
17414
|
cleanupWorkspaces: isLastDeclaredTrial ? options.cleanupWorkspaces : true,
|
|
17425
|
-
keepWorkspaces: isLastDeclaredTrial ? options.keepWorkspaces : false
|
|
17415
|
+
keepWorkspaces: isLastDeclaredTrial ? options.keepWorkspaces : false,
|
|
17416
|
+
retainOnSuccess: isLastDeclaredTrial ? options.retainOnSuccess : "cleanup",
|
|
17417
|
+
retainOnFailure: isLastDeclaredTrial ? options.retainOnFailure : "cleanup"
|
|
17426
17418
|
};
|
|
17427
17419
|
const result = await runEvalCase(trialOptions);
|
|
17428
17420
|
allResults.push(result);
|
|
@@ -17945,7 +17937,7 @@ function extractProviderError(response) {
|
|
|
17945
17937
|
return trimmed.length > 0 ? trimmed : void 0;
|
|
17946
17938
|
}
|
|
17947
17939
|
function createCacheKey(provider, target, evalCase, promptInputs) {
|
|
17948
|
-
const hash = (0,
|
|
17940
|
+
const hash = (0, import_node_crypto9.createHash)("sha256");
|
|
17949
17941
|
hash.update(provider.id);
|
|
17950
17942
|
hash.update(target.name);
|
|
17951
17943
|
hash.update(evalCase.id);
|
|
@@ -18013,7 +18005,7 @@ function computeWeightedMean(entries) {
|
|
|
18013
18005
|
}
|
|
18014
18006
|
|
|
18015
18007
|
// src/evaluation/evaluate.ts
|
|
18016
|
-
var
|
|
18008
|
+
var import_node_fs12 = require("fs");
|
|
18017
18009
|
var import_node_path43 = __toESM(require("path"), 1);
|
|
18018
18010
|
async function evaluate(config) {
|
|
18019
18011
|
const startTime = Date.now();
|
|
@@ -18132,7 +18124,7 @@ async function discoverDefaultTarget(repoRoot) {
|
|
|
18132
18124
|
for (const dir of chain) {
|
|
18133
18125
|
for (const candidate of TARGET_FILE_CANDIDATES) {
|
|
18134
18126
|
const targetsPath = import_node_path43.default.join(dir, candidate);
|
|
18135
|
-
if (!(0,
|
|
18127
|
+
if (!(0, import_node_fs12.existsSync)(targetsPath)) continue;
|
|
18136
18128
|
try {
|
|
18137
18129
|
const definitions = await readTargetDefinitions(targetsPath);
|
|
18138
18130
|
const defaultTarget = definitions.find((d) => d.name === "default");
|
|
@@ -18150,7 +18142,7 @@ async function loadEnvHierarchy(repoRoot) {
|
|
|
18150
18142
|
const envFiles = [];
|
|
18151
18143
|
for (const dir of chain) {
|
|
18152
18144
|
const envPath = import_node_path43.default.join(dir, ".env");
|
|
18153
|
-
if ((0,
|
|
18145
|
+
if ((0, import_node_fs12.existsSync)(envPath)) envFiles.push(envPath);
|
|
18154
18146
|
}
|
|
18155
18147
|
for (let i = envFiles.length - 1; i >= 0; i--) {
|
|
18156
18148
|
try {
|
|
@@ -18228,12 +18220,12 @@ var CONFIG_FILE_NAMES = [
|
|
|
18228
18220
|
".agentv/config.js"
|
|
18229
18221
|
];
|
|
18230
18222
|
async function loadTsConfig(projectRoot) {
|
|
18231
|
-
const { existsSync:
|
|
18223
|
+
const { existsSync: existsSync4 } = await import("fs");
|
|
18232
18224
|
const { pathToFileURL } = await import("url");
|
|
18233
18225
|
const { join: join2 } = await import("path");
|
|
18234
18226
|
for (const fileName of CONFIG_FILE_NAMES) {
|
|
18235
18227
|
const filePath = join2(projectRoot, fileName);
|
|
18236
|
-
if (!
|
|
18228
|
+
if (!existsSync4(filePath)) {
|
|
18237
18229
|
continue;
|
|
18238
18230
|
}
|
|
18239
18231
|
try {
|
|
@@ -18330,7 +18322,7 @@ function buildPrompt(criteria, question, referenceAnswer) {
|
|
|
18330
18322
|
}
|
|
18331
18323
|
|
|
18332
18324
|
// src/evaluation/cache/response-cache.ts
|
|
18333
|
-
var
|
|
18325
|
+
var import_promises30 = require("fs/promises");
|
|
18334
18326
|
var import_node_path44 = __toESM(require("path"), 1);
|
|
18335
18327
|
var DEFAULT_CACHE_PATH = ".agentv/cache";
|
|
18336
18328
|
var ResponseCache = class {
|
|
@@ -18341,7 +18333,7 @@ var ResponseCache = class {
|
|
|
18341
18333
|
async get(key) {
|
|
18342
18334
|
const filePath = this.keyToPath(key);
|
|
18343
18335
|
try {
|
|
18344
|
-
const data = await (0,
|
|
18336
|
+
const data = await (0, import_promises30.readFile)(filePath, "utf8");
|
|
18345
18337
|
return JSON.parse(data);
|
|
18346
18338
|
} catch {
|
|
18347
18339
|
return void 0;
|
|
@@ -18350,8 +18342,8 @@ var ResponseCache = class {
|
|
|
18350
18342
|
async set(key, value) {
|
|
18351
18343
|
const filePath = this.keyToPath(key);
|
|
18352
18344
|
const dir = import_node_path44.default.dirname(filePath);
|
|
18353
|
-
await (0,
|
|
18354
|
-
await (0,
|
|
18345
|
+
await (0, import_promises30.mkdir)(dir, { recursive: true });
|
|
18346
|
+
await (0, import_promises30.writeFile)(filePath, JSON.stringify(value, null, 2), "utf8");
|
|
18355
18347
|
}
|
|
18356
18348
|
keyToPath(key) {
|
|
18357
18349
|
const prefix = key.slice(0, 2);
|
|
@@ -18890,7 +18882,6 @@ function createAgentKernel() {
|
|
|
18890
18882
|
freeformEvaluationSchema,
|
|
18891
18883
|
generateRubrics,
|
|
18892
18884
|
getAgentvHome,
|
|
18893
|
-
getGitCacheRoot,
|
|
18894
18885
|
getHitCount,
|
|
18895
18886
|
getSubagentsRoot,
|
|
18896
18887
|
getTraceStateRoot,
|