@agentv/core 4.30.0 → 4.31.1-next.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/{chunk-Z2BBOGE4.js → chunk-A27NE3R7.js} +28 -27
- package/dist/chunk-A27NE3R7.js.map +1 -0
- package/dist/evaluation/validation/index.cjs +42 -33
- package/dist/evaluation/validation/index.cjs.map +1 -1
- package/dist/evaluation/validation/index.js +42 -33
- package/dist/evaluation/validation/index.js.map +1 -1
- package/dist/index.cjs +297 -76
- package/dist/index.cjs.map +1 -1
- package/dist/index.d.cts +34 -19
- package/dist/index.d.ts +34 -19
- package/dist/index.js +277 -51
- package/dist/index.js.map +1 -1
- package/dist/{ts-eval-loader-JL5DGTJL.js → ts-eval-loader-XR6DNOZ3.js} +2 -2
- package/package.json +1 -1
- package/dist/chunk-Z2BBOGE4.js.map +0 -1
- /package/dist/{ts-eval-loader-JL5DGTJL.js.map → ts-eval-loader-XR6DNOZ3.js.map} +0 -0
package/dist/index.js
CHANGED
|
@@ -132,7 +132,7 @@ import {
|
|
|
132
132
|
tokensPerTool,
|
|
133
133
|
trackChild,
|
|
134
134
|
trackedChildCount
|
|
135
|
-
} from "./chunk-
|
|
135
|
+
} from "./chunk-A27NE3R7.js";
|
|
136
136
|
import {
|
|
137
137
|
COMMON_TARGET_SETTINGS,
|
|
138
138
|
TEST_MESSAGE_ROLES,
|
|
@@ -720,8 +720,16 @@ function shouldSkipCacheForTemperature(targetConfig) {
|
|
|
720
720
|
}
|
|
721
721
|
|
|
722
722
|
// src/evaluation/results-repo.ts
|
|
723
|
-
import { execFile } from "node:child_process";
|
|
724
|
-
import {
|
|
723
|
+
import { execFile, spawn } from "node:child_process";
|
|
724
|
+
import {
|
|
725
|
+
existsSync,
|
|
726
|
+
mkdirSync,
|
|
727
|
+
mkdtempSync,
|
|
728
|
+
readFileSync as readFileSync2,
|
|
729
|
+
renameSync,
|
|
730
|
+
rmSync,
|
|
731
|
+
writeFileSync
|
|
732
|
+
} from "node:fs";
|
|
725
733
|
import { cp, mkdtemp, readdir, rm, stat } from "node:fs/promises";
|
|
726
734
|
import os from "node:os";
|
|
727
735
|
import path4 from "node:path";
|
|
@@ -738,10 +746,19 @@ function withFriendlyGitHubAuthError(error) {
|
|
|
738
746
|
}
|
|
739
747
|
return new Error(message);
|
|
740
748
|
}
|
|
741
|
-
function
|
|
749
|
+
function expandHome(p) {
|
|
750
|
+
if (p === "~" || p.startsWith("~/") || p.startsWith("~\\")) {
|
|
751
|
+
return path4.join(os.homedir(), p.slice(1));
|
|
752
|
+
}
|
|
753
|
+
return p;
|
|
754
|
+
}
|
|
755
|
+
function normalizeResultsConfig(config) {
|
|
756
|
+
const repo = config.repo.trim();
|
|
757
|
+
const resolvedPath = config.path ? expandHome(config.path.trim()) : path4.join(getAgentvHome(), "results", sanitizeRepoSlug(repo));
|
|
742
758
|
return {
|
|
743
|
-
|
|
744
|
-
|
|
759
|
+
mode: "github",
|
|
760
|
+
repo,
|
|
761
|
+
path: resolvedPath,
|
|
745
762
|
auto_push: config.auto_push === true,
|
|
746
763
|
branch_prefix: config.branch_prefix?.trim() || "eval-results"
|
|
747
764
|
};
|
|
@@ -752,7 +769,7 @@ function resolveResultsRepoUrl(repo) {
|
|
|
752
769
|
}
|
|
753
770
|
return `https://github.com/${repo}.git`;
|
|
754
771
|
}
|
|
755
|
-
function
|
|
772
|
+
function getResultsRepoLocalPaths(repo) {
|
|
756
773
|
const rootDir = path4.join(getAgentvHome(), "cache", "results-repo", sanitizeRepoSlug(repo));
|
|
757
774
|
return {
|
|
758
775
|
rootDir,
|
|
@@ -779,7 +796,7 @@ async function runCommand(executable, args, options) {
|
|
|
779
796
|
try {
|
|
780
797
|
const { stdout, stderr } = await execFileAsync(executable, [...args], {
|
|
781
798
|
cwd: options?.cwd,
|
|
782
|
-
env: process.env
|
|
799
|
+
env: options?.env ?? process.env
|
|
783
800
|
});
|
|
784
801
|
return { stdout, stderr };
|
|
785
802
|
} catch (error) {
|
|
@@ -793,8 +810,17 @@ async function runCommand(executable, args, options) {
|
|
|
793
810
|
throw withFriendlyGitHubAuthError(error);
|
|
794
811
|
}
|
|
795
812
|
}
|
|
813
|
+
function getGitEnv() {
|
|
814
|
+
const env = {};
|
|
815
|
+
for (const [key, value] of Object.entries(process.env)) {
|
|
816
|
+
if (value !== void 0 && !(key.startsWith("GIT_") && key !== "GIT_SSH_COMMAND")) {
|
|
817
|
+
env[key] = value;
|
|
818
|
+
}
|
|
819
|
+
}
|
|
820
|
+
return env;
|
|
821
|
+
}
|
|
796
822
|
async function runGit(args, options) {
|
|
797
|
-
return runCommand("git", args, options);
|
|
823
|
+
return runCommand("git", args, { ...options, env: getGitEnv() });
|
|
798
824
|
}
|
|
799
825
|
async function runGh(args, options) {
|
|
800
826
|
return runCommand("gh", args, options);
|
|
@@ -818,13 +844,11 @@ async function resolveDefaultBranch(repoDir) {
|
|
|
818
844
|
}
|
|
819
845
|
return "main";
|
|
820
846
|
}
|
|
821
|
-
async function
|
|
847
|
+
async function fetchResultsRepo(repoDir) {
|
|
822
848
|
await runGit(["fetch", "origin", "--prune"], { cwd: repoDir });
|
|
823
|
-
await runGit(["checkout", baseBranch], { cwd: repoDir });
|
|
824
|
-
await runGit(["pull", "--ff-only", "origin", baseBranch], { cwd: repoDir });
|
|
825
849
|
}
|
|
826
850
|
function updateStatusFile(config, patch) {
|
|
827
|
-
const cachePaths =
|
|
851
|
+
const cachePaths = getResultsRepoLocalPaths(config.repo);
|
|
828
852
|
const current = readPersistedStatus(cachePaths.statusFile);
|
|
829
853
|
writePersistedStatus(cachePaths.statusFile, {
|
|
830
854
|
...current,
|
|
@@ -832,27 +856,32 @@ function updateStatusFile(config, patch) {
|
|
|
832
856
|
});
|
|
833
857
|
}
|
|
834
858
|
async function ensureResultsRepoClone(config) {
|
|
835
|
-
const normalized =
|
|
836
|
-
const cachePaths =
|
|
859
|
+
const normalized = normalizeResultsConfig(config);
|
|
860
|
+
const cachePaths = getResultsRepoLocalPaths(normalized.repo);
|
|
861
|
+
const cloneDir = normalized.path;
|
|
837
862
|
mkdirSync(cachePaths.rootDir, { recursive: true });
|
|
838
|
-
|
|
863
|
+
mkdirSync(path4.dirname(cloneDir), { recursive: true });
|
|
864
|
+
const cloneMissing = !existsSync(cloneDir);
|
|
865
|
+
const gitDir = path4.join(cloneDir, ".git");
|
|
866
|
+
const cloneEmpty = !cloneMissing && !existsSync(gitDir) && (await readdir(cloneDir)).length === 0;
|
|
867
|
+
if (cloneMissing || cloneEmpty) {
|
|
839
868
|
try {
|
|
840
869
|
await runGit([
|
|
841
870
|
"clone",
|
|
842
871
|
"--filter=blob:none",
|
|
843
872
|
resolveResultsRepoUrl(normalized.repo),
|
|
844
|
-
|
|
873
|
+
cloneDir
|
|
845
874
|
]);
|
|
846
|
-
return
|
|
875
|
+
return cloneDir;
|
|
847
876
|
} catch (error) {
|
|
848
877
|
updateStatusFile(normalized, { last_error: withFriendlyGitHubAuthError(error).message });
|
|
849
878
|
throw withFriendlyGitHubAuthError(error);
|
|
850
879
|
}
|
|
851
880
|
}
|
|
852
|
-
if (!existsSync(
|
|
853
|
-
throw new Error(`Results repo
|
|
881
|
+
if (!existsSync(gitDir)) {
|
|
882
|
+
throw new Error(`Results repo clone path is not a git repository: ${cloneDir}`);
|
|
854
883
|
}
|
|
855
|
-
return
|
|
884
|
+
return cloneDir;
|
|
856
885
|
}
|
|
857
886
|
function getResultsRepoStatus(config) {
|
|
858
887
|
if (!config) {
|
|
@@ -860,30 +889,29 @@ function getResultsRepoStatus(config) {
|
|
|
860
889
|
configured: false,
|
|
861
890
|
available: false,
|
|
862
891
|
repo: "",
|
|
863
|
-
|
|
892
|
+
local_dir: ""
|
|
864
893
|
};
|
|
865
894
|
}
|
|
866
|
-
const normalized =
|
|
867
|
-
const
|
|
868
|
-
const persisted = readPersistedStatus(
|
|
895
|
+
const normalized = normalizeResultsConfig(config);
|
|
896
|
+
const localPaths = getResultsRepoLocalPaths(normalized.repo);
|
|
897
|
+
const persisted = readPersistedStatus(localPaths.statusFile);
|
|
869
898
|
return {
|
|
870
899
|
configured: true,
|
|
871
|
-
available: existsSync(
|
|
900
|
+
available: existsSync(normalized.path),
|
|
872
901
|
repo: normalized.repo,
|
|
873
902
|
path: normalized.path,
|
|
874
903
|
auto_push: normalized.auto_push,
|
|
875
904
|
branch_prefix: normalized.branch_prefix,
|
|
876
|
-
|
|
905
|
+
local_dir: normalized.path,
|
|
877
906
|
last_synced_at: persisted.last_synced_at,
|
|
878
907
|
last_error: persisted.last_error
|
|
879
908
|
};
|
|
880
909
|
}
|
|
881
910
|
async function syncResultsRepo(config) {
|
|
882
|
-
const normalized =
|
|
911
|
+
const normalized = normalizeResultsConfig(config);
|
|
883
912
|
try {
|
|
884
913
|
const repoDir = await ensureResultsRepoClone(normalized);
|
|
885
|
-
|
|
886
|
-
await updateCacheRepo(repoDir, baseBranch);
|
|
914
|
+
await fetchResultsRepo(repoDir);
|
|
887
915
|
updateStatusFile(normalized, {
|
|
888
916
|
last_synced_at: (/* @__PURE__ */ new Date()).toISOString(),
|
|
889
917
|
last_error: void 0
|
|
@@ -897,10 +925,10 @@ async function syncResultsRepo(config) {
|
|
|
897
925
|
return getResultsRepoStatus(normalized);
|
|
898
926
|
}
|
|
899
927
|
async function checkoutResultsRepoBranch(config, branchName) {
|
|
900
|
-
const normalized =
|
|
928
|
+
const normalized = normalizeResultsConfig(config);
|
|
901
929
|
const repoDir = await ensureResultsRepoClone(normalized);
|
|
902
930
|
const baseBranch = await resolveDefaultBranch(repoDir);
|
|
903
|
-
await
|
|
931
|
+
await fetchResultsRepo(repoDir);
|
|
904
932
|
await runGit(["checkout", "-B", branchName, `origin/${baseBranch}`], { cwd: repoDir });
|
|
905
933
|
updateStatusFile(normalized, { last_error: void 0 });
|
|
906
934
|
return {
|
|
@@ -910,10 +938,10 @@ async function checkoutResultsRepoBranch(config, branchName) {
|
|
|
910
938
|
};
|
|
911
939
|
}
|
|
912
940
|
async function prepareResultsRepoBranch(config, branchName) {
|
|
913
|
-
const normalized =
|
|
941
|
+
const normalized = normalizeResultsConfig(config);
|
|
914
942
|
const cloneDir = await ensureResultsRepoClone(normalized);
|
|
915
943
|
const baseBranch = await resolveDefaultBranch(cloneDir);
|
|
916
|
-
await
|
|
944
|
+
await fetchResultsRepo(cloneDir);
|
|
917
945
|
const worktreeRoot = await mkdtemp(path4.join(os.tmpdir(), "agentv-results-repo-"));
|
|
918
946
|
const worktreeDir = path4.join(worktreeRoot, "repo");
|
|
919
947
|
await runGit(["worktree", "add", "-B", branchName, worktreeDir, `origin/${baseBranch}`], {
|
|
@@ -938,11 +966,8 @@ async function stageResultsArtifacts(params) {
|
|
|
938
966
|
await cp(params.sourceDir, params.destinationDir, { recursive: true });
|
|
939
967
|
}
|
|
940
968
|
function resolveResultsRepoRunsDir(config) {
|
|
941
|
-
const normalized =
|
|
942
|
-
return path4.join(
|
|
943
|
-
getResultsRepoCachePaths(normalized.repo).repoDir,
|
|
944
|
-
...normalized.path.split("/")
|
|
945
|
-
);
|
|
969
|
+
const normalized = normalizeResultsConfig(config);
|
|
970
|
+
return path4.join(normalized.path, "runs");
|
|
946
971
|
}
|
|
947
972
|
async function directorySizeBytes(targetPath) {
|
|
948
973
|
const entry = await stat(targetPath);
|
|
@@ -969,9 +994,9 @@ async function commitAndPushResultsBranch(params) {
|
|
|
969
994
|
return true;
|
|
970
995
|
}
|
|
971
996
|
async function pushResultsRepoBranch(config, branchName, cwd) {
|
|
972
|
-
const normalized =
|
|
997
|
+
const normalized = normalizeResultsConfig(config);
|
|
973
998
|
await runGit(["push", "-u", "origin", branchName], {
|
|
974
|
-
cwd: cwd ??
|
|
999
|
+
cwd: cwd ?? normalized.path
|
|
975
1000
|
});
|
|
976
1001
|
updateStatusFile(normalized, {
|
|
977
1002
|
last_synced_at: (/* @__PURE__ */ new Date()).toISOString(),
|
|
@@ -1001,11 +1026,11 @@ async function createDraftResultsPr(params) {
|
|
|
1001
1026
|
}
|
|
1002
1027
|
var DIRECT_PUSH_MAX_RETRIES = 3;
|
|
1003
1028
|
async function directPushResults(params) {
|
|
1004
|
-
const normalized =
|
|
1029
|
+
const normalized = normalizeResultsConfig(params.config);
|
|
1005
1030
|
const repoDir = await ensureResultsRepoClone(normalized);
|
|
1006
1031
|
const baseBranch = await resolveDefaultBranch(repoDir);
|
|
1007
|
-
await
|
|
1008
|
-
const destinationDir = path4.join(repoDir,
|
|
1032
|
+
await fetchResultsRepo(repoDir);
|
|
1033
|
+
const destinationDir = path4.join(repoDir, "runs", params.destinationPath);
|
|
1009
1034
|
await stageResultsArtifacts({
|
|
1010
1035
|
repoDir,
|
|
1011
1036
|
sourceDir: params.sourceDir,
|
|
@@ -1019,10 +1044,19 @@ async function directPushResults(params) {
|
|
|
1019
1044
|
if (status.trim().length === 0) {
|
|
1020
1045
|
return false;
|
|
1021
1046
|
}
|
|
1022
|
-
await runGit(
|
|
1047
|
+
await runGit(
|
|
1048
|
+
[
|
|
1049
|
+
"commit",
|
|
1050
|
+
"-m",
|
|
1051
|
+
params.commitMessage,
|
|
1052
|
+
"-m",
|
|
1053
|
+
`Agentv-Run: ${buildGitRunId(params.destinationPath)}`
|
|
1054
|
+
],
|
|
1055
|
+
{ cwd: repoDir }
|
|
1056
|
+
);
|
|
1023
1057
|
for (let attempt = 1; attempt <= DIRECT_PUSH_MAX_RETRIES; attempt++) {
|
|
1024
1058
|
try {
|
|
1025
|
-
await runGit(["push", "origin", baseBranch], { cwd: repoDir });
|
|
1059
|
+
await runGit(["push", "origin", `HEAD:${baseBranch}`], { cwd: repoDir });
|
|
1026
1060
|
updateStatusFile(normalized, {
|
|
1027
1061
|
last_synced_at: (/* @__PURE__ */ new Date()).toISOString(),
|
|
1028
1062
|
last_error: void 0
|
|
@@ -1031,7 +1065,8 @@ async function directPushResults(params) {
|
|
|
1031
1065
|
} catch (error) {
|
|
1032
1066
|
const message = error instanceof Error ? error.message : String(error);
|
|
1033
1067
|
if (attempt < DIRECT_PUSH_MAX_RETRIES && message.includes("non-fast-forward")) {
|
|
1034
|
-
await
|
|
1068
|
+
await fetchResultsRepo(repoDir);
|
|
1069
|
+
await runGit(["rebase", `origin/${baseBranch}`], { cwd: repoDir });
|
|
1035
1070
|
} else {
|
|
1036
1071
|
throw error;
|
|
1037
1072
|
}
|
|
@@ -1039,6 +1074,195 @@ async function directPushResults(params) {
|
|
|
1039
1074
|
}
|
|
1040
1075
|
return false;
|
|
1041
1076
|
}
|
|
1077
|
+
function buildGitRunId(relativeRunPath) {
|
|
1078
|
+
const normalized = relativeRunPath.split(path4.sep).join("/");
|
|
1079
|
+
const segments = normalized.split("/").filter(Boolean);
|
|
1080
|
+
if (segments.length >= 2) {
|
|
1081
|
+
const experiment = segments.slice(0, -1).join("/");
|
|
1082
|
+
const timestamp = segments.at(-1);
|
|
1083
|
+
if (experiment === "default") {
|
|
1084
|
+
return timestamp ?? normalized;
|
|
1085
|
+
}
|
|
1086
|
+
return `${experiment}::${timestamp}`;
|
|
1087
|
+
}
|
|
1088
|
+
return segments[0] ?? relativeRunPath;
|
|
1089
|
+
}
|
|
1090
|
+
function getRunExperiment(runId, benchmark) {
|
|
1091
|
+
const experiment = benchmark.metadata?.experiment?.trim();
|
|
1092
|
+
if (experiment) {
|
|
1093
|
+
return experiment;
|
|
1094
|
+
}
|
|
1095
|
+
const separatorIndex = runId.lastIndexOf("::");
|
|
1096
|
+
return separatorIndex === -1 ? "default" : runId.slice(0, separatorIndex);
|
|
1097
|
+
}
|
|
1098
|
+
function computeAveragePassRate(runSummary) {
|
|
1099
|
+
if (!runSummary) {
|
|
1100
|
+
return void 0;
|
|
1101
|
+
}
|
|
1102
|
+
const passRates = Object.values(runSummary).map((summary) => summary.pass_rate?.mean).filter((value) => typeof value === "number" && Number.isFinite(value));
|
|
1103
|
+
if (passRates.length === 0) {
|
|
1104
|
+
return void 0;
|
|
1105
|
+
}
|
|
1106
|
+
return passRates.reduce((sum, value) => sum + value, 0) / passRates.length;
|
|
1107
|
+
}
|
|
1108
|
+
async function runGitBatch(repoDir, input) {
|
|
1109
|
+
return new Promise((resolve, reject) => {
|
|
1110
|
+
const child = spawn("git", ["cat-file", "--batch"], {
|
|
1111
|
+
cwd: repoDir,
|
|
1112
|
+
env: getGitEnv(),
|
|
1113
|
+
stdio: ["pipe", "pipe", "pipe"]
|
|
1114
|
+
});
|
|
1115
|
+
const stdoutChunks = [];
|
|
1116
|
+
const stderrChunks = [];
|
|
1117
|
+
child.stdout.on("data", (chunk) => {
|
|
1118
|
+
stdoutChunks.push(Buffer.isBuffer(chunk) ? chunk : Buffer.from(chunk));
|
|
1119
|
+
});
|
|
1120
|
+
child.stderr.on("data", (chunk) => {
|
|
1121
|
+
stderrChunks.push(Buffer.isBuffer(chunk) ? chunk : Buffer.from(chunk));
|
|
1122
|
+
});
|
|
1123
|
+
child.on("error", (error) => reject(withFriendlyGitHubAuthError(error)));
|
|
1124
|
+
child.on("close", (code) => {
|
|
1125
|
+
if (code === 0) {
|
|
1126
|
+
resolve(Buffer.concat(stdoutChunks));
|
|
1127
|
+
return;
|
|
1128
|
+
}
|
|
1129
|
+
const stderr = Buffer.concat(stderrChunks).toString("utf8").trim();
|
|
1130
|
+
reject(
|
|
1131
|
+
withFriendlyGitHubAuthError(
|
|
1132
|
+
stderr.length > 0 ? new Error(stderr) : new Error("git cat-file failed")
|
|
1133
|
+
)
|
|
1134
|
+
);
|
|
1135
|
+
});
|
|
1136
|
+
child.stdin.end(input);
|
|
1137
|
+
});
|
|
1138
|
+
}
|
|
1139
|
+
function parseGitBatchBlobs(output) {
|
|
1140
|
+
const blobs = [];
|
|
1141
|
+
let offset = 0;
|
|
1142
|
+
while (offset < output.length) {
|
|
1143
|
+
const headerEnd = output.indexOf(10, offset);
|
|
1144
|
+
if (headerEnd === -1) {
|
|
1145
|
+
throw new Error("Malformed git cat-file output: missing header terminator");
|
|
1146
|
+
}
|
|
1147
|
+
const header = output.subarray(offset, headerEnd).toString("utf8");
|
|
1148
|
+
offset = headerEnd + 1;
|
|
1149
|
+
if (header.length === 0) {
|
|
1150
|
+
continue;
|
|
1151
|
+
}
|
|
1152
|
+
const missingMatch = /^(.*) missing$/.exec(header);
|
|
1153
|
+
if (missingMatch) {
|
|
1154
|
+
continue;
|
|
1155
|
+
}
|
|
1156
|
+
const headerMatch = /^(.*) (\w+) (\d+)$/.exec(header);
|
|
1157
|
+
if (!headerMatch) {
|
|
1158
|
+
throw new Error(`Malformed git cat-file header: ${header}`);
|
|
1159
|
+
}
|
|
1160
|
+
const [, objectRef, objectType, sizeText] = headerMatch;
|
|
1161
|
+
if (objectType !== "blob") {
|
|
1162
|
+
throw new Error(`Unsupported git object type for ${objectRef}: ${objectType}`);
|
|
1163
|
+
}
|
|
1164
|
+
const size = Number.parseInt(sizeText, 10);
|
|
1165
|
+
const contentEnd = offset + size;
|
|
1166
|
+
if (contentEnd > output.length) {
|
|
1167
|
+
throw new Error(`Malformed git cat-file output for ${objectRef}: truncated blob content`);
|
|
1168
|
+
}
|
|
1169
|
+
blobs.push({
|
|
1170
|
+
size,
|
|
1171
|
+
content: output.subarray(offset, contentEnd)
|
|
1172
|
+
});
|
|
1173
|
+
offset = contentEnd;
|
|
1174
|
+
if (offset < output.length && output[offset] === 10) {
|
|
1175
|
+
offset += 1;
|
|
1176
|
+
}
|
|
1177
|
+
}
|
|
1178
|
+
return blobs;
|
|
1179
|
+
}
|
|
1180
|
+
async function listGitRuns(repoDir, ref = "origin/main") {
|
|
1181
|
+
const { stdout: treeOut } = await runGit(["ls-tree", "-r", "--name-only", ref, "runs"], {
|
|
1182
|
+
cwd: repoDir
|
|
1183
|
+
});
|
|
1184
|
+
const benchmarkPaths = treeOut.split(/\r?\n/).map((line) => line.trim()).filter((line) => line.endsWith("/benchmark.json"));
|
|
1185
|
+
if (benchmarkPaths.length === 0) {
|
|
1186
|
+
return [];
|
|
1187
|
+
}
|
|
1188
|
+
const batchInput = `${benchmarkPaths.map((benchmarkPath) => `${ref}:${benchmarkPath}`).join("\n")}
|
|
1189
|
+
`;
|
|
1190
|
+
const blobs = parseGitBatchBlobs(await runGitBatch(repoDir, batchInput));
|
|
1191
|
+
if (blobs.length !== benchmarkPaths.length) {
|
|
1192
|
+
throw new Error(
|
|
1193
|
+
`Expected ${benchmarkPaths.length} git blobs but received ${blobs.length} while listing results runs`
|
|
1194
|
+
);
|
|
1195
|
+
}
|
|
1196
|
+
const runs = blobs.flatMap((blob, index) => {
|
|
1197
|
+
const benchmarkPath = benchmarkPaths[index];
|
|
1198
|
+
const benchmark = JSON.parse(blob.content.toString("utf8"));
|
|
1199
|
+
const runDir = path4.posix.dirname(benchmarkPath);
|
|
1200
|
+
const relativeRunPath = path4.posix.relative("runs", runDir);
|
|
1201
|
+
const runId = buildGitRunId(relativeRunPath);
|
|
1202
|
+
const timestamp = benchmark.metadata?.timestamp?.trim() || path4.posix.basename(runDir);
|
|
1203
|
+
const targets = benchmark.metadata?.targets ?? [];
|
|
1204
|
+
const passRate = computeAveragePassRate(benchmark.run_summary);
|
|
1205
|
+
return [
|
|
1206
|
+
{
|
|
1207
|
+
run_id: runId,
|
|
1208
|
+
experiment: getRunExperiment(runId, benchmark),
|
|
1209
|
+
timestamp,
|
|
1210
|
+
...passRate !== void 0 && { pass_rate: passRate },
|
|
1211
|
+
...targets.length === 1 && targets[0] ? { target: targets[0] } : {},
|
|
1212
|
+
manifest_path: path4.posix.join(runDir, "index.jsonl"),
|
|
1213
|
+
benchmark_path: benchmarkPath,
|
|
1214
|
+
display_name: path4.posix.basename(runDir),
|
|
1215
|
+
test_count: benchmark.metadata?.tests_run?.length ?? 0,
|
|
1216
|
+
avg_score: 0,
|
|
1217
|
+
size_bytes: blob.size
|
|
1218
|
+
}
|
|
1219
|
+
];
|
|
1220
|
+
});
|
|
1221
|
+
runs.sort((a, b) => b.timestamp.localeCompare(a.timestamp));
|
|
1222
|
+
return runs;
|
|
1223
|
+
}
|
|
1224
|
+
async function materializeGitRun(repoDir, relativeRunPath, ref = "origin/main") {
|
|
1225
|
+
const normalizedRunPath = relativeRunPath.split(path4.sep).join("/");
|
|
1226
|
+
const runTreePath = path4.posix.join("runs", normalizedRunPath);
|
|
1227
|
+
const targetRunDir = path4.join(repoDir, ...runTreePath.split("/"));
|
|
1228
|
+
const { stdout: treeOut } = await runGit(["ls-tree", "-r", "--name-only", ref, runTreePath], {
|
|
1229
|
+
cwd: repoDir
|
|
1230
|
+
});
|
|
1231
|
+
const filePaths = treeOut.split(/\r?\n/).map((line) => line.trim()).filter((line) => line.length > 0);
|
|
1232
|
+
if (filePaths.length === 0) {
|
|
1233
|
+
return;
|
|
1234
|
+
}
|
|
1235
|
+
const batchInput = `${filePaths.map((filePath) => `${ref}:${filePath}`).join("\n")}
|
|
1236
|
+
`;
|
|
1237
|
+
const blobs = parseGitBatchBlobs(await runGitBatch(repoDir, batchInput));
|
|
1238
|
+
if (blobs.length !== filePaths.length) {
|
|
1239
|
+
throw new Error(
|
|
1240
|
+
`Expected ${filePaths.length} git blobs but received ${blobs.length} while materializing results run`
|
|
1241
|
+
);
|
|
1242
|
+
}
|
|
1243
|
+
const tempRoot = mkdtempSync(path4.join(repoDir, ".agentv-run-"));
|
|
1244
|
+
const tempRunDir = path4.join(tempRoot, "run");
|
|
1245
|
+
try {
|
|
1246
|
+
for (const [index, filePath] of filePaths.entries()) {
|
|
1247
|
+
const relativeFilePath = path4.posix.relative(runTreePath, filePath);
|
|
1248
|
+
const absolutePath = path4.join(tempRunDir, ...relativeFilePath.split("/"));
|
|
1249
|
+
mkdirSync(path4.dirname(absolutePath), { recursive: true });
|
|
1250
|
+
writeFileSync(absolutePath, blobs[index].content);
|
|
1251
|
+
}
|
|
1252
|
+
mkdirSync(path4.dirname(targetRunDir), { recursive: true });
|
|
1253
|
+
try {
|
|
1254
|
+
renameSync(tempRunDir, targetRunDir);
|
|
1255
|
+
} catch (error) {
|
|
1256
|
+
const code = typeof error === "object" && error !== null && "code" in error ? error.code : void 0;
|
|
1257
|
+
if ((code === "EEXIST" || code === "ENOTEMPTY") && existsSync(targetRunDir)) {
|
|
1258
|
+
return;
|
|
1259
|
+
}
|
|
1260
|
+
throw error;
|
|
1261
|
+
}
|
|
1262
|
+
} finally {
|
|
1263
|
+
rmSync(tempRoot, { recursive: true, force: true });
|
|
1264
|
+
}
|
|
1265
|
+
}
|
|
1042
1266
|
|
|
1043
1267
|
// src/projects.ts
|
|
1044
1268
|
import {
|
|
@@ -1046,7 +1270,7 @@ import {
|
|
|
1046
1270
|
mkdirSync as mkdirSync2,
|
|
1047
1271
|
readFileSync as readFileSync3,
|
|
1048
1272
|
readdirSync,
|
|
1049
|
-
renameSync,
|
|
1273
|
+
renameSync as renameSync2,
|
|
1050
1274
|
statSync,
|
|
1051
1275
|
unlinkSync,
|
|
1052
1276
|
writeFileSync as writeFileSync2
|
|
@@ -1087,7 +1311,7 @@ function migrateLegacyBenchmarksFile() {
|
|
|
1087
1311
|
try {
|
|
1088
1312
|
mkdirSync2(path5.dirname(newPath), { recursive: true });
|
|
1089
1313
|
writeFileSync2(tempPath, newContent, "utf-8");
|
|
1090
|
-
|
|
1314
|
+
renameSync2(tempPath, newPath);
|
|
1091
1315
|
unlinkSync(oldPath);
|
|
1092
1316
|
} catch (err) {
|
|
1093
1317
|
try {
|
|
@@ -2639,7 +2863,7 @@ export {
|
|
|
2639
2863
|
getOutputFilenames,
|
|
2640
2864
|
getProject,
|
|
2641
2865
|
getProjectsRegistryPath,
|
|
2642
|
-
|
|
2866
|
+
getResultsRepoLocalPaths,
|
|
2643
2867
|
getResultsRepoStatus,
|
|
2644
2868
|
getSubagentsRoot,
|
|
2645
2869
|
getTextContent,
|
|
@@ -2659,6 +2883,7 @@ export {
|
|
|
2659
2883
|
isTestMessage,
|
|
2660
2884
|
isTestMessageRole,
|
|
2661
2885
|
killAllTrackedChildren,
|
|
2886
|
+
listGitRuns,
|
|
2662
2887
|
listTargetNames,
|
|
2663
2888
|
loadConfig,
|
|
2664
2889
|
loadEvalCaseById,
|
|
@@ -2670,10 +2895,11 @@ export {
|
|
|
2670
2895
|
loadTests,
|
|
2671
2896
|
loadTsConfig,
|
|
2672
2897
|
loadTsEvalFile,
|
|
2898
|
+
materializeGitRun,
|
|
2673
2899
|
mergeExecutionMetrics,
|
|
2674
2900
|
negateScore,
|
|
2675
2901
|
normalizeLineEndings,
|
|
2676
|
-
|
|
2902
|
+
normalizeResultsConfig,
|
|
2677
2903
|
parseAgentSkillsEvals,
|
|
2678
2904
|
parseClaudeSession,
|
|
2679
2905
|
parseCodexSession,
|