agentv 4.31.0-next.1 → 4.31.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/{artifact-writer-K4B2S7OE.js → artifact-writer-FZ5RUHWC.js} +4 -4
- package/dist/{chunk-2Z2V7RJO.js → chunk-377GONL7.js} +29 -11
- package/dist/chunk-377GONL7.js.map +1 -0
- package/dist/{chunk-NACNTFNH.js → chunk-45EYE5HJ.js} +99 -38
- package/dist/{chunk-NACNTFNH.js.map → chunk-45EYE5HJ.js.map} +1 -1
- package/dist/{chunk-BMSSZSND.js → chunk-IJPWTVDU.js} +3 -3
- package/dist/{chunk-FLSABQ33.js → chunk-LOYPSIE7.js} +268 -42
- package/dist/chunk-LOYPSIE7.js.map +1 -0
- package/dist/{chunk-BCZHBAUK.js → chunk-ZFTDIK4V.js} +123 -35
- package/dist/chunk-ZFTDIK4V.js.map +1 -0
- package/dist/cli.js +5 -5
- package/dist/{dist-2E6ZNN32.js → dist-CRYAFKLS.js} +9 -5
- package/dist/index.js +5 -5
- package/dist/{interactive-22PLS22Z.js → interactive-WZW2FF43.js} +5 -5
- package/dist/studio/assets/index-9UixPaIJ.js +116 -0
- package/dist/studio/assets/index-BAvxZ1rb.css +1 -0
- package/dist/studio/assets/{index-Bh_LwYWq.js → index-DPrj3J9P.js} +1 -1
- package/dist/studio/index.html +2 -2
- package/dist/{ts-eval-loader-TWWSN6OX-VRZ43CUR.js → ts-eval-loader-XR6DNOZ3-GA7B5N26.js} +2 -2
- package/package.json +1 -1
- package/dist/chunk-2Z2V7RJO.js.map +0 -1
- package/dist/chunk-BCZHBAUK.js.map +0 -1
- package/dist/chunk-FLSABQ33.js.map +0 -1
- package/dist/studio/assets/index-CIpCCDKl.css +0 -1
- package/dist/studio/assets/index-DsVyXC9S.js +0 -116
- /package/dist/{artifact-writer-K4B2S7OE.js.map → artifact-writer-FZ5RUHWC.js.map} +0 -0
- /package/dist/{chunk-BMSSZSND.js.map → chunk-IJPWTVDU.js.map} +0 -0
- /package/dist/{dist-2E6ZNN32.js.map → dist-CRYAFKLS.js.map} +0 -0
- /package/dist/{interactive-22PLS22Z.js.map → interactive-WZW2FF43.js.map} +0 -0
- /package/dist/{ts-eval-loader-TWWSN6OX-VRZ43CUR.js.map → ts-eval-loader-XR6DNOZ3-GA7B5N26.js.map} +0 -0
|
@@ -1,10 +1,10 @@
|
|
|
1
1
|
import { createRequire } from 'node:module'; const require = createRequire(import.meta.url);
|
|
2
2
|
import {
|
|
3
3
|
toTranscriptJsonLines
|
|
4
|
-
} from "./chunk-
|
|
4
|
+
} from "./chunk-LOYPSIE7.js";
|
|
5
5
|
import {
|
|
6
6
|
DEFAULT_THRESHOLD
|
|
7
|
-
} from "./chunk-
|
|
7
|
+
} from "./chunk-377GONL7.js";
|
|
8
8
|
|
|
9
9
|
// src/commands/eval/artifact-writer.ts
|
|
10
10
|
import { mkdir, readFile, writeFile } from "node:fs/promises";
|
|
@@ -726,4 +726,4 @@ export {
|
|
|
726
726
|
writePerTestArtifacts,
|
|
727
727
|
writeArtifactsFromResults
|
|
728
728
|
};
|
|
729
|
-
//# sourceMappingURL=chunk-
|
|
729
|
+
//# sourceMappingURL=chunk-IJPWTVDU.js.map
|
|
@@ -12,7 +12,7 @@ import {
|
|
|
12
12
|
parseYamlValue,
|
|
13
13
|
toCamelCaseDeep,
|
|
14
14
|
toSnakeCaseDeep
|
|
15
|
-
} from "./chunk-
|
|
15
|
+
} from "./chunk-377GONL7.js";
|
|
16
16
|
|
|
17
17
|
// ../../packages/core/dist/index.js
|
|
18
18
|
import { readFileSync } from "node:fs";
|
|
@@ -21,8 +21,16 @@ import { readFile } from "node:fs/promises";
|
|
|
21
21
|
import path2 from "node:path";
|
|
22
22
|
import { mkdir, readFile as readFile2, writeFile } from "node:fs/promises";
|
|
23
23
|
import path3 from "node:path";
|
|
24
|
-
import { execFile } from "node:child_process";
|
|
25
|
-
import {
|
|
24
|
+
import { execFile, spawn } from "node:child_process";
|
|
25
|
+
import {
|
|
26
|
+
existsSync,
|
|
27
|
+
mkdirSync,
|
|
28
|
+
mkdtempSync,
|
|
29
|
+
readFileSync as readFileSync2,
|
|
30
|
+
renameSync,
|
|
31
|
+
rmSync,
|
|
32
|
+
writeFileSync
|
|
33
|
+
} from "node:fs";
|
|
26
34
|
import { cp, mkdtemp, readdir, rm, stat } from "node:fs/promises";
|
|
27
35
|
import os from "node:os";
|
|
28
36
|
import path4 from "node:path";
|
|
@@ -32,7 +40,7 @@ import {
|
|
|
32
40
|
mkdirSync as mkdirSync2,
|
|
33
41
|
readFileSync as readFileSync3,
|
|
34
42
|
readdirSync,
|
|
35
|
-
renameSync,
|
|
43
|
+
renameSync as renameSync2,
|
|
36
44
|
statSync,
|
|
37
45
|
unlinkSync,
|
|
38
46
|
writeFileSync as writeFileSync2
|
|
@@ -598,10 +606,19 @@ function withFriendlyGitHubAuthError(error) {
|
|
|
598
606
|
}
|
|
599
607
|
return new Error(message);
|
|
600
608
|
}
|
|
609
|
+
function expandHome(p) {
|
|
610
|
+
if (p === "~" || p.startsWith("~/") || p.startsWith("~\\")) {
|
|
611
|
+
return path4.join(os.homedir(), p.slice(1));
|
|
612
|
+
}
|
|
613
|
+
return p;
|
|
614
|
+
}
|
|
601
615
|
function normalizeResultsConfig(config) {
|
|
616
|
+
const repo = config.repo.trim();
|
|
617
|
+
const resolvedPath = config.path ? expandHome(config.path.trim()) : path4.join(getAgentvHome(), "results", sanitizeRepoSlug(repo));
|
|
602
618
|
return {
|
|
603
|
-
|
|
604
|
-
|
|
619
|
+
mode: "github",
|
|
620
|
+
repo,
|
|
621
|
+
path: resolvedPath,
|
|
605
622
|
auto_push: config.auto_push === true,
|
|
606
623
|
branch_prefix: config.branch_prefix?.trim() || "eval-results"
|
|
607
624
|
};
|
|
@@ -612,7 +629,7 @@ function resolveResultsRepoUrl(repo) {
|
|
|
612
629
|
}
|
|
613
630
|
return `https://github.com/${repo}.git`;
|
|
614
631
|
}
|
|
615
|
-
function
|
|
632
|
+
function getResultsRepoLocalPaths(repo) {
|
|
616
633
|
const rootDir = path4.join(getAgentvHome(), "cache", "results-repo", sanitizeRepoSlug(repo));
|
|
617
634
|
return {
|
|
618
635
|
rootDir,
|
|
@@ -639,7 +656,7 @@ async function runCommand(executable, args, options) {
|
|
|
639
656
|
try {
|
|
640
657
|
const { stdout, stderr } = await execFileAsync(executable, [...args], {
|
|
641
658
|
cwd: options?.cwd,
|
|
642
|
-
env: process.env
|
|
659
|
+
env: options?.env ?? process.env
|
|
643
660
|
});
|
|
644
661
|
return { stdout, stderr };
|
|
645
662
|
} catch (error) {
|
|
@@ -653,8 +670,17 @@ async function runCommand(executable, args, options) {
|
|
|
653
670
|
throw withFriendlyGitHubAuthError(error);
|
|
654
671
|
}
|
|
655
672
|
}
|
|
673
|
+
function getGitEnv() {
|
|
674
|
+
const env = {};
|
|
675
|
+
for (const [key, value] of Object.entries(process.env)) {
|
|
676
|
+
if (value !== void 0 && !(key.startsWith("GIT_") && key !== "GIT_SSH_COMMAND")) {
|
|
677
|
+
env[key] = value;
|
|
678
|
+
}
|
|
679
|
+
}
|
|
680
|
+
return env;
|
|
681
|
+
}
|
|
656
682
|
async function runGit(args, options) {
|
|
657
|
-
return runCommand("git", args, options);
|
|
683
|
+
return runCommand("git", args, { ...options, env: getGitEnv() });
|
|
658
684
|
}
|
|
659
685
|
async function runGh(args, options) {
|
|
660
686
|
return runCommand("gh", args, options);
|
|
@@ -678,13 +704,11 @@ async function resolveDefaultBranch(repoDir) {
|
|
|
678
704
|
}
|
|
679
705
|
return "main";
|
|
680
706
|
}
|
|
681
|
-
async function
|
|
707
|
+
async function fetchResultsRepo(repoDir) {
|
|
682
708
|
await runGit(["fetch", "origin", "--prune"], { cwd: repoDir });
|
|
683
|
-
await runGit(["checkout", baseBranch], { cwd: repoDir });
|
|
684
|
-
await runGit(["pull", "--ff-only", "origin", baseBranch], { cwd: repoDir });
|
|
685
709
|
}
|
|
686
710
|
function updateStatusFile(config, patch) {
|
|
687
|
-
const cachePaths =
|
|
711
|
+
const cachePaths = getResultsRepoLocalPaths(config.repo);
|
|
688
712
|
const current = readPersistedStatus(cachePaths.statusFile);
|
|
689
713
|
writePersistedStatus(cachePaths.statusFile, {
|
|
690
714
|
...current,
|
|
@@ -693,26 +717,31 @@ function updateStatusFile(config, patch) {
|
|
|
693
717
|
}
|
|
694
718
|
async function ensureResultsRepoClone(config) {
|
|
695
719
|
const normalized = normalizeResultsConfig(config);
|
|
696
|
-
const cachePaths =
|
|
720
|
+
const cachePaths = getResultsRepoLocalPaths(normalized.repo);
|
|
721
|
+
const cloneDir = normalized.path;
|
|
697
722
|
mkdirSync(cachePaths.rootDir, { recursive: true });
|
|
698
|
-
|
|
723
|
+
mkdirSync(path4.dirname(cloneDir), { recursive: true });
|
|
724
|
+
const cloneMissing = !existsSync(cloneDir);
|
|
725
|
+
const gitDir = path4.join(cloneDir, ".git");
|
|
726
|
+
const cloneEmpty = !cloneMissing && !existsSync(gitDir) && (await readdir(cloneDir)).length === 0;
|
|
727
|
+
if (cloneMissing || cloneEmpty) {
|
|
699
728
|
try {
|
|
700
729
|
await runGit([
|
|
701
730
|
"clone",
|
|
702
731
|
"--filter=blob:none",
|
|
703
732
|
resolveResultsRepoUrl(normalized.repo),
|
|
704
|
-
|
|
733
|
+
cloneDir
|
|
705
734
|
]);
|
|
706
|
-
return
|
|
735
|
+
return cloneDir;
|
|
707
736
|
} catch (error) {
|
|
708
737
|
updateStatusFile(normalized, { last_error: withFriendlyGitHubAuthError(error).message });
|
|
709
738
|
throw withFriendlyGitHubAuthError(error);
|
|
710
739
|
}
|
|
711
740
|
}
|
|
712
|
-
if (!existsSync(
|
|
713
|
-
throw new Error(`Results repo
|
|
741
|
+
if (!existsSync(gitDir)) {
|
|
742
|
+
throw new Error(`Results repo clone path is not a git repository: ${cloneDir}`);
|
|
714
743
|
}
|
|
715
|
-
return
|
|
744
|
+
return cloneDir;
|
|
716
745
|
}
|
|
717
746
|
function getResultsRepoStatus(config) {
|
|
718
747
|
if (!config) {
|
|
@@ -720,20 +749,20 @@ function getResultsRepoStatus(config) {
|
|
|
720
749
|
configured: false,
|
|
721
750
|
available: false,
|
|
722
751
|
repo: "",
|
|
723
|
-
|
|
752
|
+
local_dir: ""
|
|
724
753
|
};
|
|
725
754
|
}
|
|
726
755
|
const normalized = normalizeResultsConfig(config);
|
|
727
|
-
const
|
|
728
|
-
const persisted = readPersistedStatus(
|
|
756
|
+
const localPaths = getResultsRepoLocalPaths(normalized.repo);
|
|
757
|
+
const persisted = readPersistedStatus(localPaths.statusFile);
|
|
729
758
|
return {
|
|
730
759
|
configured: true,
|
|
731
|
-
available: existsSync(
|
|
760
|
+
available: existsSync(normalized.path),
|
|
732
761
|
repo: normalized.repo,
|
|
733
762
|
path: normalized.path,
|
|
734
763
|
auto_push: normalized.auto_push,
|
|
735
764
|
branch_prefix: normalized.branch_prefix,
|
|
736
|
-
|
|
765
|
+
local_dir: normalized.path,
|
|
737
766
|
last_synced_at: persisted.last_synced_at,
|
|
738
767
|
last_error: persisted.last_error
|
|
739
768
|
};
|
|
@@ -742,8 +771,7 @@ async function syncResultsRepo(config) {
|
|
|
742
771
|
const normalized = normalizeResultsConfig(config);
|
|
743
772
|
try {
|
|
744
773
|
const repoDir = await ensureResultsRepoClone(normalized);
|
|
745
|
-
|
|
746
|
-
await updateCacheRepo(repoDir, baseBranch);
|
|
774
|
+
await fetchResultsRepo(repoDir);
|
|
747
775
|
updateStatusFile(normalized, {
|
|
748
776
|
last_synced_at: (/* @__PURE__ */ new Date()).toISOString(),
|
|
749
777
|
last_error: void 0
|
|
@@ -760,7 +788,7 @@ async function checkoutResultsRepoBranch(config, branchName) {
|
|
|
760
788
|
const normalized = normalizeResultsConfig(config);
|
|
761
789
|
const repoDir = await ensureResultsRepoClone(normalized);
|
|
762
790
|
const baseBranch = await resolveDefaultBranch(repoDir);
|
|
763
|
-
await
|
|
791
|
+
await fetchResultsRepo(repoDir);
|
|
764
792
|
await runGit(["checkout", "-B", branchName, `origin/${baseBranch}`], { cwd: repoDir });
|
|
765
793
|
updateStatusFile(normalized, { last_error: void 0 });
|
|
766
794
|
return {
|
|
@@ -773,7 +801,7 @@ async function prepareResultsRepoBranch(config, branchName) {
|
|
|
773
801
|
const normalized = normalizeResultsConfig(config);
|
|
774
802
|
const cloneDir = await ensureResultsRepoClone(normalized);
|
|
775
803
|
const baseBranch = await resolveDefaultBranch(cloneDir);
|
|
776
|
-
await
|
|
804
|
+
await fetchResultsRepo(cloneDir);
|
|
777
805
|
const worktreeRoot = await mkdtemp(path4.join(os.tmpdir(), "agentv-results-repo-"));
|
|
778
806
|
const worktreeDir = path4.join(worktreeRoot, "repo");
|
|
779
807
|
await runGit(["worktree", "add", "-B", branchName, worktreeDir, `origin/${baseBranch}`], {
|
|
@@ -799,10 +827,7 @@ async function stageResultsArtifacts(params) {
|
|
|
799
827
|
}
|
|
800
828
|
function resolveResultsRepoRunsDir(config) {
|
|
801
829
|
const normalized = normalizeResultsConfig(config);
|
|
802
|
-
return path4.join(
|
|
803
|
-
getResultsRepoCachePaths(normalized.repo).repoDir,
|
|
804
|
-
...normalized.path.split("/")
|
|
805
|
-
);
|
|
830
|
+
return path4.join(normalized.path, "runs");
|
|
806
831
|
}
|
|
807
832
|
async function directorySizeBytes(targetPath) {
|
|
808
833
|
const entry = await stat(targetPath);
|
|
@@ -831,7 +856,7 @@ async function commitAndPushResultsBranch(params) {
|
|
|
831
856
|
async function pushResultsRepoBranch(config, branchName, cwd) {
|
|
832
857
|
const normalized = normalizeResultsConfig(config);
|
|
833
858
|
await runGit(["push", "-u", "origin", branchName], {
|
|
834
|
-
cwd: cwd ??
|
|
859
|
+
cwd: cwd ?? normalized.path
|
|
835
860
|
});
|
|
836
861
|
updateStatusFile(normalized, {
|
|
837
862
|
last_synced_at: (/* @__PURE__ */ new Date()).toISOString(),
|
|
@@ -864,8 +889,8 @@ async function directPushResults(params) {
|
|
|
864
889
|
const normalized = normalizeResultsConfig(params.config);
|
|
865
890
|
const repoDir = await ensureResultsRepoClone(normalized);
|
|
866
891
|
const baseBranch = await resolveDefaultBranch(repoDir);
|
|
867
|
-
await
|
|
868
|
-
const destinationDir = path4.join(repoDir,
|
|
892
|
+
await fetchResultsRepo(repoDir);
|
|
893
|
+
const destinationDir = path4.join(repoDir, "runs", params.destinationPath);
|
|
869
894
|
await stageResultsArtifacts({
|
|
870
895
|
repoDir,
|
|
871
896
|
sourceDir: params.sourceDir,
|
|
@@ -879,10 +904,19 @@ async function directPushResults(params) {
|
|
|
879
904
|
if (status.trim().length === 0) {
|
|
880
905
|
return false;
|
|
881
906
|
}
|
|
882
|
-
await runGit(
|
|
907
|
+
await runGit(
|
|
908
|
+
[
|
|
909
|
+
"commit",
|
|
910
|
+
"-m",
|
|
911
|
+
params.commitMessage,
|
|
912
|
+
"-m",
|
|
913
|
+
`Agentv-Run: ${buildGitRunId(params.destinationPath)}`
|
|
914
|
+
],
|
|
915
|
+
{ cwd: repoDir }
|
|
916
|
+
);
|
|
883
917
|
for (let attempt = 1; attempt <= DIRECT_PUSH_MAX_RETRIES; attempt++) {
|
|
884
918
|
try {
|
|
885
|
-
await runGit(["push", "origin", baseBranch], { cwd: repoDir });
|
|
919
|
+
await runGit(["push", "origin", `HEAD:${baseBranch}`], { cwd: repoDir });
|
|
886
920
|
updateStatusFile(normalized, {
|
|
887
921
|
last_synced_at: (/* @__PURE__ */ new Date()).toISOString(),
|
|
888
922
|
last_error: void 0
|
|
@@ -891,7 +925,8 @@ async function directPushResults(params) {
|
|
|
891
925
|
} catch (error) {
|
|
892
926
|
const message = error instanceof Error ? error.message : String(error);
|
|
893
927
|
if (attempt < DIRECT_PUSH_MAX_RETRIES && message.includes("non-fast-forward")) {
|
|
894
|
-
await
|
|
928
|
+
await fetchResultsRepo(repoDir);
|
|
929
|
+
await runGit(["rebase", `origin/${baseBranch}`], { cwd: repoDir });
|
|
895
930
|
} else {
|
|
896
931
|
throw error;
|
|
897
932
|
}
|
|
@@ -899,6 +934,195 @@ async function directPushResults(params) {
|
|
|
899
934
|
}
|
|
900
935
|
return false;
|
|
901
936
|
}
|
|
937
|
+
function buildGitRunId(relativeRunPath) {
|
|
938
|
+
const normalized = relativeRunPath.split(path4.sep).join("/");
|
|
939
|
+
const segments = normalized.split("/").filter(Boolean);
|
|
940
|
+
if (segments.length >= 2) {
|
|
941
|
+
const experiment = segments.slice(0, -1).join("/");
|
|
942
|
+
const timestamp = segments.at(-1);
|
|
943
|
+
if (experiment === "default") {
|
|
944
|
+
return timestamp ?? normalized;
|
|
945
|
+
}
|
|
946
|
+
return `${experiment}::${timestamp}`;
|
|
947
|
+
}
|
|
948
|
+
return segments[0] ?? relativeRunPath;
|
|
949
|
+
}
|
|
950
|
+
function getRunExperiment(runId, benchmark) {
|
|
951
|
+
const experiment = benchmark.metadata?.experiment?.trim();
|
|
952
|
+
if (experiment) {
|
|
953
|
+
return experiment;
|
|
954
|
+
}
|
|
955
|
+
const separatorIndex = runId.lastIndexOf("::");
|
|
956
|
+
return separatorIndex === -1 ? "default" : runId.slice(0, separatorIndex);
|
|
957
|
+
}
|
|
958
|
+
function computeAveragePassRate(runSummary) {
|
|
959
|
+
if (!runSummary) {
|
|
960
|
+
return void 0;
|
|
961
|
+
}
|
|
962
|
+
const passRates = Object.values(runSummary).map((summary) => summary.pass_rate?.mean).filter((value) => typeof value === "number" && Number.isFinite(value));
|
|
963
|
+
if (passRates.length === 0) {
|
|
964
|
+
return void 0;
|
|
965
|
+
}
|
|
966
|
+
return passRates.reduce((sum, value) => sum + value, 0) / passRates.length;
|
|
967
|
+
}
|
|
968
|
+
async function runGitBatch(repoDir, input) {
|
|
969
|
+
return new Promise((resolve, reject) => {
|
|
970
|
+
const child = spawn("git", ["cat-file", "--batch"], {
|
|
971
|
+
cwd: repoDir,
|
|
972
|
+
env: getGitEnv(),
|
|
973
|
+
stdio: ["pipe", "pipe", "pipe"]
|
|
974
|
+
});
|
|
975
|
+
const stdoutChunks = [];
|
|
976
|
+
const stderrChunks = [];
|
|
977
|
+
child.stdout.on("data", (chunk) => {
|
|
978
|
+
stdoutChunks.push(Buffer.isBuffer(chunk) ? chunk : Buffer.from(chunk));
|
|
979
|
+
});
|
|
980
|
+
child.stderr.on("data", (chunk) => {
|
|
981
|
+
stderrChunks.push(Buffer.isBuffer(chunk) ? chunk : Buffer.from(chunk));
|
|
982
|
+
});
|
|
983
|
+
child.on("error", (error) => reject(withFriendlyGitHubAuthError(error)));
|
|
984
|
+
child.on("close", (code) => {
|
|
985
|
+
if (code === 0) {
|
|
986
|
+
resolve(Buffer.concat(stdoutChunks));
|
|
987
|
+
return;
|
|
988
|
+
}
|
|
989
|
+
const stderr = Buffer.concat(stderrChunks).toString("utf8").trim();
|
|
990
|
+
reject(
|
|
991
|
+
withFriendlyGitHubAuthError(
|
|
992
|
+
stderr.length > 0 ? new Error(stderr) : new Error("git cat-file failed")
|
|
993
|
+
)
|
|
994
|
+
);
|
|
995
|
+
});
|
|
996
|
+
child.stdin.end(input);
|
|
997
|
+
});
|
|
998
|
+
}
|
|
999
|
+
function parseGitBatchBlobs(output) {
|
|
1000
|
+
const blobs = [];
|
|
1001
|
+
let offset = 0;
|
|
1002
|
+
while (offset < output.length) {
|
|
1003
|
+
const headerEnd = output.indexOf(10, offset);
|
|
1004
|
+
if (headerEnd === -1) {
|
|
1005
|
+
throw new Error("Malformed git cat-file output: missing header terminator");
|
|
1006
|
+
}
|
|
1007
|
+
const header = output.subarray(offset, headerEnd).toString("utf8");
|
|
1008
|
+
offset = headerEnd + 1;
|
|
1009
|
+
if (header.length === 0) {
|
|
1010
|
+
continue;
|
|
1011
|
+
}
|
|
1012
|
+
const missingMatch = /^(.*) missing$/.exec(header);
|
|
1013
|
+
if (missingMatch) {
|
|
1014
|
+
continue;
|
|
1015
|
+
}
|
|
1016
|
+
const headerMatch = /^(.*) (\w+) (\d+)$/.exec(header);
|
|
1017
|
+
if (!headerMatch) {
|
|
1018
|
+
throw new Error(`Malformed git cat-file header: ${header}`);
|
|
1019
|
+
}
|
|
1020
|
+
const [, objectRef, objectType, sizeText] = headerMatch;
|
|
1021
|
+
if (objectType !== "blob") {
|
|
1022
|
+
throw new Error(`Unsupported git object type for ${objectRef}: ${objectType}`);
|
|
1023
|
+
}
|
|
1024
|
+
const size = Number.parseInt(sizeText, 10);
|
|
1025
|
+
const contentEnd = offset + size;
|
|
1026
|
+
if (contentEnd > output.length) {
|
|
1027
|
+
throw new Error(`Malformed git cat-file output for ${objectRef}: truncated blob content`);
|
|
1028
|
+
}
|
|
1029
|
+
blobs.push({
|
|
1030
|
+
size,
|
|
1031
|
+
content: output.subarray(offset, contentEnd)
|
|
1032
|
+
});
|
|
1033
|
+
offset = contentEnd;
|
|
1034
|
+
if (offset < output.length && output[offset] === 10) {
|
|
1035
|
+
offset += 1;
|
|
1036
|
+
}
|
|
1037
|
+
}
|
|
1038
|
+
return blobs;
|
|
1039
|
+
}
|
|
1040
|
+
async function listGitRuns(repoDir, ref = "origin/main") {
|
|
1041
|
+
const { stdout: treeOut } = await runGit(["ls-tree", "-r", "--name-only", ref, "runs"], {
|
|
1042
|
+
cwd: repoDir
|
|
1043
|
+
});
|
|
1044
|
+
const benchmarkPaths = treeOut.split(/\r?\n/).map((line) => line.trim()).filter((line) => line.endsWith("/benchmark.json"));
|
|
1045
|
+
if (benchmarkPaths.length === 0) {
|
|
1046
|
+
return [];
|
|
1047
|
+
}
|
|
1048
|
+
const batchInput = `${benchmarkPaths.map((benchmarkPath) => `${ref}:${benchmarkPath}`).join("\n")}
|
|
1049
|
+
`;
|
|
1050
|
+
const blobs = parseGitBatchBlobs(await runGitBatch(repoDir, batchInput));
|
|
1051
|
+
if (blobs.length !== benchmarkPaths.length) {
|
|
1052
|
+
throw new Error(
|
|
1053
|
+
`Expected ${benchmarkPaths.length} git blobs but received ${blobs.length} while listing results runs`
|
|
1054
|
+
);
|
|
1055
|
+
}
|
|
1056
|
+
const runs = blobs.flatMap((blob, index) => {
|
|
1057
|
+
const benchmarkPath = benchmarkPaths[index];
|
|
1058
|
+
const benchmark = JSON.parse(blob.content.toString("utf8"));
|
|
1059
|
+
const runDir = path4.posix.dirname(benchmarkPath);
|
|
1060
|
+
const relativeRunPath = path4.posix.relative("runs", runDir);
|
|
1061
|
+
const runId = buildGitRunId(relativeRunPath);
|
|
1062
|
+
const timestamp = benchmark.metadata?.timestamp?.trim() || path4.posix.basename(runDir);
|
|
1063
|
+
const targets = benchmark.metadata?.targets ?? [];
|
|
1064
|
+
const passRate = computeAveragePassRate(benchmark.run_summary);
|
|
1065
|
+
return [
|
|
1066
|
+
{
|
|
1067
|
+
run_id: runId,
|
|
1068
|
+
experiment: getRunExperiment(runId, benchmark),
|
|
1069
|
+
timestamp,
|
|
1070
|
+
...passRate !== void 0 && { pass_rate: passRate },
|
|
1071
|
+
...targets.length === 1 && targets[0] ? { target: targets[0] } : {},
|
|
1072
|
+
manifest_path: path4.posix.join(runDir, "index.jsonl"),
|
|
1073
|
+
benchmark_path: benchmarkPath,
|
|
1074
|
+
display_name: path4.posix.basename(runDir),
|
|
1075
|
+
test_count: benchmark.metadata?.tests_run?.length ?? 0,
|
|
1076
|
+
avg_score: 0,
|
|
1077
|
+
size_bytes: blob.size
|
|
1078
|
+
}
|
|
1079
|
+
];
|
|
1080
|
+
});
|
|
1081
|
+
runs.sort((a, b) => b.timestamp.localeCompare(a.timestamp));
|
|
1082
|
+
return runs;
|
|
1083
|
+
}
|
|
1084
|
+
async function materializeGitRun(repoDir, relativeRunPath, ref = "origin/main") {
|
|
1085
|
+
const normalizedRunPath = relativeRunPath.split(path4.sep).join("/");
|
|
1086
|
+
const runTreePath = path4.posix.join("runs", normalizedRunPath);
|
|
1087
|
+
const targetRunDir = path4.join(repoDir, ...runTreePath.split("/"));
|
|
1088
|
+
const { stdout: treeOut } = await runGit(["ls-tree", "-r", "--name-only", ref, runTreePath], {
|
|
1089
|
+
cwd: repoDir
|
|
1090
|
+
});
|
|
1091
|
+
const filePaths = treeOut.split(/\r?\n/).map((line) => line.trim()).filter((line) => line.length > 0);
|
|
1092
|
+
if (filePaths.length === 0) {
|
|
1093
|
+
return;
|
|
1094
|
+
}
|
|
1095
|
+
const batchInput = `${filePaths.map((filePath) => `${ref}:${filePath}`).join("\n")}
|
|
1096
|
+
`;
|
|
1097
|
+
const blobs = parseGitBatchBlobs(await runGitBatch(repoDir, batchInput));
|
|
1098
|
+
if (blobs.length !== filePaths.length) {
|
|
1099
|
+
throw new Error(
|
|
1100
|
+
`Expected ${filePaths.length} git blobs but received ${blobs.length} while materializing results run`
|
|
1101
|
+
);
|
|
1102
|
+
}
|
|
1103
|
+
const tempRoot = mkdtempSync(path4.join(repoDir, ".agentv-run-"));
|
|
1104
|
+
const tempRunDir = path4.join(tempRoot, "run");
|
|
1105
|
+
try {
|
|
1106
|
+
for (const [index, filePath] of filePaths.entries()) {
|
|
1107
|
+
const relativeFilePath = path4.posix.relative(runTreePath, filePath);
|
|
1108
|
+
const absolutePath = path4.join(tempRunDir, ...relativeFilePath.split("/"));
|
|
1109
|
+
mkdirSync(path4.dirname(absolutePath), { recursive: true });
|
|
1110
|
+
writeFileSync(absolutePath, blobs[index].content);
|
|
1111
|
+
}
|
|
1112
|
+
mkdirSync(path4.dirname(targetRunDir), { recursive: true });
|
|
1113
|
+
try {
|
|
1114
|
+
renameSync(tempRunDir, targetRunDir);
|
|
1115
|
+
} catch (error) {
|
|
1116
|
+
const code = typeof error === "object" && error !== null && "code" in error ? error.code : void 0;
|
|
1117
|
+
if ((code === "EEXIST" || code === "ENOTEMPTY") && existsSync(targetRunDir)) {
|
|
1118
|
+
return;
|
|
1119
|
+
}
|
|
1120
|
+
throw error;
|
|
1121
|
+
}
|
|
1122
|
+
} finally {
|
|
1123
|
+
rmSync(tempRoot, { recursive: true, force: true });
|
|
1124
|
+
}
|
|
1125
|
+
}
|
|
902
1126
|
function getProjectsRegistryPath() {
|
|
903
1127
|
return path5.join(getAgentvConfigDir(), "projects.yaml");
|
|
904
1128
|
}
|
|
@@ -933,7 +1157,7 @@ function migrateLegacyBenchmarksFile() {
|
|
|
933
1157
|
try {
|
|
934
1158
|
mkdirSync2(path5.dirname(newPath), { recursive: true });
|
|
935
1159
|
writeFileSync2(tempPath, newContent, "utf-8");
|
|
936
|
-
|
|
1160
|
+
renameSync2(tempPath, newPath);
|
|
937
1161
|
unlinkSync(oldPath);
|
|
938
1162
|
} catch (err) {
|
|
939
1163
|
try {
|
|
@@ -2357,7 +2581,7 @@ export {
|
|
|
2357
2581
|
shouldSkipCacheForTemperature,
|
|
2358
2582
|
normalizeResultsConfig,
|
|
2359
2583
|
resolveResultsRepoUrl,
|
|
2360
|
-
|
|
2584
|
+
getResultsRepoLocalPaths,
|
|
2361
2585
|
ensureResultsRepoClone,
|
|
2362
2586
|
getResultsRepoStatus,
|
|
2363
2587
|
syncResultsRepo,
|
|
@@ -2370,6 +2594,8 @@ export {
|
|
|
2370
2594
|
pushResultsRepoBranch,
|
|
2371
2595
|
createDraftResultsPr,
|
|
2372
2596
|
directPushResults,
|
|
2597
|
+
listGitRuns,
|
|
2598
|
+
materializeGitRun,
|
|
2373
2599
|
getProjectsRegistryPath,
|
|
2374
2600
|
loadProjectRegistry,
|
|
2375
2601
|
saveProjectRegistry,
|
|
@@ -2401,4 +2627,4 @@ export {
|
|
|
2401
2627
|
TranscriptProvider,
|
|
2402
2628
|
createAgentKernel
|
|
2403
2629
|
};
|
|
2404
|
-
//# sourceMappingURL=chunk-
|
|
2630
|
+
//# sourceMappingURL=chunk-LOYPSIE7.js.map
|