agentv 4.40.1 → 4.41.1-next.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/{artifact-writer-GIAIMGPQ.js → artifact-writer-AMV64TWV.js} +4 -4
- package/dist/{chunk-TWQP7JYQ.js → chunk-A4J456KS.js} +2 -2
- package/dist/{chunk-BLXYBUU4.js → chunk-ENHX2CCS.js} +1485 -943
- package/dist/chunk-ENHX2CCS.js.map +1 -0
- package/dist/{chunk-B7CT3J2W.js → chunk-NRCVKN7X.js} +899 -300
- package/dist/chunk-NRCVKN7X.js.map +1 -0
- package/dist/{chunk-A36XLUI5.js → chunk-UMPZ64HO.js} +12 -10
- package/dist/chunk-UMPZ64HO.js.map +1 -0
- package/dist/{chunk-I3SC4FOT.js → chunk-Z45FKRMJ.js} +212 -58
- package/dist/chunk-Z45FKRMJ.js.map +1 -0
- package/dist/cli.js +5 -5
- package/dist/{dist-6Z4OSITR.js → dist-X5P5IR65.js} +7 -3
- package/dist/index.js +5 -5
- package/dist/{interactive-Q575M3A7.js → interactive-KU2RGBJJ.js} +5 -5
- package/dist/skills/agentv-bench/references/eval-yaml-spec.md +4 -4
- package/dist/skills/agentv-eval-writer/references/custom-evaluators.md +14 -14
- package/dist/skills/agentv-eval-writer/references/python-helpers.md +47 -0
- package/dist/{ts-eval-loader-NWH3B4HG-UXXCZKLP.js → ts-eval-loader-ZVL6CGTE-TZYZX3QS.js} +2 -2
- package/package.json +1 -1
- package/dist/chunk-A36XLUI5.js.map +0 -1
- package/dist/chunk-B7CT3J2W.js.map +0 -1
- package/dist/chunk-BLXYBUU4.js.map +0 -1
- package/dist/chunk-I3SC4FOT.js.map +0 -1
- /package/dist/{artifact-writer-GIAIMGPQ.js.map → artifact-writer-AMV64TWV.js.map} +0 -0
- /package/dist/{chunk-TWQP7JYQ.js.map → chunk-A4J456KS.js.map} +0 -0
- /package/dist/{dist-6Z4OSITR.js.map → dist-X5P5IR65.js.map} +0 -0
- /package/dist/{interactive-Q575M3A7.js.map → interactive-KU2RGBJJ.js.map} +0 -0
- /package/dist/{ts-eval-loader-NWH3B4HG-UXXCZKLP.js.map → ts-eval-loader-ZVL6CGTE-TZYZX3QS.js.map} +0 -0
|
@@ -1,24 +1,33 @@
|
|
|
1
1
|
import { createRequire } from 'node:module'; const require = createRequire(import.meta.url);
|
|
2
2
|
import {
|
|
3
|
+
AGENT_PROVIDER_KINDS,
|
|
4
|
+
buildPromptInputs,
|
|
3
5
|
external_exports,
|
|
4
6
|
extractLastAssistantContent,
|
|
5
7
|
getAgentvDataDir,
|
|
6
8
|
getRepoCheckoutRef,
|
|
7
9
|
groupTranscriptJsonLines,
|
|
8
10
|
interpolateEnv,
|
|
11
|
+
loadTests,
|
|
9
12
|
normalizeRepoIdentity,
|
|
10
13
|
normalizeToolCall,
|
|
11
14
|
parseRepoConfig,
|
|
12
15
|
parseYamlValue,
|
|
16
|
+
prepareEvalCaseWorkspace,
|
|
17
|
+
prepareSharedWorkspaceSetup,
|
|
13
18
|
readTranscriptJsonl,
|
|
19
|
+
releaseSharedWorkspaceSetup,
|
|
14
20
|
resolveRepoCloneUrl
|
|
15
|
-
} from "./chunk-
|
|
21
|
+
} from "./chunk-ENHX2CCS.js";
|
|
16
22
|
|
|
17
23
|
// ../../packages/core/dist/index.js
|
|
18
24
|
import { readFileSync } from "node:fs";
|
|
19
25
|
import path from "node:path";
|
|
20
|
-
import {
|
|
26
|
+
import { randomUUID } from "node:crypto";
|
|
21
27
|
import path2 from "node:path";
|
|
28
|
+
import micromatch from "micromatch";
|
|
29
|
+
import { readFile } from "node:fs/promises";
|
|
30
|
+
import path3 from "node:path";
|
|
22
31
|
import { execFile, spawn } from "node:child_process";
|
|
23
32
|
import {
|
|
24
33
|
existsSync,
|
|
@@ -31,17 +40,17 @@ import {
|
|
|
31
40
|
} from "node:fs";
|
|
32
41
|
import { cp, lstat, mkdtemp, readdir, rm, stat } from "node:fs/promises";
|
|
33
42
|
import os from "node:os";
|
|
34
|
-
import
|
|
43
|
+
import path4 from "node:path";
|
|
35
44
|
import { promisify } from "node:util";
|
|
36
45
|
import * as childProcess from "node:child_process";
|
|
37
46
|
import { existsSync as existsSync2 } from "node:fs";
|
|
38
47
|
import { spawnSync } from "node:child_process";
|
|
39
48
|
import { readdir as readdir2, stat as stat2 } from "node:fs/promises";
|
|
40
49
|
import { homedir } from "node:os";
|
|
41
|
-
import
|
|
50
|
+
import path5 from "node:path";
|
|
42
51
|
import { readdir as readdir3, stat as stat3 } from "node:fs/promises";
|
|
43
52
|
import { homedir as homedir2 } from "node:os";
|
|
44
|
-
import
|
|
53
|
+
import path6 from "node:path";
|
|
45
54
|
function codeGraderInstruction(graderName, description) {
|
|
46
55
|
const desc = description ? ` This grader: ${description}.` : "";
|
|
47
56
|
return `Run \`agentv eval assert ${graderName} --agent-output <agent_output> --agent-input <original_prompt>\` and check the result.${desc} The command accepts --agent-output (the agent's full response text) and --agent-input (the original user prompt). It returns JSON on stdout: {"score": 0-1, "reasoning": "..."}. A score >= 0.5 means pass (exit 0); below 0.5 means fail (exit 1).`;
|
|
@@ -297,6 +306,150 @@ function getOutputFilenames(result) {
|
|
|
297
306
|
}
|
|
298
307
|
return names;
|
|
299
308
|
}
|
|
309
|
+
function matchesFilter(id, filter) {
|
|
310
|
+
return typeof filter === "string" ? micromatch.isMatch(id, filter) : filter.some((pattern) => micromatch.isMatch(id, pattern));
|
|
311
|
+
}
|
|
312
|
+
function selectSingleCase(options) {
|
|
313
|
+
const selected = options.testId ? options.evalCases.filter((evalCase) => evalCase.id === options.testId) : options.filter ? options.evalCases.filter((evalCase) => matchesFilter(evalCase.id, options.filter ?? "")) : options.evalCases;
|
|
314
|
+
if (selected.length !== 1) {
|
|
315
|
+
const selector = options.testId ? `test_id "${options.testId}"` : options.filter ? `filter "${Array.isArray(options.filter) ? options.filter.join(",") : options.filter}"` : "the eval file";
|
|
316
|
+
throw new Error(
|
|
317
|
+
`prepareEvalWorkspace requires exactly one test, but ${selector} matched ${selected.length} in ${options.evalPath}.`
|
|
318
|
+
);
|
|
319
|
+
}
|
|
320
|
+
return selected[0];
|
|
321
|
+
}
|
|
322
|
+
function promptModeForTarget(target) {
|
|
323
|
+
return AGENT_PROVIDER_KINDS.includes(target.kind) || target.kind === "cli" ? "agent" : "lm";
|
|
324
|
+
}
|
|
325
|
+
function toRepoPins(repos) {
|
|
326
|
+
return (repos ?? []).map((repo) => ({
|
|
327
|
+
...repo.path !== void 0 && { path: repo.path },
|
|
328
|
+
...repo.repo !== void 0 && { repo: repo.repo },
|
|
329
|
+
...repo.commit !== void 0 && { commit: repo.commit },
|
|
330
|
+
...repo.base_commit !== void 0 && { baseCommit: repo.base_commit },
|
|
331
|
+
...repo.ancestor !== void 0 && { ancestor: repo.ancestor },
|
|
332
|
+
...repo.sparse !== void 0 && { sparse: repo.sparse }
|
|
333
|
+
}));
|
|
334
|
+
}
|
|
335
|
+
function poolMetadata(setup, selectedSlotPath) {
|
|
336
|
+
const slot = setup.poolSlot ?? setup.poolSlots.find((candidate) => candidate.path === selectedSlotPath);
|
|
337
|
+
if (!slot) {
|
|
338
|
+
return void 0;
|
|
339
|
+
}
|
|
340
|
+
return {
|
|
341
|
+
fingerprint: slot.fingerprint,
|
|
342
|
+
slotIndex: slot.index,
|
|
343
|
+
lockPath: slot.lockPath
|
|
344
|
+
};
|
|
345
|
+
}
|
|
346
|
+
async function releaseUnselectedPoolSlots(setup, selectedSlotPath) {
|
|
347
|
+
if (!setup.poolManager) {
|
|
348
|
+
return;
|
|
349
|
+
}
|
|
350
|
+
for (const slot of setup.poolSlots) {
|
|
351
|
+
if (slot.path !== selectedSlotPath) {
|
|
352
|
+
await setup.poolManager.releaseSlot(slot).catch(() => {
|
|
353
|
+
});
|
|
354
|
+
}
|
|
355
|
+
}
|
|
356
|
+
}
|
|
357
|
+
async function prepareEvalWorkspace(options) {
|
|
358
|
+
const evalPath = path2.resolve(options.testFilePath);
|
|
359
|
+
const evalRunId = randomUUID();
|
|
360
|
+
const evalCases = options.evalCases ?? await loadTests(evalPath, options.repoRoot, {
|
|
361
|
+
verbose: options.verbose,
|
|
362
|
+
filter: options.testId ?? options.filter
|
|
363
|
+
});
|
|
364
|
+
const evalCase = selectSingleCase({
|
|
365
|
+
evalCases,
|
|
366
|
+
testId: options.testId,
|
|
367
|
+
filter: options.filter,
|
|
368
|
+
evalPath
|
|
369
|
+
});
|
|
370
|
+
const evalDir = path2.dirname(evalPath);
|
|
371
|
+
const workers = options.maxConcurrency ?? 1;
|
|
372
|
+
const retainOnSuccess = options.retainOnSuccess ?? (options.keepWorkspaces ? "keep" : "cleanup");
|
|
373
|
+
const retainOnFailure = options.retainOnFailure ?? (options.cleanupWorkspaces ? "cleanup" : "keep");
|
|
374
|
+
const formattingMode = promptModeForTarget(options.target);
|
|
375
|
+
const promptInputs = await buildPromptInputs(evalCase, formattingMode);
|
|
376
|
+
let sharedSetup;
|
|
377
|
+
try {
|
|
378
|
+
sharedSetup = await prepareSharedWorkspaceSetup({
|
|
379
|
+
evalRunId,
|
|
380
|
+
evalCases: [evalCase],
|
|
381
|
+
targetHooks: options.targetHooks,
|
|
382
|
+
evalDir,
|
|
383
|
+
verbose: options.verbose,
|
|
384
|
+
workers,
|
|
385
|
+
poolMaxSlots: options.poolMaxSlots,
|
|
386
|
+
workspacePath: options.workspacePath,
|
|
387
|
+
legacyWorkspacePath: options.workspace,
|
|
388
|
+
workspaceMode: options.workspaceMode,
|
|
389
|
+
workspaceClean: options.workspaceClean
|
|
390
|
+
});
|
|
391
|
+
const testPoolSlot = sharedSetup.availablePoolSlots.length > 0 ? sharedSetup.availablePoolSlots.pop() : void 0;
|
|
392
|
+
const selectedWorkspacePath = testPoolSlot?.path ?? sharedSetup.sharedWorkspacePath;
|
|
393
|
+
const selectedBaselineCommit = testPoolSlot ? sharedSetup.poolSlotBaselines.get(testPoolSlot.path) : sharedSetup.sharedBaselineCommit;
|
|
394
|
+
const caseSetup = await prepareEvalCaseWorkspace({
|
|
395
|
+
evalCase,
|
|
396
|
+
targetName: options.target.name,
|
|
397
|
+
evalRunId,
|
|
398
|
+
sharedWorkspacePath: selectedWorkspacePath,
|
|
399
|
+
sharedBaselineCommit: selectedBaselineCommit,
|
|
400
|
+
suiteWorkspaceFile: sharedSetup.suiteWorkspaceFile,
|
|
401
|
+
repoManager: sharedSetup.repoManager,
|
|
402
|
+
evalDir,
|
|
403
|
+
cleanupWorkspaces: options.cleanupWorkspaces,
|
|
404
|
+
targetHooks: options.targetHooks,
|
|
405
|
+
setupDebug: options.verbose
|
|
406
|
+
});
|
|
407
|
+
if (!caseSetup.workspacePath) {
|
|
408
|
+
throw new Error(
|
|
409
|
+
`No workspace was materialized for test "${evalCase.id}". Add workspace.template, workspace.repos, or workspace.hooks before preparing an external attempt.`
|
|
410
|
+
);
|
|
411
|
+
}
|
|
412
|
+
await releaseUnselectedPoolSlots(sharedSetup, caseSetup.workspacePath);
|
|
413
|
+
const pool = poolMetadata(sharedSetup, caseSetup.workspacePath);
|
|
414
|
+
return {
|
|
415
|
+
evalPath,
|
|
416
|
+
testId: evalCase.id,
|
|
417
|
+
target: options.target.name,
|
|
418
|
+
evalRunId,
|
|
419
|
+
workspacePath: caseSetup.workspacePath,
|
|
420
|
+
...caseSetup.caseWorkspaceFile !== void 0 && {
|
|
421
|
+
workspaceFile: caseSetup.caseWorkspaceFile
|
|
422
|
+
},
|
|
423
|
+
createdAt: (options.now ?? (() => /* @__PURE__ */ new Date()))().toISOString(),
|
|
424
|
+
hookExecutions: [...sharedSetup.hookExecutions, ...caseSetup.hookExecutions],
|
|
425
|
+
repoPins: toRepoPins(evalCase.workspace?.repos),
|
|
426
|
+
baseline: caseSetup.baselineCommit ? { status: "initialized", commit: caseSetup.baselineCommit } : { status: "unavailable" },
|
|
427
|
+
promptSource: {
|
|
428
|
+
kind: "eval_case",
|
|
429
|
+
formattingMode,
|
|
430
|
+
question: promptInputs.question,
|
|
431
|
+
...promptInputs.systemMessage !== void 0 && {
|
|
432
|
+
systemMessage: promptInputs.systemMessage
|
|
433
|
+
},
|
|
434
|
+
...promptInputs.chatPrompt !== void 0 && { chatPrompt: promptInputs.chatPrompt }
|
|
435
|
+
},
|
|
436
|
+
cleanupPolicy: {
|
|
437
|
+
mode: sharedSetup.configuredMode,
|
|
438
|
+
retainOnSuccess,
|
|
439
|
+
retainOnFailure,
|
|
440
|
+
manualCleanup: true
|
|
441
|
+
},
|
|
442
|
+
sharedWorkspace: caseSetup.isSharedWorkspace,
|
|
443
|
+
...pool !== void 0 && { pool }
|
|
444
|
+
};
|
|
445
|
+
} catch (error) {
|
|
446
|
+
if (sharedSetup) {
|
|
447
|
+
await releaseSharedWorkspaceSetup(sharedSetup).catch(() => {
|
|
448
|
+
});
|
|
449
|
+
}
|
|
450
|
+
throw error;
|
|
451
|
+
}
|
|
452
|
+
}
|
|
300
453
|
var AgentVConfigSchema = external_exports.object({
|
|
301
454
|
/** Default execution settings */
|
|
302
455
|
execution: external_exports.object({
|
|
@@ -486,7 +639,7 @@ async function extractReposFromEvalFile(filePath) {
|
|
|
486
639
|
const parsed = interpolateEnv(parseYamlValue(content), process.env);
|
|
487
640
|
if (!parsed || typeof parsed !== "object" || Array.isArray(parsed)) return [];
|
|
488
641
|
const obj = parsed;
|
|
489
|
-
const evalFileDir =
|
|
642
|
+
const evalFileDir = path3.dirname(path3.resolve(filePath));
|
|
490
643
|
const repos = [];
|
|
491
644
|
const suiteRepos = await extractReposFromWorkspaceRaw(obj.workspace, evalFileDir);
|
|
492
645
|
repos.push(...suiteRepos);
|
|
@@ -502,7 +655,7 @@ async function extractReposFromEvalFile(filePath) {
|
|
|
502
655
|
}
|
|
503
656
|
async function extractReposFromWorkspaceRaw(raw, evalFileDir) {
|
|
504
657
|
if (typeof raw === "string") {
|
|
505
|
-
const workspaceFilePath =
|
|
658
|
+
const workspaceFilePath = path3.resolve(evalFileDir, raw);
|
|
506
659
|
const content = await readFile(workspaceFilePath, "utf8");
|
|
507
660
|
const parsed = interpolateEnv(parseYamlValue(content), process.env);
|
|
508
661
|
if (!parsed || typeof parsed !== "object" || Array.isArray(parsed)) return [];
|
|
@@ -545,7 +698,7 @@ function withFriendlyGitHubAuthError(error) {
|
|
|
545
698
|
}
|
|
546
699
|
function expandHome(p) {
|
|
547
700
|
if (p === "~" || p.startsWith("~/") || p.startsWith("~\\")) {
|
|
548
|
-
return
|
|
701
|
+
return path4.join(os.homedir(), p.slice(1));
|
|
549
702
|
}
|
|
550
703
|
return p;
|
|
551
704
|
}
|
|
@@ -557,8 +710,8 @@ function normalizeResultsConfig(config, options) {
|
|
|
557
710
|
const remote = config.remote?.trim() || "origin";
|
|
558
711
|
const autoPush = config.sync?.auto_push ?? config.auto_push === true;
|
|
559
712
|
const requirePush = config.sync?.require_push === true;
|
|
560
|
-
const resolvedRepoPath = repoPath ?
|
|
561
|
-
const resolvedPath = config.path ? expandHome(config.path.trim()) : repoUrl ?
|
|
713
|
+
const resolvedRepoPath = repoPath ? path4.resolve(options?.baseDir ?? process.cwd(), expandHome(repoPath)) : void 0;
|
|
714
|
+
const resolvedPath = config.path ? expandHome(config.path.trim()) : repoUrl ? path4.join(getAgentvDataDir(), "results", sanitizeRepoSlug(repoUrl)) : resolvedRepoPath ?? path4.join(getAgentvDataDir(), "results", sanitizeRepoSlug(repo));
|
|
562
715
|
return {
|
|
563
716
|
mode: "github",
|
|
564
717
|
repo,
|
|
@@ -579,11 +732,11 @@ function resolveResultsRepoUrl(repo) {
|
|
|
579
732
|
return `https://github.com/${repo}.git`;
|
|
580
733
|
}
|
|
581
734
|
function getResultsRepoLocalPaths(repo) {
|
|
582
|
-
const rootDir =
|
|
735
|
+
const rootDir = path4.join(getAgentvDataDir(), "cache", "results-repo", sanitizeRepoSlug(repo));
|
|
583
736
|
return {
|
|
584
737
|
rootDir,
|
|
585
|
-
repoDir:
|
|
586
|
-
statusFile:
|
|
738
|
+
repoDir: path4.join(rootDir, "repo"),
|
|
739
|
+
statusFile: path4.join(rootDir, "status.json")
|
|
587
740
|
};
|
|
588
741
|
}
|
|
589
742
|
function readPersistedStatus(statusFile) {
|
|
@@ -597,7 +750,7 @@ function readPersistedStatus(statusFile) {
|
|
|
597
750
|
}
|
|
598
751
|
}
|
|
599
752
|
function writePersistedStatus(statusFile, status) {
|
|
600
|
-
mkdirSync(
|
|
753
|
+
mkdirSync(path4.dirname(statusFile), { recursive: true });
|
|
601
754
|
writeFileSync(statusFile, `${JSON.stringify(status, null, 2)}
|
|
602
755
|
`, "utf8");
|
|
603
756
|
}
|
|
@@ -761,9 +914,9 @@ async function ensureResultsRepoClone(config) {
|
|
|
761
914
|
const cachePaths = getResultsRepoLocalPaths(normalized.repo);
|
|
762
915
|
const cloneDir = normalized.path;
|
|
763
916
|
mkdirSync(cachePaths.rootDir, { recursive: true });
|
|
764
|
-
mkdirSync(
|
|
917
|
+
mkdirSync(path4.dirname(cloneDir), { recursive: true });
|
|
765
918
|
const cloneMissing = !existsSync(cloneDir);
|
|
766
|
-
const gitDir =
|
|
919
|
+
const gitDir = path4.join(cloneDir, ".git");
|
|
767
920
|
const cloneEmpty = !cloneMissing && !existsSync(gitDir) && (await readdir(cloneDir)).length === 0;
|
|
768
921
|
if (cloneMissing || cloneEmpty) {
|
|
769
922
|
try {
|
|
@@ -939,7 +1092,7 @@ async function hasInProgressGitConflict(repoDir) {
|
|
|
939
1092
|
check: false
|
|
940
1093
|
});
|
|
941
1094
|
const markerPath = stdout.trim();
|
|
942
|
-
const resolvedMarkerPath =
|
|
1095
|
+
const resolvedMarkerPath = path4.isAbsolute(markerPath) ? markerPath : path4.join(repoDir, markerPath);
|
|
943
1096
|
if (markerPath && existsSync(resolvedMarkerPath)) {
|
|
944
1097
|
return true;
|
|
945
1098
|
}
|
|
@@ -1403,8 +1556,8 @@ async function prepareResultsRepoBranch(config, branchName) {
|
|
|
1403
1556
|
const cloneDir = await ensureResultsRepoClone(normalized);
|
|
1404
1557
|
const baseBranch = await resolveDefaultBranch(cloneDir);
|
|
1405
1558
|
await fetchResultsRepo(cloneDir, normalized.remote);
|
|
1406
|
-
const worktreeRoot = await mkdtemp(
|
|
1407
|
-
const worktreeDir =
|
|
1559
|
+
const worktreeRoot = await mkdtemp(path4.join(os.tmpdir(), "agentv-results-repo-"));
|
|
1560
|
+
const worktreeDir = path4.join(worktreeRoot, "repo");
|
|
1408
1561
|
await runGit(
|
|
1409
1562
|
["worktree", "add", "-B", branchName, worktreeDir, `${normalized.remote}/${baseBranch}`],
|
|
1410
1563
|
{
|
|
@@ -1426,12 +1579,12 @@ async function prepareResultsRepoBranch(config, branchName) {
|
|
|
1426
1579
|
}
|
|
1427
1580
|
async function stageResultsArtifacts(params) {
|
|
1428
1581
|
rmSync(params.destinationDir, { recursive: true, force: true });
|
|
1429
|
-
mkdirSync(
|
|
1582
|
+
mkdirSync(path4.dirname(params.destinationDir), { recursive: true });
|
|
1430
1583
|
await cp(params.sourceDir, params.destinationDir, { recursive: true });
|
|
1431
1584
|
}
|
|
1432
1585
|
function resolveResultsRepoRunsDir(config) {
|
|
1433
1586
|
const normalized = normalizeResultsConfig(config);
|
|
1434
|
-
return
|
|
1587
|
+
return path4.join(normalized.path, RESULTS_REPO_RESULTS_DIR, "runs");
|
|
1435
1588
|
}
|
|
1436
1589
|
async function directorySizeBytes(targetPath) {
|
|
1437
1590
|
const entry = await stat(targetPath);
|
|
@@ -1440,7 +1593,7 @@ async function directorySizeBytes(targetPath) {
|
|
|
1440
1593
|
}
|
|
1441
1594
|
let total = 0;
|
|
1442
1595
|
for (const child of await readdir(targetPath, { withFileTypes: true })) {
|
|
1443
|
-
total += await directorySizeBytes(
|
|
1596
|
+
total += await directorySizeBytes(path4.join(targetPath, child.name));
|
|
1444
1597
|
}
|
|
1445
1598
|
return total;
|
|
1446
1599
|
}
|
|
@@ -1515,7 +1668,7 @@ async function assertValidResultsBranchName(repoDir, branch) {
|
|
|
1515
1668
|
await runGit(["check-ref-format", "--branch", branch], { cwd: repoDir });
|
|
1516
1669
|
}
|
|
1517
1670
|
function normalizeDestinationPath(destinationPath) {
|
|
1518
|
-
const normalized = destinationPath.split(
|
|
1671
|
+
const normalized = destinationPath.split(path4.sep).join("/");
|
|
1519
1672
|
const segments = normalized.split("/").filter(Boolean);
|
|
1520
1673
|
if (segments.length === 0 || normalized.startsWith("/") || segments.some((segment) => segment === "..")) {
|
|
1521
1674
|
throw new Error(`Invalid results destination path: ${destinationPath}`);
|
|
@@ -1526,7 +1679,7 @@ async function listSourceFiles(sourceDir) {
|
|
|
1526
1679
|
const entries = [];
|
|
1527
1680
|
async function visit(dir) {
|
|
1528
1681
|
for (const entry of await readdir(dir, { withFileTypes: true })) {
|
|
1529
|
-
const absolutePath =
|
|
1682
|
+
const absolutePath = path4.join(dir, entry.name);
|
|
1530
1683
|
if (entry.isDirectory()) {
|
|
1531
1684
|
await visit(absolutePath);
|
|
1532
1685
|
} else if (entry.isFile() || entry.isSymbolicLink()) {
|
|
@@ -1579,14 +1732,14 @@ async function commitResultsRunWithTemporaryIndex(params) {
|
|
|
1579
1732
|
await assertValidResultsBranchName(params.repoDir, normalized.branch);
|
|
1580
1733
|
await ensureResultsBranchNotCheckedOut(params.repoDir, normalized);
|
|
1581
1734
|
const destinationRunPath = normalizeDestinationPath(params.destinationPath);
|
|
1582
|
-
const destinationTreePath =
|
|
1735
|
+
const destinationTreePath = path4.posix.join(RESULTS_REPO_RUNS_DIR, destinationRunPath);
|
|
1583
1736
|
const base = await resolveStorageBranchBase({
|
|
1584
1737
|
repoDir: params.repoDir,
|
|
1585
1738
|
normalized,
|
|
1586
1739
|
preferRemote: params.preferRemoteBase
|
|
1587
1740
|
});
|
|
1588
|
-
const indexRoot = await mkdtemp(
|
|
1589
|
-
const indexFile =
|
|
1741
|
+
const indexRoot = await mkdtemp(path4.join(os.tmpdir(), "agentv-results-index-"));
|
|
1742
|
+
const indexFile = path4.join(indexRoot, "index");
|
|
1590
1743
|
const indexEnv = { GIT_INDEX_FILE: indexFile };
|
|
1591
1744
|
try {
|
|
1592
1745
|
if (base.baseRef) {
|
|
@@ -1608,8 +1761,8 @@ async function commitResultsRunWithTemporaryIndex(params) {
|
|
|
1608
1761
|
}
|
|
1609
1762
|
const sourceFiles = await listSourceFiles(params.sourceDir);
|
|
1610
1763
|
for (const sourceFile of sourceFiles) {
|
|
1611
|
-
const relativeFile =
|
|
1612
|
-
const destinationFile =
|
|
1764
|
+
const relativeFile = path4.relative(params.sourceDir, sourceFile).split(path4.sep).join("/");
|
|
1765
|
+
const destinationFile = path4.posix.join(destinationTreePath, relativeFile);
|
|
1613
1766
|
const fileStat = await lstat(sourceFile);
|
|
1614
1767
|
let mode = fileStat.mode & 73 ? "100755" : "100644";
|
|
1615
1768
|
let hashInputPath = sourceFile;
|
|
@@ -1783,7 +1936,7 @@ async function directPushResults(params) {
|
|
|
1783
1936
|
return true;
|
|
1784
1937
|
}
|
|
1785
1938
|
function buildGitRunId(relativeRunPath) {
|
|
1786
|
-
const normalized = relativeRunPath.split(
|
|
1939
|
+
const normalized = relativeRunPath.split(path4.sep).join("/");
|
|
1787
1940
|
const segments = normalized.split("/").filter(Boolean);
|
|
1788
1941
|
if (segments.length >= 2) {
|
|
1789
1942
|
const experiment = segments.slice(0, -1).join("/");
|
|
@@ -1887,8 +2040,8 @@ function parseGitBatchBlobs(output) {
|
|
|
1887
2040
|
}
|
|
1888
2041
|
function buildWipBranchName(runDir) {
|
|
1889
2042
|
const hostname = os.hostname().replace(/[^A-Za-z0-9._-]+/g, "-").slice(0, 40);
|
|
1890
|
-
const runBasename =
|
|
1891
|
-
return `agentv/
|
|
2043
|
+
const runBasename = path4.basename(runDir).replace(/[^A-Za-z0-9._-]+/g, "-").slice(0, 60);
|
|
2044
|
+
return `agentv/wip/${hostname}/${runBasename}`;
|
|
1892
2045
|
}
|
|
1893
2046
|
async function setupWipWorktree(params) {
|
|
1894
2047
|
const normalized = normalizeResultsConfig(params.config);
|
|
@@ -1906,8 +2059,8 @@ async function setupWipWorktree(params) {
|
|
|
1906
2059
|
if (!baseRef) {
|
|
1907
2060
|
throw new Error("Could not resolve a base ref for the WIP results branch");
|
|
1908
2061
|
}
|
|
1909
|
-
const worktreeRoot = await mkdtemp(
|
|
1910
|
-
const worktreeDir =
|
|
2062
|
+
const worktreeRoot = await mkdtemp(path4.join(os.tmpdir(), "agentv-wip-"));
|
|
2063
|
+
const worktreeDir = path4.join(worktreeRoot, "repo");
|
|
1911
2064
|
await runGit(["worktree", "add", "-B", params.wipBranch, worktreeDir, baseRef], {
|
|
1912
2065
|
cwd: cloneDir
|
|
1913
2066
|
});
|
|
@@ -1928,7 +2081,7 @@ async function setupWipWorktree(params) {
|
|
|
1928
2081
|
};
|
|
1929
2082
|
}
|
|
1930
2083
|
async function pushWipCheckpoint(params) {
|
|
1931
|
-
const destinationDir =
|
|
2084
|
+
const destinationDir = path4.join(
|
|
1932
2085
|
params.handle.worktreeDir,
|
|
1933
2086
|
RESULTS_REPO_RUNS_DIR,
|
|
1934
2087
|
params.destinationPath
|
|
@@ -1985,11 +2138,11 @@ async function listGitRuns(repoDir, ref = "origin/main") {
|
|
|
1985
2138
|
const runs = blobs.flatMap((blob, index) => {
|
|
1986
2139
|
const benchmarkPath = benchmarkPaths[index];
|
|
1987
2140
|
const benchmark = JSON.parse(blob.content.toString("utf8"));
|
|
1988
|
-
const runDir =
|
|
1989
|
-
const relativeRunPath =
|
|
2141
|
+
const runDir = path4.posix.dirname(benchmarkPath);
|
|
2142
|
+
const relativeRunPath = path4.posix.relative(RESULTS_REPO_RUNS_DIR, runDir);
|
|
1990
2143
|
const runId = buildGitRunId(relativeRunPath);
|
|
1991
|
-
const timestamp = benchmark.metadata?.timestamp?.trim() ||
|
|
1992
|
-
const displayName = benchmark.metadata?.display_name?.trim() ||
|
|
2144
|
+
const timestamp = benchmark.metadata?.timestamp?.trim() || path4.posix.basename(runDir);
|
|
2145
|
+
const displayName = benchmark.metadata?.display_name?.trim() || path4.posix.basename(runDir);
|
|
1993
2146
|
const targets = benchmark.metadata?.targets ?? [];
|
|
1994
2147
|
const passRate = computeAveragePassRate(benchmark.run_summary);
|
|
1995
2148
|
return [
|
|
@@ -1999,7 +2152,7 @@ async function listGitRuns(repoDir, ref = "origin/main") {
|
|
|
1999
2152
|
timestamp,
|
|
2000
2153
|
...passRate !== void 0 && { pass_rate: passRate },
|
|
2001
2154
|
...targets.length === 1 && targets[0] ? { target: targets[0] } : {},
|
|
2002
|
-
manifest_path:
|
|
2155
|
+
manifest_path: path4.posix.join(runDir, "index.jsonl"),
|
|
2003
2156
|
benchmark_path: benchmarkPath,
|
|
2004
2157
|
display_name: displayName,
|
|
2005
2158
|
test_count: benchmark.metadata?.tests_run?.length ?? 0,
|
|
@@ -2012,9 +2165,9 @@ async function listGitRuns(repoDir, ref = "origin/main") {
|
|
|
2012
2165
|
return runs;
|
|
2013
2166
|
}
|
|
2014
2167
|
async function materializeGitRun(repoDir, relativeRunPath, ref = "origin/main") {
|
|
2015
|
-
const normalizedRunPath = relativeRunPath.split(
|
|
2016
|
-
const runTreePath =
|
|
2017
|
-
const targetRunDir =
|
|
2168
|
+
const normalizedRunPath = relativeRunPath.split(path4.sep).join("/");
|
|
2169
|
+
const runTreePath = path4.posix.join(RESULTS_REPO_RUNS_DIR, normalizedRunPath);
|
|
2170
|
+
const targetRunDir = path4.join(repoDir, ...runTreePath.split("/"));
|
|
2018
2171
|
const { stdout: treeOut } = await runGit(["ls-tree", "-r", "--name-only", ref, runTreePath], {
|
|
2019
2172
|
cwd: repoDir
|
|
2020
2173
|
});
|
|
@@ -2030,16 +2183,16 @@ async function materializeGitRun(repoDir, relativeRunPath, ref = "origin/main")
|
|
|
2030
2183
|
`Expected ${filePaths.length} git blobs but received ${blobs.length} while materializing results run`
|
|
2031
2184
|
);
|
|
2032
2185
|
}
|
|
2033
|
-
const tempRoot = mkdtempSync(
|
|
2034
|
-
const tempRunDir =
|
|
2186
|
+
const tempRoot = mkdtempSync(path4.join(repoDir, ".agentv-run-"));
|
|
2187
|
+
const tempRunDir = path4.join(tempRoot, "run");
|
|
2035
2188
|
try {
|
|
2036
2189
|
for (const [index, filePath] of filePaths.entries()) {
|
|
2037
|
-
const relativeFilePath =
|
|
2038
|
-
const absolutePath =
|
|
2039
|
-
mkdirSync(
|
|
2190
|
+
const relativeFilePath = path4.posix.relative(runTreePath, filePath);
|
|
2191
|
+
const absolutePath = path4.join(tempRunDir, ...relativeFilePath.split("/"));
|
|
2192
|
+
mkdirSync(path4.dirname(absolutePath), { recursive: true });
|
|
2040
2193
|
writeFileSync(absolutePath, blobs[index].content);
|
|
2041
2194
|
}
|
|
2042
|
-
mkdirSync(
|
|
2195
|
+
mkdirSync(path4.dirname(targetRunDir), { recursive: true });
|
|
2043
2196
|
try {
|
|
2044
2197
|
renameSync(tempRunDir, targetRunDir);
|
|
2045
2198
|
} catch (error) {
|
|
@@ -3024,7 +3177,7 @@ function extractResponseItemContent(content) {
|
|
|
3024
3177
|
}
|
|
3025
3178
|
return parts.length > 0 ? parts.join("") : void 0;
|
|
3026
3179
|
}
|
|
3027
|
-
var DEFAULT_SESSIONS_DIR = () =>
|
|
3180
|
+
var DEFAULT_SESSIONS_DIR = () => path5.join(homedir(), ".codex", "sessions");
|
|
3028
3181
|
async function discoverCodexSessions(opts) {
|
|
3029
3182
|
const sessionsDir = opts?.sessionsDir ?? DEFAULT_SESSIONS_DIR();
|
|
3030
3183
|
const limit = opts?.latest ? 1 : opts?.limit ?? 10;
|
|
@@ -3036,7 +3189,7 @@ async function discoverCodexSessions(opts) {
|
|
|
3036
3189
|
return [];
|
|
3037
3190
|
}
|
|
3038
3191
|
for (const year of yearDirs) {
|
|
3039
|
-
const yearPath =
|
|
3192
|
+
const yearPath = path5.join(sessionsDir, year);
|
|
3040
3193
|
let monthDirs;
|
|
3041
3194
|
try {
|
|
3042
3195
|
monthDirs = await readdir2(yearPath);
|
|
@@ -3044,7 +3197,7 @@ async function discoverCodexSessions(opts) {
|
|
|
3044
3197
|
continue;
|
|
3045
3198
|
}
|
|
3046
3199
|
for (const month of monthDirs) {
|
|
3047
|
-
const monthPath =
|
|
3200
|
+
const monthPath = path5.join(yearPath, month);
|
|
3048
3201
|
let dayDirs;
|
|
3049
3202
|
try {
|
|
3050
3203
|
dayDirs = await readdir2(monthPath);
|
|
@@ -3056,7 +3209,7 @@ async function discoverCodexSessions(opts) {
|
|
|
3056
3209
|
const dirDate = `${year}-${month}-${day}`;
|
|
3057
3210
|
if (dirDate !== opts.date) continue;
|
|
3058
3211
|
}
|
|
3059
|
-
const dayPath =
|
|
3212
|
+
const dayPath = path5.join(monthPath, day);
|
|
3060
3213
|
let files;
|
|
3061
3214
|
try {
|
|
3062
3215
|
files = await readdir2(dayPath);
|
|
@@ -3065,7 +3218,7 @@ async function discoverCodexSessions(opts) {
|
|
|
3065
3218
|
}
|
|
3066
3219
|
for (const file of files) {
|
|
3067
3220
|
if (!file.startsWith("rollout-") || !file.endsWith(".jsonl")) continue;
|
|
3068
|
-
const filePath =
|
|
3221
|
+
const filePath = path5.join(dayPath, file);
|
|
3069
3222
|
const nameWithoutExt = file.replace(/\.jsonl$/, "");
|
|
3070
3223
|
const parts = nameWithoutExt.split("-");
|
|
3071
3224
|
const sessionId = parts.length >= 6 ? parts.slice(-5).join("-") : nameWithoutExt;
|
|
@@ -3084,7 +3237,7 @@ async function discoverCodexSessions(opts) {
|
|
|
3084
3237
|
sessions.sort((a, b) => b.updatedAt.getTime() - a.updatedAt.getTime());
|
|
3085
3238
|
return sessions.slice(0, limit);
|
|
3086
3239
|
}
|
|
3087
|
-
var DEFAULT_PROJECTS_DIR = () =>
|
|
3240
|
+
var DEFAULT_PROJECTS_DIR = () => path6.join(homedir2(), ".claude", "projects");
|
|
3088
3241
|
function encodeProjectPath(projectPath) {
|
|
3089
3242
|
return projectPath.replace(/\//g, "-");
|
|
3090
3243
|
}
|
|
@@ -3103,7 +3256,7 @@ async function discoverClaudeSessions(opts) {
|
|
|
3103
3256
|
}
|
|
3104
3257
|
const sessions = [];
|
|
3105
3258
|
for (const projectDir of projectDirs) {
|
|
3106
|
-
const dirPath =
|
|
3259
|
+
const dirPath = path6.join(projectsDir, projectDir);
|
|
3107
3260
|
let entries;
|
|
3108
3261
|
try {
|
|
3109
3262
|
entries = await readdir3(dirPath);
|
|
@@ -3114,7 +3267,7 @@ async function discoverClaudeSessions(opts) {
|
|
|
3114
3267
|
if (!entry.endsWith(".jsonl")) continue;
|
|
3115
3268
|
const sessionId = entry.replace(/\.jsonl$/, "");
|
|
3116
3269
|
if (opts?.sessionId && sessionId !== opts.sessionId) continue;
|
|
3117
|
-
const filePath =
|
|
3270
|
+
const filePath = path6.join(dirPath, entry);
|
|
3118
3271
|
let updatedAt;
|
|
3119
3272
|
try {
|
|
3120
3273
|
const fileStat = await stat3(filePath);
|
|
@@ -3188,6 +3341,7 @@ export {
|
|
|
3188
3341
|
transpileEvalYaml,
|
|
3189
3342
|
transpileEvalYamlFile,
|
|
3190
3343
|
getOutputFilenames,
|
|
3344
|
+
prepareEvalWorkspace,
|
|
3191
3345
|
defineConfig,
|
|
3192
3346
|
loadTsConfig,
|
|
3193
3347
|
generateRubrics,
|
|
@@ -3232,4 +3386,4 @@ export {
|
|
|
3232
3386
|
TranscriptProvider,
|
|
3233
3387
|
createAgentKernel
|
|
3234
3388
|
};
|
|
3235
|
-
//# sourceMappingURL=chunk-
|
|
3389
|
+
//# sourceMappingURL=chunk-Z45FKRMJ.js.map
|