agentv 4.40.1 → 4.41.1-next.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (28) hide show
  1. package/dist/{artifact-writer-GIAIMGPQ.js → artifact-writer-AMV64TWV.js} +4 -4
  2. package/dist/{chunk-TWQP7JYQ.js → chunk-A4J456KS.js} +2 -2
  3. package/dist/{chunk-BLXYBUU4.js → chunk-ENHX2CCS.js} +1485 -943
  4. package/dist/chunk-ENHX2CCS.js.map +1 -0
  5. package/dist/{chunk-B7CT3J2W.js → chunk-NRCVKN7X.js} +899 -300
  6. package/dist/chunk-NRCVKN7X.js.map +1 -0
  7. package/dist/{chunk-A36XLUI5.js → chunk-UMPZ64HO.js} +12 -10
  8. package/dist/chunk-UMPZ64HO.js.map +1 -0
  9. package/dist/{chunk-I3SC4FOT.js → chunk-Z45FKRMJ.js} +212 -58
  10. package/dist/chunk-Z45FKRMJ.js.map +1 -0
  11. package/dist/cli.js +5 -5
  12. package/dist/{dist-6Z4OSITR.js → dist-X5P5IR65.js} +7 -3
  13. package/dist/index.js +5 -5
  14. package/dist/{interactive-Q575M3A7.js → interactive-KU2RGBJJ.js} +5 -5
  15. package/dist/skills/agentv-bench/references/eval-yaml-spec.md +4 -4
  16. package/dist/skills/agentv-eval-writer/references/custom-evaluators.md +14 -14
  17. package/dist/skills/agentv-eval-writer/references/python-helpers.md +47 -0
  18. package/dist/{ts-eval-loader-NWH3B4HG-UXXCZKLP.js → ts-eval-loader-ZVL6CGTE-TZYZX3QS.js} +2 -2
  19. package/package.json +1 -1
  20. package/dist/chunk-A36XLUI5.js.map +0 -1
  21. package/dist/chunk-B7CT3J2W.js.map +0 -1
  22. package/dist/chunk-BLXYBUU4.js.map +0 -1
  23. package/dist/chunk-I3SC4FOT.js.map +0 -1
  24. /package/dist/{artifact-writer-GIAIMGPQ.js.map → artifact-writer-AMV64TWV.js.map} +0 -0
  25. /package/dist/{chunk-TWQP7JYQ.js.map → chunk-A4J456KS.js.map} +0 -0
  26. /package/dist/{dist-6Z4OSITR.js.map → dist-X5P5IR65.js.map} +0 -0
  27. /package/dist/{interactive-Q575M3A7.js.map → interactive-KU2RGBJJ.js.map} +0 -0
  28. /package/dist/{ts-eval-loader-NWH3B4HG-UXXCZKLP.js.map → ts-eval-loader-ZVL6CGTE-TZYZX3QS.js.map} +0 -0
@@ -1,24 +1,33 @@
1
1
  import { createRequire } from 'node:module'; const require = createRequire(import.meta.url);
2
2
  import {
3
+ AGENT_PROVIDER_KINDS,
4
+ buildPromptInputs,
3
5
  external_exports,
4
6
  extractLastAssistantContent,
5
7
  getAgentvDataDir,
6
8
  getRepoCheckoutRef,
7
9
  groupTranscriptJsonLines,
8
10
  interpolateEnv,
11
+ loadTests,
9
12
  normalizeRepoIdentity,
10
13
  normalizeToolCall,
11
14
  parseRepoConfig,
12
15
  parseYamlValue,
16
+ prepareEvalCaseWorkspace,
17
+ prepareSharedWorkspaceSetup,
13
18
  readTranscriptJsonl,
19
+ releaseSharedWorkspaceSetup,
14
20
  resolveRepoCloneUrl
15
- } from "./chunk-BLXYBUU4.js";
21
+ } from "./chunk-ENHX2CCS.js";
16
22
 
17
23
  // ../../packages/core/dist/index.js
18
24
  import { readFileSync } from "node:fs";
19
25
  import path from "node:path";
20
- import { readFile } from "node:fs/promises";
26
+ import { randomUUID } from "node:crypto";
21
27
  import path2 from "node:path";
28
+ import micromatch from "micromatch";
29
+ import { readFile } from "node:fs/promises";
30
+ import path3 from "node:path";
22
31
  import { execFile, spawn } from "node:child_process";
23
32
  import {
24
33
  existsSync,
@@ -31,17 +40,17 @@ import {
31
40
  } from "node:fs";
32
41
  import { cp, lstat, mkdtemp, readdir, rm, stat } from "node:fs/promises";
33
42
  import os from "node:os";
34
- import path3 from "node:path";
43
+ import path4 from "node:path";
35
44
  import { promisify } from "node:util";
36
45
  import * as childProcess from "node:child_process";
37
46
  import { existsSync as existsSync2 } from "node:fs";
38
47
  import { spawnSync } from "node:child_process";
39
48
  import { readdir as readdir2, stat as stat2 } from "node:fs/promises";
40
49
  import { homedir } from "node:os";
41
- import path4 from "node:path";
50
+ import path5 from "node:path";
42
51
  import { readdir as readdir3, stat as stat3 } from "node:fs/promises";
43
52
  import { homedir as homedir2 } from "node:os";
44
- import path5 from "node:path";
53
+ import path6 from "node:path";
45
54
  function codeGraderInstruction(graderName, description) {
46
55
  const desc = description ? ` This grader: ${description}.` : "";
47
56
  return `Run \`agentv eval assert ${graderName} --agent-output <agent_output> --agent-input <original_prompt>\` and check the result.${desc} The command accepts --agent-output (the agent's full response text) and --agent-input (the original user prompt). It returns JSON on stdout: {"score": 0-1, "reasoning": "..."}. A score >= 0.5 means pass (exit 0); below 0.5 means fail (exit 1).`;
@@ -297,6 +306,150 @@ function getOutputFilenames(result) {
297
306
  }
298
307
  return names;
299
308
  }
309
+ function matchesFilter(id, filter) {
310
+ return typeof filter === "string" ? micromatch.isMatch(id, filter) : filter.some((pattern) => micromatch.isMatch(id, pattern));
311
+ }
312
+ function selectSingleCase(options) {
313
+ const selected = options.testId ? options.evalCases.filter((evalCase) => evalCase.id === options.testId) : options.filter ? options.evalCases.filter((evalCase) => matchesFilter(evalCase.id, options.filter ?? "")) : options.evalCases;
314
+ if (selected.length !== 1) {
315
+ const selector = options.testId ? `test_id "${options.testId}"` : options.filter ? `filter "${Array.isArray(options.filter) ? options.filter.join(",") : options.filter}"` : "the eval file";
316
+ throw new Error(
317
+ `prepareEvalWorkspace requires exactly one test, but ${selector} matched ${selected.length} in ${options.evalPath}.`
318
+ );
319
+ }
320
+ return selected[0];
321
+ }
322
+ function promptModeForTarget(target) {
323
+ return AGENT_PROVIDER_KINDS.includes(target.kind) || target.kind === "cli" ? "agent" : "lm";
324
+ }
325
+ function toRepoPins(repos) {
326
+ return (repos ?? []).map((repo) => ({
327
+ ...repo.path !== void 0 && { path: repo.path },
328
+ ...repo.repo !== void 0 && { repo: repo.repo },
329
+ ...repo.commit !== void 0 && { commit: repo.commit },
330
+ ...repo.base_commit !== void 0 && { baseCommit: repo.base_commit },
331
+ ...repo.ancestor !== void 0 && { ancestor: repo.ancestor },
332
+ ...repo.sparse !== void 0 && { sparse: repo.sparse }
333
+ }));
334
+ }
335
+ function poolMetadata(setup, selectedSlotPath) {
336
+ const slot = setup.poolSlot ?? setup.poolSlots.find((candidate) => candidate.path === selectedSlotPath);
337
+ if (!slot) {
338
+ return void 0;
339
+ }
340
+ return {
341
+ fingerprint: slot.fingerprint,
342
+ slotIndex: slot.index,
343
+ lockPath: slot.lockPath
344
+ };
345
+ }
346
+ async function releaseUnselectedPoolSlots(setup, selectedSlotPath) {
347
+ if (!setup.poolManager) {
348
+ return;
349
+ }
350
+ for (const slot of setup.poolSlots) {
351
+ if (slot.path !== selectedSlotPath) {
352
+ await setup.poolManager.releaseSlot(slot).catch(() => {
353
+ });
354
+ }
355
+ }
356
+ }
357
+ async function prepareEvalWorkspace(options) {
358
+ const evalPath = path2.resolve(options.testFilePath);
359
+ const evalRunId = randomUUID();
360
+ const evalCases = options.evalCases ?? await loadTests(evalPath, options.repoRoot, {
361
+ verbose: options.verbose,
362
+ filter: options.testId ?? options.filter
363
+ });
364
+ const evalCase = selectSingleCase({
365
+ evalCases,
366
+ testId: options.testId,
367
+ filter: options.filter,
368
+ evalPath
369
+ });
370
+ const evalDir = path2.dirname(evalPath);
371
+ const workers = options.maxConcurrency ?? 1;
372
+ const retainOnSuccess = options.retainOnSuccess ?? (options.keepWorkspaces ? "keep" : "cleanup");
373
+ const retainOnFailure = options.retainOnFailure ?? (options.cleanupWorkspaces ? "cleanup" : "keep");
374
+ const formattingMode = promptModeForTarget(options.target);
375
+ const promptInputs = await buildPromptInputs(evalCase, formattingMode);
376
+ let sharedSetup;
377
+ try {
378
+ sharedSetup = await prepareSharedWorkspaceSetup({
379
+ evalRunId,
380
+ evalCases: [evalCase],
381
+ targetHooks: options.targetHooks,
382
+ evalDir,
383
+ verbose: options.verbose,
384
+ workers,
385
+ poolMaxSlots: options.poolMaxSlots,
386
+ workspacePath: options.workspacePath,
387
+ legacyWorkspacePath: options.workspace,
388
+ workspaceMode: options.workspaceMode,
389
+ workspaceClean: options.workspaceClean
390
+ });
391
+ const testPoolSlot = sharedSetup.availablePoolSlots.length > 0 ? sharedSetup.availablePoolSlots.pop() : void 0;
392
+ const selectedWorkspacePath = testPoolSlot?.path ?? sharedSetup.sharedWorkspacePath;
393
+ const selectedBaselineCommit = testPoolSlot ? sharedSetup.poolSlotBaselines.get(testPoolSlot.path) : sharedSetup.sharedBaselineCommit;
394
+ const caseSetup = await prepareEvalCaseWorkspace({
395
+ evalCase,
396
+ targetName: options.target.name,
397
+ evalRunId,
398
+ sharedWorkspacePath: selectedWorkspacePath,
399
+ sharedBaselineCommit: selectedBaselineCommit,
400
+ suiteWorkspaceFile: sharedSetup.suiteWorkspaceFile,
401
+ repoManager: sharedSetup.repoManager,
402
+ evalDir,
403
+ cleanupWorkspaces: options.cleanupWorkspaces,
404
+ targetHooks: options.targetHooks,
405
+ setupDebug: options.verbose
406
+ });
407
+ if (!caseSetup.workspacePath) {
408
+ throw new Error(
409
+ `No workspace was materialized for test "${evalCase.id}". Add workspace.template, workspace.repos, or workspace.hooks before preparing an external attempt.`
410
+ );
411
+ }
412
+ await releaseUnselectedPoolSlots(sharedSetup, caseSetup.workspacePath);
413
+ const pool = poolMetadata(sharedSetup, caseSetup.workspacePath);
414
+ return {
415
+ evalPath,
416
+ testId: evalCase.id,
417
+ target: options.target.name,
418
+ evalRunId,
419
+ workspacePath: caseSetup.workspacePath,
420
+ ...caseSetup.caseWorkspaceFile !== void 0 && {
421
+ workspaceFile: caseSetup.caseWorkspaceFile
422
+ },
423
+ createdAt: (options.now ?? (() => /* @__PURE__ */ new Date()))().toISOString(),
424
+ hookExecutions: [...sharedSetup.hookExecutions, ...caseSetup.hookExecutions],
425
+ repoPins: toRepoPins(evalCase.workspace?.repos),
426
+ baseline: caseSetup.baselineCommit ? { status: "initialized", commit: caseSetup.baselineCommit } : { status: "unavailable" },
427
+ promptSource: {
428
+ kind: "eval_case",
429
+ formattingMode,
430
+ question: promptInputs.question,
431
+ ...promptInputs.systemMessage !== void 0 && {
432
+ systemMessage: promptInputs.systemMessage
433
+ },
434
+ ...promptInputs.chatPrompt !== void 0 && { chatPrompt: promptInputs.chatPrompt }
435
+ },
436
+ cleanupPolicy: {
437
+ mode: sharedSetup.configuredMode,
438
+ retainOnSuccess,
439
+ retainOnFailure,
440
+ manualCleanup: true
441
+ },
442
+ sharedWorkspace: caseSetup.isSharedWorkspace,
443
+ ...pool !== void 0 && { pool }
444
+ };
445
+ } catch (error) {
446
+ if (sharedSetup) {
447
+ await releaseSharedWorkspaceSetup(sharedSetup).catch(() => {
448
+ });
449
+ }
450
+ throw error;
451
+ }
452
+ }
300
453
  var AgentVConfigSchema = external_exports.object({
301
454
  /** Default execution settings */
302
455
  execution: external_exports.object({
@@ -486,7 +639,7 @@ async function extractReposFromEvalFile(filePath) {
486
639
  const parsed = interpolateEnv(parseYamlValue(content), process.env);
487
640
  if (!parsed || typeof parsed !== "object" || Array.isArray(parsed)) return [];
488
641
  const obj = parsed;
489
- const evalFileDir = path2.dirname(path2.resolve(filePath));
642
+ const evalFileDir = path3.dirname(path3.resolve(filePath));
490
643
  const repos = [];
491
644
  const suiteRepos = await extractReposFromWorkspaceRaw(obj.workspace, evalFileDir);
492
645
  repos.push(...suiteRepos);
@@ -502,7 +655,7 @@ async function extractReposFromEvalFile(filePath) {
502
655
  }
503
656
  async function extractReposFromWorkspaceRaw(raw, evalFileDir) {
504
657
  if (typeof raw === "string") {
505
- const workspaceFilePath = path2.resolve(evalFileDir, raw);
658
+ const workspaceFilePath = path3.resolve(evalFileDir, raw);
506
659
  const content = await readFile(workspaceFilePath, "utf8");
507
660
  const parsed = interpolateEnv(parseYamlValue(content), process.env);
508
661
  if (!parsed || typeof parsed !== "object" || Array.isArray(parsed)) return [];
@@ -545,7 +698,7 @@ function withFriendlyGitHubAuthError(error) {
545
698
  }
546
699
  function expandHome(p) {
547
700
  if (p === "~" || p.startsWith("~/") || p.startsWith("~\\")) {
548
- return path3.join(os.homedir(), p.slice(1));
701
+ return path4.join(os.homedir(), p.slice(1));
549
702
  }
550
703
  return p;
551
704
  }
@@ -557,8 +710,8 @@ function normalizeResultsConfig(config, options) {
557
710
  const remote = config.remote?.trim() || "origin";
558
711
  const autoPush = config.sync?.auto_push ?? config.auto_push === true;
559
712
  const requirePush = config.sync?.require_push === true;
560
- const resolvedRepoPath = repoPath ? path3.resolve(options?.baseDir ?? process.cwd(), expandHome(repoPath)) : void 0;
561
- const resolvedPath = config.path ? expandHome(config.path.trim()) : repoUrl ? path3.join(getAgentvDataDir(), "results", sanitizeRepoSlug(repoUrl)) : resolvedRepoPath ?? path3.join(getAgentvDataDir(), "results", sanitizeRepoSlug(repo));
713
+ const resolvedRepoPath = repoPath ? path4.resolve(options?.baseDir ?? process.cwd(), expandHome(repoPath)) : void 0;
714
+ const resolvedPath = config.path ? expandHome(config.path.trim()) : repoUrl ? path4.join(getAgentvDataDir(), "results", sanitizeRepoSlug(repoUrl)) : resolvedRepoPath ?? path4.join(getAgentvDataDir(), "results", sanitizeRepoSlug(repo));
562
715
  return {
563
716
  mode: "github",
564
717
  repo,
@@ -579,11 +732,11 @@ function resolveResultsRepoUrl(repo) {
579
732
  return `https://github.com/${repo}.git`;
580
733
  }
581
734
  function getResultsRepoLocalPaths(repo) {
582
- const rootDir = path3.join(getAgentvDataDir(), "cache", "results-repo", sanitizeRepoSlug(repo));
735
+ const rootDir = path4.join(getAgentvDataDir(), "cache", "results-repo", sanitizeRepoSlug(repo));
583
736
  return {
584
737
  rootDir,
585
- repoDir: path3.join(rootDir, "repo"),
586
- statusFile: path3.join(rootDir, "status.json")
738
+ repoDir: path4.join(rootDir, "repo"),
739
+ statusFile: path4.join(rootDir, "status.json")
587
740
  };
588
741
  }
589
742
  function readPersistedStatus(statusFile) {
@@ -597,7 +750,7 @@ function readPersistedStatus(statusFile) {
597
750
  }
598
751
  }
599
752
  function writePersistedStatus(statusFile, status) {
600
- mkdirSync(path3.dirname(statusFile), { recursive: true });
753
+ mkdirSync(path4.dirname(statusFile), { recursive: true });
601
754
  writeFileSync(statusFile, `${JSON.stringify(status, null, 2)}
602
755
  `, "utf8");
603
756
  }
@@ -761,9 +914,9 @@ async function ensureResultsRepoClone(config) {
761
914
  const cachePaths = getResultsRepoLocalPaths(normalized.repo);
762
915
  const cloneDir = normalized.path;
763
916
  mkdirSync(cachePaths.rootDir, { recursive: true });
764
- mkdirSync(path3.dirname(cloneDir), { recursive: true });
917
+ mkdirSync(path4.dirname(cloneDir), { recursive: true });
765
918
  const cloneMissing = !existsSync(cloneDir);
766
- const gitDir = path3.join(cloneDir, ".git");
919
+ const gitDir = path4.join(cloneDir, ".git");
767
920
  const cloneEmpty = !cloneMissing && !existsSync(gitDir) && (await readdir(cloneDir)).length === 0;
768
921
  if (cloneMissing || cloneEmpty) {
769
922
  try {
@@ -939,7 +1092,7 @@ async function hasInProgressGitConflict(repoDir) {
939
1092
  check: false
940
1093
  });
941
1094
  const markerPath = stdout.trim();
942
- const resolvedMarkerPath = path3.isAbsolute(markerPath) ? markerPath : path3.join(repoDir, markerPath);
1095
+ const resolvedMarkerPath = path4.isAbsolute(markerPath) ? markerPath : path4.join(repoDir, markerPath);
943
1096
  if (markerPath && existsSync(resolvedMarkerPath)) {
944
1097
  return true;
945
1098
  }
@@ -1403,8 +1556,8 @@ async function prepareResultsRepoBranch(config, branchName) {
1403
1556
  const cloneDir = await ensureResultsRepoClone(normalized);
1404
1557
  const baseBranch = await resolveDefaultBranch(cloneDir);
1405
1558
  await fetchResultsRepo(cloneDir, normalized.remote);
1406
- const worktreeRoot = await mkdtemp(path3.join(os.tmpdir(), "agentv-results-repo-"));
1407
- const worktreeDir = path3.join(worktreeRoot, "repo");
1559
+ const worktreeRoot = await mkdtemp(path4.join(os.tmpdir(), "agentv-results-repo-"));
1560
+ const worktreeDir = path4.join(worktreeRoot, "repo");
1408
1561
  await runGit(
1409
1562
  ["worktree", "add", "-B", branchName, worktreeDir, `${normalized.remote}/${baseBranch}`],
1410
1563
  {
@@ -1426,12 +1579,12 @@ async function prepareResultsRepoBranch(config, branchName) {
1426
1579
  }
1427
1580
  async function stageResultsArtifacts(params) {
1428
1581
  rmSync(params.destinationDir, { recursive: true, force: true });
1429
- mkdirSync(path3.dirname(params.destinationDir), { recursive: true });
1582
+ mkdirSync(path4.dirname(params.destinationDir), { recursive: true });
1430
1583
  await cp(params.sourceDir, params.destinationDir, { recursive: true });
1431
1584
  }
1432
1585
  function resolveResultsRepoRunsDir(config) {
1433
1586
  const normalized = normalizeResultsConfig(config);
1434
- return path3.join(normalized.path, RESULTS_REPO_RESULTS_DIR, "runs");
1587
+ return path4.join(normalized.path, RESULTS_REPO_RESULTS_DIR, "runs");
1435
1588
  }
1436
1589
  async function directorySizeBytes(targetPath) {
1437
1590
  const entry = await stat(targetPath);
@@ -1440,7 +1593,7 @@ async function directorySizeBytes(targetPath) {
1440
1593
  }
1441
1594
  let total = 0;
1442
1595
  for (const child of await readdir(targetPath, { withFileTypes: true })) {
1443
- total += await directorySizeBytes(path3.join(targetPath, child.name));
1596
+ total += await directorySizeBytes(path4.join(targetPath, child.name));
1444
1597
  }
1445
1598
  return total;
1446
1599
  }
@@ -1515,7 +1668,7 @@ async function assertValidResultsBranchName(repoDir, branch) {
1515
1668
  await runGit(["check-ref-format", "--branch", branch], { cwd: repoDir });
1516
1669
  }
1517
1670
  function normalizeDestinationPath(destinationPath) {
1518
- const normalized = destinationPath.split(path3.sep).join("/");
1671
+ const normalized = destinationPath.split(path4.sep).join("/");
1519
1672
  const segments = normalized.split("/").filter(Boolean);
1520
1673
  if (segments.length === 0 || normalized.startsWith("/") || segments.some((segment) => segment === "..")) {
1521
1674
  throw new Error(`Invalid results destination path: ${destinationPath}`);
@@ -1526,7 +1679,7 @@ async function listSourceFiles(sourceDir) {
1526
1679
  const entries = [];
1527
1680
  async function visit(dir) {
1528
1681
  for (const entry of await readdir(dir, { withFileTypes: true })) {
1529
- const absolutePath = path3.join(dir, entry.name);
1682
+ const absolutePath = path4.join(dir, entry.name);
1530
1683
  if (entry.isDirectory()) {
1531
1684
  await visit(absolutePath);
1532
1685
  } else if (entry.isFile() || entry.isSymbolicLink()) {
@@ -1579,14 +1732,14 @@ async function commitResultsRunWithTemporaryIndex(params) {
1579
1732
  await assertValidResultsBranchName(params.repoDir, normalized.branch);
1580
1733
  await ensureResultsBranchNotCheckedOut(params.repoDir, normalized);
1581
1734
  const destinationRunPath = normalizeDestinationPath(params.destinationPath);
1582
- const destinationTreePath = path3.posix.join(RESULTS_REPO_RUNS_DIR, destinationRunPath);
1735
+ const destinationTreePath = path4.posix.join(RESULTS_REPO_RUNS_DIR, destinationRunPath);
1583
1736
  const base = await resolveStorageBranchBase({
1584
1737
  repoDir: params.repoDir,
1585
1738
  normalized,
1586
1739
  preferRemote: params.preferRemoteBase
1587
1740
  });
1588
- const indexRoot = await mkdtemp(path3.join(os.tmpdir(), "agentv-results-index-"));
1589
- const indexFile = path3.join(indexRoot, "index");
1741
+ const indexRoot = await mkdtemp(path4.join(os.tmpdir(), "agentv-results-index-"));
1742
+ const indexFile = path4.join(indexRoot, "index");
1590
1743
  const indexEnv = { GIT_INDEX_FILE: indexFile };
1591
1744
  try {
1592
1745
  if (base.baseRef) {
@@ -1608,8 +1761,8 @@ async function commitResultsRunWithTemporaryIndex(params) {
1608
1761
  }
1609
1762
  const sourceFiles = await listSourceFiles(params.sourceDir);
1610
1763
  for (const sourceFile of sourceFiles) {
1611
- const relativeFile = path3.relative(params.sourceDir, sourceFile).split(path3.sep).join("/");
1612
- const destinationFile = path3.posix.join(destinationTreePath, relativeFile);
1764
+ const relativeFile = path4.relative(params.sourceDir, sourceFile).split(path4.sep).join("/");
1765
+ const destinationFile = path4.posix.join(destinationTreePath, relativeFile);
1613
1766
  const fileStat = await lstat(sourceFile);
1614
1767
  let mode = fileStat.mode & 73 ? "100755" : "100644";
1615
1768
  let hashInputPath = sourceFile;
@@ -1783,7 +1936,7 @@ async function directPushResults(params) {
1783
1936
  return true;
1784
1937
  }
1785
1938
  function buildGitRunId(relativeRunPath) {
1786
- const normalized = relativeRunPath.split(path3.sep).join("/");
1939
+ const normalized = relativeRunPath.split(path4.sep).join("/");
1787
1940
  const segments = normalized.split("/").filter(Boolean);
1788
1941
  if (segments.length >= 2) {
1789
1942
  const experiment = segments.slice(0, -1).join("/");
@@ -1887,8 +2040,8 @@ function parseGitBatchBlobs(output) {
1887
2040
  }
1888
2041
  function buildWipBranchName(runDir) {
1889
2042
  const hostname = os.hostname().replace(/[^A-Za-z0-9._-]+/g, "-").slice(0, 40);
1890
- const runBasename = path3.basename(runDir).replace(/[^A-Za-z0-9._-]+/g, "-").slice(0, 60);
1891
- return `agentv/inflight/${hostname}/${runBasename}`;
2043
+ const runBasename = path4.basename(runDir).replace(/[^A-Za-z0-9._-]+/g, "-").slice(0, 60);
2044
+ return `agentv/wip/${hostname}/${runBasename}`;
1892
2045
  }
1893
2046
  async function setupWipWorktree(params) {
1894
2047
  const normalized = normalizeResultsConfig(params.config);
@@ -1906,8 +2059,8 @@ async function setupWipWorktree(params) {
1906
2059
  if (!baseRef) {
1907
2060
  throw new Error("Could not resolve a base ref for the WIP results branch");
1908
2061
  }
1909
- const worktreeRoot = await mkdtemp(path3.join(os.tmpdir(), "agentv-wip-"));
1910
- const worktreeDir = path3.join(worktreeRoot, "repo");
2062
+ const worktreeRoot = await mkdtemp(path4.join(os.tmpdir(), "agentv-wip-"));
2063
+ const worktreeDir = path4.join(worktreeRoot, "repo");
1911
2064
  await runGit(["worktree", "add", "-B", params.wipBranch, worktreeDir, baseRef], {
1912
2065
  cwd: cloneDir
1913
2066
  });
@@ -1928,7 +2081,7 @@ async function setupWipWorktree(params) {
1928
2081
  };
1929
2082
  }
1930
2083
  async function pushWipCheckpoint(params) {
1931
- const destinationDir = path3.join(
2084
+ const destinationDir = path4.join(
1932
2085
  params.handle.worktreeDir,
1933
2086
  RESULTS_REPO_RUNS_DIR,
1934
2087
  params.destinationPath
@@ -1985,11 +2138,11 @@ async function listGitRuns(repoDir, ref = "origin/main") {
1985
2138
  const runs = blobs.flatMap((blob, index) => {
1986
2139
  const benchmarkPath = benchmarkPaths[index];
1987
2140
  const benchmark = JSON.parse(blob.content.toString("utf8"));
1988
- const runDir = path3.posix.dirname(benchmarkPath);
1989
- const relativeRunPath = path3.posix.relative(RESULTS_REPO_RUNS_DIR, runDir);
2141
+ const runDir = path4.posix.dirname(benchmarkPath);
2142
+ const relativeRunPath = path4.posix.relative(RESULTS_REPO_RUNS_DIR, runDir);
1990
2143
  const runId = buildGitRunId(relativeRunPath);
1991
- const timestamp = benchmark.metadata?.timestamp?.trim() || path3.posix.basename(runDir);
1992
- const displayName = benchmark.metadata?.display_name?.trim() || path3.posix.basename(runDir);
2144
+ const timestamp = benchmark.metadata?.timestamp?.trim() || path4.posix.basename(runDir);
2145
+ const displayName = benchmark.metadata?.display_name?.trim() || path4.posix.basename(runDir);
1993
2146
  const targets = benchmark.metadata?.targets ?? [];
1994
2147
  const passRate = computeAveragePassRate(benchmark.run_summary);
1995
2148
  return [
@@ -1999,7 +2152,7 @@ async function listGitRuns(repoDir, ref = "origin/main") {
1999
2152
  timestamp,
2000
2153
  ...passRate !== void 0 && { pass_rate: passRate },
2001
2154
  ...targets.length === 1 && targets[0] ? { target: targets[0] } : {},
2002
- manifest_path: path3.posix.join(runDir, "index.jsonl"),
2155
+ manifest_path: path4.posix.join(runDir, "index.jsonl"),
2003
2156
  benchmark_path: benchmarkPath,
2004
2157
  display_name: displayName,
2005
2158
  test_count: benchmark.metadata?.tests_run?.length ?? 0,
@@ -2012,9 +2165,9 @@ async function listGitRuns(repoDir, ref = "origin/main") {
2012
2165
  return runs;
2013
2166
  }
2014
2167
  async function materializeGitRun(repoDir, relativeRunPath, ref = "origin/main") {
2015
- const normalizedRunPath = relativeRunPath.split(path3.sep).join("/");
2016
- const runTreePath = path3.posix.join(RESULTS_REPO_RUNS_DIR, normalizedRunPath);
2017
- const targetRunDir = path3.join(repoDir, ...runTreePath.split("/"));
2168
+ const normalizedRunPath = relativeRunPath.split(path4.sep).join("/");
2169
+ const runTreePath = path4.posix.join(RESULTS_REPO_RUNS_DIR, normalizedRunPath);
2170
+ const targetRunDir = path4.join(repoDir, ...runTreePath.split("/"));
2018
2171
  const { stdout: treeOut } = await runGit(["ls-tree", "-r", "--name-only", ref, runTreePath], {
2019
2172
  cwd: repoDir
2020
2173
  });
@@ -2030,16 +2183,16 @@ async function materializeGitRun(repoDir, relativeRunPath, ref = "origin/main")
2030
2183
  `Expected ${filePaths.length} git blobs but received ${blobs.length} while materializing results run`
2031
2184
  );
2032
2185
  }
2033
- const tempRoot = mkdtempSync(path3.join(repoDir, ".agentv-run-"));
2034
- const tempRunDir = path3.join(tempRoot, "run");
2186
+ const tempRoot = mkdtempSync(path4.join(repoDir, ".agentv-run-"));
2187
+ const tempRunDir = path4.join(tempRoot, "run");
2035
2188
  try {
2036
2189
  for (const [index, filePath] of filePaths.entries()) {
2037
- const relativeFilePath = path3.posix.relative(runTreePath, filePath);
2038
- const absolutePath = path3.join(tempRunDir, ...relativeFilePath.split("/"));
2039
- mkdirSync(path3.dirname(absolutePath), { recursive: true });
2190
+ const relativeFilePath = path4.posix.relative(runTreePath, filePath);
2191
+ const absolutePath = path4.join(tempRunDir, ...relativeFilePath.split("/"));
2192
+ mkdirSync(path4.dirname(absolutePath), { recursive: true });
2040
2193
  writeFileSync(absolutePath, blobs[index].content);
2041
2194
  }
2042
- mkdirSync(path3.dirname(targetRunDir), { recursive: true });
2195
+ mkdirSync(path4.dirname(targetRunDir), { recursive: true });
2043
2196
  try {
2044
2197
  renameSync(tempRunDir, targetRunDir);
2045
2198
  } catch (error) {
@@ -3024,7 +3177,7 @@ function extractResponseItemContent(content) {
3024
3177
  }
3025
3178
  return parts.length > 0 ? parts.join("") : void 0;
3026
3179
  }
3027
- var DEFAULT_SESSIONS_DIR = () => path4.join(homedir(), ".codex", "sessions");
3180
+ var DEFAULT_SESSIONS_DIR = () => path5.join(homedir(), ".codex", "sessions");
3028
3181
  async function discoverCodexSessions(opts) {
3029
3182
  const sessionsDir = opts?.sessionsDir ?? DEFAULT_SESSIONS_DIR();
3030
3183
  const limit = opts?.latest ? 1 : opts?.limit ?? 10;
@@ -3036,7 +3189,7 @@ async function discoverCodexSessions(opts) {
3036
3189
  return [];
3037
3190
  }
3038
3191
  for (const year of yearDirs) {
3039
- const yearPath = path4.join(sessionsDir, year);
3192
+ const yearPath = path5.join(sessionsDir, year);
3040
3193
  let monthDirs;
3041
3194
  try {
3042
3195
  monthDirs = await readdir2(yearPath);
@@ -3044,7 +3197,7 @@ async function discoverCodexSessions(opts) {
3044
3197
  continue;
3045
3198
  }
3046
3199
  for (const month of monthDirs) {
3047
- const monthPath = path4.join(yearPath, month);
3200
+ const monthPath = path5.join(yearPath, month);
3048
3201
  let dayDirs;
3049
3202
  try {
3050
3203
  dayDirs = await readdir2(monthPath);
@@ -3056,7 +3209,7 @@ async function discoverCodexSessions(opts) {
3056
3209
  const dirDate = `${year}-${month}-${day}`;
3057
3210
  if (dirDate !== opts.date) continue;
3058
3211
  }
3059
- const dayPath = path4.join(monthPath, day);
3212
+ const dayPath = path5.join(monthPath, day);
3060
3213
  let files;
3061
3214
  try {
3062
3215
  files = await readdir2(dayPath);
@@ -3065,7 +3218,7 @@ async function discoverCodexSessions(opts) {
3065
3218
  }
3066
3219
  for (const file of files) {
3067
3220
  if (!file.startsWith("rollout-") || !file.endsWith(".jsonl")) continue;
3068
- const filePath = path4.join(dayPath, file);
3221
+ const filePath = path5.join(dayPath, file);
3069
3222
  const nameWithoutExt = file.replace(/\.jsonl$/, "");
3070
3223
  const parts = nameWithoutExt.split("-");
3071
3224
  const sessionId = parts.length >= 6 ? parts.slice(-5).join("-") : nameWithoutExt;
@@ -3084,7 +3237,7 @@ async function discoverCodexSessions(opts) {
3084
3237
  sessions.sort((a, b) => b.updatedAt.getTime() - a.updatedAt.getTime());
3085
3238
  return sessions.slice(0, limit);
3086
3239
  }
3087
- var DEFAULT_PROJECTS_DIR = () => path5.join(homedir2(), ".claude", "projects");
3240
+ var DEFAULT_PROJECTS_DIR = () => path6.join(homedir2(), ".claude", "projects");
3088
3241
  function encodeProjectPath(projectPath) {
3089
3242
  return projectPath.replace(/\//g, "-");
3090
3243
  }
@@ -3103,7 +3256,7 @@ async function discoverClaudeSessions(opts) {
3103
3256
  }
3104
3257
  const sessions = [];
3105
3258
  for (const projectDir of projectDirs) {
3106
- const dirPath = path5.join(projectsDir, projectDir);
3259
+ const dirPath = path6.join(projectsDir, projectDir);
3107
3260
  let entries;
3108
3261
  try {
3109
3262
  entries = await readdir3(dirPath);
@@ -3114,7 +3267,7 @@ async function discoverClaudeSessions(opts) {
3114
3267
  if (!entry.endsWith(".jsonl")) continue;
3115
3268
  const sessionId = entry.replace(/\.jsonl$/, "");
3116
3269
  if (opts?.sessionId && sessionId !== opts.sessionId) continue;
3117
- const filePath = path5.join(dirPath, entry);
3270
+ const filePath = path6.join(dirPath, entry);
3118
3271
  let updatedAt;
3119
3272
  try {
3120
3273
  const fileStat = await stat3(filePath);
@@ -3188,6 +3341,7 @@ export {
3188
3341
  transpileEvalYaml,
3189
3342
  transpileEvalYamlFile,
3190
3343
  getOutputFilenames,
3344
+ prepareEvalWorkspace,
3191
3345
  defineConfig,
3192
3346
  loadTsConfig,
3193
3347
  generateRubrics,
@@ -3232,4 +3386,4 @@ export {
3232
3386
  TranscriptProvider,
3233
3387
  createAgentKernel
3234
3388
  };
3235
- //# sourceMappingURL=chunk-I3SC4FOT.js.map
3389
+ //# sourceMappingURL=chunk-Z45FKRMJ.js.map