@ls-stack/agent-eval 0.55.1 → 0.56.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,5 +1,5 @@
1
- import { Ct as buildEvalKey, Nt as getEvalRegistry, St as resolveLlmCallsConfig, bt as runSummarySchema, c as resolveArtifactPath, ct as applyDerivedCallAttributes, dt as getEvalDisplayStatus, f as resolveEvalDefaultConfig, ft as deriveScopedSummaryFromCases, gt as matchesTagsFilter, h as normalizeScoreDef, m as buildDeclaredColumnDefs, o as stripTerminalControlCodes, p as loadConfig, s as resolveTracePresentation, ut as getEvalTitle, v as createFsCacheStore, wt as getCaseRowCaseKey, xt as resolveApiCallsConfig } from "./runExecution-Sw38bCaq.mjs";
2
- import { C as validateCharts, S as parseEvalDiscovery, _ as runTouchesEval, a as validateTagsFilters, b as deriveEvalFreshness, c as getLatestRunInfos, d as nextShortIdFromSnapshots, f as persistCaseDetail, g as recomputePersistedCaseStatus, h as recomputeEvalStatusesInRuns, i as resolveEvalTags, l as loadPersistedRunSnapshot, m as persistRunState, n as getTargetEvalKeys, o as generateRunId, p as deleteTemporaryRuns, s as getLastRunStatuses, u as loadPersistedRunSnapshots, v as buildManualInputDescriptor, x as loadIsolatedEvalRegistry, y as parseManualInputValues } from "./runOrchestration-DJsdLYeZ.mjs";
1
+ import { Ct as buildEvalKey, Nt as getEvalRegistry, St as resolveLlmCallsConfig, bt as runSummarySchema, c as resolveArtifactPath, ct as applyDerivedCallAttributes, dt as getEvalDisplayStatus, f as resolveEvalDefaultConfig, ft as deriveScopedSummaryFromCases, gt as matchesTagsFilter, h as normalizeScoreDef, m as buildDeclaredColumnDefs, o as stripTerminalControlCodes, p as loadConfig, s as resolveTracePresentation, ut as getEvalTitle, v as createFsCacheStore, wt as getCaseRowCaseKey, xt as resolveApiCallsConfig } from "./runExecution-6lrtj48K.mjs";
2
+ import { C as validateCharts, S as parseEvalDiscovery, _ as runTouchesEval, a as validateTagsFilters, b as deriveEvalFreshness, c as getLatestRunInfos, d as nextShortIdFromSnapshots, f as persistCaseDetail, g as recomputePersistedCaseStatus, h as recomputeEvalStatusesInRuns, i as resolveEvalTags, l as loadPersistedRunSnapshot, m as persistRunState, n as getTargetEvalKeys, o as generateRunId, p as deleteTemporaryRuns, s as getLastRunStatuses, u as loadPersistedRunSnapshots, v as buildManualInputDescriptor, x as loadIsolatedEvalRegistry, y as parseManualInputValues } from "./runOrchestration-BYaN2mzS.mjs";
3
3
  import { copyFile, mkdir, readFile, rm, writeFile } from "node:fs/promises";
4
4
  import { basename, dirname, extname, isAbsolute, join, relative, resolve, sep } from "node:path";
5
5
  import { createHash, randomUUID } from "node:crypto";
@@ -794,6 +794,7 @@ async function markRunErrored(runState, message, managerContext) {
794
794
  payload: { message }
795
795
  });
796
796
  managerContext.emitDiscoveryEvent();
797
+ managerContext.onRunTerminal();
797
798
  }
798
799
  async function markRunTerminalFromChild(runState, event, managerContext) {
799
800
  const snapshot = await loadPersistedRunSnapshot(runState.runDir);
@@ -815,6 +816,7 @@ async function markRunTerminalFromChild(runState, event, managerContext) {
815
816
  }
816
817
  managerContext.emitEvent(runState, event);
817
818
  managerContext.emitDiscoveryEvent();
819
+ managerContext.onRunTerminal();
818
820
  }
819
821
  //#endregion
820
822
  //#region ../runner/src/runnerStateHydration.ts
@@ -883,6 +885,7 @@ function getWatchRootsForIncludePatterns(params) {
883
885
  }
884
886
  //#endregion
885
887
  //#region ../runner/src/runner.ts
888
+ const defaultCachePruneIdleDelayMs = 5e3;
886
889
  /** Create an in-memory eval runner bound to the current workspace config. */
887
890
  function createRunner({ watchForChanges = true } = {}) {
888
891
  let config;
@@ -903,6 +906,7 @@ function createRunner({ watchForChanges = true } = {}) {
903
906
  let runHistoryWatcher;
904
907
  let discoveryRefreshTimer;
905
908
  let runHistoryRefreshTimer;
909
+ let cachePruneIdleTimer;
906
910
  let registryLoadCounter = 0;
907
911
  const configReload = createConfigReloadController({
908
912
  getActiveRunCount,
@@ -942,6 +946,9 @@ function createRunner({ watchForChanges = true } = {}) {
942
946
  async clearCache(filter) {
943
947
  await cacheStore.clear(filter);
944
948
  },
949
+ async repairCache() {
950
+ return cacheStore.repair();
951
+ },
945
952
  async recomputeStatusesForEval(evalKey) {
946
953
  const evalMeta = resolveEvalMeta(evalKey);
947
954
  if (!evalMeta) return { updatedRuns: 0 };
@@ -1252,6 +1259,7 @@ function createRunner({ watchForChanges = true } = {}) {
1252
1259
  emitDiscoveryEvent();
1253
1260
  },
1254
1261
  async startRun(request) {
1262
+ cancelCacheRetentionPrune();
1255
1263
  const tagsFilterError = validateTagsFilters(request.target.tagsFilter);
1256
1264
  if (tagsFilterError !== null) throw new Error(tagsFilterError);
1257
1265
  const deletedTemporaryRuns = await deleteTemporaryRuns({
@@ -1349,7 +1357,8 @@ function createRunner({ watchForChanges = true } = {}) {
1349
1357
  workspaceRoot,
1350
1358
  evals,
1351
1359
  emitEvent,
1352
- emitDiscoveryEvent
1360
+ emitDiscoveryEvent,
1361
+ onRunTerminal: scheduleCacheRetentionPrune
1353
1362
  }
1354
1363
  });
1355
1364
  if (deletedTemporaryRuns > 0) emitDiscoveryEvent();
@@ -1398,6 +1407,7 @@ function createRunner({ watchForChanges = true } = {}) {
1398
1407
  payload: run.summary
1399
1408
  });
1400
1409
  emitDiscoveryEvent();
1410
+ scheduleCacheRetentionPrune();
1401
1411
  },
1402
1412
  getCaseDetail(runId, caseId) {
1403
1413
  const run = runs.get(runId);
@@ -1419,6 +1429,7 @@ function createRunner({ watchForChanges = true } = {}) {
1419
1429
  };
1420
1430
  },
1421
1431
  async close() {
1432
+ cancelCacheRetentionPrune();
1422
1433
  await Promise.all([closeWatchers(), configReload.close()]);
1423
1434
  },
1424
1435
  getWorkspaceRoot() {
@@ -1522,6 +1533,28 @@ function createRunner({ watchForChanges = true } = {}) {
1522
1533
  function getActiveRunCount() {
1523
1534
  return [...runs.values()].filter((run) => run.manifest.status === "running").length;
1524
1535
  }
1536
+ function getCachePruneIdleDelayMs() {
1537
+ const configured = config.cache?.pruneIdleDelayMs;
1538
+ if (configured === void 0 || !Number.isFinite(configured) || configured <= 0) return defaultCachePruneIdleDelayMs;
1539
+ return Math.floor(configured);
1540
+ }
1541
+ function cancelCacheRetentionPrune() {
1542
+ if (cachePruneIdleTimer === void 0) return;
1543
+ clearTimeout(cachePruneIdleTimer);
1544
+ cachePruneIdleTimer = void 0;
1545
+ }
1546
+ function scheduleCacheRetentionPrune() {
1547
+ cancelCacheRetentionPrune();
1548
+ cachePruneIdleTimer = setTimeout(() => {
1549
+ cachePruneIdleTimer = void 0;
1550
+ pruneCacheRetentionIfIdle();
1551
+ }, getCachePruneIdleDelayMs());
1552
+ cachePruneIdleTimer.unref();
1553
+ }
1554
+ async function pruneCacheRetentionIfIdle() {
1555
+ if (getActiveRunCount() > 0) return;
1556
+ await cacheStore.pruneRetention();
1557
+ }
1525
1558
  function emitDiscoveryEvent() {
1526
1559
  const lastRunStatuses = getLastRunStatuses({
1527
1560
  runs: runs.values(),
@@ -1657,19 +1690,20 @@ Flags:
1657
1690
  `);
1658
1691
  return;
1659
1692
  }
1660
- if (topic === "cache" || topic === "cache list" || topic === "cache clear") {
1693
+ if (topic === "cache" || topic === "cache list" || topic === "cache clear" || topic === "cache repair") {
1661
1694
  console.info(`
1662
- agent-evals cache - Manage cached operation entries
1695
+ agent-evals cache - Manage cached namespace/key entries
1663
1696
 
1664
1697
  Usage:
1665
1698
  agent-evals cache list [flags]
1666
1699
  agent-evals cache clear --eval <id>
1667
1700
  agent-evals cache clear --all
1701
+ agent-evals cache repair [flags]
1668
1702
 
1669
1703
  Flags:
1670
1704
  --eval <id> Clear entries for specific eval(s) (comma-separated)
1671
1705
  --all Confirm clearing every cached entry
1672
- --json Output cache listing as JSON
1706
+ --json Output cache listing or repair summary as JSON
1673
1707
  --no-env Disable automatic .env loading
1674
1708
  --help, -h Show this help
1675
1709
  `);
@@ -1683,9 +1717,10 @@ Commands:
1683
1717
  list List discovered evals
1684
1718
  run Run evals
1685
1719
  show-runs [id|latest] Show saved run artifact file paths
1686
- cache list List cached operation entries
1720
+ cache list List cached namespace/key entries
1687
1721
  cache clear --eval <id> Clear cache entries for one eval
1688
1722
  cache clear --all Clear every cached entry
1723
+ cache repair Remove unindexed/orphaned cache files
1689
1724
  help Show this help
1690
1725
 
1691
1726
  Options:
@@ -1961,7 +1996,7 @@ function parseArgs(argv) {
1961
1996
  let cursor = 1;
1962
1997
  if (args.command === "cache") {
1963
1998
  const sub = normalizedArgv[cursor];
1964
- if (sub === "list" || sub === "clear") {
1999
+ if (sub === "list" || sub === "clear" || sub === "repair") {
1965
2000
  args.subcommand = sub;
1966
2001
  args.helpTopic = `cache ${sub}`;
1967
2002
  cursor++;
@@ -2136,8 +2171,8 @@ async function commandApp(args) {
2136
2171
  const { serve } = await import("@hono/node-server");
2137
2172
  const bundledWebDist = resolve(currentDir, "apps/web/dist");
2138
2173
  if (existsSync(bundledWebDist)) process.env.AGENT_EVALS_WEB_DIST = bundledWebDist;
2139
- const appModule = await import("./app-BD0D9-7k.mjs");
2140
- const runnerModule = await import("./runner-dB69WsnM.mjs");
2174
+ const appModule = await import("./app-Bpe6Monh.mjs");
2175
+ const runnerModule = await import("./runner-C3CiS2o7.mjs");
2141
2176
  if (!isHonoAppModule(appModule)) throw new Error("Server app module is invalid");
2142
2177
  if (!isServerRunnerModule(runnerModule)) throw new Error("Server runner module is invalid");
2143
2178
  await runnerModule.initRunner();
@@ -2311,10 +2346,8 @@ async function commandCache(args) {
2311
2346
  for (const entry of entries) {
2312
2347
  console.info(` ${entry.namespace}`);
2313
2348
  console.info(` key: ${entry.key}`);
2314
- const operationLabel = entry.operationType === "span" ? `${entry.operationName} (span ${entry.spanKind ?? "unknown"})` : `${entry.operationName} (value)`;
2315
- console.info(` operation: ${operationLabel}`);
2316
2349
  console.info(` stored: ${entry.storedAt}`);
2317
- console.info(` size: ${String(entry.sizeBytes)} bytes`);
2350
+ console.info(` last accessed: ${entry.lastAccessedAt}`);
2318
2351
  console.info("");
2319
2352
  }
2320
2353
  return;
@@ -2342,6 +2375,20 @@ async function commandCache(args) {
2342
2375
  console.info("Refusing to clear cache without --eval <id> or --all. Use one of these flags to confirm.");
2343
2376
  process.exit(1);
2344
2377
  }
2378
+ if (args.subcommand === "repair") {
2379
+ const summary = await runner.repairCache();
2380
+ if (args.json) {
2381
+ console.info(JSON.stringify(summary, null, 2));
2382
+ return;
2383
+ }
2384
+ console.info("Cache repair complete.");
2385
+ console.info(`Removed cache files: ${String(summary.removedCacheFiles)}`);
2386
+ console.info(`Removed debug files: ${String(summary.removedDebugFiles)}`);
2387
+ console.info(`Removed blob files: ${String(summary.removedBlobFiles)}`);
2388
+ console.info(`Removed index rows: ${String(summary.removedIndexRows)}`);
2389
+ console.info(`Rewritten indexes: ${String(summary.rewrittenIndexes)}`);
2390
+ return;
2391
+ }
2345
2392
  printHelp(args.helpTopic);
2346
2393
  }
2347
2394
  function getSortedRunSnapshots(runner) {