@ls-stack/agent-eval 0.58.2 → 0.58.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -25,7 +25,7 @@
25
25
  href="https://fonts.googleapis.com/css2?family=Geist:wght@400;500;600&family=JetBrains+Mono:wght@400;500&display=swap"
26
26
  rel="stylesheet"
27
27
  />
28
- <script type="module" crossorigin src="/assets/index-BMWBZw_u.js"></script>
28
+ <script type="module" crossorigin src="/assets/index-BXFsxHVc.js"></script>
29
29
  <link rel="stylesheet" crossorigin href="/assets/index-CHH7m5Cv.css">
30
30
  </head>
31
31
  <body>
package/dist/bin.mjs CHANGED
@@ -1,5 +1,5 @@
1
1
  #!/usr/bin/env node
2
- import { t as runCli } from "./cli-HBwXIJsg.mjs";
2
+ import { t as runCli } from "./cli-Bf5RzM8O.mjs";
3
3
  import { spawn } from "node:child_process";
4
4
  //#region src/bin.ts
5
5
  const moduleMocksFlag = "--experimental-test-module-mocks";
@@ -1,4 +1,4 @@
1
- import { Ct as resolveLlmCallsConfig, It as runWithEvalRegistry, J as runInEvalRuntimeScope, L as configureEvalRunLogs, St as resolveApiCallsConfig, _ as createBufferedCacheStore, a as isCaseChildParentMessage, d as loadEvalModule, l as registerAgentEvalsPackageResolutionHooks, p as loadConfig, r as runCase, v as createFsCacheStore } from "./runExecution-pHJ0_TzH.mjs";
1
+ import { Ct as resolveApiCallsConfig, Lt as runWithEvalRegistry, R as configureEvalRunLogs, Y as runInEvalRuntimeScope, _ as createBufferedCacheStore, a as isCaseChildParentMessage, d as loadEvalModule, l as registerAgentEvalsPackageResolutionHooks, p as loadConfig, r as runCase, v as createFsCacheStore, wt as resolveLlmCallsConfig, y as getCacheRetentionOptions } from "./runExecution-CLkC-4Z1.mjs";
2
2
  //#region ../runner/src/caseChild.ts
3
3
  let fatalErrorReported = false;
4
4
  let disconnectExpected = false;
@@ -52,11 +52,12 @@ async function executeCaseChild(context) {
52
52
  registerAgentEvalsPackageResolutionHooks();
53
53
  const config = await loadConfig();
54
54
  configureEvalRunLogs({ captureConsole: config.runLogs?.captureConsole !== false });
55
+ const cacheRetentionOptions = getCacheRetentionOptions(config.cache);
55
56
  const cacheStore = createFsCacheStore({
56
57
  workspaceRoot: context.workspaceRoot,
57
58
  dir: config.cache?.dir,
58
- maxEntriesPerNamespace: config.cache?.maxEntriesPerNamespace ?? config.cache?.maxEntriesPerEval,
59
- maxEntriesByNamespace: config.cache?.maxEntriesByNamespace,
59
+ maxEntriesPerNamespace: cacheRetentionOptions.maxEntriesPerNamespace,
60
+ maxEntriesByNamespace: cacheRetentionOptions.maxEntriesByNamespace,
60
61
  lastAccessedAtUpdateIntervalMs: config.cache?.lastAccessedAtUpdateIntervalMs
61
62
  });
62
63
  const bufferedCacheStore = context.cacheEnabled && context.cacheMode !== "bypass" ? createBufferedCacheStore(cacheStore) : null;
@@ -1,5 +1,5 @@
1
- import { Ct as resolveLlmCallsConfig, Et as getCaseRowCaseKey, Ft as getEvalRegistry, Ot as caseRowSchema, St as resolveApiCallsConfig, Tt as buildEvalKey, _t as matchesTagsFilter, c as resolveArtifactPath, dt as getEvalTitle, f as resolveEvalDefaultConfig, ft as getEvalDisplayStatus, h as normalizeScoreDef, lt as applyDerivedCallAttributes, m as buildDeclaredColumnDefs, o as stripTerminalControlCodes, p as loadConfig, pt as deriveScopedSummaryFromCases, s as resolveTracePresentation, v as createFsCacheStore, xt as runSummarySchema } from "./runExecution-pHJ0_TzH.mjs";
2
- import { C as validateCharts, S as parseEvalDiscovery, _ as runTouchesEval, a as validateTagsFilters, b as deriveEvalFreshness, c as getLatestRunInfos, d as nextShortIdFromSnapshots, f as persistCaseDetail, g as recomputePersistedCaseStatus, h as recomputeEvalStatusesInRuns, i as resolveEvalTags, l as loadPersistedRunSnapshot, m as persistRunState, n as getTargetEvalKeys, o as generateRunId, p as deleteTemporaryRuns, s as getLastRunStatuses, u as loadPersistedRunSnapshots, v as buildManualInputDescriptor, x as loadIsolatedEvalRegistry, y as parseManualInputValues } from "./runOrchestration-ngVXShH4.mjs";
1
+ import { Ct as resolveApiCallsConfig, Dt as getCaseRowCaseKey, Et as buildEvalKey, It as getEvalRegistry, St as runSummarySchema, c as resolveArtifactPath, f as resolveEvalDefaultConfig, ft as getEvalTitle, h as normalizeScoreDef, kt as caseRowSchema, m as buildDeclaredColumnDefs, mt as deriveScopedSummaryFromCases, o as stripTerminalControlCodes, p as loadConfig, pt as getEvalDisplayStatus, s as resolveTracePresentation, ut as applyDerivedCallAttributes, v as createFsCacheStore, vt as matchesTagsFilter, wt as resolveLlmCallsConfig, y as getCacheRetentionOptions } from "./runExecution-CLkC-4Z1.mjs";
2
+ import { C as validateCharts, S as parseEvalDiscovery, _ as runTouchesEval, a as validateTagsFilters, b as deriveEvalFreshness, c as getLatestRunInfos, d as nextShortIdFromSnapshots, f as persistCaseDetail, g as recomputePersistedCaseStatus, h as recomputeEvalStatusesInRuns, i as resolveEvalTags, l as loadPersistedRunSnapshot, m as persistRunState, n as getTargetEvalKeys, o as generateRunId, p as deleteTemporaryRuns, s as getLastRunStatuses, u as loadPersistedRunSnapshots, v as buildManualInputDescriptor, x as loadIsolatedEvalRegistry, y as parseManualInputValues } from "./runOrchestration-BS-WxTee.mjs";
3
3
  import { copyFile, mkdir, readFile, rm, writeFile } from "node:fs/promises";
4
4
  import { basename, dirname, extname, isAbsolute, join, relative, resolve, sep } from "node:path";
5
5
  import { createHash, randomUUID } from "node:crypto";
@@ -1483,11 +1483,12 @@ function createRunner({ watchForChanges = true } = {}) {
1483
1483
  await mkdir(localStateDir, { recursive: true });
1484
1484
  await mkdir(join(localStateDir, "runs"), { recursive: true });
1485
1485
  await cleanupStagedManualInputFiles(workspaceRoot);
1486
+ const cacheRetentionOptions = getCacheRetentionOptions(config.cache);
1486
1487
  cacheStore = createFsCacheStore({
1487
1488
  workspaceRoot,
1488
1489
  dir: config.cache?.dir,
1489
- maxEntriesPerNamespace: config.cache?.maxEntriesPerNamespace ?? config.cache?.maxEntriesPerEval,
1490
- maxEntriesByNamespace: config.cache?.maxEntriesByNamespace,
1490
+ maxEntriesPerNamespace: cacheRetentionOptions.maxEntriesPerNamespace,
1491
+ maxEntriesByNamespace: cacheRetentionOptions.maxEntriesByNamespace,
1491
1492
  lastAccessedAtUpdateIntervalMs: config.cache?.lastAccessedAtUpdateIntervalMs
1492
1493
  });
1493
1494
  await loadPersistedRuns();
@@ -2198,8 +2199,8 @@ async function commandApp(args) {
2198
2199
  const { serve } = await import("@hono/node-server");
2199
2200
  const bundledWebDist = resolve(currentDir, "apps/web/dist");
2200
2201
  if (existsSync(bundledWebDist)) process.env.AGENT_EVALS_WEB_DIST = bundledWebDist;
2201
- const appModule = await import("./app-BxD6aHbp.mjs");
2202
- const runnerModule = await import("./runner-BnZMGBla.mjs");
2202
+ const appModule = await import("./app-sGeXC4AT.mjs");
2203
+ const runnerModule = await import("./runner-Bz5ZPqmm.mjs");
2203
2204
  if (!isHonoAppModule(appModule)) throw new Error("Server app module is invalid");
2204
2205
  if (!isServerRunnerModule(runnerModule)) throw new Error("Server runner module is invalid");
2205
2206
  await runnerModule.initRunner();