@ls-stack/agent-eval 0.8.0 → 0.9.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -25,8 +25,8 @@
25
25
  href="https://fonts.googleapis.com/css2?family=Geist:wght@400;500;600&family=JetBrains+Mono:wght@400;500&display=swap"
26
26
  rel="stylesheet"
27
27
  />
28
- <script type="module" crossorigin src="/assets/index-CvJmtK1T.js"></script>
29
- <link rel="stylesheet" crossorigin href="/assets/index-ClE28i5w.css">
28
+ <script type="module" crossorigin src="/assets/index-CmY0_D5Z.js"></script>
29
+ <link rel="stylesheet" crossorigin href="/assets/index-ChgByJbI.css">
30
30
  </head>
31
31
  <body>
32
32
  <div id="root"></div>
package/dist/bin.mjs CHANGED
@@ -1,5 +1,5 @@
1
1
  #!/usr/bin/env node
2
- import { t as runCli } from "./cli-DQK5W0je.mjs";
2
+ import { t as runCli } from "./cli-DrPk66xh.mjs";
3
3
  import { spawn } from "node:child_process";
4
4
  //#region src/bin.ts
5
5
  const moduleMocksFlag = "--experimental-test-module-mocks";
@@ -1,4 +1,4 @@
1
- import { D as deriveScopedSummaryFromCases, E as getEvalDisplayStatus, Kt as getEvalRegistry, T as getEvalTitle, _ as loadEvalModule, a as loadPersistedRunSnapshot, b as normalizeScoreDef, c as persistCaseDetail, d as recomputePersistedCaseStatus, f as runTouchesEval, g as setLatestRunInfoMap, h as getTargetEvalIds, i as getLatestRunInfos, j as runSummarySchema, l as persistRunState, m as buildEvalSummary, n as generateRunId, o as loadPersistedRunSnapshots, p as resolveArtifactPath, r as getLastRunStatuses, s as nextShortIdFromSnapshots, u as recomputeEvalStatusesInRuns, v as loadConfig, x as createFsCacheStore, y as buildDeclaredColumnDefs } from "./runOrchestration-HaMahl6b.mjs";
1
+ import { A as deriveScopedSummaryFromCases, O as getEvalTitle, P as runSummarySchema, V as resolveLlmCallsConfig, _ as loadEvalModule, a as loadPersistedRunSnapshot, b as normalizeScoreDef, c as persistCaseDetail, d as recomputePersistedCaseStatus, f as runTouchesEval, g as setLatestRunInfoMap, h as getTargetEvalIds, i as getLatestRunInfos, k as getEvalDisplayStatus, l as persistRunState, m as buildEvalSummary, n as generateRunId, o as loadPersistedRunSnapshots, p as resolveArtifactPath, r as getLastRunStatuses, rn as getEvalRegistry, s as nextShortIdFromSnapshots, u as recomputeEvalStatusesInRuns, v as loadConfig, x as createFsCacheStore, y as buildDeclaredColumnDefs } from "./runOrchestration-DA4Rh5g0.mjs";
2
2
  import { createHash } from "node:crypto";
3
3
  import { mkdir, readFile, rm, writeFile } from "node:fs/promises";
4
4
  import { dirname, join, relative, resolve } from "node:path";
@@ -417,6 +417,7 @@ function createRunner({ watchForChanges = true } = {}) {
417
417
  let workspaceRoot;
418
418
  let localStateDir;
419
419
  let cacheStore;
420
+ let llmCallsConfig = resolveLlmCallsConfig(void 0);
420
421
  const evals = /* @__PURE__ */ new Map();
421
422
  const runs = /* @__PURE__ */ new Map();
422
423
  const lastRunStatusMap = /* @__PURE__ */ new Map();
@@ -439,12 +440,14 @@ function createRunner({ watchForChanges = true } = {}) {
439
440
  config = await loadConfig();
440
441
  workspaceRoot = config.workspaceRoot ?? process.cwd();
441
442
  localStateDir = resolve(workspaceRoot, ".agent-evals");
443
+ llmCallsConfig = resolveLlmCallsConfig(config.llmCalls);
442
444
  await mkdir(localStateDir, { recursive: true });
443
445
  await mkdir(join(localStateDir, "runs"), { recursive: true });
444
446
  cacheStore = createFsCacheStore({
445
447
  workspaceRoot,
446
448
  dir: config.cache?.dir,
447
- maxEntriesPerEval: config.cache?.maxEntriesPerEval
449
+ maxEntriesPerNamespace: config.cache?.maxEntriesPerNamespace ?? config.cache?.maxEntriesPerEval,
450
+ maxEntriesByNamespace: config.cache?.maxEntriesByNamespace
448
451
  });
449
452
  await loadPersistedRuns();
450
453
  await runner.refreshDiscovery();
@@ -453,6 +456,9 @@ function createRunner({ watchForChanges = true } = {}) {
453
456
  async listCache() {
454
457
  return cacheStore.list();
455
458
  },
459
+ async getCacheEntry(namespace, key) {
460
+ return cacheStore.lookup(namespace, key);
461
+ },
456
462
  async clearCache(filter) {
457
463
  await cacheStore.clear(filter);
458
464
  },
@@ -803,6 +809,9 @@ function createRunner({ watchForChanges = true } = {}) {
803
809
  getWorkspaceRoot() {
804
810
  return workspaceRoot;
805
811
  },
812
+ getLlmCallsConfig() {
813
+ return llmCallsConfig;
814
+ },
806
815
  getArtifactPath(artifactId_) {
807
816
  return resolveArtifactPath(join(localStateDir, "runs"), artifactId_);
808
817
  }
@@ -1036,8 +1045,8 @@ async function commandApp(args) {
1036
1045
  const { serve } = await import("@hono/node-server");
1037
1046
  const bundledWebDist = resolve(currentDir, "apps/web/dist");
1038
1047
  if (existsSync(bundledWebDist)) process.env.AGENT_EVALS_WEB_DIST = bundledWebDist;
1039
- const appModule = await import("./app-ZFLdu8-r.mjs");
1040
- const runnerModule = await import("./runner--XPZ5D7N.mjs");
1048
+ const appModule = await import("./app-hkNNN_jn.mjs");
1049
+ const runnerModule = await import("./runner-BzT3B9OF.mjs");
1041
1050
  if (!isHonoAppModule(appModule)) throw new Error("Server app module is invalid");
1042
1051
  if (!isServerRunnerModule(runnerModule)) throw new Error("Server runner module is invalid");
1043
1052
  await runnerModule.initRunner();