@ls-stack/agent-eval 0.16.1 → 0.18.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -25,8 +25,8 @@
25
25
  href="https://fonts.googleapis.com/css2?family=Geist:wght@400;500;600&family=JetBrains+Mono:wght@400;500&display=swap"
26
26
  rel="stylesheet"
27
27
  />
28
- <script type="module" crossorigin src="/assets/index-BZ1TdyEg.js"></script>
29
- <link rel="stylesheet" crossorigin href="/assets/index-MARPw1bH.css">
28
+ <script type="module" crossorigin src="/assets/index-BxcwUS7V.js"></script>
29
+ <link rel="stylesheet" crossorigin href="/assets/index-BovjyzD8.css">
30
30
  </head>
31
31
  <body>
32
32
  <div id="root"></div>
package/dist/bin.mjs CHANGED
@@ -1,5 +1,5 @@
1
1
  #!/usr/bin/env node
2
- import { t as runCli } from "./cli-BmrtjQj_.mjs";
2
+ import { t as runCli } from "./cli-3zANEAhG.mjs";
3
3
  import { spawn } from "node:child_process";
4
4
  //#region src/bin.ts
5
5
  const moduleMocksFlag = "--experimental-test-module-mocks";
@@ -1,4 +1,4 @@
1
- import { A as getEvalTitle, I as runSummarySchema, J as resolveApiCallsConfig, M as deriveScopedSummaryFromCases, S as createFsCacheStore, Y as resolveLlmCallsConfig, _ as loadEvalModule, a as loadPersistedRunSnapshot, b as buildDeclaredColumnDefs, c as persistCaseDetail, d as recomputePersistedCaseStatus, f as runTouchesEval, g as setLatestRunInfoMap, h as getTargetEvalIds, hn as getEvalRegistry, i as getLatestRunInfos, j as getEvalDisplayStatus, l as persistRunState, m as buildEvalSummary, n as generateRunId, o as loadPersistedRunSnapshots, p as resolveArtifactPath, r as getLastRunStatuses, s as nextShortIdFromSnapshots, u as recomputeEvalStatusesInRuns, v as parseEvalMetas, x as normalizeScoreDef, y as loadConfig } from "./runOrchestration-BDyNrRQT.mjs";
1
+ import { An as getEvalRegistry, L as runSummarySchema, M as getEvalDisplayStatus, N as deriveScopedSummaryFromCases, S as createFsCacheStore, X as resolveLlmCallsConfig, Y as resolveApiCallsConfig, _ as loadEvalModule, a as loadPersistedRunSnapshot, b as buildDeclaredColumnDefs, c as persistCaseDetail, d as recomputePersistedCaseStatus, f as runTouchesEval, g as setLatestRunInfoMap, h as getTargetEvalIds, i as getLatestRunInfos, j as getEvalTitle, l as persistRunState, m as buildEvalSummary, n as generateRunId, o as loadPersistedRunSnapshots, p as resolveArtifactPath, r as getLastRunStatuses, s as nextShortIdFromSnapshots, u as recomputeEvalStatusesInRuns, v as parseEvalMetas, x as normalizeScoreDef, y as loadConfig } from "./runOrchestration-BBg_VUH5.mjs";
2
2
  import { createHash } from "node:crypto";
3
3
  import { mkdir, readFile, rm, writeFile } from "node:fs/promises";
4
4
  import { dirname, join, relative, resolve } from "node:path";
@@ -346,7 +346,9 @@ function createRunner({ watchForChanges = true } = {}) {
346
346
  const discoveryListeners = /* @__PURE__ */ new Set();
347
347
  let nextShortIdNum = 0;
348
348
  let discoveryWatcher;
349
+ let runHistoryWatcher;
349
350
  let discoveryRefreshTimer;
351
+ let runHistoryRefreshTimer;
350
352
  function toWorkspaceRelativePath(filePath) {
351
353
  return relative(workspaceRoot, filePath).replaceAll("\\", "/");
352
354
  }
@@ -379,7 +381,7 @@ function createRunner({ watchForChanges = true } = {}) {
379
381
  return cacheStore.list();
380
382
  },
381
383
  async getCacheEntry(namespace, key) {
382
- return cacheStore.lookup(namespace, key);
384
+ return cacheStore.lookupWithDebug(namespace, key);
383
385
  },
384
386
  async clearCache(filter) {
385
387
  await cacheStore.clear(filter);
@@ -722,10 +724,14 @@ function createRunner({ watchForChanges = true } = {}) {
722
724
  clearTimeout(discoveryRefreshTimer);
723
725
  discoveryRefreshTimer = void 0;
724
726
  }
725
- const watcher = discoveryWatcher;
726
- if (watcher === void 0) return;
727
+ if (runHistoryRefreshTimer !== void 0) {
728
+ clearTimeout(runHistoryRefreshTimer);
729
+ runHistoryRefreshTimer = void 0;
730
+ }
731
+ const watchers = [discoveryWatcher, runHistoryWatcher].filter((watcher) => watcher !== void 0);
727
732
  discoveryWatcher = void 0;
728
- await watcher.close();
733
+ runHistoryWatcher = void 0;
734
+ await Promise.all(watchers.map((watcher) => watcher.close()));
729
735
  },
730
736
  getWorkspaceRoot() {
731
737
  return workspaceRoot;
@@ -764,6 +770,29 @@ function createRunner({ watchForChanges = true } = {}) {
764
770
  watcher.on("unlink", scheduleRefresh);
765
771
  watcher.on("addDir", scheduleRefresh);
766
772
  watcher.on("unlinkDir", scheduleRefresh);
773
+ await setupRunHistoryWatcher();
774
+ await new Promise((ready) => {
775
+ watcher.once("ready", ready);
776
+ });
777
+ }
778
+ async function setupRunHistoryWatcher() {
779
+ const watcher = watch(join(localStateDir, "runs"), {
780
+ ignoreInitial: true,
781
+ persistent: true
782
+ });
783
+ runHistoryWatcher = watcher;
784
+ const scheduleRefresh = () => {
785
+ if (runHistoryRefreshTimer !== void 0) clearTimeout(runHistoryRefreshTimer);
786
+ runHistoryRefreshTimer = setTimeout(() => {
787
+ runHistoryRefreshTimer = void 0;
788
+ refreshPersistedRunsFromDisk();
789
+ }, 50);
790
+ };
791
+ watcher.on("change", scheduleRefresh);
792
+ watcher.on("add", scheduleRefresh);
793
+ watcher.on("unlink", scheduleRefresh);
794
+ watcher.on("addDir", scheduleRefresh);
795
+ watcher.on("unlinkDir", scheduleRefresh);
767
796
  await new Promise((ready) => {
768
797
  watcher.once("ready", ready);
769
798
  });
@@ -797,12 +826,34 @@ function createRunner({ watchForChanges = true } = {}) {
797
826
  runs.clear();
798
827
  const persistedRuns = await loadPersistedRunSnapshots(localStateDir);
799
828
  nextShortIdNum = nextShortIdFromSnapshots(persistedRuns);
800
- for (const persistedRun of persistedRuns) runs.set(persistedRun.manifest.id, {
801
- ...persistedRun,
802
- listeners: /* @__PURE__ */ new Set(),
803
- childProcess: void 0,
804
- childTerminalReceived: false
805
- });
829
+ for (const persistedRun of persistedRuns) runs.set(persistedRun.manifest.id, toRunnerRunState(persistedRun));
830
+ }
831
+ async function refreshPersistedRunsFromDisk() {
832
+ const persistedRuns = await loadPersistedRunSnapshots(localStateDir);
833
+ const persistedRunIds = new Set(persistedRuns.map((snapshot) => snapshot.manifest.id));
834
+ let changed = false;
835
+ for (const persistedRun of persistedRuns) {
836
+ const existing = runs.get(persistedRun.manifest.id);
837
+ if (existing?.manifest.status === "running" && existing.childProcess) continue;
838
+ runs.set(persistedRun.manifest.id, toRunnerRunState(persistedRun, existing));
839
+ changed = true;
840
+ }
841
+ for (const [runId, existing] of [...runs]) {
842
+ if (persistedRunIds.has(runId)) continue;
843
+ if (existing.manifest.status === "running") continue;
844
+ runs.delete(runId);
845
+ changed = true;
846
+ }
847
+ nextShortIdNum = Math.max(nextShortIdNum, nextShortIdFromSnapshots(persistedRuns));
848
+ if (changed) emitDiscoveryEvent();
849
+ }
850
+ function toRunnerRunState(snapshot, existing) {
851
+ return {
852
+ ...snapshot,
853
+ listeners: existing?.listeners ?? /* @__PURE__ */ new Set(),
854
+ childProcess: existing?.childProcess,
855
+ childTerminalReceived: existing?.childTerminalReceived ?? false
856
+ };
806
857
  }
807
858
  return runner;
808
859
  }
@@ -978,8 +1029,8 @@ async function commandApp(args) {
978
1029
  const { serve } = await import("@hono/node-server");
979
1030
  const bundledWebDist = resolve(currentDir, "apps/web/dist");
980
1031
  if (existsSync(bundledWebDist)) process.env.AGENT_EVALS_WEB_DIST = bundledWebDist;
981
- const appModule = await import("./app-B8e-oWYc.mjs");
982
- const runnerModule = await import("./runner-DABFPXkx.mjs");
1032
+ const appModule = await import("./app-hAlVvT-Q.mjs");
1033
+ const runnerModule = await import("./runner-DxlahWDo.mjs");
983
1034
  if (!isHonoAppModule(appModule)) throw new Error("Server app module is invalid");
984
1035
  if (!isServerRunnerModule(runnerModule)) throw new Error("Server runner module is invalid");
985
1036
  await runnerModule.initRunner();
package/dist/index.d.mts CHANGED
@@ -666,6 +666,59 @@ declare const assertionFailureSchema: z$1.ZodObject<{
666
666
  }, z$1.core.$strip>;
667
667
  /** Assertion failure metadata captured for one case run. */
668
668
  type AssertionFailure = z$1.infer<typeof assertionFailureSchema>;
669
+ /** Severity level for one log captured during a case run. */
670
+ declare const runLogLevelSchema: z$1.ZodEnum<{
671
+ error: "error";
672
+ log: "log";
673
+ info: "info";
674
+ warn: "warn";
675
+ }>;
676
+ /** Severity level for one log captured during a case run. */
677
+ type RunLogLevel = z$1.infer<typeof runLogLevelSchema>;
678
+ /** Eval runner phase that emitted a captured case log. */
679
+ declare const runLogPhaseSchema: z$1.ZodEnum<{
680
+ eval: "eval";
681
+ derive: "derive";
682
+ outputsSchema: "outputsSchema";
683
+ scorer: "scorer";
684
+ }>;
685
+ /** Eval runner phase that emitted a captured case log. */
686
+ type RunLogPhase = z$1.infer<typeof runLogPhaseSchema>;
687
+ /** Schema for one persisted log entry captured during a case run. */
688
+ declare const runLogLocationSchema: z$1.ZodObject<{
689
+ file: z$1.ZodString;
690
+ line: z$1.ZodNumber;
691
+ column: z$1.ZodNumber;
692
+ }, z$1.core.$strip>;
693
+ /** Best-effort source location for one captured case log. */
694
+ type RunLogLocation = z$1.infer<typeof runLogLocationSchema>;
695
+ /** Schema for one persisted log entry captured during a case run. */
696
+ declare const runLogEntrySchema: z$1.ZodObject<{
697
+ timestamp: z$1.ZodString;
698
+ level: z$1.ZodEnum<{
699
+ error: "error";
700
+ log: "log";
701
+ info: "info";
702
+ warn: "warn";
703
+ }>;
704
+ phase: z$1.ZodEnum<{
705
+ eval: "eval";
706
+ derive: "derive";
707
+ outputsSchema: "outputsSchema";
708
+ scorer: "scorer";
709
+ }>;
710
+ message: z$1.ZodString;
711
+ args: z$1.ZodDefault<z$1.ZodArray<z$1.ZodUnknown>>;
712
+ truncated: z$1.ZodDefault<z$1.ZodBoolean>;
713
+ location: z$1.ZodOptional<z$1.ZodObject<{
714
+ file: z$1.ZodString;
715
+ line: z$1.ZodNumber;
716
+ column: z$1.ZodNumber;
717
+ }, z$1.core.$strip>>;
718
+ source: z$1.ZodOptional<z$1.ZodString>;
719
+ }, z$1.core.$strip>;
720
+ /** Persisted log entry captured during a case run. */
721
+ type RunLogEntry = z$1.infer<typeof runLogEntrySchema>;
669
722
  /** Trace payload captured while computing one score for a case. */
670
723
  declare const scoreTraceSchema: z$1.ZodObject<{
671
724
  trace: z$1.ZodArray<z$1.ZodObject<{
@@ -907,6 +960,30 @@ declare const caseDetailSchema: z$1.ZodObject<{
907
960
  message: string;
908
961
  stack?: string | undefined;
909
962
  }, string>>]>>;
963
+ logs: z$1.ZodDefault<z$1.ZodArray<z$1.ZodObject<{
964
+ timestamp: z$1.ZodString;
965
+ level: z$1.ZodEnum<{
966
+ error: "error";
967
+ log: "log";
968
+ info: "info";
969
+ warn: "warn";
970
+ }>;
971
+ phase: z$1.ZodEnum<{
972
+ eval: "eval";
973
+ derive: "derive";
974
+ outputsSchema: "outputsSchema";
975
+ scorer: "scorer";
976
+ }>;
977
+ message: z$1.ZodString;
978
+ args: z$1.ZodDefault<z$1.ZodArray<z$1.ZodUnknown>>;
979
+ truncated: z$1.ZodDefault<z$1.ZodBoolean>;
980
+ location: z$1.ZodOptional<z$1.ZodObject<{
981
+ file: z$1.ZodString;
982
+ line: z$1.ZodNumber;
983
+ column: z$1.ZodNumber;
984
+ }, z$1.core.$strip>>;
985
+ source: z$1.ZodOptional<z$1.ZodString>;
986
+ }, z$1.core.$strip>>>;
910
987
  error: z$1.ZodNullable<z$1.ZodObject<{
911
988
  name: z$1.ZodOptional<z$1.ZodString>;
912
989
  message: z$1.ZodString;
@@ -1564,6 +1641,12 @@ declare const apiCallsConfigSchema: z$1.ZodObject<{
1564
1641
  }, z$1.core.$strip>;
1565
1642
  /** Authored API calls config accepted from `agent-evals.config.ts`. */
1566
1643
  type ApiCallsConfigInput = z$1.infer<typeof apiCallsConfigSchema>;
1644
+ /** Schema for workspace-level run log capture options. */
1645
+ declare const runLogsConfigSchema: z$1.ZodObject<{
1646
+ captureConsole: z$1.ZodOptional<z$1.ZodBoolean>;
1647
+ }, z$1.core.$strip>;
1648
+ /** Workspace-level run log capture options. */
1649
+ type RunLogsConfigInput = z$1.infer<typeof runLogsConfigSchema>;
1567
1650
  /** Resolved LLM-calls config sent to the UI with all defaults applied. */
1568
1651
  type ResolvedLlmCallsConfig = {
1569
1652
  kinds: string[];
@@ -1737,6 +1820,16 @@ type AgentEvalsConfig = {
1737
1820
  * ```
1738
1821
  */
1739
1822
  apiCalls?: ApiCallsConfigInput;
1823
+ /**
1824
+ * Configuration for case run logs.
1825
+ *
1826
+ * Console capture is enabled by default and stores `console.log`,
1827
+ * `console.info`, `console.warn`, and `console.error` calls made during
1828
+ * active case-owned phases. Set `captureConsole: false` to keep console
1829
+ * output visible in the terminal without persisting it to case details.
1830
+ * Manual `evalLog(...)` calls are still persisted.
1831
+ */
1832
+ runLogs?: RunLogsConfigInput;
1740
1833
  /**
1741
1834
  * Optional controls for the operation cache. When omitted, the cache is
1742
1835
  * enabled and stored under `<workspaceRoot>/.agent-evals/cache`.
@@ -1872,6 +1965,9 @@ declare const agentEvalsConfigSchema: z$1.ZodObject<{
1872
1965
  }>>>;
1873
1966
  }, z$1.core.$strip>>>;
1874
1967
  }, z$1.core.$strip>>;
1968
+ runLogs: z$1.ZodOptional<z$1.ZodObject<{
1969
+ captureConsole: z$1.ZodOptional<z$1.ZodBoolean>;
1970
+ }, z$1.core.$strip>>;
1875
1971
  cache: z$1.ZodOptional<z$1.ZodObject<{
1876
1972
  enabled: z$1.ZodOptional<z$1.ZodBoolean>;
1877
1973
  dir: z$1.ZodOptional<z$1.ZodString>;
@@ -2247,6 +2343,110 @@ declare const cacheEntrySchema: z$1.ZodObject<{
2247
2343
  }, z$1.core.$strip>;
2248
2344
  /** Persisted cache file contents. */
2249
2345
  type CacheEntry = z$1.infer<typeof cacheEntrySchema>;
2346
+ /** Debug-only raw key metadata stored outside the reusable cache entry. */
2347
+ declare const cacheDebugKeyEntrySchema: z$1.ZodObject<{
2348
+ version: z$1.ZodLiteral<1>;
2349
+ key: z$1.ZodString;
2350
+ namespace: z$1.ZodString;
2351
+ operationType: z$1.ZodEnum<{
2352
+ span: "span";
2353
+ value: "value";
2354
+ }>;
2355
+ operationName: z$1.ZodString;
2356
+ storedAt: z$1.ZodString;
2357
+ codeFingerprint: z$1.ZodString;
2358
+ rawKey: z$1.ZodUnknown;
2359
+ }, z$1.core.$strip>;
2360
+ /** Debug-only raw cache key entry. May contain sensitive prompt/input data. */
2361
+ type CacheDebugKeyEntry = z$1.infer<typeof cacheDebugKeyEntrySchema>;
2362
+ /** Cache lookup response with optional debug-only raw key data. */
2363
+ declare const cacheEntryWithDebugKeySchema: z$1.ZodObject<{
2364
+ version: z$1.ZodLiteral<1>;
2365
+ key: z$1.ZodString;
2366
+ namespace: z$1.ZodString;
2367
+ operationType: z$1.ZodOptional<z$1.ZodEnum<{
2368
+ span: "span";
2369
+ value: "value";
2370
+ }>>;
2371
+ operationName: z$1.ZodOptional<z$1.ZodString>;
2372
+ spanName: z$1.ZodOptional<z$1.ZodString>;
2373
+ spanKind: z$1.ZodOptional<z$1.ZodString>;
2374
+ storedAt: z$1.ZodString;
2375
+ codeFingerprint: z$1.ZodString;
2376
+ recording: z$1.ZodObject<{
2377
+ returnValue: z$1.ZodUnknown;
2378
+ finalAttributes: z$1.ZodRecord<z$1.ZodString, z$1.ZodUnknown>;
2379
+ finalStatus: z$1.ZodOptional<z$1.ZodEnum<{
2380
+ error: "error";
2381
+ running: "running";
2382
+ ok: "ok";
2383
+ cancelled: "cancelled";
2384
+ }>>;
2385
+ finalError: z$1.ZodOptional<z$1.ZodObject<{
2386
+ name: z$1.ZodOptional<z$1.ZodString>;
2387
+ message: z$1.ZodString;
2388
+ stack: z$1.ZodOptional<z$1.ZodString>;
2389
+ capturedAt: z$1.ZodOptional<z$1.ZodString>;
2390
+ }, z$1.core.$catchall<z$1.ZodUnknown>>>;
2391
+ finalErrors: z$1.ZodOptional<z$1.ZodArray<z$1.ZodObject<{
2392
+ name: z$1.ZodOptional<z$1.ZodString>;
2393
+ message: z$1.ZodString;
2394
+ stack: z$1.ZodOptional<z$1.ZodString>;
2395
+ capturedAt: z$1.ZodOptional<z$1.ZodString>;
2396
+ }, z$1.core.$catchall<z$1.ZodUnknown>>>>;
2397
+ finalWarning: z$1.ZodOptional<z$1.ZodObject<{
2398
+ name: z$1.ZodOptional<z$1.ZodString>;
2399
+ message: z$1.ZodString;
2400
+ stack: z$1.ZodOptional<z$1.ZodString>;
2401
+ capturedAt: z$1.ZodOptional<z$1.ZodString>;
2402
+ }, z$1.core.$catchall<z$1.ZodUnknown>>>;
2403
+ finalWarnings: z$1.ZodOptional<z$1.ZodArray<z$1.ZodObject<{
2404
+ name: z$1.ZodOptional<z$1.ZodString>;
2405
+ message: z$1.ZodString;
2406
+ stack: z$1.ZodOptional<z$1.ZodString>;
2407
+ capturedAt: z$1.ZodOptional<z$1.ZodString>;
2408
+ }, z$1.core.$catchall<z$1.ZodUnknown>>>>;
2409
+ ops: z$1.ZodArray<z$1.ZodDiscriminatedUnion<[z$1.ZodObject<{
2410
+ kind: z$1.ZodLiteral<"setOutput">;
2411
+ key: z$1.ZodString;
2412
+ value: z$1.ZodUnknown;
2413
+ }, z$1.core.$strip>, z$1.ZodObject<{
2414
+ kind: z$1.ZodLiteral<"appendOutput">;
2415
+ key: z$1.ZodString;
2416
+ value: z$1.ZodUnknown;
2417
+ }, z$1.core.$strip>, z$1.ZodObject<{
2418
+ kind: z$1.ZodLiteral<"mergeOutput">;
2419
+ key: z$1.ZodString;
2420
+ patch: z$1.ZodRecord<z$1.ZodString, z$1.ZodUnknown>;
2421
+ }, z$1.core.$strip>, z$1.ZodObject<{
2422
+ kind: z$1.ZodLiteral<"incrementOutput">;
2423
+ key: z$1.ZodString;
2424
+ delta: z$1.ZodNumber;
2425
+ }, z$1.core.$strip>, z$1.ZodObject<{
2426
+ kind: z$1.ZodLiteral<"checkpoint">;
2427
+ name: z$1.ZodString;
2428
+ data: z$1.ZodUnknown;
2429
+ }, z$1.core.$strip>, z$1.ZodObject<{
2430
+ kind: z$1.ZodLiteral<"subSpan">;
2431
+ span: z$1.ZodType<SerializedCacheSpan, unknown, z$1.core.$ZodTypeInternals<SerializedCacheSpan, unknown>>;
2432
+ }, z$1.core.$strip>], "kind">>;
2433
+ }, z$1.core.$strip>;
2434
+ debugKey: z$1.ZodOptional<z$1.ZodObject<{
2435
+ version: z$1.ZodLiteral<1>;
2436
+ key: z$1.ZodString;
2437
+ namespace: z$1.ZodString;
2438
+ operationType: z$1.ZodEnum<{
2439
+ span: "span";
2440
+ value: "value";
2441
+ }>;
2442
+ operationName: z$1.ZodString;
2443
+ storedAt: z$1.ZodString;
2444
+ codeFingerprint: z$1.ZodString;
2445
+ rawKey: z$1.ZodUnknown;
2446
+ }, z$1.core.$strip>>;
2447
+ }, z$1.core.$strip>;
2448
+ /** Cache lookup response returned by cache APIs when raw-key debug data exists. */
2449
+ type CacheEntryWithDebugKey = z$1.infer<typeof cacheEntryWithDebugKeySchema>;
2250
2450
  /** Persisted per-owner cache file containing multiple cache entries. */
2251
2451
  declare const cacheFileSchema: z$1.ZodObject<{
2252
2452
  version: z$1.ZodLiteral<1>;
@@ -2326,21 +2526,44 @@ declare const cacheFileSchema: z$1.ZodObject<{
2326
2526
  }, z$1.core.$strip>;
2327
2527
  /** Persisted per-owner cache file contents. */
2328
2528
  type CacheFile = z$1.infer<typeof cacheFileSchema>;
2529
+ /** Persisted per-owner debug file containing raw cache key metadata. */
2530
+ declare const cacheDebugKeyFileSchema: z$1.ZodObject<{
2531
+ version: z$1.ZodLiteral<1>;
2532
+ owner: z$1.ZodString;
2533
+ entries: z$1.ZodRecord<z$1.ZodString, z$1.ZodObject<{
2534
+ version: z$1.ZodLiteral<1>;
2535
+ key: z$1.ZodString;
2536
+ namespace: z$1.ZodString;
2537
+ operationType: z$1.ZodEnum<{
2538
+ span: "span";
2539
+ value: "value";
2540
+ }>;
2541
+ operationName: z$1.ZodString;
2542
+ storedAt: z$1.ZodString;
2543
+ codeFingerprint: z$1.ZodString;
2544
+ rawKey: z$1.ZodUnknown;
2545
+ }, z$1.core.$strip>>;
2546
+ }, z$1.core.$strip>;
2547
+ /** Persisted per-owner raw cache key debug file contents. */
2548
+ type CacheDebugKeyFile = z$1.infer<typeof cacheDebugKeyFileSchema>;
2329
2549
  //#endregion
2330
2550
  //#region ../shared/src/utils/extractCacheHits.d.ts
2331
2551
  /**
2332
- * Single cache-hit entry rendered as one row in the case drawer's
2333
- * "Cache hits" tab.
2552
+ * Single cache activity entry rendered as one row in the case drawer's Cache
2553
+ * tab.
2334
2554
  *
2335
- * `origin === 'span'` rows came from a span's `cache.status` attribute or from
2336
- * a `cache.refs` ref attached to a span body. `origin === 'caseRoot'` rows
2337
- * came from `evalTracer.cache(...)` calls made directly from the case body
2338
- * (no surrounding `traceSpan`), which would otherwise be invisible.
2555
+ * `action === 'hit'` rows reused an existing persisted cache entry.
2556
+ * `action === 'added'` rows came from a miss or refresh that wrote a persisted
2557
+ * cache entry during the run. `origin === 'caseRoot'` rows came from
2558
+ * `evalTracer.cache(...)` calls made directly from the case body (no
2559
+ * surrounding `traceSpan`), which would otherwise be invisible.
2339
2560
  */
2340
- type CacheHitEntry = {
2561
+ type CacheActivityEntry = {
2341
2562
  id: string;
2342
2563
  source: 'span' | 'value';
2343
2564
  origin: 'span' | 'caseRoot';
2565
+ action: 'hit' | 'added';
2566
+ status: 'hit' | 'miss' | 'refresh';
2344
2567
  name: string;
2345
2568
  namespace: string;
2346
2569
  key: string;
@@ -2348,14 +2571,26 @@ type CacheHitEntry = {
2348
2571
  age: number | undefined;
2349
2572
  spanId: string | undefined;
2350
2573
  };
2574
+ /** Cache activity row narrowed to cache hits for compatibility helpers. */
2575
+ type CacheHitEntry = CacheActivityEntry & {
2576
+ action: 'hit';
2577
+ status: 'hit';
2578
+ };
2579
+ /**
2580
+ * Collect every cache hit or cache write recorded for a case run.
2581
+ *
2582
+ * Walks `spans` for span-level cache activity (`attributes['cache.status']`)
2583
+ * and per-span value-cache refs (`attributes['cache.refs']`), then appends
2584
+ * spanless value-cache refs persisted on the case scope. Bypasses are skipped
2585
+ * because they do not read or write a persisted cache entry.
2586
+ */
2587
+ declare function extractCacheEntries(spans: EvalTraceSpan[], caseCacheRefs: TraceCacheRef[]): CacheActivityEntry[];
2351
2588
  /**
2352
2589
  * Collect every `status === 'hit'` cache event recorded for a case run.
2353
2590
  *
2354
- * Walks `spans` for span-level cache hits (`attributes['cache.status'] ===
2355
- * 'hit'`) and per-span value-cache refs (`attributes['cache.refs']`), then
2356
- * appends spanless value-cache refs persisted on the case scope. Non-hit
2357
- * statuses (`miss`/`refresh`/`bypass`) are skipped — they remain visible
2358
- * inline in the Trace tab.
2591
+ * This compatibility helper returns only rows that reused an existing
2592
+ * persisted cache entry. Use `extractCacheEntries(...)` when the UI should
2593
+ * include cache misses and refreshes that wrote entries during the run.
2359
2594
  */
2360
2595
  declare function extractCacheHits(spans: EvalTraceSpan[], caseCacheRefs: TraceCacheRef[]): CacheHitEntry[];
2361
2596
  //#endregion
@@ -2673,6 +2908,19 @@ declare function defineEval<TInput = unknown, TOutputs extends EvalOutputs = Eva
2673
2908
  declare function repoFile(path: string, mimeType?: string): RepoFileRef;
2674
2909
  //#endregion
2675
2910
  //#region ../sdk/src/runtime.d.ts
2911
+ /**
2912
+ * Raw-key debug payload passed alongside cache writes.
2913
+ *
2914
+ * `rawKey` may include prompt text, user input, or other sensitive material.
2915
+ * Runners store it outside the reusable cache so projects can gitignore the
2916
+ * debug folder while keeping hash-only cache entries shareable.
2917
+ */
2918
+ type CacheDebugKeyWrite = {
2919
+ rawKey: unknown;
2920
+ operationType: CacheOperationType;
2921
+ operationName: string;
2922
+ codeFingerprint: string;
2923
+ };
2676
2924
  /**
2677
2925
  * Adapter used by the SDK to read and write cache entries.
2678
2926
  *
@@ -2680,8 +2928,15 @@ declare function repoFile(path: string, mimeType?: string): RepoFileRef;
2680
2928
  * starts executing.
2681
2929
  */
2682
2930
  type CacheAdapter = {
2683
- /** Return the stored entry for `keyHash` under `namespace`, or `null`. */lookup(namespace: string, keyHash: string): Promise<CacheEntry | null>; /** Persist a cache entry. Must be safe under concurrent calls. */
2684
- write(entry: CacheEntry): Promise<void>;
2931
+ /** Return the stored entry for `keyHash` under `namespace`, or `null`. */lookup(namespace: string, keyHash: string): Promise<CacheEntry | null>;
2932
+ /**
2933
+ * Persist a cache entry. Must be safe under concurrent calls.
2934
+ *
2935
+ * `debugKey` is optional and contains the authored raw key value for
2936
+ * debugging. It may contain sensitive prompt/input data and should be stored
2937
+ * separately from reusable cache files.
2938
+ */
2939
+ write(entry: CacheEntry, debugKey?: CacheDebugKeyWrite): Promise<void>;
2685
2940
  };
2686
2941
  /** Runner-supplied cache context attached to an eval case scope. */
2687
2942
  type CacheScopeContext = {
@@ -2703,7 +2958,8 @@ type EvalCaseScope = {
2703
2958
  nextEvalIdCounter: number; /** Authored input for the current case, when provided by the runner. */
2704
2959
  input?: unknown;
2705
2960
  outputs: Record<string, unknown>; /** Structured assertion failures recorded for the current case. */
2706
- assertionFailures: AssertionFailure[];
2961
+ assertionFailures: AssertionFailure[]; /** Logs captured from manual `evalLog(...)` calls and enabled console calls. */
2962
+ logs: RunLogEntry[];
2707
2963
  spans: EvalTraceSpan[];
2708
2964
  checkpoints: Map<string, unknown>;
2709
2965
  spanStack: string[];
@@ -2735,6 +2991,7 @@ type EvalCaseScope = {
2735
2991
  * modules imported while a run is being prepared.
2736
2992
  */
2737
2993
  type EvalRuntimeScope = 'env' | 'cases' | 'eval' | 'derive' | 'outputsSchema' | 'scorer';
2994
+ type EvalLogLevelInput = RunLogLevel | 'warning';
2738
2995
  /** Error thrown when an eval assertion fails during case execution. */
2739
2996
  declare class EvalAssertionError extends Error {
2740
2997
  constructor(message: string);
@@ -2750,6 +3007,14 @@ declare function getCurrentScope(): EvalCaseScope | undefined;
2750
3007
  * while validating outputs, and `scorer` while computing scores.
2751
3008
  */
2752
3009
  declare function isInEvalScope(): EvalRuntimeScope | null;
3010
+ /**
3011
+ * Record a manual log entry on the active eval case.
3012
+ *
3013
+ * Values are formatted with Node-style console formatting and capped before
3014
+ * persistence so a single log cannot make run artifacts unbounded. Calls made
3015
+ * outside active case-owned eval phases are ignored.
3016
+ */
3017
+ declare function evalLog(level: EvalLogLevelInput, ...args: unknown[]): void;
2753
3018
  /**
2754
3019
  * Register background work that should settle before eval finalization.
2755
3020
  *
@@ -2845,6 +3110,32 @@ declare function incrementEvalOutput(key: string, delta: number): void;
2845
3110
  */
2846
3111
  declare function evalAssert(condition: boolean, message: string): void;
2847
3112
  //#endregion
3113
+ //#region ../sdk/src/cacheSerialization.d.ts
3114
+ declare const serializedCacheValueMarker = "__agentEvalsCacheSerialization";
3115
+ declare const jsonSafeCacheValueVersion = "json-safe-v1";
3116
+ type JsonSafeCacheValueType = 'ArrayBuffer' | 'BigInt' | 'Blob' | 'CompressedJson' | 'CompressedString' | 'Date' | 'Error' | 'File' | 'Float64Array' | 'Headers' | 'Map' | 'Number' | 'Object' | 'RegExp' | 'Set' | 'URL' | 'URLSearchParams' | 'Undefined';
3117
+ type JsonSafeSerializedCacheValue = {
3118
+ [serializedCacheValueMarker]: typeof jsonSafeCacheValueVersion;
3119
+ codec?: 'gzip';
3120
+ length?: number;
3121
+ type: JsonSafeCacheValueType;
3122
+ value?: unknown;
3123
+ };
3124
+ /** JSON-safe persisted representation for one rich cached value. */
3125
+ type SerializedCacheValue = JsonSafeSerializedCacheValue;
3126
+ /**
3127
+ * Serialize one cached value while keeping plain JSON as plain JSON.
3128
+ *
3129
+ * Rich runtime values use small tagged wrappers.
3130
+ */
3131
+ declare function serializeCacheValue(value: unknown): Promise<unknown>;
3132
+ /** Revive one cached value, while preserving legacy JSON-round-tripped data. */
3133
+ declare function deserializeCacheValue(value: unknown): unknown;
3134
+ /** Serialize all rich values captured in a cache recording before persistence. */
3135
+ declare function serializeCacheRecording(recording: CacheRecording): Promise<CacheRecording>;
3136
+ /** Revive all rich values captured in a cache recording after lookup. */
3137
+ declare function deserializeCacheRecording(recording: CacheRecording): CacheRecording;
3138
+ //#endregion
2848
3139
  //#region ../sdk/src/traceDiagnostics.d.ts
2849
3140
  /** Severity used when attaching a recoverable diagnostic to an active span. */
2850
3141
  type CaptureEvalSpanErrorLevel = 'error' | 'warning';
@@ -3110,11 +3401,11 @@ type EvalRunner = {
3110
3401
  listCache(): Promise<CacheListItem[]>;
3111
3402
  /**
3112
3403
  * Return the full persisted cache entry for `namespace` + `key`, including
3113
- * its recording. Returns `null` when no entry matches. Used by the case
3114
- * drawer's Cache hits tab to lazily fetch the cached return value when a
3115
- * row is expanded.
3404
+ * its recording and optional raw-key debug metadata. Returns `null` when no
3405
+ * entry matches. Used by the case drawer's Cache tab to lazily fetch the
3406
+ * cached return value when a row is expanded.
3116
3407
  */
3117
- getCacheEntry(namespace: string, key: string): Promise<CacheEntry | null>;
3408
+ getCacheEntry(namespace: string, key: string): Promise<CacheEntryWithDebugKey | null>;
3118
3409
  /**
3119
3410
  * Remove cache entries matching `filter`, or all entries when no filter is
3120
3411
  * supplied.
@@ -3169,4 +3460,4 @@ declare function createRunner({
3169
3460
  */
3170
3461
  declare function runCli(argv: string[]): Promise<void>;
3171
3462
  //#endregion
3172
- export { type AgentEvalsConfig, type ApiCallEntry, type ApiCallMetric, type ApiCallMetricFormat, type ApiCallMetricPlacement, type ApiCallMetricValue, type ApiCallsConfigInput, type AssertionFailure, type CacheAdapter, type CacheEntry, type CacheFile, type CacheHitEntry, type CacheKeyHashInput, type CacheKeyHashOptions, type CacheListItem, type CacheMode, type CacheOperationType, type CacheRecording, type CacheRecordingFrame, type CacheRecordingOp, type CacheScopeContext, type CacheStatus, type CaptureEvalSpanErrorLevel, type CaptureEvalSpanErrorOptions, type CaseDetail, type CaseRow, type CellValue, type ColumnDef, type ColumnFormat, type ColumnKind, type CreateRunRequest, DEFAULT_API_CALLS_CONFIG, DEFAULT_LLM_CALLS_CONFIG, type DerivedStatus, EvalAssertionError, type EvalCase, type EvalCaseScope, type EvalChartAggregate, type EvalChartAxis, type EvalChartBuiltinMetric, type EvalChartColor, type EvalChartConfig, type EvalChartMetric, type EvalChartTooltipExtra, type EvalChartType, type EvalChartsConfig, type EvalColumnOverride, type EvalColumns, type EvalDefinition, type EvalDeriveContext, type EvalDisplayStatus, type EvalExecuteContext, type EvalFreshnessStatus, type EvalManualScoreDef, type EvalOutputs, type EvalOutputsSchema, type EvalRunner, type EvalRuntimeScope, type EvalScoreContext, type EvalScoreDef, type EvalScoreFn, type EvalSetOutput, type EvalStatAggregate, type EvalStatItem, type EvalStatsConfig, type EvalSummary, type EvalTraceSpan, type EvalTraceSpanError, type EvalTraceSpanWarning, type EvalTraceTree, type FileRef, type JsonCell, type LlmCallEntry, type LlmCallMetric, type LlmCallMetricFormat, type LlmCallMetricPlacement, type LlmCallMetricValue, type LlmCallsConfigInput, type NumberDisplayOptions, type RepoFileRef, type ResolvedApiCallMetric, type ResolvedApiCallsConfig, type ResolvedLlmCallMetric, type ResolvedLlmCallsConfig, type RunArtifactRef, type RunInEvalScopeOptions, type RunManifest, type RunSummary, type ScalarCell, type ScopedCaseSummary, type ScoreTrace, type SerializedCacheSpan, type SpanCacheOptions, type SseEnvelope, type SseEventType, type TraceActiveSpan, type TraceAttributeDisplay, type TraceAttributeDisplayFormat, type TraceAttributeDisplayInput, type TraceAttributeDisplayPlacement, type TraceAttributeTransform, type TraceAttributeTransformContext, type TraceCacheInfo, type TraceCacheRef, type TraceDisplayConfig, type TraceDisplayInputConfig, type TraceSpanInfo, type TrialSelectionMode, type UpdateManualScoreRequest, agentEvalsConfigSchema, apiCallMetricFormatSchema, apiCallMetricPlacementSchema, apiCallMetricSchema, apiCallsConfigSchema, appendToEvalOutput, assertionFailureSchema, buildTraceTree, cacheEntrySchema, cacheFileSchema, cacheListItemSchema, cacheModeSchema, cacheOperationTypeSchema, cacheRecordingOpSchema, cacheRecordingSchema, cacheStatusSchema, captureEvalSpanError, caseDetailSchema, caseRowSchema, cellValueSchema, columnDefSchema, columnFormatSchema, columnKindSchema, createRunRequestSchema, createRunner, defineEval, deriveScopedSummaryFromCases, deriveStatusFromCaseRows, deriveStatusFromChildStatuses, evalAssert, evalChartAggregateSchema, evalChartAxisSchema, evalChartBuiltinMetricSchema, evalChartColorSchema, evalChartConfigSchema, evalChartMetricSchema, evalChartTooltipExtraSchema, evalChartTypeSchema, evalChartsConfigSchema, evalFreshnessStatusSchema, evalSpan, evalStatAggregateSchema, evalStatItemSchema, evalStatsConfigSchema, evalSummarySchema, evalTracer, extractApiCalls, extractCacheHits, extractLlmCalls, fileRefSchema, getCurrentScope, getEvalCaseInput, getEvalDisplayStatus, getEvalRegistry, getEvalTitle, getNestedAttribute, hashCacheKey, hashCacheKeySync, incrementEvalOutput, isInEvalScope, jsonCellSchema, llmCallMetricFormatSchema, llmCallMetricPlacementSchema, llmCallMetricSchema, llmCallsConfigSchema, mergeEvalOutput, nextEvalId, numberDisplayOptionsSchema, repoFile, repoFileRefSchema, resolveApiCallsConfig, resolveLlmCallsConfig, runArtifactRefSchema, runCli, runInEvalRuntimeScope, runInEvalScope, runInExistingEvalScope, runManifestSchema, runSummarySchema, scoreTraceSchema, serializedCacheSpanSchema, setEvalOutput, setScopeCacheContext, spanCacheOptionsSchema, sseEnvelopeSchema, startEvalBackgroundJob, traceAttributeDisplayFormatSchema, traceAttributeDisplayInputSchema, traceAttributeDisplayPlacementSchema, traceAttributeDisplaySchema, traceCacheRefSchema, traceDisplayConfigSchema, traceDisplayInputConfigSchema, traceSpanErrorSchema, traceSpanKindSchema, traceSpanSchema, traceSpanWarningSchema, trialSelectionModeSchema, updateManualScoreRequestSchema, z };
3463
+ export { type AgentEvalsConfig, type ApiCallEntry, type ApiCallMetric, type ApiCallMetricFormat, type ApiCallMetricPlacement, type ApiCallMetricValue, type ApiCallsConfigInput, type AssertionFailure, type CacheActivityEntry, type CacheAdapter, type CacheDebugKeyEntry, type CacheDebugKeyFile, type CacheDebugKeyWrite, type CacheEntry, type CacheEntryWithDebugKey, type CacheFile, type CacheHitEntry, type CacheKeyHashInput, type CacheKeyHashOptions, type CacheListItem, type CacheMode, type CacheOperationType, type CacheRecording, type CacheRecordingFrame, type CacheRecordingOp, type CacheScopeContext, type CacheStatus, type CaptureEvalSpanErrorLevel, type CaptureEvalSpanErrorOptions, type CaseDetail, type CaseRow, type CellValue, type ColumnDef, type ColumnFormat, type ColumnKind, type CreateRunRequest, DEFAULT_API_CALLS_CONFIG, DEFAULT_LLM_CALLS_CONFIG, type DerivedStatus, EvalAssertionError, type EvalCase, type EvalCaseScope, type EvalChartAggregate, type EvalChartAxis, type EvalChartBuiltinMetric, type EvalChartColor, type EvalChartConfig, type EvalChartMetric, type EvalChartTooltipExtra, type EvalChartType, type EvalChartsConfig, type EvalColumnOverride, type EvalColumns, type EvalDefinition, type EvalDeriveContext, type EvalDisplayStatus, type EvalExecuteContext, type EvalFreshnessStatus, type EvalManualScoreDef, type EvalOutputs, type EvalOutputsSchema, type EvalRunner, type EvalRuntimeScope, type EvalScoreContext, type EvalScoreDef, type EvalScoreFn, type EvalSetOutput, type EvalStatAggregate, type EvalStatItem, type EvalStatsConfig, type EvalSummary, type EvalTraceSpan, type EvalTraceSpanError, type EvalTraceSpanWarning, type EvalTraceTree, type FileRef, type JsonCell, type LlmCallEntry, type LlmCallMetric, type LlmCallMetricFormat, type LlmCallMetricPlacement, type LlmCallMetricValue, type LlmCallsConfigInput, type NumberDisplayOptions, type RepoFileRef, type ResolvedApiCallMetric, type ResolvedApiCallsConfig, type ResolvedLlmCallMetric, type ResolvedLlmCallsConfig, type RunArtifactRef, type RunInEvalScopeOptions, type RunLogEntry, type RunLogLevel, type RunLogLocation, type RunLogPhase, type RunLogsConfigInput, type RunManifest, type RunSummary, type ScalarCell, type ScopedCaseSummary, type ScoreTrace, type SerializedCacheSpan, type SerializedCacheValue, type SpanCacheOptions, type SseEnvelope, type SseEventType, type TraceActiveSpan, type TraceAttributeDisplay, type TraceAttributeDisplayFormat, type TraceAttributeDisplayInput, type TraceAttributeDisplayPlacement, type TraceAttributeTransform, type TraceAttributeTransformContext, type TraceCacheInfo, type TraceCacheRef, type TraceDisplayConfig, type TraceDisplayInputConfig, type TraceSpanInfo, type TrialSelectionMode, type UpdateManualScoreRequest, agentEvalsConfigSchema, apiCallMetricFormatSchema, apiCallMetricPlacementSchema, apiCallMetricSchema, apiCallsConfigSchema, appendToEvalOutput, assertionFailureSchema, buildTraceTree, cacheDebugKeyEntrySchema, cacheDebugKeyFileSchema, cacheEntrySchema, cacheEntryWithDebugKeySchema, cacheFileSchema, cacheListItemSchema, cacheModeSchema, cacheOperationTypeSchema, cacheRecordingOpSchema, cacheRecordingSchema, cacheStatusSchema, captureEvalSpanError, caseDetailSchema, caseRowSchema, cellValueSchema, columnDefSchema, columnFormatSchema, columnKindSchema, createRunRequestSchema, createRunner, defineEval, deriveScopedSummaryFromCases, deriveStatusFromCaseRows, deriveStatusFromChildStatuses, deserializeCacheRecording, deserializeCacheValue, evalAssert, evalChartAggregateSchema, evalChartAxisSchema, evalChartBuiltinMetricSchema, evalChartColorSchema, evalChartConfigSchema, evalChartMetricSchema, evalChartTooltipExtraSchema, evalChartTypeSchema, evalChartsConfigSchema, evalFreshnessStatusSchema, evalLog, evalSpan, evalStatAggregateSchema, evalStatItemSchema, evalStatsConfigSchema, evalSummarySchema, evalTracer, extractApiCalls, extractCacheEntries, extractCacheHits, extractLlmCalls, fileRefSchema, getCurrentScope, getEvalCaseInput, getEvalDisplayStatus, getEvalRegistry, getEvalTitle, getNestedAttribute, hashCacheKey, hashCacheKeySync, incrementEvalOutput, isInEvalScope, jsonCellSchema, llmCallMetricFormatSchema, llmCallMetricPlacementSchema, llmCallMetricSchema, llmCallsConfigSchema, mergeEvalOutput, nextEvalId, numberDisplayOptionsSchema, repoFile, repoFileRefSchema, resolveApiCallsConfig, resolveLlmCallsConfig, runArtifactRefSchema, runCli, runInEvalRuntimeScope, runInEvalScope, runInExistingEvalScope, runLogEntrySchema, runLogLevelSchema, runLogLocationSchema, runLogPhaseSchema, runLogsConfigSchema, runManifestSchema, runSummarySchema, scoreTraceSchema, serializeCacheRecording, serializeCacheValue, serializedCacheSpanSchema, setEvalOutput, setScopeCacheContext, spanCacheOptionsSchema, sseEnvelopeSchema, startEvalBackgroundJob, traceAttributeDisplayFormatSchema, traceAttributeDisplayInputSchema, traceAttributeDisplayPlacementSchema, traceAttributeDisplaySchema, traceCacheRefSchema, traceDisplayConfigSchema, traceDisplayInputConfigSchema, traceSpanErrorSchema, traceSpanKindSchema, traceSpanSchema, traceSpanWarningSchema, trialSelectionModeSchema, updateManualScoreRequestSchema, z };
package/dist/index.mjs CHANGED
@@ -1,4 +1,4 @@
1
- import { $ as caseRowSchema, $t as appendToEvalOutput, A as getEvalTitle, At as traceDisplayConfigSchema, B as apiCallMetricFormatSchema, Bt as fileRefSchema, C as createRunRequestSchema, Ct as serializedCacheSpanSchema, D as extractApiCalls, Dt as traceAttributeDisplayInputSchema, E as extractCacheHits, Et as traceAttributeDisplayFormatSchema, F as runManifestSchema, Ft as traceSpanWarningSchema, G as llmCallMetricPlacementSchema, Gt as z, H as apiCallMetricSchema, Ht as numberDisplayOptionsSchema, I as runSummarySchema, It as cellValueSchema, J as resolveApiCallsConfig, Jt as evalSpan, K as llmCallMetricSchema, Kt as buildTraceTree, L as DEFAULT_API_CALLS_CONFIG, Lt as columnDefSchema, M as deriveScopedSummaryFromCases, Mt as traceSpanErrorSchema, N as deriveStatusFromCaseRows, Nt as traceSpanKindSchema, O as extractLlmCalls, Ot as traceAttributeDisplayPlacementSchema, P as deriveStatusFromChildStatuses, Pt as traceSpanSchema, Q as caseDetailSchema, Qt as EvalAssertionError, R as DEFAULT_LLM_CALLS_CONFIG, Rt as columnFormatSchema, St as cacheStatusSchema, T as sseEnvelopeSchema, Tt as traceCacheRefSchema, U as apiCallsConfigSchema, Ut as repoFileRefSchema, V as apiCallMetricPlacementSchema, Vt as jsonCellSchema, W as llmCallMetricFormatSchema, Wt as runArtifactRefSchema, X as trialSelectionModeSchema, Xt as hashCacheKey, Y as resolveLlmCallsConfig, Yt as evalTracer, Z as assertionFailureSchema, Zt as hashCacheKeySync, _t as cacheListItemSchema, an as mergeEvalOutput, at as scoreTraceSchema, bt as cacheRecordingOpSchema, cn as runInEvalScope, ct as evalChartBuiltinMetricSchema, dn as setScopeCacheContext, dt as evalChartMetricSchema, en as evalAssert, et as evalFreshnessStatusSchema, fn as startEvalBackgroundJob, ft as evalChartTooltipExtraSchema, gt as cacheFileSchema, hn as getEvalRegistry, ht as cacheEntrySchema, in as isInEvalScope, it as evalSummarySchema, j as getEvalDisplayStatus, jt as traceDisplayInputConfigSchema, k as getNestedAttribute, kt as traceAttributeDisplaySchema, ln as runInExistingEvalScope, lt as evalChartColorSchema, mn as defineEval, mt as evalChartsConfigSchema, nn as getEvalCaseInput, nt as evalStatItemSchema, on as nextEvalId, ot as evalChartAggregateSchema, pn as repoFile, pt as evalChartTypeSchema, q as llmCallsConfigSchema, qt as captureEvalSpanError, rn as incrementEvalOutput, rt as evalStatsConfigSchema, sn as runInEvalRuntimeScope, st as evalChartAxisSchema, tn as getCurrentScope, tt as evalStatAggregateSchema, un as setEvalOutput, ut as evalChartConfigSchema, vt as cacheModeSchema, w as updateManualScoreRequestSchema, wt as spanCacheOptionsSchema, xt as cacheRecordingSchema, yt as cacheOperationTypeSchema, z as agentEvalsConfigSchema, zt as columnKindSchema } from "./runOrchestration-BDyNrRQT.mjs";
2
- import { n as createRunner, t as runCli } from "./cli-BmrtjQj_.mjs";
3
- import "./src-CEAJYN_X.mjs";
4
- export { DEFAULT_API_CALLS_CONFIG, DEFAULT_LLM_CALLS_CONFIG, EvalAssertionError, agentEvalsConfigSchema, apiCallMetricFormatSchema, apiCallMetricPlacementSchema, apiCallMetricSchema, apiCallsConfigSchema, appendToEvalOutput, assertionFailureSchema, buildTraceTree, cacheEntrySchema, cacheFileSchema, cacheListItemSchema, cacheModeSchema, cacheOperationTypeSchema, cacheRecordingOpSchema, cacheRecordingSchema, cacheStatusSchema, captureEvalSpanError, caseDetailSchema, caseRowSchema, cellValueSchema, columnDefSchema, columnFormatSchema, columnKindSchema, createRunRequestSchema, createRunner, defineEval, deriveScopedSummaryFromCases, deriveStatusFromCaseRows, deriveStatusFromChildStatuses, evalAssert, evalChartAggregateSchema, evalChartAxisSchema, evalChartBuiltinMetricSchema, evalChartColorSchema, evalChartConfigSchema, evalChartMetricSchema, evalChartTooltipExtraSchema, evalChartTypeSchema, evalChartsConfigSchema, evalFreshnessStatusSchema, evalSpan, evalStatAggregateSchema, evalStatItemSchema, evalStatsConfigSchema, evalSummarySchema, evalTracer, extractApiCalls, extractCacheHits, extractLlmCalls, fileRefSchema, getCurrentScope, getEvalCaseInput, getEvalDisplayStatus, getEvalRegistry, getEvalTitle, getNestedAttribute, hashCacheKey, hashCacheKeySync, incrementEvalOutput, isInEvalScope, jsonCellSchema, llmCallMetricFormatSchema, llmCallMetricPlacementSchema, llmCallMetricSchema, llmCallsConfigSchema, mergeEvalOutput, nextEvalId, numberDisplayOptionsSchema, repoFile, repoFileRefSchema, resolveApiCallsConfig, resolveLlmCallsConfig, runArtifactRefSchema, runCli, runInEvalRuntimeScope, runInEvalScope, runInExistingEvalScope, runManifestSchema, runSummarySchema, scoreTraceSchema, serializedCacheSpanSchema, setEvalOutput, setScopeCacheContext, spanCacheOptionsSchema, sseEnvelopeSchema, startEvalBackgroundJob, traceAttributeDisplayFormatSchema, traceAttributeDisplayInputSchema, traceAttributeDisplayPlacementSchema, traceAttributeDisplaySchema, traceCacheRefSchema, traceDisplayConfigSchema, traceDisplayInputConfigSchema, traceSpanErrorSchema, traceSpanKindSchema, traceSpanSchema, traceSpanWarningSchema, trialSelectionModeSchema, updateManualScoreRequestSchema, z };
1
+ import { $ as assertionFailureSchema, $t as runArtifactRefSchema, A as getNestedAttribute, An as getEvalRegistry, At as cacheRecordingSchema, B as agentEvalsConfigSchema, Bt as traceDisplayInputConfigSchema, C as createRunRequestSchema, Cn as runInEvalScope, Ct as cacheEntrySchema, D as extractCacheHits, Dn as startEvalBackgroundJob, Dt as cacheModeSchema, E as extractCacheEntries, En as setScopeCacheContext, Et as cacheListItemSchema, F as deriveStatusFromChildStatuses, Ft as traceAttributeDisplayFormatSchema, G as llmCallMetricFormatSchema, Gt as cellValueSchema, H as apiCallMetricPlacementSchema, Ht as traceSpanKindSchema, I as runManifestSchema, It as traceAttributeDisplayInputSchema, J as llmCallsConfigSchema, Jt as columnKindSchema, K as llmCallMetricPlacementSchema, Kt as columnDefSchema, L as runSummarySchema, Lt as traceAttributeDisplayPlacementSchema, M as getEvalDisplayStatus, Mt as serializedCacheSpanSchema, N as deriveScopedSummaryFromCases, Nt as spanCacheOptionsSchema, O as extractApiCalls, On as repoFile, Ot as cacheOperationTypeSchema, P as deriveStatusFromCaseRows, Pt as traceCacheRefSchema, Q as trialSelectionModeSchema, Qt as repoFileRefSchema, R as DEFAULT_API_CALLS_CONFIG, Rt as traceAttributeDisplaySchema, Sn as runInEvalRuntimeScope, St as cacheDebugKeyFileSchema, T as sseEnvelopeSchema, Tn as setEvalOutput, Tt as cacheFileSchema, U as apiCallMetricSchema, Ut as traceSpanSchema, V as apiCallMetricFormatSchema, Vt as traceSpanErrorSchema, W as apiCallsConfigSchema, Wt as traceSpanWarningSchema, X as resolveLlmCallsConfig, Xt as jsonCellSchema, Y as resolveApiCallsConfig, Yt as fileRefSchema, Z as runLogsConfigSchema, Zt as numberDisplayOptionsSchema, _n as getEvalCaseInput, _t as evalChartMetricSchema, an as hashCacheKey, at as evalStatsConfigSchema, bn as mergeEvalOutput, bt as evalChartsConfigSchema, cn as deserializeCacheValue, ct as runLogLevelSchema, dn as EvalAssertionError, dt as scoreTraceSchema, en as z, et as caseDetailSchema, fn as appendToEvalOutput, ft as evalChartAggregateSchema, gn as getCurrentScope, gt as evalChartConfigSchema, hn as evalLog, ht as evalChartColorSchema, in as evalTracer, it as evalStatItemSchema, j as getEvalTitle, jt as cacheStatusSchema, k as extractLlmCalls, kn as defineEval, kt as cacheRecordingOpSchema, ln as serializeCacheRecording, lt as runLogLocationSchema, mn as evalAssert, mt as evalChartBuiltinMetricSchema, nn as captureEvalSpanError, nt as evalFreshnessStatusSchema, on as hashCacheKeySync, ot as evalSummarySchema, pt as evalChartAxisSchema, q as llmCallMetricSchema, qt as columnFormatSchema, rn as evalSpan, rt as evalStatAggregateSchema, sn as deserializeCacheRecording, st as runLogEntrySchema, tn as buildTraceTree, tt as caseRowSchema, un as serializeCacheValue, ut as runLogPhaseSchema, vn as incrementEvalOutput, vt as evalChartTooltipExtraSchema, w as updateManualScoreRequestSchema, wn as runInExistingEvalScope, wt as cacheEntryWithDebugKeySchema, xn as nextEvalId, xt as cacheDebugKeyEntrySchema, yn as isInEvalScope, yt as evalChartTypeSchema, z as DEFAULT_LLM_CALLS_CONFIG, zt as traceDisplayConfigSchema } from "./runOrchestration-BBg_VUH5.mjs";
2
+ import { n as createRunner, t as runCli } from "./cli-3zANEAhG.mjs";
3
+ import "./src-BC4OrajN.mjs";
4
+ export { DEFAULT_API_CALLS_CONFIG, DEFAULT_LLM_CALLS_CONFIG, EvalAssertionError, agentEvalsConfigSchema, apiCallMetricFormatSchema, apiCallMetricPlacementSchema, apiCallMetricSchema, apiCallsConfigSchema, appendToEvalOutput, assertionFailureSchema, buildTraceTree, cacheDebugKeyEntrySchema, cacheDebugKeyFileSchema, cacheEntrySchema, cacheEntryWithDebugKeySchema, cacheFileSchema, cacheListItemSchema, cacheModeSchema, cacheOperationTypeSchema, cacheRecordingOpSchema, cacheRecordingSchema, cacheStatusSchema, captureEvalSpanError, caseDetailSchema, caseRowSchema, cellValueSchema, columnDefSchema, columnFormatSchema, columnKindSchema, createRunRequestSchema, createRunner, defineEval, deriveScopedSummaryFromCases, deriveStatusFromCaseRows, deriveStatusFromChildStatuses, deserializeCacheRecording, deserializeCacheValue, evalAssert, evalChartAggregateSchema, evalChartAxisSchema, evalChartBuiltinMetricSchema, evalChartColorSchema, evalChartConfigSchema, evalChartMetricSchema, evalChartTooltipExtraSchema, evalChartTypeSchema, evalChartsConfigSchema, evalFreshnessStatusSchema, evalLog, evalSpan, evalStatAggregateSchema, evalStatItemSchema, evalStatsConfigSchema, evalSummarySchema, evalTracer, extractApiCalls, extractCacheEntries, extractCacheHits, extractLlmCalls, fileRefSchema, getCurrentScope, getEvalCaseInput, getEvalDisplayStatus, getEvalRegistry, getEvalTitle, getNestedAttribute, hashCacheKey, hashCacheKeySync, incrementEvalOutput, isInEvalScope, jsonCellSchema, llmCallMetricFormatSchema, llmCallMetricPlacementSchema, llmCallMetricSchema, llmCallsConfigSchema, mergeEvalOutput, nextEvalId, numberDisplayOptionsSchema, repoFile, repoFileRefSchema, resolveApiCallsConfig, resolveLlmCallsConfig, runArtifactRefSchema, runCli, runInEvalRuntimeScope, runInEvalScope, runInExistingEvalScope, runLogEntrySchema, runLogLevelSchema, runLogLocationSchema, runLogPhaseSchema, runLogsConfigSchema, runManifestSchema, runSummarySchema, scoreTraceSchema, serializeCacheRecording, serializeCacheValue, serializedCacheSpanSchema, setEvalOutput, setScopeCacheContext, spanCacheOptionsSchema, sseEnvelopeSchema, startEvalBackgroundJob, traceAttributeDisplayFormatSchema, traceAttributeDisplayInputSchema, traceAttributeDisplayPlacementSchema, traceAttributeDisplaySchema, traceCacheRefSchema, traceDisplayConfigSchema, traceDisplayInputConfigSchema, traceSpanErrorSchema, traceSpanKindSchema, traceSpanSchema, traceSpanWarningSchema, trialSelectionModeSchema, updateManualScoreRequestSchema, z };