@ls-stack/agent-eval 0.60.2 → 0.60.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,7 +1,7 @@
1
- import { Et as getCaseRowCaseKey, Ot as caseRowSchema, dt as getEvalTitle, nt as updateManualScoreRequestSchema, rt as extractCacheEntries, tt as createRunRequestSchema } from "./runExecution-Bq0Y3y_1.mjs";
2
- import { o as stageManualInputFile } from "./cli-CbePEEua.mjs";
3
- import "./src-CVM_FqPx.mjs";
4
- import { t as getRunnerInstance } from "./runner-Kp0JqxrU.mjs";
1
+ import { Dt as caseRowSchema, Tt as getCaseRowCaseKey, et as createRunRequestSchema, nt as extractCacheEntries, tt as updateManualScoreRequestSchema, ut as getEvalTitle } from "./runExecution-Bu9yfdUS.mjs";
2
+ import { o as stageManualInputFile } from "./cli-OLZIjQpx.mjs";
3
+ import "./src-Cy3OxoZW.mjs";
4
+ import { t as getRunnerInstance } from "./runner-SxtKn-Xh.mjs";
5
5
  import { z } from "zod";
6
6
  import { resultify } from "t-result";
7
7
  import { readFile } from "node:fs/promises";
package/dist/bin.mjs CHANGED
@@ -1,5 +1,5 @@
1
1
  #!/usr/bin/env node
2
- import { t as runCli } from "./cli-CbePEEua.mjs";
2
+ import { t as runCli } from "./cli-OLZIjQpx.mjs";
3
3
  import { spawn } from "node:child_process";
4
4
  //#region src/bin.ts
5
5
  const moduleMocksFlag = "--experimental-test-module-mocks";
@@ -1,4 +1,4 @@
1
- import { Ct as resolveLlmCallsConfig, It as runWithEvalRegistry, J as runInEvalRuntimeScope, L as configureEvalRunLogs, St as resolveApiCallsConfig, _ as createBufferedCacheStore, a as isCaseChildParentMessage, d as loadEvalModule, l as registerAgentEvalsPackageResolutionHooks, p as loadConfig, r as runCase, v as createFsCacheStore, y as getCacheRetentionOptions } from "./runExecution-Bq0Y3y_1.mjs";
1
+ import { Ft as runWithEvalRegistry, I as configureEvalRunLogs, St as resolveLlmCallsConfig, _ as createFsCacheStore, a as isCaseChildParentMessage, d as loadEvalModule, g as createBufferedCacheStore, l as registerAgentEvalsPackageResolutionHooks, p as loadConfig, q as runInEvalRuntimeScope, r as runCase, v as getCacheRetentionOptions, xt as resolveApiCallsConfig } from "./runExecution-Bu9yfdUS.mjs";
2
2
  //#region ../runner/src/caseChild.ts
3
3
  let fatalErrorReported = false;
4
4
  let disconnectExpected = false;
@@ -1,5 +1,5 @@
1
- import { Ct as resolveLlmCallsConfig, Et as getCaseRowCaseKey, Ft as getEvalRegistry, Ot as caseRowSchema, St as resolveApiCallsConfig, Tt as buildEvalKey, _t as matchesTagsFilter, c as resolveArtifactPath, dt as getEvalTitle, f as resolveEvalDefaultConfig, ft as getEvalDisplayStatus, h as normalizeScoreDef, lt as applyDerivedCallAttributes, m as buildDeclaredColumnDefs, o as stripTerminalControlCodes, p as loadConfig, pt as deriveScopedSummaryFromCases, s as resolveTracePresentation, v as createFsCacheStore, xt as runSummarySchema, y as getCacheRetentionOptions } from "./runExecution-Bq0Y3y_1.mjs";
2
- import { C as validateCharts, S as parseEvalDiscovery, _ as runTouchesEval, a as validateTagsFilters, b as deriveEvalFreshness, c as getLatestRunInfos, d as nextShortIdFromSnapshots, f as persistCaseDetail, g as recomputePersistedCaseStatus, h as recomputeEvalStatusesInRuns, i as resolveEvalTags, l as loadPersistedRunSnapshot, m as persistRunState, n as getTargetEvalKeys, o as generateRunId, p as deleteTemporaryRuns, s as getLastRunStatuses, u as loadPersistedRunSnapshots, v as buildManualInputDescriptor, x as loadIsolatedEvalRegistry, y as parseManualInputValues } from "./runOrchestration-BpwW0AmB.mjs";
1
+ import { Dt as caseRowSchema, Pt as getEvalRegistry, St as resolveLlmCallsConfig, Tt as getCaseRowCaseKey, _ as createFsCacheStore, bt as runSummarySchema, c as resolveArtifactPath, ct as applyDerivedCallAttributes, dt as getEvalDisplayStatus, f as resolveEvalDefaultConfig, ft as deriveScopedSummaryFromCases, gt as matchesTagsFilter, m as buildDeclaredColumnDefs, o as stripTerminalControlCodes, p as loadConfig, s as resolveTracePresentation, ut as getEvalTitle, v as getCacheRetentionOptions, wt as buildEvalKey, xt as resolveApiCallsConfig } from "./runExecution-Bu9yfdUS.mjs";
2
+ import { C as parseEvalDiscovery, S as loadIsolatedEvalRegistry, _ as recomputePersistedCaseStatus, a as validateTagsFilters, b as parseManualInputValues, c as getLatestRunInfos, d as loadPersistedRunSnapshots, f as nextShortIdFromSnapshots, g as recomputeEvalStatusesInRuns, h as persistRunState, i as resolveEvalTags, l as loadPersistedCaseDetail, m as deleteTemporaryRuns, n as getTargetEvalKeys, o as generateRunId, p as persistCaseDetail, s as getLastRunStatuses, u as loadPersistedRunSnapshot, v as runTouchesEval, w as validateCharts, x as deriveEvalFreshness, y as buildManualInputDescriptor } from "./runOrchestration-mpgZmEZ6.mjs";
3
3
  import { parseEnv } from "node:util";
4
4
  import { resultify } from "t-result";
5
5
  import { copyFile, mkdir, readFile, rm, writeFile } from "node:fs/promises";
@@ -28,10 +28,14 @@ function resolveCaseDetailLookup(run, caseId) {
28
28
  const caseDetail = run.caseDetails.get(lookupId);
29
29
  if (caseDetail) return caseDetail;
30
30
  }
31
- const matchingCaseRow = run.cases.find((caseRow) => lookupIds.has(getCaseRowCaseKey(caseRow)) || lookupIds.has(caseRow.caseId));
31
+ const matchingCaseRow = resolveCaseRowForCaseDetailLookup(run, caseId);
32
32
  if (matchingCaseRow === void 0) return void 0;
33
33
  return run.caseDetails.get(getCaseRowCaseKey(matchingCaseRow));
34
34
  }
35
+ function resolveCaseRowForCaseDetailLookup(run, caseId) {
36
+ const lookupIds = new Set(getCaseLookupIds(caseId));
37
+ return run.cases.find((caseRow) => lookupIds.has(getCaseRowCaseKey(caseRow)) || lookupIds.has(caseRow.caseId));
38
+ }
35
39
  //#endregion
36
40
  //#region ../runner/src/configReload.ts
37
41
  /** Coordinates idle-only reloads for workspace config and `.env` in app mode. */
@@ -823,7 +827,7 @@ async function markRunTerminalFromChild(runState, event, managerContext) {
823
827
  runState.manifest = snapshot.manifest;
824
828
  runState.summary = snapshot.summary;
825
829
  runState.cases = snapshot.cases;
826
- runState.caseDetails = snapshot.caseDetails;
830
+ runState.caseDetails = /* @__PURE__ */ new Map();
827
831
  } else if (event.type === "run.finished") {
828
832
  runState.manifest.status = "completed";
829
833
  runState.manifest.endedAt = (/* @__PURE__ */ new Date()).toISOString();
@@ -969,7 +973,6 @@ function createRunner({ watchForChanges = true, loadEnv = true } = {}) {
969
973
  let discoveryRefreshTimer;
970
974
  let runHistoryRefreshTimer;
971
975
  let cachePruneIdleTimer;
972
- let registryLoadCounter = 0;
973
976
  const configReload = createConfigReloadController({
974
977
  getActiveRunCount,
975
978
  closeRunnerWatchers: closeWatchers,
@@ -995,9 +998,28 @@ function createRunner({ watchForChanges = true, loadEnv = true } = {}) {
995
998
  if (typeof config.concurrency !== "number" || !Number.isFinite(config.concurrency)) return 1;
996
999
  return Math.max(1, Math.floor(config.concurrency));
997
1000
  }
998
- function nextRegistryLoadIsolationKey(prefix, filePath) {
999
- registryLoadCounter++;
1000
- return `${prefix}:${String(registryLoadCounter)}:${filePath}`;
1001
+ function getCaseDetailFileId(run, caseRow) {
1002
+ const caseKey = getCaseRowCaseKey(caseRow);
1003
+ return run.cases.some((existing) => existing.caseId === caseRow.caseId && getCaseRowCaseKey(existing) !== caseKey) ? caseKey : caseRow.caseId;
1004
+ }
1005
+ function hydrateCaseDetailForRow(run, caseRow) {
1006
+ const caseKey = getCaseRowCaseKey(caseRow);
1007
+ const cached = run.caseDetails.get(caseKey);
1008
+ if (cached !== void 0) return cached;
1009
+ const detail = loadPersistedCaseDetail(run.runDir, getCaseDetailFileId(run, caseRow));
1010
+ if (detail === null) return void 0;
1011
+ run.caseDetails.set(detail.caseKey ?? detail.caseId, detail);
1012
+ return detail;
1013
+ }
1014
+ function hydrateCaseDetailForLookup(run, caseId) {
1015
+ const cached = resolveCaseDetailLookup(run, caseId);
1016
+ if (cached !== void 0) return cached;
1017
+ const caseRow = resolveCaseRowForCaseDetailLookup(run, caseId);
1018
+ if (caseRow === void 0) return void 0;
1019
+ return hydrateCaseDetailForRow(run, caseRow);
1020
+ }
1021
+ function getDiscoveryModuleIsolationKey(filePath) {
1022
+ return `discovery:${filePath}`;
1001
1023
  }
1002
1024
  const runner = {
1003
1025
  async init() {
@@ -1018,29 +1040,17 @@ function createRunner({ watchForChanges = true, loadEnv = true } = {}) {
1018
1040
  async recomputeStatusesForEval(evalKey) {
1019
1041
  const evalMeta = resolveEvalMeta(evalKey);
1020
1042
  if (!evalMeta) return { updatedRuns: 0 };
1021
- const entry = (await loadIsolatedEvalRegistry({
1022
- evalFilePath: evalMeta.sourceFilePath,
1023
- sourceFingerprint: evalMeta.sourceFingerprint ?? void 0,
1024
- moduleIsolation: {
1025
- key: nextRegistryLoadIsolationKey("recompute-status", evalMeta.sourceFilePath),
1026
- workspaceRoot
1027
- },
1028
- runtimeScope: "env"
1029
- })).get(evalMeta.id);
1030
- if (!entry) return { updatedRuns: 0 };
1031
1043
  const scoreThresholds = /* @__PURE__ */ new Map();
1032
- entry.use((evalDef) => {
1033
- for (const [key, def] of Object.entries(evalDef.scores ?? {})) {
1034
- const threshold = normalizeScoreDef(def).passThreshold;
1035
- if (threshold !== void 0) scoreThresholds.set(key, threshold);
1036
- }
1037
- for (const [key, def] of Object.entries(evalDef.manualScores ?? {})) if (def.passThreshold !== void 0) scoreThresholds.set(key, def.passThreshold);
1038
- });
1044
+ for (const columnDef of evalMeta.columnDefs) {
1045
+ if (columnDef.isScore !== true || columnDef.passThreshold === void 0) continue;
1046
+ scoreThresholds.set(columnDef.key, columnDef.passThreshold);
1047
+ }
1039
1048
  const updatedRuns = await recomputeEvalStatusesInRuns({
1040
1049
  runs: runs.values(),
1041
1050
  evalKey: evalMeta.key,
1042
1051
  evalExists: evals.has(evalMeta.key),
1043
1052
  scoreThresholds,
1053
+ getCaseDetail: hydrateCaseDetailForRow,
1044
1054
  persistCaseDetail
1045
1055
  });
1046
1056
  emitDiscoveryEvent();
@@ -1052,6 +1062,7 @@ function createRunner({ watchForChanges = true, loadEnv = true } = {}) {
1052
1062
  updated: false,
1053
1063
  reason: "Run not found"
1054
1064
  };
1065
+ hydrateCaseDetailForLookup(run, caseId);
1055
1066
  return recalculateDerivedAttributesForCase({
1056
1067
  run,
1057
1068
  caseId,
@@ -1107,7 +1118,7 @@ function createRunner({ watchForChanges = true, loadEnv = true } = {}) {
1107
1118
  updated: false,
1108
1119
  reason: "Manual score not found"
1109
1120
  };
1110
- const caseDetail = run.caseDetails.get(getCaseRowCaseKey(caseRow));
1121
+ const caseDetail = hydrateCaseDetailForRow(run, caseRow);
1111
1122
  if (!caseDetail) return {
1112
1123
  updated: false,
1113
1124
  reason: "Case detail not found"
@@ -1238,7 +1249,7 @@ function createRunner({ watchForChanges = true, loadEnv = true } = {}) {
1238
1249
  evalFilePath: filePath,
1239
1250
  sourceFingerprint,
1240
1251
  moduleIsolation: {
1241
- key: nextRegistryLoadIsolationKey("discovery", filePath),
1252
+ key: getDiscoveryModuleIsolationKey(filePath),
1242
1253
  workspaceRoot
1243
1254
  },
1244
1255
  runtimeScope: "env"
@@ -1480,7 +1491,7 @@ function createRunner({ watchForChanges = true, loadEnv = true } = {}) {
1480
1491
  getCaseDetail(runId, caseId) {
1481
1492
  const run = runs.get(runId);
1482
1493
  if (!run) return void 0;
1483
- return resolveCaseDetailLookup(run, caseId);
1494
+ return hydrateCaseDetailForLookup(run, caseId);
1484
1495
  },
1485
1496
  subscribe(runId, listener) {
1486
1497
  const run = runs.get(runId);
@@ -2232,8 +2243,8 @@ async function commandApp(args) {
2232
2243
  const { serve } = await import("@hono/node-server");
2233
2244
  const bundledWebDist = resolve(currentDir, "apps/web/dist");
2234
2245
  if (existsSync(bundledWebDist)) process.env.AGENT_EVALS_WEB_DIST = bundledWebDist;
2235
- const appModule = await import("./app-DPCFFkyQ.mjs");
2236
- const runnerModule = await import("./runner-XEP21_u9.mjs");
2246
+ const appModule = await import("./app-gg10KvzS.mjs");
2247
+ const runnerModule = await import("./runner-C4Y0lWb1.mjs");
2237
2248
  if (!isHonoAppModule(appModule)) throw new Error("Server app module is invalid");
2238
2249
  if (!isServerRunnerModule(runnerModule)) throw new Error("Server runner module is invalid");
2239
2250
  await runnerModule.initRunner({ loadEnv: args.loadEnv });
package/dist/index.d.mts CHANGED
@@ -1942,7 +1942,6 @@ declare const columnFormatSchema: z.ZodEnum<{
1942
1942
  number: "number";
1943
1943
  boolean: "boolean";
1944
1944
  file: "file";
1945
- duration: "duration";
1946
1945
  markdown: "markdown";
1947
1946
  json: "json";
1948
1947
  image: "image";
@@ -1950,6 +1949,7 @@ declare const columnFormatSchema: z.ZodEnum<{
1950
1949
  pdf: "pdf";
1951
1950
  audio: "audio";
1952
1951
  video: "video";
1952
+ duration: "duration";
1953
1953
  percent: "percent";
1954
1954
  passFail: "passFail";
1955
1955
  stars: "stars";
@@ -1969,7 +1969,6 @@ declare const columnDefSchema: z.ZodObject<{
1969
1969
  number: "number";
1970
1970
  boolean: "boolean";
1971
1971
  file: "file";
1972
- duration: "duration";
1973
1972
  markdown: "markdown";
1974
1973
  json: "json";
1975
1974
  image: "image";
@@ -1977,6 +1976,7 @@ declare const columnDefSchema: z.ZodObject<{
1977
1976
  pdf: "pdf";
1978
1977
  audio: "audio";
1979
1978
  video: "video";
1979
+ duration: "duration";
1980
1980
  percent: "percent";
1981
1981
  passFail: "passFail";
1982
1982
  stars: "stars";
@@ -2022,8 +2022,8 @@ type CellValue = z.infer<typeof cellValueSchema>; //#endregion
2022
2022
  declare const traceAttributeDisplayFormatSchema: z.ZodEnum<{
2023
2023
  string: "string";
2024
2024
  number: "number";
2025
- duration: "duration";
2026
2025
  json: "json";
2026
+ duration: "duration";
2027
2027
  }>;
2028
2028
  /**
2029
2029
  * Formatting hint for trace attribute values rendered by the UI.
@@ -2047,8 +2047,8 @@ declare const traceAttributeDisplaySchema: z.ZodObject<{
2047
2047
  format: z.ZodOptional<z.ZodEnum<{
2048
2048
  string: "string";
2049
2049
  number: "number";
2050
- duration: "duration";
2051
2050
  json: "json";
2051
+ duration: "duration";
2052
2052
  }>>;
2053
2053
  numberFormat: z.ZodOptional<z.ZodType<NumberDisplayOptions, unknown, z.core.$ZodTypeInternals<NumberDisplayOptions, unknown>>>;
2054
2054
  placements: z.ZodOptional<z.ZodArray<z.ZodEnum<{
@@ -2083,8 +2083,8 @@ declare const traceDisplayConfigSchema: z.ZodObject<{
2083
2083
  format: z.ZodOptional<z.ZodEnum<{
2084
2084
  string: "string";
2085
2085
  number: "number";
2086
- duration: "duration";
2087
2086
  json: "json";
2087
+ duration: "duration";
2088
2088
  }>>;
2089
2089
  numberFormat: z.ZodOptional<z.ZodType<NumberDisplayOptions, unknown, z.core.$ZodTypeInternals<NumberDisplayOptions, unknown>>>;
2090
2090
  placements: z.ZodOptional<z.ZodArray<z.ZodEnum<{
@@ -2123,8 +2123,8 @@ declare const traceAttributeDisplayInputSchema: z.ZodObject<{
2123
2123
  format: z.ZodOptional<z.ZodEnum<{
2124
2124
  string: "string";
2125
2125
  number: "number";
2126
- duration: "duration";
2127
2126
  json: "json";
2127
+ duration: "duration";
2128
2128
  }>>;
2129
2129
  numberFormat: z.ZodOptional<z.ZodType<NumberDisplayOptions, unknown, z.core.$ZodTypeInternals<NumberDisplayOptions, unknown>>>;
2130
2130
  placements: z.ZodOptional<z.ZodArray<z.ZodEnum<{
@@ -2161,8 +2161,8 @@ declare const traceDisplayInputConfigSchema: z.ZodObject<{
2161
2161
  format: z.ZodOptional<z.ZodEnum<{
2162
2162
  string: "string";
2163
2163
  number: "number";
2164
- duration: "duration";
2165
2164
  json: "json";
2165
+ duration: "duration";
2166
2166
  }>>;
2167
2167
  numberFormat: z.ZodOptional<z.ZodType<NumberDisplayOptions, unknown, z.core.$ZodTypeInternals<NumberDisplayOptions, unknown>>>;
2168
2168
  placements: z.ZodOptional<z.ZodArray<z.ZodEnum<{
@@ -2214,8 +2214,8 @@ declare const traceSpanSchema$1: z.ZodObject<{
2214
2214
  status: z.ZodEnum<{
2215
2215
  error: "error";
2216
2216
  running: "running";
2217
- cancelled: "cancelled";
2218
2217
  ok: "ok";
2218
+ cancelled: "cancelled";
2219
2219
  }>;
2220
2220
  attributes: z.ZodOptional<z.ZodRecord<z.ZodString, z.ZodUnknown>>;
2221
2221
  error: z.ZodOptional<z.ZodObject<{
@@ -2327,7 +2327,6 @@ declare const evalStatItemSchema: z.ZodDiscriminatedUnion<[z.ZodObject<{
2327
2327
  number: "number";
2328
2328
  boolean: "boolean";
2329
2329
  file: "file";
2330
- duration: "duration";
2331
2330
  markdown: "markdown";
2332
2331
  json: "json";
2333
2332
  image: "image";
@@ -2335,6 +2334,7 @@ declare const evalStatItemSchema: z.ZodDiscriminatedUnion<[z.ZodObject<{
2335
2334
  pdf: "pdf";
2336
2335
  audio: "audio";
2337
2336
  video: "video";
2337
+ duration: "duration";
2338
2338
  percent: "percent";
2339
2339
  passFail: "passFail";
2340
2340
  stars: "stars";
@@ -2391,7 +2391,6 @@ declare const evalStatsConfigSchema: z.ZodArray<z.ZodDiscriminatedUnion<[z.ZodOb
2391
2391
  number: "number";
2392
2392
  boolean: "boolean";
2393
2393
  file: "file";
2394
- duration: "duration";
2395
2394
  markdown: "markdown";
2396
2395
  json: "json";
2397
2396
  image: "image";
@@ -2399,6 +2398,7 @@ declare const evalStatsConfigSchema: z.ZodArray<z.ZodDiscriminatedUnion<[z.ZodOb
2399
2398
  pdf: "pdf";
2400
2399
  audio: "audio";
2401
2400
  video: "video";
2401
+ duration: "duration";
2402
2402
  percent: "percent";
2403
2403
  passFail: "passFail";
2404
2404
  stars: "stars";
@@ -2437,7 +2437,6 @@ declare const evalSummarySchema$1: z.ZodObject<{
2437
2437
  number: "number";
2438
2438
  boolean: "boolean";
2439
2439
  file: "file";
2440
- duration: "duration";
2441
2440
  markdown: "markdown";
2442
2441
  json: "json";
2443
2442
  image: "image";
@@ -2445,6 +2444,7 @@ declare const evalSummarySchema$1: z.ZodObject<{
2445
2444
  pdf: "pdf";
2446
2445
  audio: "audio";
2447
2446
  video: "video";
2447
+ duration: "duration";
2448
2448
  percent: "percent";
2449
2449
  passFail: "passFail";
2450
2450
  stars: "stars";
@@ -2466,10 +2466,10 @@ declare const evalSummarySchema$1: z.ZodObject<{
2466
2466
  caseIds: z.ZodOptional<z.ZodArray<z.ZodString>>;
2467
2467
  lastRunStatus: z.ZodNullable<z.ZodEnum<{
2468
2468
  error: "error";
2469
- pass: "pass";
2470
- fail: "fail";
2471
2469
  running: "running";
2472
2470
  cancelled: "cancelled";
2471
+ pass: "pass";
2472
+ fail: "fail";
2473
2473
  unscored: "unscored";
2474
2474
  }>>;
2475
2475
  stats: z.ZodOptional<z.ZodArray<z.ZodDiscriminatedUnion<[z.ZodObject<{
@@ -2518,7 +2518,6 @@ declare const evalSummarySchema$1: z.ZodObject<{
2518
2518
  number: "number";
2519
2519
  boolean: "boolean";
2520
2520
  file: "file";
2521
- duration: "duration";
2522
2521
  markdown: "markdown";
2523
2522
  json: "json";
2524
2523
  image: "image";
@@ -2526,6 +2525,7 @@ declare const evalSummarySchema$1: z.ZodObject<{
2526
2525
  pdf: "pdf";
2527
2526
  audio: "audio";
2528
2527
  video: "video";
2528
+ duration: "duration";
2529
2529
  percent: "percent";
2530
2530
  passFail: "passFail";
2531
2531
  stars: "stars";
@@ -2558,9 +2558,9 @@ declare const evalSummarySchema$1: z.ZodObject<{
2558
2558
  }>;
2559
2559
  label: z.ZodOptional<z.ZodString>;
2560
2560
  color: z.ZodOptional<z.ZodEnum<{
2561
+ error: "error";
2561
2562
  success: "success";
2562
2563
  accent: "accent";
2563
- error: "error";
2564
2564
  accentDim: "accentDim";
2565
2565
  warning: "warning";
2566
2566
  textMuted: "textMuted";
@@ -2582,9 +2582,9 @@ declare const evalSummarySchema$1: z.ZodObject<{
2582
2582
  }>;
2583
2583
  label: z.ZodOptional<z.ZodString>;
2584
2584
  color: z.ZodOptional<z.ZodEnum<{
2585
+ error: "error";
2585
2586
  success: "success";
2586
2587
  accent: "accent";
2587
- error: "error";
2588
2588
  accentDim: "accentDim";
2589
2589
  warning: "warning";
2590
2590
  textMuted: "textMuted";
@@ -2715,10 +2715,10 @@ declare const caseRowSchema$1: z.ZodObject<{
2715
2715
  tags: z.ZodOptional<z.ZodArray<z.ZodString>>;
2716
2716
  status: z.ZodEnum<{
2717
2717
  error: "error";
2718
- pass: "pass";
2719
- fail: "fail";
2720
2718
  running: "running";
2721
2719
  cancelled: "cancelled";
2720
+ pass: "pass";
2721
+ fail: "fail";
2722
2722
  pending: "pending";
2723
2723
  }>;
2724
2724
  durationMs: z.ZodNullable<z.ZodNumber>;
@@ -2749,7 +2749,6 @@ declare const caseRowSchema$1: z.ZodObject<{
2749
2749
  number: "number";
2750
2750
  boolean: "boolean";
2751
2751
  file: "file";
2752
- duration: "duration";
2753
2752
  markdown: "markdown";
2754
2753
  json: "json";
2755
2754
  image: "image";
@@ -2757,6 +2756,7 @@ declare const caseRowSchema$1: z.ZodObject<{
2757
2756
  pdf: "pdf";
2758
2757
  audio: "audio";
2759
2758
  video: "video";
2759
+ duration: "duration";
2760
2760
  percent: "percent";
2761
2761
  passFail: "passFail";
2762
2762
  stars: "stars";
@@ -2857,8 +2857,8 @@ declare const scoreTraceSchema: z.ZodObject<{
2857
2857
  status: z.ZodEnum<{
2858
2858
  error: "error";
2859
2859
  running: "running";
2860
- cancelled: "cancelled";
2861
2860
  ok: "ok";
2861
+ cancelled: "cancelled";
2862
2862
  }>;
2863
2863
  attributes: z.ZodOptional<z.ZodRecord<z.ZodString, z.ZodUnknown>>;
2864
2864
  error: z.ZodOptional<z.ZodObject<{
@@ -2894,8 +2894,8 @@ declare const scoreTraceSchema: z.ZodObject<{
2894
2894
  format: z.ZodOptional<z.ZodEnum<{
2895
2895
  string: "string";
2896
2896
  number: "number";
2897
- duration: "duration";
2898
2897
  json: "json";
2898
+ duration: "duration";
2899
2899
  }>>;
2900
2900
  numberFormat: z.ZodOptional<z.ZodType<NumberDisplayOptions, unknown, z.core.$ZodTypeInternals<NumberDisplayOptions, unknown>>>;
2901
2901
  placements: z.ZodOptional<z.ZodArray<z.ZodEnum<{
@@ -2920,10 +2920,10 @@ declare const scoreTraceSchema: z.ZodObject<{
2920
2920
  namespace: z.ZodString;
2921
2921
  key: z.ZodString;
2922
2922
  status: z.ZodEnum<{
2923
+ bypass: "bypass";
2924
+ refresh: "refresh";
2923
2925
  hit: "hit";
2924
2926
  miss: "miss";
2925
- refresh: "refresh";
2926
- bypass: "bypass";
2927
2927
  }>;
2928
2928
  read: z.ZodOptional<z.ZodBoolean>;
2929
2929
  stored: z.ZodOptional<z.ZodBoolean>;
@@ -2942,10 +2942,10 @@ declare const caseDetailSchema$1: z.ZodObject<{
2942
2942
  tags: z.ZodOptional<z.ZodArray<z.ZodString>>;
2943
2943
  status: z.ZodEnum<{
2944
2944
  error: "error";
2945
- pass: "pass";
2946
- fail: "fail";
2947
2945
  running: "running";
2948
2946
  cancelled: "cancelled";
2947
+ pass: "pass";
2948
+ fail: "fail";
2949
2949
  pending: "pending";
2950
2950
  }>;
2951
2951
  input: z.ZodUnknown;
@@ -2960,8 +2960,8 @@ declare const caseDetailSchema$1: z.ZodObject<{
2960
2960
  status: z.ZodEnum<{
2961
2961
  error: "error";
2962
2962
  running: "running";
2963
- cancelled: "cancelled";
2964
2963
  ok: "ok";
2964
+ cancelled: "cancelled";
2965
2965
  }>;
2966
2966
  attributes: z.ZodOptional<z.ZodRecord<z.ZodString, z.ZodUnknown>>;
2967
2967
  error: z.ZodOptional<z.ZodObject<{
@@ -2997,8 +2997,8 @@ declare const caseDetailSchema$1: z.ZodObject<{
2997
2997
  format: z.ZodOptional<z.ZodEnum<{
2998
2998
  string: "string";
2999
2999
  number: "number";
3000
- duration: "duration";
3001
3000
  json: "json";
3001
+ duration: "duration";
3002
3002
  }>>;
3003
3003
  numberFormat: z.ZodOptional<z.ZodType<NumberDisplayOptions, unknown, z.core.$ZodTypeInternals<NumberDisplayOptions, unknown>>>;
3004
3004
  placements: z.ZodOptional<z.ZodArray<z.ZodEnum<{
@@ -3029,8 +3029,8 @@ declare const caseDetailSchema$1: z.ZodObject<{
3029
3029
  status: z.ZodEnum<{
3030
3030
  error: "error";
3031
3031
  running: "running";
3032
- cancelled: "cancelled";
3033
3032
  ok: "ok";
3033
+ cancelled: "cancelled";
3034
3034
  }>;
3035
3035
  attributes: z.ZodOptional<z.ZodRecord<z.ZodString, z.ZodUnknown>>;
3036
3036
  error: z.ZodOptional<z.ZodObject<{
@@ -3066,8 +3066,8 @@ declare const caseDetailSchema$1: z.ZodObject<{
3066
3066
  format: z.ZodOptional<z.ZodEnum<{
3067
3067
  string: "string";
3068
3068
  number: "number";
3069
- duration: "duration";
3070
3069
  json: "json";
3070
+ duration: "duration";
3071
3071
  }>>;
3072
3072
  numberFormat: z.ZodOptional<z.ZodType<NumberDisplayOptions, unknown, z.core.$ZodTypeInternals<NumberDisplayOptions, unknown>>>;
3073
3073
  placements: z.ZodOptional<z.ZodArray<z.ZodEnum<{
@@ -3092,10 +3092,10 @@ declare const caseDetailSchema$1: z.ZodObject<{
3092
3092
  namespace: z.ZodString;
3093
3093
  key: z.ZodString;
3094
3094
  status: z.ZodEnum<{
3095
+ bypass: "bypass";
3096
+ refresh: "refresh";
3095
3097
  hit: "hit";
3096
3098
  miss: "miss";
3097
- refresh: "refresh";
3098
- bypass: "bypass";
3099
3099
  }>;
3100
3100
  read: z.ZodOptional<z.ZodBoolean>;
3101
3101
  stored: z.ZodOptional<z.ZodBoolean>;
@@ -3127,7 +3127,6 @@ declare const caseDetailSchema$1: z.ZodObject<{
3127
3127
  number: "number";
3128
3128
  boolean: "boolean";
3129
3129
  file: "file";
3130
- duration: "duration";
3131
3130
  markdown: "markdown";
3132
3131
  json: "json";
3133
3132
  image: "image";
@@ -3135,6 +3134,7 @@ declare const caseDetailSchema$1: z.ZodObject<{
3135
3134
  pdf: "pdf";
3136
3135
  audio: "audio";
3137
3136
  video: "video";
3137
+ duration: "duration";
3138
3138
  percent: "percent";
3139
3139
  passFail: "passFail";
3140
3140
  stars: "stars";
@@ -3213,10 +3213,10 @@ declare const caseDetailSchema$1: z.ZodObject<{
3213
3213
  namespace: z.ZodString;
3214
3214
  key: z.ZodString;
3215
3215
  status: z.ZodEnum<{
3216
+ bypass: "bypass";
3217
+ refresh: "refresh";
3216
3218
  hit: "hit";
3217
3219
  miss: "miss";
3218
- refresh: "refresh";
3219
- bypass: "bypass";
3220
3220
  }>;
3221
3221
  read: z.ZodOptional<z.ZodBoolean>;
3222
3222
  stored: z.ZodOptional<z.ZodBoolean>;
@@ -3283,9 +3283,9 @@ type EvalChartAggregate = z.infer<typeof evalChartAggregateSchema>;
3283
3283
  * not emit raw hex so authored evals stay decoupled from the web theme.
3284
3284
  */
3285
3285
  declare const evalChartColorSchema: z.ZodEnum<{
3286
+ error: "error";
3286
3287
  success: "success";
3287
3288
  accent: "accent";
3288
- error: "error";
3289
3289
  accentDim: "accentDim";
3290
3290
  warning: "warning";
3291
3291
  textMuted: "textMuted";
@@ -3312,9 +3312,9 @@ declare const evalChartMetricSchema: z.ZodDiscriminatedUnion<[z.ZodObject<{
3312
3312
  }>;
3313
3313
  label: z.ZodOptional<z.ZodString>;
3314
3314
  color: z.ZodOptional<z.ZodEnum<{
3315
+ error: "error";
3315
3316
  success: "success";
3316
3317
  accent: "accent";
3317
- error: "error";
3318
3318
  accentDim: "accentDim";
3319
3319
  warning: "warning";
3320
3320
  textMuted: "textMuted";
@@ -3336,9 +3336,9 @@ declare const evalChartMetricSchema: z.ZodDiscriminatedUnion<[z.ZodObject<{
3336
3336
  }>;
3337
3337
  label: z.ZodOptional<z.ZodString>;
3338
3338
  color: z.ZodOptional<z.ZodEnum<{
3339
+ error: "error";
3339
3340
  success: "success";
3340
3341
  accent: "accent";
3341
- error: "error";
3342
3342
  accentDim: "accentDim";
3343
3343
  warning: "warning";
3344
3344
  textMuted: "textMuted";
@@ -3395,9 +3395,9 @@ declare const evalChartConfigSchema: z.ZodObject<{
3395
3395
  }>;
3396
3396
  label: z.ZodOptional<z.ZodString>;
3397
3397
  color: z.ZodOptional<z.ZodEnum<{
3398
+ error: "error";
3398
3399
  success: "success";
3399
3400
  accent: "accent";
3400
- error: "error";
3401
3401
  accentDim: "accentDim";
3402
3402
  warning: "warning";
3403
3403
  textMuted: "textMuted";
@@ -3419,9 +3419,9 @@ declare const evalChartConfigSchema: z.ZodObject<{
3419
3419
  }>;
3420
3420
  label: z.ZodOptional<z.ZodString>;
3421
3421
  color: z.ZodOptional<z.ZodEnum<{
3422
+ error: "error";
3422
3423
  success: "success";
3423
3424
  accent: "accent";
3424
- error: "error";
3425
3425
  accentDim: "accentDim";
3426
3426
  warning: "warning";
3427
3427
  textMuted: "textMuted";
@@ -3485,9 +3485,9 @@ declare const evalChartsConfigSchema: z.ZodArray<z.ZodObject<{
3485
3485
  }>;
3486
3486
  label: z.ZodOptional<z.ZodString>;
3487
3487
  color: z.ZodOptional<z.ZodEnum<{
3488
+ error: "error";
3488
3489
  success: "success";
3489
3490
  accent: "accent";
3490
- error: "error";
3491
3491
  accentDim: "accentDim";
3492
3492
  warning: "warning";
3493
3493
  textMuted: "textMuted";
@@ -3509,9 +3509,9 @@ declare const evalChartsConfigSchema: z.ZodArray<z.ZodObject<{
3509
3509
  }>;
3510
3510
  label: z.ZodOptional<z.ZodString>;
3511
3511
  color: z.ZodOptional<z.ZodEnum<{
3512
+ error: "error";
3512
3513
  success: "success";
3513
3514
  accent: "accent";
3514
- error: "error";
3515
3515
  accentDim: "accentDim";
3516
3516
  warning: "warning";
3517
3517
  textMuted: "textMuted";
@@ -3589,9 +3589,9 @@ declare const runManifestSchema$1: z.ZodObject<{
3589
3589
  median: "median";
3590
3590
  }>>>;
3591
3591
  cacheMode: z.ZodOptional<z.ZodEnum<{
3592
- refresh: "refresh";
3593
- bypass: "bypass";
3594
3592
  use: "use";
3593
+ bypass: "bypass";
3594
+ refresh: "refresh";
3595
3595
  }>>;
3596
3596
  }, z.core.$strip>;
3597
3597
  /** Persisted lifecycle metadata for a single eval run. */
@@ -3808,8 +3808,8 @@ declare const llmCallMetricFormatSchema$1: z.ZodEnum<{
3808
3808
  string: "string";
3809
3809
  number: "number";
3810
3810
  boolean: "boolean";
3811
- duration: "duration";
3812
3811
  json: "json";
3812
+ duration: "duration";
3813
3813
  }>;
3814
3814
  /** Render format applied to an LLM-call metric value. */
3815
3815
  type LlmCallMetricFormat = z.infer<typeof llmCallMetricFormatSchema$1>;
@@ -3818,8 +3818,8 @@ declare const apiCallMetricFormatSchema$1: z.ZodEnum<{
3818
3818
  string: "string";
3819
3819
  number: "number";
3820
3820
  boolean: "boolean";
3821
- duration: "duration";
3822
3821
  json: "json";
3822
+ duration: "duration";
3823
3823
  }>;
3824
3824
  /** Render format applied to an API-call metric value. */
3825
3825
  type ApiCallMetricFormat = z.infer<typeof apiCallMetricFormatSchema$1>;
@@ -3888,8 +3888,8 @@ declare const llmCallMetricSchema: z.ZodObject<{
3888
3888
  string: "string";
3889
3889
  number: "number";
3890
3890
  boolean: "boolean";
3891
- duration: "duration";
3892
3891
  json: "json";
3892
+ duration: "duration";
3893
3893
  }>>;
3894
3894
  numberFormat: z.ZodOptional<z.ZodType<NumberDisplayOptions, unknown, z.core.$ZodTypeInternals<NumberDisplayOptions, unknown>>>;
3895
3895
  placements: z.ZodOptional<z.ZodArray<z.ZodEnum<{
@@ -3917,8 +3917,8 @@ declare const apiCallMetricSchema: z.ZodObject<{
3917
3917
  string: "string";
3918
3918
  number: "number";
3919
3919
  boolean: "boolean";
3920
- duration: "duration";
3921
3920
  json: "json";
3921
+ duration: "duration";
3922
3922
  }>>;
3923
3923
  numberFormat: z.ZodOptional<z.ZodType<NumberDisplayOptions, unknown, z.core.$ZodTypeInternals<NumberDisplayOptions, unknown>>>;
3924
3924
  placements: z.ZodOptional<z.ZodArray<z.ZodEnum<{
@@ -4031,8 +4031,8 @@ declare const llmCallsConfigSchema: z.ZodObject<{
4031
4031
  string: "string";
4032
4032
  number: "number";
4033
4033
  boolean: "boolean";
4034
- duration: "duration";
4035
4034
  json: "json";
4035
+ duration: "duration";
4036
4036
  }>>;
4037
4037
  numberFormat: z.ZodOptional<z.ZodType<NumberDisplayOptions, unknown, z.core.$ZodTypeInternals<NumberDisplayOptions, unknown>>>;
4038
4038
  placements: z.ZodOptional<z.ZodArray<z.ZodEnum<{
@@ -4067,8 +4067,8 @@ declare const apiCallsConfigSchema: z.ZodObject<{
4067
4067
  string: "string";
4068
4068
  number: "number";
4069
4069
  boolean: "boolean";
4070
- duration: "duration";
4071
4070
  json: "json";
4071
+ duration: "duration";
4072
4072
  }>>;
4073
4073
  numberFormat: z.ZodOptional<z.ZodType<NumberDisplayOptions, unknown, z.core.$ZodTypeInternals<NumberDisplayOptions, unknown>>>;
4074
4074
  placements: z.ZodOptional<z.ZodArray<z.ZodEnum<{
@@ -4567,9 +4567,9 @@ declare function extractApiCalls(spans: EvalTraceSpan$1[], config: ResolvedApiCa
4567
4567
  * - `refresh`: never read, always write (forces re-execution and overwrites).
4568
4568
  */
4569
4569
  declare const cacheModeSchema: z.ZodEnum<{
4570
- refresh: "refresh";
4571
- bypass: "bypass";
4572
4570
  use: "use";
4571
+ bypass: "bypass";
4572
+ refresh: "refresh";
4573
4573
  }>;
4574
4574
  /** Mode controlling how cached spans behave during a run. */
4575
4575
  type CacheMode = z.infer<typeof cacheModeSchema>;
@@ -4583,17 +4583,17 @@ declare const spanCacheOptionsSchema: z.ZodObject<{
4583
4583
  type SpanCacheOptions = z.infer<typeof spanCacheOptionsSchema>;
4584
4584
  /** Category of operation stored in the eval cache. */
4585
4585
  declare const cacheOperationTypeSchema: z.ZodEnum<{
4586
- value: "value";
4587
4586
  span: "span";
4587
+ value: "value";
4588
4588
  }>;
4589
4589
  /** Category of operation stored in the eval cache. */
4590
4590
  type CacheOperationType = z.infer<typeof cacheOperationTypeSchema>;
4591
4591
  /** Status of a cache lookup recorded on a span or case scope. */
4592
4592
  declare const cacheStatusSchema: z.ZodEnum<{
4593
+ bypass: "bypass";
4594
+ refresh: "refresh";
4593
4595
  hit: "hit";
4594
4596
  miss: "miss";
4595
- refresh: "refresh";
4596
- bypass: "bypass";
4597
4597
  }>;
4598
4598
  /** Status of a cache lookup recorded on a span or case scope. */
4599
4599
  type CacheStatus = z.infer<typeof cacheStatusSchema>;
@@ -4610,10 +4610,10 @@ declare const traceCacheRefSchema: z.ZodObject<{
4610
4610
  namespace: z.ZodString;
4611
4611
  key: z.ZodString;
4612
4612
  status: z.ZodEnum<{
4613
+ bypass: "bypass";
4614
+ refresh: "refresh";
4613
4615
  hit: "hit";
4614
4616
  miss: "miss";
4615
- refresh: "refresh";
4616
- bypass: "bypass";
4617
4617
  }>;
4618
4618
  read: z.ZodOptional<z.ZodBoolean>;
4619
4619
  stored: z.ZodOptional<z.ZodBoolean>;
@@ -4670,7 +4670,6 @@ declare const cacheRecordingOpSchema: z.ZodDiscriminatedUnion<[z.ZodObject<{
4670
4670
  number: "number";
4671
4671
  boolean: "boolean";
4672
4672
  file: "file";
4673
- duration: "duration";
4674
4673
  markdown: "markdown";
4675
4674
  json: "json";
4676
4675
  image: "image";
@@ -4678,6 +4677,7 @@ declare const cacheRecordingOpSchema: z.ZodDiscriminatedUnion<[z.ZodObject<{
4678
4677
  pdf: "pdf";
4679
4678
  audio: "audio";
4680
4679
  video: "video";
4680
+ duration: "duration";
4681
4681
  percent: "percent";
4682
4682
  passFail: "passFail";
4683
4683
  stars: "stars";
@@ -4721,8 +4721,8 @@ declare const cacheRecordingSchema: z.ZodObject<{
4721
4721
  finalStatus: z.ZodOptional<z.ZodEnum<{
4722
4722
  error: "error";
4723
4723
  running: "running";
4724
- cancelled: "cancelled";
4725
4724
  ok: "ok";
4725
+ cancelled: "cancelled";
4726
4726
  }>>;
4727
4727
  finalError: z.ZodOptional<z.ZodObject<{
4728
4728
  name: z.ZodOptional<z.ZodString>;
@@ -4758,7 +4758,6 @@ declare const cacheRecordingSchema: z.ZodObject<{
4758
4758
  number: "number";
4759
4759
  boolean: "boolean";
4760
4760
  file: "file";
4761
- duration: "duration";
4762
4761
  markdown: "markdown";
4763
4762
  json: "json";
4764
4763
  image: "image";
@@ -4766,6 +4765,7 @@ declare const cacheRecordingSchema: z.ZodObject<{
4766
4765
  pdf: "pdf";
4767
4766
  audio: "audio";
4768
4767
  video: "video";
4768
+ duration: "duration";
4769
4769
  percent: "percent";
4770
4770
  passFail: "passFail";
4771
4771
  stars: "stars";
@@ -4809,8 +4809,8 @@ declare const cacheEntrySchema: z.ZodObject<{
4809
4809
  key: z.ZodString;
4810
4810
  namespace: z.ZodString;
4811
4811
  operationType: z.ZodOptional<z.ZodEnum<{
4812
- value: "value";
4813
4812
  span: "span";
4813
+ value: "value";
4814
4814
  }>>;
4815
4815
  operationName: z.ZodOptional<z.ZodString>;
4816
4816
  spanName: z.ZodOptional<z.ZodString>;
@@ -4822,8 +4822,8 @@ declare const cacheEntrySchema: z.ZodObject<{
4822
4822
  finalStatus: z.ZodOptional<z.ZodEnum<{
4823
4823
  error: "error";
4824
4824
  running: "running";
4825
- cancelled: "cancelled";
4826
4825
  ok: "ok";
4826
+ cancelled: "cancelled";
4827
4827
  }>>;
4828
4828
  finalError: z.ZodOptional<z.ZodObject<{
4829
4829
  name: z.ZodOptional<z.ZodString>;
@@ -4859,7 +4859,6 @@ declare const cacheEntrySchema: z.ZodObject<{
4859
4859
  number: "number";
4860
4860
  boolean: "boolean";
4861
4861
  file: "file";
4862
- duration: "duration";
4863
4862
  markdown: "markdown";
4864
4863
  json: "json";
4865
4864
  image: "image";
@@ -4867,6 +4866,7 @@ declare const cacheEntrySchema: z.ZodObject<{
4867
4866
  pdf: "pdf";
4868
4867
  audio: "audio";
4869
4868
  video: "video";
4869
+ duration: "duration";
4870
4870
  percent: "percent";
4871
4871
  passFail: "passFail";
4872
4872
  stars: "stars";
@@ -4916,8 +4916,8 @@ declare const cacheDebugKeyEntrySchema: z.ZodObject<{
4916
4916
  key: z.ZodString;
4917
4917
  namespace: z.ZodString;
4918
4918
  operationType: z.ZodEnum<{
4919
- value: "value";
4920
4919
  span: "span";
4920
+ value: "value";
4921
4921
  }>;
4922
4922
  operationName: z.ZodString;
4923
4923
  storedAt: z.ZodString;
@@ -4927,8 +4927,8 @@ declare const cacheDebugKeyEntrySchema: z.ZodObject<{
4927
4927
  key: z.ZodString;
4928
4928
  namespace: z.ZodString;
4929
4929
  operationType: z.ZodOptional<z.ZodEnum<{
4930
- value: "value";
4931
4930
  span: "span";
4931
+ value: "value";
4932
4932
  }>>;
4933
4933
  operationName: z.ZodOptional<z.ZodString>;
4934
4934
  spanName: z.ZodOptional<z.ZodString>;
@@ -4940,8 +4940,8 @@ declare const cacheDebugKeyEntrySchema: z.ZodObject<{
4940
4940
  finalStatus: z.ZodOptional<z.ZodEnum<{
4941
4941
  error: "error";
4942
4942
  running: "running";
4943
- cancelled: "cancelled";
4944
4943
  ok: "ok";
4944
+ cancelled: "cancelled";
4945
4945
  }>>;
4946
4946
  finalError: z.ZodOptional<z.ZodObject<{
4947
4947
  name: z.ZodOptional<z.ZodString>;
@@ -4977,7 +4977,6 @@ declare const cacheDebugKeyEntrySchema: z.ZodObject<{
4977
4977
  number: "number";
4978
4978
  boolean: "boolean";
4979
4979
  file: "file";
4980
- duration: "duration";
4981
4980
  markdown: "markdown";
4982
4981
  json: "json";
4983
4982
  image: "image";
@@ -4985,6 +4984,7 @@ declare const cacheDebugKeyEntrySchema: z.ZodObject<{
4985
4984
  pdf: "pdf";
4986
4985
  audio: "audio";
4987
4986
  video: "video";
4987
+ duration: "duration";
4988
4988
  percent: "percent";
4989
4989
  passFail: "passFail";
4990
4990
  stars: "stars";
@@ -5034,8 +5034,8 @@ declare const cacheEntryWithDebugKeySchema$1: z.ZodObject<{
5034
5034
  key: z.ZodString;
5035
5035
  namespace: z.ZodString;
5036
5036
  operationType: z.ZodOptional<z.ZodEnum<{
5037
- value: "value";
5038
5037
  span: "span";
5038
+ value: "value";
5039
5039
  }>>;
5040
5040
  operationName: z.ZodOptional<z.ZodString>;
5041
5041
  spanName: z.ZodOptional<z.ZodString>;
@@ -5047,8 +5047,8 @@ declare const cacheEntryWithDebugKeySchema$1: z.ZodObject<{
5047
5047
  finalStatus: z.ZodOptional<z.ZodEnum<{
5048
5048
  error: "error";
5049
5049
  running: "running";
5050
- cancelled: "cancelled";
5051
5050
  ok: "ok";
5051
+ cancelled: "cancelled";
5052
5052
  }>>;
5053
5053
  finalError: z.ZodOptional<z.ZodObject<{
5054
5054
  name: z.ZodOptional<z.ZodString>;
@@ -5084,7 +5084,6 @@ declare const cacheEntryWithDebugKeySchema$1: z.ZodObject<{
5084
5084
  number: "number";
5085
5085
  boolean: "boolean";
5086
5086
  file: "file";
5087
- duration: "duration";
5088
5087
  markdown: "markdown";
5089
5088
  json: "json";
5090
5089
  image: "image";
@@ -5092,6 +5091,7 @@ declare const cacheEntryWithDebugKeySchema$1: z.ZodObject<{
5092
5091
  pdf: "pdf";
5093
5092
  audio: "audio";
5094
5093
  video: "video";
5094
+ duration: "duration";
5095
5095
  percent: "percent";
5096
5096
  passFail: "passFail";
5097
5097
  stars: "stars";
@@ -5132,8 +5132,8 @@ declare const cacheEntryWithDebugKeySchema$1: z.ZodObject<{
5132
5132
  key: z.ZodString;
5133
5133
  namespace: z.ZodString;
5134
5134
  operationType: z.ZodEnum<{
5135
- value: "value";
5136
5135
  span: "span";
5136
+ value: "value";
5137
5137
  }>;
5138
5138
  operationName: z.ZodString;
5139
5139
  storedAt: z.ZodString;
@@ -5143,8 +5143,8 @@ declare const cacheEntryWithDebugKeySchema$1: z.ZodObject<{
5143
5143
  key: z.ZodString;
5144
5144
  namespace: z.ZodString;
5145
5145
  operationType: z.ZodOptional<z.ZodEnum<{
5146
- value: "value";
5147
5146
  span: "span";
5147
+ value: "value";
5148
5148
  }>>;
5149
5149
  operationName: z.ZodOptional<z.ZodString>;
5150
5150
  spanName: z.ZodOptional<z.ZodString>;
@@ -5156,8 +5156,8 @@ declare const cacheEntryWithDebugKeySchema$1: z.ZodObject<{
5156
5156
  finalStatus: z.ZodOptional<z.ZodEnum<{
5157
5157
  error: "error";
5158
5158
  running: "running";
5159
- cancelled: "cancelled";
5160
5159
  ok: "ok";
5160
+ cancelled: "cancelled";
5161
5161
  }>>;
5162
5162
  finalError: z.ZodOptional<z.ZodObject<{
5163
5163
  name: z.ZodOptional<z.ZodString>;
@@ -5193,7 +5193,6 @@ declare const cacheEntryWithDebugKeySchema$1: z.ZodObject<{
5193
5193
  number: "number";
5194
5194
  boolean: "boolean";
5195
5195
  file: "file";
5196
- duration: "duration";
5197
5196
  markdown: "markdown";
5198
5197
  json: "json";
5199
5198
  image: "image";
@@ -5201,6 +5200,7 @@ declare const cacheEntryWithDebugKeySchema$1: z.ZodObject<{
5201
5200
  pdf: "pdf";
5202
5201
  audio: "audio";
5203
5202
  video: "video";
5203
+ duration: "duration";
5204
5204
  percent: "percent";
5205
5205
  passFail: "passFail";
5206
5206
  stars: "stars";
@@ -5250,8 +5250,8 @@ declare const cacheFileSchema: z.ZodObject<{
5250
5250
  key: z.ZodString;
5251
5251
  namespace: z.ZodString;
5252
5252
  operationType: z.ZodOptional<z.ZodEnum<{
5253
- value: "value";
5254
5253
  span: "span";
5254
+ value: "value";
5255
5255
  }>>;
5256
5256
  operationName: z.ZodOptional<z.ZodString>;
5257
5257
  spanName: z.ZodOptional<z.ZodString>;
@@ -5263,8 +5263,8 @@ declare const cacheFileSchema: z.ZodObject<{
5263
5263
  finalStatus: z.ZodOptional<z.ZodEnum<{
5264
5264
  error: "error";
5265
5265
  running: "running";
5266
- cancelled: "cancelled";
5267
5266
  ok: "ok";
5267
+ cancelled: "cancelled";
5268
5268
  }>>;
5269
5269
  finalError: z.ZodOptional<z.ZodObject<{
5270
5270
  name: z.ZodOptional<z.ZodString>;
@@ -5300,7 +5300,6 @@ declare const cacheFileSchema: z.ZodObject<{
5300
5300
  number: "number";
5301
5301
  boolean: "boolean";
5302
5302
  file: "file";
5303
- duration: "duration";
5304
5303
  markdown: "markdown";
5305
5304
  json: "json";
5306
5305
  image: "image";
@@ -5308,6 +5307,7 @@ declare const cacheFileSchema: z.ZodObject<{
5308
5307
  pdf: "pdf";
5309
5308
  audio: "audio";
5310
5309
  video: "video";
5310
+ duration: "duration";
5311
5311
  percent: "percent";
5312
5312
  passFail: "passFail";
5313
5313
  stars: "stars";
@@ -5356,8 +5356,8 @@ declare const cacheDebugKeyFileSchema: z.ZodObject<{
5356
5356
  key: z.ZodString;
5357
5357
  namespace: z.ZodString;
5358
5358
  operationType: z.ZodEnum<{
5359
- value: "value";
5360
5359
  span: "span";
5360
+ value: "value";
5361
5361
  }>;
5362
5362
  operationName: z.ZodString;
5363
5363
  storedAt: z.ZodString;
@@ -5367,8 +5367,8 @@ declare const cacheDebugKeyFileSchema: z.ZodObject<{
5367
5367
  key: z.ZodString;
5368
5368
  namespace: z.ZodString;
5369
5369
  operationType: z.ZodOptional<z.ZodEnum<{
5370
- value: "value";
5371
5370
  span: "span";
5371
+ value: "value";
5372
5372
  }>>;
5373
5373
  operationName: z.ZodOptional<z.ZodString>;
5374
5374
  spanName: z.ZodOptional<z.ZodString>;
@@ -5380,8 +5380,8 @@ declare const cacheDebugKeyFileSchema: z.ZodObject<{
5380
5380
  finalStatus: z.ZodOptional<z.ZodEnum<{
5381
5381
  error: "error";
5382
5382
  running: "running";
5383
- cancelled: "cancelled";
5384
5383
  ok: "ok";
5384
+ cancelled: "cancelled";
5385
5385
  }>>;
5386
5386
  finalError: z.ZodOptional<z.ZodObject<{
5387
5387
  name: z.ZodOptional<z.ZodString>;
@@ -5417,7 +5417,6 @@ declare const cacheDebugKeyFileSchema: z.ZodObject<{
5417
5417
  number: "number";
5418
5418
  boolean: "boolean";
5419
5419
  file: "file";
5420
- duration: "duration";
5421
5420
  markdown: "markdown";
5422
5421
  json: "json";
5423
5422
  image: "image";
@@ -5425,6 +5424,7 @@ declare const cacheDebugKeyFileSchema: z.ZodObject<{
5425
5424
  pdf: "pdf";
5426
5425
  audio: "audio";
5427
5426
  video: "video";
5427
+ duration: "duration";
5428
5428
  percent: "percent";
5429
5429
  passFail: "passFail";
5430
5430
  stars: "stars";
@@ -5579,9 +5579,9 @@ declare const createRunRequestSchema$1: z.ZodObject<{
5579
5579
  temporary: z.ZodOptional<z.ZodBoolean>;
5580
5580
  cache: z.ZodOptional<z.ZodObject<{
5581
5581
  mode: z.ZodDefault<z.ZodEnum<{
5582
- refresh: "refresh";
5583
- bypass: "bypass";
5584
5582
  use: "use";
5583
+ bypass: "bypass";
5584
+ refresh: "refresh";
5585
5585
  }>>;
5586
5586
  }, z.core.$strip>>;
5587
5587
  manualInputs: z.ZodOptional<z.ZodRecord<z.ZodString, z.ZodUnknown>>;
package/dist/index.mjs CHANGED
@@ -1,4 +1,4 @@
1
- import { $ as setScopeCacheContext, A as repoFile, B as evalTime, C as evalTracer, D as deserializeCacheValue, E as deserializeCacheRecording, F as EvalRuntimeUsageError, Ft as getEvalRegistry, H as getEvalCaseInput, I as appendToEvalOutput, J as runInEvalRuntimeScope, K as mergeEvalOutput, M as readManualInputFile, N as evalExpect, O as serializeCacheRecording, P as EvalAssertionError, Q as setEvalOutput, R as evalAssert, S as evalSpan, T as hashCacheKeySync, U as incrementEvalOutput, V as getCurrentScope, W as isInEvalScope, X as runInExistingEvalScope, Y as runInEvalScope, at as extractApiCalls, b as buildTraceTree, ct as simulateTokenAllocation, et as startEvalBackgroundJob, it as extractCacheHits, j as manualInputFileValueSchema, k as serializeCacheValue, ot as extractLlmCalls, q as nextEvalId, rt as extractCacheEntries, st as simulateLlmCallCost, ut as getNestedAttribute, w as hashCacheKey, x as captureEvalSpanError, z as evalLog } from "./runExecution-Bq0Y3y_1.mjs";
2
- import { a as materializeManualInputFiles, i as isManualInputFileValue, n as createRunner, o as stageManualInputFile, r as cleanupStagedManualInputFiles, s as stageManualInputFileFromPath, t as runCli } from "./cli-CbePEEua.mjs";
3
- import { n as matchesEvalTags, t as defineEval } from "./src-CVM_FqPx.mjs";
1
+ import { $ as startEvalBackgroundJob, A as manualInputFileValueSchema, B as getCurrentScope, C as hashCacheKey, D as serializeCacheRecording, E as deserializeCacheValue, F as appendToEvalOutput, G as mergeEvalOutput, H as incrementEvalOutput, J as runInEvalScope, K as nextEvalId, L as evalAssert, M as evalExpect, N as EvalAssertionError, O as serializeCacheValue, P as EvalRuntimeUsageError, Pt as getEvalRegistry, Q as setScopeCacheContext, R as evalLog, S as evalTracer, T as deserializeCacheRecording, U as isInEvalScope, V as getEvalCaseInput, Y as runInExistingEvalScope, Z as setEvalOutput, at as extractLlmCalls, b as captureEvalSpanError, it as extractApiCalls, j as readManualInputFile, k as repoFile, lt as getNestedAttribute, nt as extractCacheEntries, ot as simulateLlmCallCost, q as runInEvalRuntimeScope, rt as extractCacheHits, st as simulateTokenAllocation, w as hashCacheKeySync, x as evalSpan, y as buildTraceTree, z as evalTime } from "./runExecution-Bu9yfdUS.mjs";
2
+ import { a as materializeManualInputFiles, i as isManualInputFileValue, n as createRunner, o as stageManualInputFile, r as cleanupStagedManualInputFiles, s as stageManualInputFileFromPath, t as runCli } from "./cli-OLZIjQpx.mjs";
3
+ import { n as matchesEvalTags, t as defineEval } from "./src-Cy3OxoZW.mjs";
4
4
  export { EvalAssertionError, EvalRuntimeUsageError, appendToEvalOutput, buildTraceTree, captureEvalSpanError, cleanupStagedManualInputFiles, createRunner, defineEval, deserializeCacheRecording, deserializeCacheValue, evalAssert, evalExpect, evalLog, evalSpan, evalTime, evalTracer, extractApiCalls, extractCacheEntries, extractCacheHits, extractLlmCalls, getCurrentScope, getEvalCaseInput, getEvalRegistry, getNestedAttribute, hashCacheKey, hashCacheKeySync, incrementEvalOutput, isInEvalScope, isManualInputFileValue, manualInputFileValueSchema, matchesEvalTags, materializeManualInputFiles, mergeEvalOutput, nextEvalId, readManualInputFile, repoFile, runCli, runInEvalRuntimeScope, runInEvalScope, runInExistingEvalScope, serializeCacheRecording, serializeCacheValue, setEvalOutput, setScopeCacheContext, simulateLlmCallCost, simulateTokenAllocation, stageManualInputFile, stageManualInputFileFromPath, startEvalBackgroundJob };
package/dist/runChild.mjs CHANGED
@@ -1,5 +1,5 @@
1
- import { At as evalStatsConfigSchema, L as configureEvalRunLogs, Mt as evalChartsConfigSchema, Nt as columnDefSchema, Tt as buildEvalKey, bt as runManifestSchema, jt as manualInputDescriptorSchema, kt as evalStatAggregateSchema, l as registerAgentEvalsPackageResolutionHooks, p as loadConfig, tt as createRunRequestSchema, v as createFsCacheStore, xt as runSummarySchema, y as getCacheRetentionOptions } from "./runExecution-Bq0Y3y_1.mjs";
2
- import { S as parseEvalDiscovery, m as persistRunState, r as getTargetEvals$1, t as executeRun } from "./runOrchestration-BpwW0AmB.mjs";
1
+ import { At as manualInputDescriptorSchema, I as configureEvalRunLogs, Mt as columnDefSchema, Ot as evalStatAggregateSchema, _ as createFsCacheStore, bt as runSummarySchema, et as createRunRequestSchema, jt as evalChartsConfigSchema, kt as evalStatsConfigSchema, l as registerAgentEvalsPackageResolutionHooks, p as loadConfig, v as getCacheRetentionOptions, wt as buildEvalKey, yt as runManifestSchema } from "./runExecution-Bu9yfdUS.mjs";
2
+ import { C as parseEvalDiscovery, h as persistRunState, r as getTargetEvals$1, t as executeRun } from "./runOrchestration-mpgZmEZ6.mjs";
3
3
  import { z } from "zod";
4
4
  import { readFile } from "node:fs/promises";
5
5
  import { relative } from "node:path";
@@ -7239,4 +7239,4 @@ function recordAssertionFailure(scope, failure) {
7239
7239
  });
7240
7240
  }
7241
7241
  //#endregion
7242
- export { setScopeCacheContext as $, repoFile as A, evalStatsConfigSchema as At, evalTime as B, evalTracer as C, resolveLlmCallsConfig as Ct, deserializeCacheValue as D, caseDetailSchema as Dt, deserializeCacheRecording as E, getCaseRowCaseKey as Et, EvalRuntimeUsageError as F, getEvalRegistry as Ft, matchesEvalTags as G, getEvalCaseInput as H, appendToEvalOutput as I, runWithEvalRegistry as It, runInEvalRuntimeScope as J, mergeEvalOutput as K, configureEvalRunLogs as L, readManualInputFile as M, evalChartsConfigSchema as Mt, evalExpect as N, columnDefSchema as Nt, serializeCacheRecording as O, caseRowSchema as Ot, EvalAssertionError as P, defineEval as Pt, setEvalOutput as Q, evalAssert as R, evalSpan as S, resolveApiCallsConfig as St, hashCacheKeySync as T, buildEvalKey as Tt, incrementEvalOutput as U, getCurrentScope as V, isInEvalScope as W, runInExistingEvalScope as X, runInEvalScope as Y, runWithEvalClock as Z, createBufferedCacheStore as _, matchesTagsFilter as _t, isCaseChildParentMessage as a, extractApiCalls as at, buildTraceTree as b, runManifestSchema as bt, resolveArtifactPath as c, simulateTokenAllocation as ct, loadEvalModule as d, getEvalTitle as dt, startEvalBackgroundJob as et, resolveEvalDefaultConfig as f, getEvalDisplayStatus as ft, commitPendingCacheWrites as g, dedupeEvalTags as gt, normalizeScoreDef as h, deriveStatusFromChildStatuses as ht, isCaseChildMessage as i, extractCacheHits as it, manualInputFileValueSchema as j, manualInputDescriptorSchema as jt, serializeCacheValue as k, evalStatAggregateSchema as kt, registerAgentEvalsPackageResolutionHooks as l, applyDerivedCallAttributes as lt, buildDeclaredColumnDefs as m, deriveStatusFromCaseRows as mt, resolveRunnableEvalCases as n, updateManualScoreRequestSchema as nt, stripTerminalControlCodes as o, extractLlmCalls as ot, loadConfig as p, deriveScopedSummaryFromCases as pt, nextEvalId as q, runCase as r, extractCacheEntries as rt, resolveTracePresentation as s, simulateLlmCallCost as st, filterEvalCases as t, createRunRequestSchema as tt, runWithModuleIsolation as u, getNestedAttribute as ut, createFsCacheStore as v, validateEvalTagName as vt, hashCacheKey as w, buildCaseKey as wt, captureEvalSpanError as x, runSummarySchema as xt, getCacheRetentionOptions as y, validateTagsFilterExpression as yt, evalLog as z };
7242
+ export { startEvalBackgroundJob as $, manualInputFileValueSchema as A, manualInputDescriptorSchema as At, getCurrentScope as B, hashCacheKey as C, buildCaseKey as Ct, serializeCacheRecording as D, caseRowSchema as Dt, deserializeCacheValue as E, caseDetailSchema as Et, appendToEvalOutput as F, runWithEvalRegistry as Ft, mergeEvalOutput as G, incrementEvalOutput as H, configureEvalRunLogs as I, runInEvalScope as J, nextEvalId as K, evalAssert as L, evalExpect as M, columnDefSchema as Mt, EvalAssertionError as N, defineEval as Nt, serializeCacheValue as O, evalStatAggregateSchema as Ot, EvalRuntimeUsageError as P, getEvalRegistry as Pt, setScopeCacheContext as Q, evalLog as R, evalTracer as S, resolveLlmCallsConfig as St, deserializeCacheRecording as T, getCaseRowCaseKey as Tt, isInEvalScope as U, getEvalCaseInput as V, matchesEvalTags as W, runWithEvalClock as X, runInExistingEvalScope as Y, setEvalOutput as Z, createFsCacheStore as _, validateEvalTagName as _t, isCaseChildParentMessage as a, extractLlmCalls as at, captureEvalSpanError as b, runSummarySchema as bt, resolveArtifactPath as c, applyDerivedCallAttributes as ct, loadEvalModule as d, getEvalDisplayStatus as dt, createRunRequestSchema as et, resolveEvalDefaultConfig as f, deriveScopedSummaryFromCases as ft, createBufferedCacheStore as g, matchesTagsFilter as gt, commitPendingCacheWrites as h, dedupeEvalTags as ht, isCaseChildMessage as i, extractApiCalls as it, readManualInputFile as j, evalChartsConfigSchema as jt, repoFile as k, evalStatsConfigSchema as kt, registerAgentEvalsPackageResolutionHooks as l, getNestedAttribute as lt, buildDeclaredColumnDefs as m, deriveStatusFromChildStatuses as mt, resolveRunnableEvalCases as n, extractCacheEntries as nt, stripTerminalControlCodes as o, simulateLlmCallCost as ot, loadConfig as p, deriveStatusFromCaseRows as pt, runInEvalRuntimeScope as q, runCase as r, extractCacheHits as rt, resolveTracePresentation as s, simulateTokenAllocation as st, filterEvalCases as t, updateManualScoreRequestSchema as tt, runWithModuleIsolation as u, getEvalTitle as ut, getCacheRetentionOptions as v, validateTagsFilterExpression as vt, hashCacheKeySync as w, buildEvalKey as wt, evalSpan as x, resolveApiCallsConfig as xt, buildTraceTree as y, runManifestSchema as yt, evalTime as z };
@@ -1,8 +1,8 @@
1
- import { Dt as caseDetailSchema, Et as getCaseRowCaseKey, It as runWithEvalRegistry, J as runInEvalRuntimeScope, Ot as caseRowSchema, Z as runWithEvalClock, _t as matchesTagsFilter, bt as runManifestSchema, d as loadEvalModule, f as resolveEvalDefaultConfig, g as commitPendingCacheWrites, gt as dedupeEvalTags, ht as deriveStatusFromChildStatuses, i as isCaseChildMessage, m as buildDeclaredColumnDefs, mt as deriveStatusFromCaseRows, n as resolveRunnableEvalCases, o as stripTerminalControlCodes, pt as deriveScopedSummaryFromCases, t as filterEvalCases, u as runWithModuleIsolation, vt as validateEvalTagName, wt as buildCaseKey, xt as runSummarySchema, yt as validateTagsFilterExpression } from "./runExecution-Bq0Y3y_1.mjs";
1
+ import { Ct as buildCaseKey, Dt as caseRowSchema, Et as caseDetailSchema, Ft as runWithEvalRegistry, Tt as getCaseRowCaseKey, X as runWithEvalClock, _t as validateEvalTagName, bt as runSummarySchema, d as loadEvalModule, f as resolveEvalDefaultConfig, ft as deriveScopedSummaryFromCases, gt as matchesTagsFilter, h as commitPendingCacheWrites, ht as dedupeEvalTags, i as isCaseChildMessage, m as buildDeclaredColumnDefs, mt as deriveStatusFromChildStatuses, n as resolveRunnableEvalCases, o as stripTerminalControlCodes, pt as deriveStatusFromCaseRows, q as runInEvalRuntimeScope, t as filterEvalCases, u as runWithModuleIsolation, vt as validateTagsFilterExpression, yt as runManifestSchema } from "./runExecution-Bu9yfdUS.mjs";
2
2
  import { Result, resultify } from "t-result";
3
3
  import { readFile, readdir, rm, writeFile } from "node:fs/promises";
4
4
  import { dirname, join } from "node:path";
5
- import { existsSync } from "node:fs";
5
+ import { existsSync, readFileSync } from "node:fs";
6
6
  import { fileURLToPath } from "node:url";
7
7
  import { spawn } from "node:child_process";
8
8
  //#region ../runner/src/chartValidation.ts
@@ -670,7 +670,7 @@ async function recomputeEvalStatusesInRuns(params) {
670
670
  let changed = false;
671
671
  for (const caseRow of run.cases) {
672
672
  if (caseRow.evalKey !== params.evalKey) continue;
673
- const caseDetail = run.caseDetails.get(getCaseRowCaseKey(caseRow));
673
+ const caseDetail = params.getCaseDetail?.(run, caseRow) ?? run.caseDetails.get(getCaseRowCaseKey(caseRow));
674
674
  const nextStatus = recomputePersistedCaseStatus(caseRow, caseDetail, params.scoreThresholds);
675
675
  if (caseRow.status === nextStatus) continue;
676
676
  caseRow.status = nextStatus;
@@ -725,14 +725,22 @@ function nextShortIdFromSnapshots(snapshots) {
725
725
  }
726
726
  return maxNum + 1;
727
727
  }
728
- async function loadPersistedRunSnapshots(localStateDir) {
728
+ /**
729
+ * Load persisted run metadata from the local state directory.
730
+ *
731
+ * Case details are skipped by default so long-running app processes can keep
732
+ * run history in memory without retaining every trace payload. Pass
733
+ * `includeCaseDetails` only for narrow maintenance flows that need full
734
+ * details for every case.
735
+ */
736
+ async function loadPersistedRunSnapshots(localStateDir, options = {}) {
729
737
  const runsDir = join(localStateDir, "runs");
730
738
  const entriesResult = await resultify(() => readdir(runsDir, { withFileTypes: true }));
731
739
  if (entriesResult.error) return [];
732
740
  const snapshots = [];
733
741
  const runDirs = entriesResult.value.filter((entry) => entry.isDirectory()).map((entry) => join(runsDir, entry.name)).toSorted();
734
742
  for (const runDir of runDirs) {
735
- const snapshot = await loadPersistedRunSnapshot(runDir);
743
+ const snapshot = await loadPersistedRunSnapshot(runDir, options);
736
744
  if (!snapshot) continue;
737
745
  snapshots.push(snapshot);
738
746
  }
@@ -766,7 +774,14 @@ function getLatestRunInfos(params) {
766
774
  function toLastRunStatus$1(status) {
767
775
  return status === "pending" ? null : status;
768
776
  }
769
- async function loadPersistedRunSnapshot(runDir) {
777
+ /**
778
+ * Load one persisted run snapshot from disk.
779
+ *
780
+ * The returned snapshot includes manifest, summary, and case rows. Case
781
+ * details are loaded only when `includeCaseDetails` is true; otherwise callers
782
+ * should use `loadPersistedCaseDetail` for the specific case being inspected.
783
+ */
784
+ async function loadPersistedRunSnapshot(runDir, options = {}) {
770
785
  const manifest = await readParsedJsonFile(join(runDir, "run.json"), { safeParse: runManifestSchema.safeParse.bind(runManifestSchema) });
771
786
  if (!manifest) return null;
772
787
  const summary = await readParsedJsonFile(join(runDir, "summary.json"), { safeParse: runSummarySchema.safeParse.bind(runSummarySchema) });
@@ -776,9 +791,18 @@ async function loadPersistedRunSnapshot(runDir) {
776
791
  manifest,
777
792
  summary,
778
793
  cases: await readCaseRows(runDir),
779
- caseDetails: await readCaseDetails(runDir)
794
+ caseDetails: options.includeCaseDetails === true ? await readCaseDetails(runDir) : /* @__PURE__ */ new Map()
780
795
  };
781
796
  }
797
+ /**
798
+ * Load one persisted case detail by its artifact file id.
799
+ *
800
+ * Returns `null` when the file is missing, invalid JSON, or no longer matches
801
+ * the current case-detail schema.
802
+ */
803
+ function loadPersistedCaseDetail(runDir, fileId) {
804
+ return readParsedJsonFileSync(join(runDir, "case-details", `${encodeCaseDetailFileName(fileId)}.json`), { safeParse: caseDetailSchema.safeParse.bind(caseDetailSchema) });
805
+ }
782
806
  async function readParsedJsonFile(filePath, schema) {
783
807
  const fileResult = await resultify(() => readFile(filePath, "utf-8"));
784
808
  if (fileResult.error) return null;
@@ -788,6 +812,15 @@ async function readParsedJsonFile(filePath, schema) {
788
812
  if (!parsed.success) return null;
789
813
  return parsed.data;
790
814
  }
815
+ function readParsedJsonFileSync(filePath, schema) {
816
+ const fileResult = resultify(() => readFileSync(filePath, "utf-8"));
817
+ if (fileResult.error) return null;
818
+ const jsonResult = resultify(() => JSON.parse(fileResult.value));
819
+ if (jsonResult.error) return null;
820
+ const parsed = schema.safeParse(jsonResult.value);
821
+ if (!parsed.success) return null;
822
+ return parsed.data;
823
+ }
791
824
  async function readCaseRows(runDir) {
792
825
  const fileResult = await resultify(() => readFile(join(runDir, "cases.jsonl"), "utf-8"));
793
826
  if (fileResult.error) return [];
@@ -1660,4 +1693,4 @@ function toLastRunStatus(status) {
1660
1693
  return status === "pending" ? null : status;
1661
1694
  }
1662
1695
  //#endregion
1663
- export { validateCharts as C, parseEvalDiscovery as S, runTouchesEval as _, validateTagsFilters as a, deriveEvalFreshness as b, getLatestRunInfos as c, nextShortIdFromSnapshots as d, persistCaseDetail as f, recomputePersistedCaseStatus as g, recomputeEvalStatusesInRuns as h, resolveEvalTags as i, loadPersistedRunSnapshot as l, persistRunState as m, getTargetEvalKeys as n, generateRunId as o, deleteTemporaryRuns as p, getTargetEvals as r, getLastRunStatuses as s, executeRun as t, loadPersistedRunSnapshots as u, buildManualInputDescriptor as v, loadIsolatedEvalRegistry as x, parseManualInputValues as y };
1696
+ export { parseEvalDiscovery as C, loadIsolatedEvalRegistry as S, recomputePersistedCaseStatus as _, validateTagsFilters as a, parseManualInputValues as b, getLatestRunInfos as c, loadPersistedRunSnapshots as d, nextShortIdFromSnapshots as f, recomputeEvalStatusesInRuns as g, persistRunState as h, resolveEvalTags as i, loadPersistedCaseDetail as l, deleteTemporaryRuns as m, getTargetEvalKeys as n, generateRunId as o, persistCaseDetail as p, getTargetEvals as r, getLastRunStatuses as s, executeRun as t, loadPersistedRunSnapshot as u, runTouchesEval as v, validateCharts as w, deriveEvalFreshness as x, buildManualInputDescriptor as y };
@@ -1,2 +1,2 @@
1
- import { n as initRunner, t as getRunnerInstance } from "./runner-Kp0JqxrU.mjs";
1
+ import { n as initRunner, t as getRunnerInstance } from "./runner-SxtKn-Xh.mjs";
2
2
  export { getRunnerInstance, initRunner };
@@ -1,5 +1,5 @@
1
- import { n as createRunner } from "./cli-CbePEEua.mjs";
2
- import "./src-CVM_FqPx.mjs";
1
+ import { n as createRunner } from "./cli-OLZIjQpx.mjs";
2
+ import "./src-Cy3OxoZW.mjs";
3
3
  //#region ../../apps/server/src/runner.ts
4
4
  let runnerInstance = null;
5
5
  function getRunnerInstance({ loadEnv = true } = {}) {
@@ -1,5 +1,5 @@
1
- import { G as matchesEvalTags$1, Pt as defineEval$1 } from "./runExecution-Bq0Y3y_1.mjs";
2
- import "./cli-CbePEEua.mjs";
1
+ import { Nt as defineEval$1, W as matchesEvalTags$1 } from "./runExecution-Bu9yfdUS.mjs";
2
+ import "./cli-OLZIjQpx.mjs";
3
3
  //#region src/index.ts
4
4
  /** Register an eval definition with typed tag support. */
5
5
  function defineEval(definition) {
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@ls-stack/agent-eval",
3
- "version": "0.60.2",
3
+ "version": "0.60.4",
4
4
  "type": "module",
5
5
  "bin": {
6
6
  "agent-evals": "./dist/bin.mjs"
@@ -32,9 +32,9 @@
32
32
  "devDependencies": {
33
33
  "@types/node": "^24.7.2",
34
34
  "typescript": "^5.9.2",
35
- "@agent-evals/runner": "0.0.1",
36
35
  "@agent-evals/shared": "0.0.1",
37
- "@agent-evals/sdk": "0.0.1"
36
+ "@agent-evals/sdk": "0.0.1",
37
+ "@agent-evals/runner": "0.0.1"
38
38
  },
39
39
  "scripts": {
40
40
  "build": "pnpm --filter @agent-evals/web build && pnpm --filter @agent-evals/shared build && pnpm --filter @agent-evals/sdk build && pnpm --filter @agent-evals/runner build && tsdown --filter cli-js && tsdown --filter cli-types",