@ls-stack/agent-eval 0.47.0 → 0.50.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -25,8 +25,8 @@
25
25
  href="https://fonts.googleapis.com/css2?family=Geist:wght@400;500;600&family=JetBrains+Mono:wght@400;500&display=swap"
26
26
  rel="stylesheet"
27
27
  />
28
- <script type="module" crossorigin src="/assets/index-DB61h-lP.js"></script>
29
- <link rel="stylesheet" crossorigin href="/assets/index-B5JrV3_C.css">
28
+ <script type="module" crossorigin src="/assets/index-BkXnL_y8.js"></script>
29
+ <link rel="stylesheet" crossorigin href="/assets/index-BQY_snr3.css">
30
30
  </head>
31
31
  <body>
32
32
  <div id="root"></div>
package/dist/bin.mjs CHANGED
@@ -1,5 +1,5 @@
1
1
  #!/usr/bin/env node
2
- import { t as runCli } from "./cli-vdJYkEVk.mjs";
2
+ import { t as runCli } from "./cli-R7_V6YWa.mjs";
3
3
  import { spawn } from "node:child_process";
4
4
  //#region src/bin.ts
5
5
  const moduleMocksFlag = "--experimental-test-module-mocks";
@@ -1,4 +1,4 @@
1
- import { A as validateCharts, At as buildEvalKey, C as deriveEvalFreshness, Ct as getEvalDisplayStatus, D as loadConfig, Dt as runSummarySchema, E as resolveEvalDefaultConfig, Lt as getEvalRegistry, O as buildDeclaredColumnDefs, Ot as resolveApiCallsConfig, S as parseManualInputValues, St as getEvalTitle, T as parseEvalDiscovery, Tt as matchesTagsFilter, _ as recomputePersistedCaseStatus, a as validateTagsFilters, b as resolveArtifactPath, bt as applyDerivedCallAttributes, c as getLastRunStatuses, d as loadPersistedRunSnapshots, f as nextShortIdFromSnapshots, g as recomputeEvalStatusesInRuns, h as persistRunState, i as resolveEvalTags, j as createFsCacheStore, jt as getCaseRowCaseKey, k as normalizeScoreDef, kt as resolveLlmCallsConfig, l as getLatestRunInfos, m as deleteTemporaryRuns, n as getTargetEvalKeys, o as stripTerminalControlCodes, p as persistCaseDetail, s as generateRunId, u as loadPersistedRunSnapshot, v as runTouchesEval, w as loadEvalModule, wt as deriveScopedSummaryFromCases, x as buildManualInputDescriptor, y as resolveTracePresentation } from "./runOrchestration-BFdxG9ws.mjs";
1
+ import { A as validateCharts, At as buildEvalKey, C as deriveEvalFreshness, Ct as getEvalDisplayStatus, D as loadConfig, Dt as runSummarySchema, E as resolveEvalDefaultConfig, Lt as getEvalRegistry, O as buildDeclaredColumnDefs, Ot as resolveApiCallsConfig, S as parseManualInputValues, St as getEvalTitle, T as parseEvalDiscovery, Tt as matchesTagsFilter, _ as recomputePersistedCaseStatus, a as validateTagsFilters, b as resolveArtifactPath, bt as applyDerivedCallAttributes, c as getLastRunStatuses, d as loadPersistedRunSnapshots, f as nextShortIdFromSnapshots, g as recomputeEvalStatusesInRuns, h as persistRunState, i as resolveEvalTags, j as createFsCacheStore, jt as getCaseRowCaseKey, k as normalizeScoreDef, kt as resolveLlmCallsConfig, l as getLatestRunInfos, m as deleteTemporaryRuns, n as getTargetEvalKeys, o as stripTerminalControlCodes, p as persistCaseDetail, s as generateRunId, u as loadPersistedRunSnapshot, v as runTouchesEval, w as loadEvalModule, wt as deriveScopedSummaryFromCases, x as buildManualInputDescriptor, y as resolveTracePresentation } from "./runOrchestration-CokPQet7.mjs";
2
2
  import { copyFile, mkdir, readFile, rm, writeFile } from "node:fs/promises";
3
3
  import { basename, dirname, extname, isAbsolute, join, relative, resolve, sep } from "node:path";
4
4
  import { createHash, randomUUID } from "node:crypto";
@@ -2095,8 +2095,8 @@ async function commandApp(args) {
2095
2095
  const { serve } = await import("@hono/node-server");
2096
2096
  const bundledWebDist = resolve(currentDir, "apps/web/dist");
2097
2097
  if (existsSync(bundledWebDist)) process.env.AGENT_EVALS_WEB_DIST = bundledWebDist;
2098
- const appModule = await import("./app-BZmhhSFZ.mjs");
2099
- const runnerModule = await import("./runner--aH0jO4Z.mjs");
2098
+ const appModule = await import("./app-DR9WPMA4.mjs");
2099
+ const runnerModule = await import("./runner-B8dLVAyM.mjs");
2100
2100
  if (!isHonoAppModule(appModule)) throw new Error("Server app module is invalid");
2101
2101
  if (!isServerRunnerModule(runnerModule)) throw new Error("Server runner module is invalid");
2102
2102
  await runnerModule.initRunner();
package/dist/index.d.mts CHANGED
@@ -152,6 +152,9 @@ declare const evalStatsConfigSchema$1: z$1.ZodArray<z$1.ZodDiscriminatedUnion<[z
152
152
  }, z$1.core.$strip>, z$1.ZodObject<{
153
153
  hideIfNoValue: z$1.ZodOptional<z$1.ZodBoolean>;
154
154
  kind: z$1.ZodLiteral<"duration">;
155
+ }, z$1.core.$strip>, z$1.ZodObject<{
156
+ hideIfNoValue: z$1.ZodOptional<z$1.ZodBoolean>;
157
+ kind: z$1.ZodLiteral<"cacheHits">;
155
158
  }, z$1.core.$strip>, z$1.ZodObject<{
156
159
  hideIfNoValue: z$1.ZodOptional<z$1.ZodBoolean>;
157
160
  kind: z$1.ZodLiteral<"column">;
@@ -222,6 +225,7 @@ declare const runLogEntrySchema$1: z$1.ZodObject<{
222
225
  file: z$1.ZodString;
223
226
  line: z$1.ZodNumber;
224
227
  column: z$1.ZodNumber;
228
+ stack: z$1.ZodOptional<z$1.ZodString>;
225
229
  }, z$1.core.$strip>>;
226
230
  source: z$1.ZodOptional<z$1.ZodString>;
227
231
  }, z$1.core.$strip>;
@@ -1012,13 +1016,15 @@ type EvalDefinitionBase<TInput = unknown, TOutputs extends EvalOutputs = EvalOut
1012
1016
  * Opt-in: when omitted (or empty) the EvalCard renders no stats row at all.
1013
1017
  * When provided, the stats render in order, left to right.
1014
1018
  *
1015
- * Built-in kinds (`cases`, `passRate`, `duration`, `cost`) read from the
1016
- * latest run summary. `kind: 'column'` aggregates a score or numeric output
1017
- * column across the latest run's cases `key` must match one of the eval's
1018
- * score or column keys, and only finite numeric values participate in the
1019
- * reduction. When no case has a numeric value for the key the stat renders
1020
- * an em dash, or hides when `hideIfNoValue` is true. `label`, `format`, and
1021
- * `numberFormat` default to the matching `ColumnDef`.
1019
+ * Built-in kinds (`cases`, `passRate`, `duration`, `cacheHits`) read from
1020
+ * the latest run summary. `cacheHits` counts Agent Eval operation-level cache
1021
+ * hits over total cache operations, not LLM provider prompt-cache read
1022
+ * tokens. `kind: 'column'` aggregates a score or numeric output column across
1023
+ * the latest run's cases `key` must match one of the eval's score or column
1024
+ * keys, and only finite numeric values participate in the reduction. When no
1025
+ * case has a numeric value for the key the stat renders an em dash, or hides
1026
+ * when `hideIfNoValue` is true. `label`, `format`, and `numberFormat` default
1027
+ * to the matching `ColumnDef`.
1022
1028
  */
1023
1029
  stats?: EvalStatsConfig$1;
1024
1030
  /**
@@ -1831,9 +1837,9 @@ declare const traceAttributeDisplaySchema: z$1.ZodObject<{
1831
1837
  subtree: "subtree";
1832
1838
  }>>;
1833
1839
  mode: z$1.ZodOptional<z$1.ZodEnum<{
1834
- all: "all";
1835
- last: "last";
1836
1840
  sum: "sum";
1841
+ last: "last";
1842
+ all: "all";
1837
1843
  }>>;
1838
1844
  }, z$1.core.$strip>;
1839
1845
  /**
@@ -1867,9 +1873,9 @@ declare const traceDisplayConfigSchema: z$1.ZodObject<{
1867
1873
  subtree: "subtree";
1868
1874
  }>>;
1869
1875
  mode: z$1.ZodOptional<z$1.ZodEnum<{
1870
- all: "all";
1871
- last: "last";
1872
1876
  sum: "sum";
1877
+ last: "last";
1878
+ all: "all";
1873
1879
  }>>;
1874
1880
  }, z$1.core.$strip>>>;
1875
1881
  }, z$1.core.$strip>;
@@ -1907,9 +1913,9 @@ declare const traceAttributeDisplayInputSchema: z$1.ZodObject<{
1907
1913
  subtree: "subtree";
1908
1914
  }>>;
1909
1915
  mode: z$1.ZodOptional<z$1.ZodEnum<{
1910
- all: "all";
1911
- last: "last";
1912
1916
  sum: "sum";
1917
+ last: "last";
1918
+ all: "all";
1913
1919
  }>>;
1914
1920
  transform: z$1.ZodOptional<z$1.ZodCustom<TraceAttributeTransform, TraceAttributeTransform>>;
1915
1921
  }, z$1.core.$strip>;
@@ -1945,9 +1951,9 @@ declare const traceDisplayInputConfigSchema: z$1.ZodObject<{
1945
1951
  subtree: "subtree";
1946
1952
  }>>;
1947
1953
  mode: z$1.ZodOptional<z$1.ZodEnum<{
1948
- all: "all";
1949
- last: "last";
1950
1954
  sum: "sum";
1955
+ last: "last";
1956
+ all: "all";
1951
1957
  }>>;
1952
1958
  transform: z$1.ZodOptional<z$1.ZodCustom<TraceAttributeTransform, TraceAttributeTransform>>;
1953
1959
  }, z$1.core.$strip>>>;
@@ -2026,16 +2032,18 @@ declare const evalFreshnessStatusSchema: z$1.ZodEnum<{
2026
2032
  type EvalFreshnessStatus = z$1.infer<typeof evalFreshnessStatusSchema>;
2027
2033
  /** Reducer used to collapse a column's per-case values into a single stat. */
2028
2034
  declare const evalStatAggregateSchema: z$1.ZodEnum<{
2029
- last: "last";
2030
- sum: "sum";
2031
2035
  avg: "avg";
2032
2036
  min: "min";
2033
2037
  max: "max";
2038
+ sum: "sum";
2039
+ last: "last";
2034
2040
  }>;
2035
2041
  /** Reducer used to collapse a column's per-case values into a single stat. */
2036
2042
  type EvalStatAggregate = z$1.infer<typeof evalStatAggregateSchema>;
2037
2043
  /**
2038
2044
  * One entry in the EvalCard stats row. Built-in kinds use latest run totals;
2045
+ * `cacheHits` counts Agent Eval operation-level cache hits from spans and
2046
+ * `evalTracer.cache(...)` refs, not LLM provider prompt-cache read tokens.
2039
2047
  * `column` aggregates a score or numeric output column across the latest run.
2040
2048
  */
2041
2049
  declare const evalStatItemSchema: z$1.ZodDiscriminatedUnion<[z$1.ZodObject<{
@@ -2048,17 +2056,20 @@ declare const evalStatItemSchema: z$1.ZodDiscriminatedUnion<[z$1.ZodObject<{
2048
2056
  }, z$1.core.$strip>, z$1.ZodObject<{
2049
2057
  hideIfNoValue: z$1.ZodOptional<z$1.ZodBoolean>;
2050
2058
  kind: z$1.ZodLiteral<"duration">;
2059
+ }, z$1.core.$strip>, z$1.ZodObject<{
2060
+ hideIfNoValue: z$1.ZodOptional<z$1.ZodBoolean>;
2061
+ kind: z$1.ZodLiteral<"cacheHits">;
2051
2062
  }, z$1.core.$strip>, z$1.ZodObject<{
2052
2063
  hideIfNoValue: z$1.ZodOptional<z$1.ZodBoolean>;
2053
2064
  kind: z$1.ZodLiteral<"column">;
2054
2065
  key: z$1.ZodString;
2055
2066
  label: z$1.ZodOptional<z$1.ZodString>;
2056
2067
  aggregate: z$1.ZodEnum<{
2057
- last: "last";
2058
- sum: "sum";
2059
2068
  avg: "avg";
2060
2069
  min: "min";
2061
2070
  max: "max";
2071
+ sum: "sum";
2072
+ last: "last";
2062
2073
  }>;
2063
2074
  format: z$1.ZodOptional<z$1.ZodEnum<{
2064
2075
  number: "number";
@@ -2090,17 +2101,20 @@ declare const evalStatsConfigSchema: z$1.ZodArray<z$1.ZodDiscriminatedUnion<[z$1
2090
2101
  }, z$1.core.$strip>, z$1.ZodObject<{
2091
2102
  hideIfNoValue: z$1.ZodOptional<z$1.ZodBoolean>;
2092
2103
  kind: z$1.ZodLiteral<"duration">;
2104
+ }, z$1.core.$strip>, z$1.ZodObject<{
2105
+ hideIfNoValue: z$1.ZodOptional<z$1.ZodBoolean>;
2106
+ kind: z$1.ZodLiteral<"cacheHits">;
2093
2107
  }, z$1.core.$strip>, z$1.ZodObject<{
2094
2108
  hideIfNoValue: z$1.ZodOptional<z$1.ZodBoolean>;
2095
2109
  kind: z$1.ZodLiteral<"column">;
2096
2110
  key: z$1.ZodString;
2097
2111
  label: z$1.ZodOptional<z$1.ZodString>;
2098
2112
  aggregate: z$1.ZodEnum<{
2099
- last: "last";
2100
- sum: "sum";
2101
2113
  avg: "avg";
2102
2114
  min: "min";
2103
2115
  max: "max";
2116
+ sum: "sum";
2117
+ last: "last";
2104
2118
  }>;
2105
2119
  format: z$1.ZodOptional<z$1.ZodEnum<{
2106
2120
  number: "number";
@@ -2193,17 +2207,20 @@ declare const evalSummarySchema$1: z$1.ZodObject<{
2193
2207
  }, z$1.core.$strip>, z$1.ZodObject<{
2194
2208
  hideIfNoValue: z$1.ZodOptional<z$1.ZodBoolean>;
2195
2209
  kind: z$1.ZodLiteral<"duration">;
2210
+ }, z$1.core.$strip>, z$1.ZodObject<{
2211
+ hideIfNoValue: z$1.ZodOptional<z$1.ZodBoolean>;
2212
+ kind: z$1.ZodLiteral<"cacheHits">;
2196
2213
  }, z$1.core.$strip>, z$1.ZodObject<{
2197
2214
  hideIfNoValue: z$1.ZodOptional<z$1.ZodBoolean>;
2198
2215
  kind: z$1.ZodLiteral<"column">;
2199
2216
  key: z$1.ZodString;
2200
2217
  label: z$1.ZodOptional<z$1.ZodString>;
2201
2218
  aggregate: z$1.ZodEnum<{
2202
- last: "last";
2203
- sum: "sum";
2204
2219
  avg: "avg";
2205
2220
  min: "min";
2206
2221
  max: "max";
2222
+ sum: "sum";
2223
+ last: "last";
2207
2224
  }>;
2208
2225
  format: z$1.ZodOptional<z$1.ZodEnum<{
2209
2226
  number: "number";
@@ -2239,11 +2256,11 @@ declare const evalSummarySchema$1: z$1.ZodObject<{
2239
2256
  }>;
2240
2257
  label: z$1.ZodOptional<z$1.ZodString>;
2241
2258
  color: z$1.ZodOptional<z$1.ZodEnum<{
2242
- error: "error";
2243
2259
  success: "success";
2244
- warning: "warning";
2260
+ error: "error";
2245
2261
  accent: "accent";
2246
2262
  accentDim: "accentDim";
2263
+ warning: "warning";
2247
2264
  textMuted: "textMuted";
2248
2265
  }>>;
2249
2266
  axis: z$1.ZodOptional<z$1.ZodEnum<{
@@ -2254,20 +2271,20 @@ declare const evalSummarySchema$1: z$1.ZodObject<{
2254
2271
  source: z$1.ZodLiteral<"column">;
2255
2272
  key: z$1.ZodString;
2256
2273
  aggregate: z$1.ZodEnum<{
2257
- sum: "sum";
2258
2274
  avg: "avg";
2259
2275
  min: "min";
2260
2276
  max: "max";
2277
+ sum: "sum";
2261
2278
  latest: "latest";
2262
2279
  passThresholdRate: "passThresholdRate";
2263
2280
  }>;
2264
2281
  label: z$1.ZodOptional<z$1.ZodString>;
2265
2282
  color: z$1.ZodOptional<z$1.ZodEnum<{
2266
- error: "error";
2267
2283
  success: "success";
2268
- warning: "warning";
2284
+ error: "error";
2269
2285
  accent: "accent";
2270
2286
  accentDim: "accentDim";
2287
+ warning: "warning";
2271
2288
  textMuted: "textMuted";
2272
2289
  }>>;
2273
2290
  axis: z$1.ZodOptional<z$1.ZodEnum<{
@@ -2296,10 +2313,10 @@ declare const evalSummarySchema$1: z$1.ZodObject<{
2296
2313
  source: z$1.ZodLiteral<"column">;
2297
2314
  key: z$1.ZodString;
2298
2315
  aggregate: z$1.ZodEnum<{
2299
- sum: "sum";
2300
2316
  avg: "avg";
2301
2317
  min: "min";
2302
2318
  max: "max";
2319
+ sum: "sum";
2303
2320
  latest: "latest";
2304
2321
  passThresholdRate: "passThresholdRate";
2305
2322
  }>;
@@ -2403,6 +2420,8 @@ declare const caseRowSchema$1: z$1.ZodObject<{
2403
2420
  pending: "pending";
2404
2421
  }>;
2405
2422
  durationMs: z$1.ZodNullable<z$1.ZodNumber>;
2423
+ cacheHits: z$1.ZodOptional<z$1.ZodNumber>;
2424
+ cacheOperations: z$1.ZodOptional<z$1.ZodNumber>;
2406
2425
  costUsd: z$1.ZodOptional<z$1.ZodNullable<z$1.ZodNumber>>;
2407
2426
  columns: z$1.ZodRecord<z$1.ZodString, z$1.ZodUnion<readonly [z$1.ZodType<string | number | boolean | Record<string, unknown> | unknown[] | null, unknown, z$1.core.$ZodTypeInternals<string | number | boolean | Record<string, unknown> | unknown[] | null, unknown>>, z$1.ZodUnion<readonly [z$1.ZodObject<{
2408
2427
  source: z$1.ZodLiteral<"repo">;
@@ -2449,8 +2468,9 @@ declare const runLogLocationSchema: z$1.ZodObject<{
2449
2468
  file: z$1.ZodString;
2450
2469
  line: z$1.ZodNumber;
2451
2470
  column: z$1.ZodNumber;
2471
+ stack: z$1.ZodOptional<z$1.ZodString>;
2452
2472
  }, z$1.core.$strip>;
2453
- /** Best-effort source location for one captured case log. */
2473
+ /** Best-effort source location and captured stack for one case log. */
2454
2474
  type RunLogLocation = z$1.infer<typeof runLogLocationSchema>;
2455
2475
  /** Schema for one persisted log entry captured during a case run. */
2456
2476
  declare const runLogEntrySchema: z$1.ZodObject<{
@@ -2474,6 +2494,7 @@ declare const runLogEntrySchema: z$1.ZodObject<{
2474
2494
  file: z$1.ZodString;
2475
2495
  line: z$1.ZodNumber;
2476
2496
  column: z$1.ZodNumber;
2497
+ stack: z$1.ZodOptional<z$1.ZodString>;
2477
2498
  }, z$1.core.$strip>>;
2478
2499
  source: z$1.ZodOptional<z$1.ZodString>;
2479
2500
  }, z$1.core.$strip>;
@@ -2543,9 +2564,9 @@ declare const scoreTraceSchema: z$1.ZodObject<{
2543
2564
  subtree: "subtree";
2544
2565
  }>>;
2545
2566
  mode: z$1.ZodOptional<z$1.ZodEnum<{
2546
- all: "all";
2547
- last: "last";
2548
2567
  sum: "sum";
2568
+ last: "last";
2569
+ all: "all";
2549
2570
  }>>;
2550
2571
  }, z$1.core.$strip>>>;
2551
2572
  }, z$1.core.$strip>;
@@ -2630,9 +2651,9 @@ declare const caseDetailSchema$1: z$1.ZodObject<{
2630
2651
  subtree: "subtree";
2631
2652
  }>>;
2632
2653
  mode: z$1.ZodOptional<z$1.ZodEnum<{
2633
- all: "all";
2634
- last: "last";
2635
2654
  sum: "sum";
2655
+ last: "last";
2656
+ all: "all";
2636
2657
  }>>;
2637
2658
  }, z$1.core.$strip>>>;
2638
2659
  }, z$1.core.$strip>;
@@ -2699,9 +2720,9 @@ declare const caseDetailSchema$1: z$1.ZodObject<{
2699
2720
  subtree: "subtree";
2700
2721
  }>>;
2701
2722
  mode: z$1.ZodOptional<z$1.ZodEnum<{
2702
- all: "all";
2703
- last: "last";
2704
2723
  sum: "sum";
2724
+ last: "last";
2725
+ all: "all";
2705
2726
  }>>;
2706
2727
  }, z$1.core.$strip>>>;
2707
2728
  }, z$1.core.$strip>;
@@ -2746,6 +2767,7 @@ declare const caseDetailSchema$1: z$1.ZodObject<{
2746
2767
  file: z$1.ZodString;
2747
2768
  line: z$1.ZodNumber;
2748
2769
  column: z$1.ZodNumber;
2770
+ stack: z$1.ZodOptional<z$1.ZodString>;
2749
2771
  }, z$1.core.$strip>>;
2750
2772
  source: z$1.ZodOptional<z$1.ZodString>;
2751
2773
  }, z$1.core.$strip>>>;
@@ -2817,10 +2839,10 @@ declare const evalChartBuiltinMetricSchema: z$1.ZodEnum<{
2817
2839
  type EvalChartBuiltinMetric = z$1.infer<typeof evalChartBuiltinMetricSchema>;
2818
2840
  /** Reducer applied to a numeric column across all cases of a single run. */
2819
2841
  declare const evalChartAggregateSchema: z$1.ZodEnum<{
2820
- sum: "sum";
2821
2842
  avg: "avg";
2822
2843
  min: "min";
2823
2844
  max: "max";
2845
+ sum: "sum";
2824
2846
  latest: "latest";
2825
2847
  passThresholdRate: "passThresholdRate";
2826
2848
  }>;
@@ -2831,11 +2853,11 @@ type EvalChartAggregate = z$1.infer<typeof evalChartAggregateSchema>;
2831
2853
  * not emit raw hex so authored evals stay decoupled from the web theme.
2832
2854
  */
2833
2855
  declare const evalChartColorSchema: z$1.ZodEnum<{
2834
- error: "error";
2835
2856
  success: "success";
2836
- warning: "warning";
2857
+ error: "error";
2837
2858
  accent: "accent";
2838
2859
  accentDim: "accentDim";
2860
+ warning: "warning";
2839
2861
  textMuted: "textMuted";
2840
2862
  }>;
2841
2863
  /** Semantic color token resolved to a theme color by the web UI. */
@@ -2860,11 +2882,11 @@ declare const evalChartMetricSchema: z$1.ZodDiscriminatedUnion<[z$1.ZodObject<{
2860
2882
  }>;
2861
2883
  label: z$1.ZodOptional<z$1.ZodString>;
2862
2884
  color: z$1.ZodOptional<z$1.ZodEnum<{
2863
- error: "error";
2864
2885
  success: "success";
2865
- warning: "warning";
2886
+ error: "error";
2866
2887
  accent: "accent";
2867
2888
  accentDim: "accentDim";
2889
+ warning: "warning";
2868
2890
  textMuted: "textMuted";
2869
2891
  }>>;
2870
2892
  axis: z$1.ZodOptional<z$1.ZodEnum<{
@@ -2875,20 +2897,20 @@ declare const evalChartMetricSchema: z$1.ZodDiscriminatedUnion<[z$1.ZodObject<{
2875
2897
  source: z$1.ZodLiteral<"column">;
2876
2898
  key: z$1.ZodString;
2877
2899
  aggregate: z$1.ZodEnum<{
2878
- sum: "sum";
2879
2900
  avg: "avg";
2880
2901
  min: "min";
2881
2902
  max: "max";
2903
+ sum: "sum";
2882
2904
  latest: "latest";
2883
2905
  passThresholdRate: "passThresholdRate";
2884
2906
  }>;
2885
2907
  label: z$1.ZodOptional<z$1.ZodString>;
2886
2908
  color: z$1.ZodOptional<z$1.ZodEnum<{
2887
- error: "error";
2888
2909
  success: "success";
2889
- warning: "warning";
2910
+ error: "error";
2890
2911
  accent: "accent";
2891
2912
  accentDim: "accentDim";
2913
+ warning: "warning";
2892
2914
  textMuted: "textMuted";
2893
2915
  }>>;
2894
2916
  axis: z$1.ZodOptional<z$1.ZodEnum<{
@@ -2910,10 +2932,10 @@ declare const evalChartTooltipExtraSchema: z$1.ZodDiscriminatedUnion<[z$1.ZodObj
2910
2932
  source: z$1.ZodLiteral<"column">;
2911
2933
  key: z$1.ZodString;
2912
2934
  aggregate: z$1.ZodEnum<{
2913
- sum: "sum";
2914
2935
  avg: "avg";
2915
2936
  min: "min";
2916
2937
  max: "max";
2938
+ sum: "sum";
2917
2939
  latest: "latest";
2918
2940
  passThresholdRate: "passThresholdRate";
2919
2941
  }>;
@@ -2943,11 +2965,11 @@ declare const evalChartConfigSchema: z$1.ZodObject<{
2943
2965
  }>;
2944
2966
  label: z$1.ZodOptional<z$1.ZodString>;
2945
2967
  color: z$1.ZodOptional<z$1.ZodEnum<{
2946
- error: "error";
2947
2968
  success: "success";
2948
- warning: "warning";
2969
+ error: "error";
2949
2970
  accent: "accent";
2950
2971
  accentDim: "accentDim";
2972
+ warning: "warning";
2951
2973
  textMuted: "textMuted";
2952
2974
  }>>;
2953
2975
  axis: z$1.ZodOptional<z$1.ZodEnum<{
@@ -2958,20 +2980,20 @@ declare const evalChartConfigSchema: z$1.ZodObject<{
2958
2980
  source: z$1.ZodLiteral<"column">;
2959
2981
  key: z$1.ZodString;
2960
2982
  aggregate: z$1.ZodEnum<{
2961
- sum: "sum";
2962
2983
  avg: "avg";
2963
2984
  min: "min";
2964
2985
  max: "max";
2986
+ sum: "sum";
2965
2987
  latest: "latest";
2966
2988
  passThresholdRate: "passThresholdRate";
2967
2989
  }>;
2968
2990
  label: z$1.ZodOptional<z$1.ZodString>;
2969
2991
  color: z$1.ZodOptional<z$1.ZodEnum<{
2970
- error: "error";
2971
2992
  success: "success";
2972
- warning: "warning";
2993
+ error: "error";
2973
2994
  accent: "accent";
2974
2995
  accentDim: "accentDim";
2996
+ warning: "warning";
2975
2997
  textMuted: "textMuted";
2976
2998
  }>>;
2977
2999
  axis: z$1.ZodOptional<z$1.ZodEnum<{
@@ -3000,10 +3022,10 @@ declare const evalChartConfigSchema: z$1.ZodObject<{
3000
3022
  source: z$1.ZodLiteral<"column">;
3001
3023
  key: z$1.ZodString;
3002
3024
  aggregate: z$1.ZodEnum<{
3003
- sum: "sum";
3004
3025
  avg: "avg";
3005
3026
  min: "min";
3006
3027
  max: "max";
3028
+ sum: "sum";
3007
3029
  latest: "latest";
3008
3030
  passThresholdRate: "passThresholdRate";
3009
3031
  }>;
@@ -3033,11 +3055,11 @@ declare const evalChartsConfigSchema: z$1.ZodArray<z$1.ZodObject<{
3033
3055
  }>;
3034
3056
  label: z$1.ZodOptional<z$1.ZodString>;
3035
3057
  color: z$1.ZodOptional<z$1.ZodEnum<{
3036
- error: "error";
3037
3058
  success: "success";
3038
- warning: "warning";
3059
+ error: "error";
3039
3060
  accent: "accent";
3040
3061
  accentDim: "accentDim";
3062
+ warning: "warning";
3041
3063
  textMuted: "textMuted";
3042
3064
  }>>;
3043
3065
  axis: z$1.ZodOptional<z$1.ZodEnum<{
@@ -3048,20 +3070,20 @@ declare const evalChartsConfigSchema: z$1.ZodArray<z$1.ZodObject<{
3048
3070
  source: z$1.ZodLiteral<"column">;
3049
3071
  key: z$1.ZodString;
3050
3072
  aggregate: z$1.ZodEnum<{
3051
- sum: "sum";
3052
3073
  avg: "avg";
3053
3074
  min: "min";
3054
3075
  max: "max";
3076
+ sum: "sum";
3055
3077
  latest: "latest";
3056
3078
  passThresholdRate: "passThresholdRate";
3057
3079
  }>;
3058
3080
  label: z$1.ZodOptional<z$1.ZodString>;
3059
3081
  color: z$1.ZodOptional<z$1.ZodEnum<{
3060
- error: "error";
3061
3082
  success: "success";
3062
- warning: "warning";
3083
+ error: "error";
3063
3084
  accent: "accent";
3064
3085
  accentDim: "accentDim";
3086
+ warning: "warning";
3065
3087
  textMuted: "textMuted";
3066
3088
  }>>;
3067
3089
  axis: z$1.ZodOptional<z$1.ZodEnum<{
@@ -3090,10 +3112,10 @@ declare const evalChartsConfigSchema: z$1.ZodArray<z$1.ZodObject<{
3090
3112
  source: z$1.ZodLiteral<"column">;
3091
3113
  key: z$1.ZodString;
3092
3114
  aggregate: z$1.ZodEnum<{
3093
- sum: "sum";
3094
3115
  avg: "avg";
3095
3116
  min: "min";
3096
3117
  max: "max";
3118
+ sum: "sum";
3097
3119
  latest: "latest";
3098
3120
  passThresholdRate: "passThresholdRate";
3099
3121
  }>;
@@ -3121,8 +3143,8 @@ declare const runManifestSchema$1: z$1.ZodObject<{
3121
3143
  evalSourceFingerprints: z$1.ZodDefault<z$1.ZodOptional<z$1.ZodRecord<z$1.ZodString, z$1.ZodString>>>;
3122
3144
  target: z$1.ZodObject<{
3123
3145
  mode: z$1.ZodEnum<{
3124
- all: "all";
3125
3146
  caseIds: "caseIds";
3147
+ all: "all";
3126
3148
  evalIds: "evalIds";
3127
3149
  }>;
3128
3150
  evalKeys: z$1.ZodOptional<z$1.ZodArray<z$1.ZodString>>;
@@ -3189,6 +3211,21 @@ type ScopedCaseSummary = {
3189
3211
  pendingCases: number;
3190
3212
  runningCases: number;
3191
3213
  totalDurationMs: number | null;
3214
+ /**
3215
+ * Sum of Agent Eval operation-level cache hits across the scoped case rows.
3216
+ *
3217
+ * Missing values from older run artifacts count as zero. This is separate
3218
+ * from LLM prompt-cache token reads such as `cachedInputTokens`.
3219
+ */
3220
+ cacheHits: number;
3221
+ /**
3222
+ * Sum of Agent Eval operation-level cache activity entries across the scoped
3223
+ * case rows.
3224
+ *
3225
+ * This is the denominator for `cacheHits`. Missing values from older run
3226
+ * artifacts count as zero.
3227
+ */
3228
+ cacheOperations: number;
3192
3229
  };
3193
3230
  //#endregion
3194
3231
  //#region src/evalStatus.d.ts
@@ -4042,8 +4079,8 @@ declare const spanCacheOptionsSchema: z$1.ZodObject<{
4042
4079
  type SpanCacheOptions = z$1.infer<typeof spanCacheOptionsSchema>;
4043
4080
  /** Category of operation stored in the eval cache. */
4044
4081
  declare const cacheOperationTypeSchema: z$1.ZodEnum<{
4045
- span: "span";
4046
4082
  value: "value";
4083
+ span: "span";
4047
4084
  }>;
4048
4085
  /** Category of operation stored in the eval cache. */
4049
4086
  type CacheOperationType = z$1.infer<typeof cacheOperationTypeSchema>;
@@ -4086,8 +4123,8 @@ declare const cacheListItemSchema$1: z$1.ZodObject<{
4086
4123
  key: z$1.ZodString;
4087
4124
  namespace: z$1.ZodString;
4088
4125
  operationType: z$1.ZodEnum<{
4089
- span: "span";
4090
4126
  value: "value";
4127
+ span: "span";
4091
4128
  }>;
4092
4129
  operationName: z$1.ZodString;
4093
4130
  spanName: z$1.ZodOptional<z$1.ZodString>;
@@ -4209,8 +4246,8 @@ declare const cacheEntrySchema: z$1.ZodObject<{
4209
4246
  key: z$1.ZodString;
4210
4247
  namespace: z$1.ZodString;
4211
4248
  operationType: z$1.ZodOptional<z$1.ZodEnum<{
4212
- span: "span";
4213
4249
  value: "value";
4250
+ span: "span";
4214
4251
  }>>;
4215
4252
  operationName: z$1.ZodOptional<z$1.ZodString>;
4216
4253
  spanName: z$1.ZodOptional<z$1.ZodString>;
@@ -4288,8 +4325,8 @@ declare const cacheDebugKeyEntrySchema: z$1.ZodObject<{
4288
4325
  key: z$1.ZodString;
4289
4326
  namespace: z$1.ZodString;
4290
4327
  operationType: z$1.ZodEnum<{
4291
- span: "span";
4292
4328
  value: "value";
4329
+ span: "span";
4293
4330
  }>;
4294
4331
  operationName: z$1.ZodString;
4295
4332
  storedAt: z$1.ZodString;
@@ -4299,8 +4336,8 @@ declare const cacheDebugKeyEntrySchema: z$1.ZodObject<{
4299
4336
  key: z$1.ZodString;
4300
4337
  namespace: z$1.ZodString;
4301
4338
  operationType: z$1.ZodOptional<z$1.ZodEnum<{
4302
- span: "span";
4303
4339
  value: "value";
4340
+ span: "span";
4304
4341
  }>>;
4305
4342
  operationName: z$1.ZodOptional<z$1.ZodString>;
4306
4343
  spanName: z$1.ZodOptional<z$1.ZodString>;
@@ -4378,8 +4415,8 @@ declare const cacheEntryWithDebugKeySchema$1: z$1.ZodObject<{
4378
4415
  key: z$1.ZodString;
4379
4416
  namespace: z$1.ZodString;
4380
4417
  operationType: z$1.ZodOptional<z$1.ZodEnum<{
4381
- span: "span";
4382
4418
  value: "value";
4419
+ span: "span";
4383
4420
  }>>;
4384
4421
  operationName: z$1.ZodOptional<z$1.ZodString>;
4385
4422
  spanName: z$1.ZodOptional<z$1.ZodString>;
@@ -4448,8 +4485,8 @@ declare const cacheEntryWithDebugKeySchema$1: z$1.ZodObject<{
4448
4485
  key: z$1.ZodString;
4449
4486
  namespace: z$1.ZodString;
4450
4487
  operationType: z$1.ZodEnum<{
4451
- span: "span";
4452
4488
  value: "value";
4489
+ span: "span";
4453
4490
  }>;
4454
4491
  operationName: z$1.ZodString;
4455
4492
  storedAt: z$1.ZodString;
@@ -4459,8 +4496,8 @@ declare const cacheEntryWithDebugKeySchema$1: z$1.ZodObject<{
4459
4496
  key: z$1.ZodString;
4460
4497
  namespace: z$1.ZodString;
4461
4498
  operationType: z$1.ZodOptional<z$1.ZodEnum<{
4462
- span: "span";
4463
4499
  value: "value";
4500
+ span: "span";
4464
4501
  }>>;
4465
4502
  operationName: z$1.ZodOptional<z$1.ZodString>;
4466
4503
  spanName: z$1.ZodOptional<z$1.ZodString>;
@@ -4538,8 +4575,8 @@ declare const cacheFileSchema: z$1.ZodObject<{
4538
4575
  key: z$1.ZodString;
4539
4576
  namespace: z$1.ZodString;
4540
4577
  operationType: z$1.ZodOptional<z$1.ZodEnum<{
4541
- span: "span";
4542
4578
  value: "value";
4579
+ span: "span";
4543
4580
  }>>;
4544
4581
  operationName: z$1.ZodOptional<z$1.ZodString>;
4545
4582
  spanName: z$1.ZodOptional<z$1.ZodString>;
@@ -4616,8 +4653,8 @@ declare const cacheDebugKeyFileSchema: z$1.ZodObject<{
4616
4653
  key: z$1.ZodString;
4617
4654
  namespace: z$1.ZodString;
4618
4655
  operationType: z$1.ZodEnum<{
4619
- span: "span";
4620
4656
  value: "value";
4657
+ span: "span";
4621
4658
  }>;
4622
4659
  operationName: z$1.ZodString;
4623
4660
  storedAt: z$1.ZodString;
@@ -4627,8 +4664,8 @@ declare const cacheDebugKeyFileSchema: z$1.ZodObject<{
4627
4664
  key: z$1.ZodString;
4628
4665
  namespace: z$1.ZodString;
4629
4666
  operationType: z$1.ZodOptional<z$1.ZodEnum<{
4630
- span: "span";
4631
4667
  value: "value";
4668
+ span: "span";
4632
4669
  }>>;
4633
4670
  operationName: z$1.ZodOptional<z$1.ZodString>;
4634
4671
  spanName: z$1.ZodOptional<z$1.ZodString>;
@@ -4797,8 +4834,8 @@ type ConfigReloadState = z$1.infer<typeof configReloadStateSchema$1>;
4797
4834
  declare const createRunRequestSchema$1: z$1.ZodObject<{
4798
4835
  target: z$1.ZodObject<{
4799
4836
  mode: z$1.ZodEnum<{
4800
- all: "all";
4801
4837
  caseIds: "caseIds";
4838
+ all: "all";
4802
4839
  evalIds: "evalIds";
4803
4840
  }>;
4804
4841
  evalKeys: z$1.ZodOptional<z$1.ZodArray<z$1.ZodString>>;
@@ -5148,6 +5185,9 @@ declare const evalSummarySchema: z$1.ZodObject<{
5148
5185
  }, z$1.core.$strip>, z$1.ZodObject<{
5149
5186
  hideIfNoValue: z$1.ZodOptional<z$1.ZodBoolean>;
5150
5187
  kind: z$1.ZodLiteral<"duration">;
5188
+ }, z$1.core.$strip>, z$1.ZodObject<{
5189
+ hideIfNoValue: z$1.ZodOptional<z$1.ZodBoolean>;
5190
+ kind: z$1.ZodLiteral<"cacheHits">;
5151
5191
  }, z$1.core.$strip>, z$1.ZodObject<{
5152
5192
  hideIfNoValue: z$1.ZodOptional<z$1.ZodBoolean>;
5153
5193
  kind: z$1.ZodLiteral<"column">;
@@ -5358,6 +5398,8 @@ declare const caseRowSchema: z$1.ZodObject<{
5358
5398
  pending: "pending";
5359
5399
  }>;
5360
5400
  durationMs: z$1.ZodNullable<z$1.ZodNumber>;
5401
+ cacheHits: z$1.ZodOptional<z$1.ZodNumber>;
5402
+ cacheOperations: z$1.ZodOptional<z$1.ZodNumber>;
5361
5403
  costUsd: z$1.ZodOptional<z$1.ZodNullable<z$1.ZodNumber>>;
5362
5404
  columns: z$1.ZodRecord<z$1.ZodString, z$1.ZodUnion<readonly [z$1.ZodType<string | number | boolean | Record<string, unknown> | unknown[] | null, unknown, z$1.core.$ZodTypeInternals<string | number | boolean | Record<string, unknown> | unknown[] | null, unknown>>, z$1.ZodUnion<readonly [z$1.ZodObject<{
5363
5405
  source: z$1.ZodLiteral<"repo">;
@@ -5567,6 +5609,7 @@ declare const caseDetailSchema: z$1.ZodObject<{
5567
5609
  file: z$1.ZodString;
5568
5610
  line: z$1.ZodNumber;
5569
5611
  column: z$1.ZodNumber;
5612
+ stack: z$1.ZodOptional<z$1.ZodString>;
5570
5613
  }, z$1.core.$strip>>;
5571
5614
  source: z$1.ZodOptional<z$1.ZodString>;
5572
5615
  }, z$1.core.$strip>>>;