@ls-stack/agent-eval 0.47.0 → 0.51.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -25,8 +25,8 @@
25
25
  href="https://fonts.googleapis.com/css2?family=Geist:wght@400;500;600&family=JetBrains+Mono:wght@400;500&display=swap"
26
26
  rel="stylesheet"
27
27
  />
28
- <script type="module" crossorigin src="/assets/index-DB61h-lP.js"></script>
29
- <link rel="stylesheet" crossorigin href="/assets/index-B5JrV3_C.css">
28
+ <script type="module" crossorigin src="/assets/index-DwgyYZgf.js"></script>
29
+ <link rel="stylesheet" crossorigin href="/assets/index-C5SveD-X.css">
30
30
  </head>
31
31
  <body>
32
32
  <div id="root"></div>
package/dist/bin.mjs CHANGED
@@ -1,5 +1,5 @@
1
1
  #!/usr/bin/env node
2
- import { t as runCli } from "./cli-vdJYkEVk.mjs";
2
+ import { t as runCli } from "./cli-Cvs7tc2v.mjs";
3
3
  import { spawn } from "node:child_process";
4
4
  //#region src/bin.ts
5
5
  const moduleMocksFlag = "--experimental-test-module-mocks";
@@ -1,4 +1,4 @@
1
- import { A as validateCharts, At as buildEvalKey, C as deriveEvalFreshness, Ct as getEvalDisplayStatus, D as loadConfig, Dt as runSummarySchema, E as resolveEvalDefaultConfig, Lt as getEvalRegistry, O as buildDeclaredColumnDefs, Ot as resolveApiCallsConfig, S as parseManualInputValues, St as getEvalTitle, T as parseEvalDiscovery, Tt as matchesTagsFilter, _ as recomputePersistedCaseStatus, a as validateTagsFilters, b as resolveArtifactPath, bt as applyDerivedCallAttributes, c as getLastRunStatuses, d as loadPersistedRunSnapshots, f as nextShortIdFromSnapshots, g as recomputeEvalStatusesInRuns, h as persistRunState, i as resolveEvalTags, j as createFsCacheStore, jt as getCaseRowCaseKey, k as normalizeScoreDef, kt as resolveLlmCallsConfig, l as getLatestRunInfos, m as deleteTemporaryRuns, n as getTargetEvalKeys, o as stripTerminalControlCodes, p as persistCaseDetail, s as generateRunId, u as loadPersistedRunSnapshot, v as runTouchesEval, w as loadEvalModule, wt as deriveScopedSummaryFromCases, x as buildManualInputDescriptor, y as resolveTracePresentation } from "./runOrchestration-BFdxG9ws.mjs";
1
+ import { A as validateCharts, At as buildEvalKey, C as deriveEvalFreshness, Ct as getEvalDisplayStatus, D as loadConfig, Dt as runSummarySchema, E as resolveEvalDefaultConfig, Lt as getEvalRegistry, O as buildDeclaredColumnDefs, Ot as resolveApiCallsConfig, S as parseManualInputValues, St as getEvalTitle, T as parseEvalDiscovery, Tt as matchesTagsFilter, _ as recomputePersistedCaseStatus, a as validateTagsFilters, b as resolveArtifactPath, bt as applyDerivedCallAttributes, c as getLastRunStatuses, d as loadPersistedRunSnapshots, f as nextShortIdFromSnapshots, g as recomputeEvalStatusesInRuns, h as persistRunState, i as resolveEvalTags, j as createFsCacheStore, jt as getCaseRowCaseKey, k as normalizeScoreDef, kt as resolveLlmCallsConfig, l as getLatestRunInfos, m as deleteTemporaryRuns, n as getTargetEvalKeys, o as stripTerminalControlCodes, p as persistCaseDetail, s as generateRunId, u as loadPersistedRunSnapshot, v as runTouchesEval, w as loadEvalModule, wt as deriveScopedSummaryFromCases, x as buildManualInputDescriptor, y as resolveTracePresentation } from "./runOrchestration-o38J7uZO.mjs";
2
2
  import { copyFile, mkdir, readFile, rm, writeFile } from "node:fs/promises";
3
3
  import { basename, dirname, extname, isAbsolute, join, relative, resolve, sep } from "node:path";
4
4
  import { createHash, randomUUID } from "node:crypto";
@@ -2095,8 +2095,8 @@ async function commandApp(args) {
2095
2095
  const { serve } = await import("@hono/node-server");
2096
2096
  const bundledWebDist = resolve(currentDir, "apps/web/dist");
2097
2097
  if (existsSync(bundledWebDist)) process.env.AGENT_EVALS_WEB_DIST = bundledWebDist;
2098
- const appModule = await import("./app-BZmhhSFZ.mjs");
2099
- const runnerModule = await import("./runner--aH0jO4Z.mjs");
2098
+ const appModule = await import("./app-CzLj4ZX0.mjs");
2099
+ const runnerModule = await import("./runner-iWtmKx9z.mjs");
2100
2100
  if (!isHonoAppModule(appModule)) throw new Error("Server app module is invalid");
2101
2101
  if (!isServerRunnerModule(runnerModule)) throw new Error("Server runner module is invalid");
2102
2102
  await runnerModule.initRunner();
package/dist/index.d.mts CHANGED
@@ -152,6 +152,9 @@ declare const evalStatsConfigSchema$1: z$1.ZodArray<z$1.ZodDiscriminatedUnion<[z
152
152
  }, z$1.core.$strip>, z$1.ZodObject<{
153
153
  hideIfNoValue: z$1.ZodOptional<z$1.ZodBoolean>;
154
154
  kind: z$1.ZodLiteral<"duration">;
155
+ }, z$1.core.$strip>, z$1.ZodObject<{
156
+ hideIfNoValue: z$1.ZodOptional<z$1.ZodBoolean>;
157
+ kind: z$1.ZodLiteral<"cacheHits">;
155
158
  }, z$1.core.$strip>, z$1.ZodObject<{
156
159
  hideIfNoValue: z$1.ZodOptional<z$1.ZodBoolean>;
157
160
  kind: z$1.ZodLiteral<"column">;
@@ -222,6 +225,7 @@ declare const runLogEntrySchema$1: z$1.ZodObject<{
222
225
  file: z$1.ZodString;
223
226
  line: z$1.ZodNumber;
224
227
  column: z$1.ZodNumber;
228
+ stack: z$1.ZodOptional<z$1.ZodString>;
225
229
  }, z$1.core.$strip>>;
226
230
  source: z$1.ZodOptional<z$1.ZodString>;
227
231
  }, z$1.core.$strip>;
@@ -1012,13 +1016,15 @@ type EvalDefinitionBase<TInput = unknown, TOutputs extends EvalOutputs = EvalOut
1012
1016
  * Opt-in: when omitted (or empty) the EvalCard renders no stats row at all.
1013
1017
  * When provided, the stats render in order, left to right.
1014
1018
  *
1015
- * Built-in kinds (`cases`, `passRate`, `duration`, `cost`) read from the
1016
- * latest run summary. `kind: 'column'` aggregates a score or numeric output
1017
- * column across the latest run's cases `key` must match one of the eval's
1018
- * score or column keys, and only finite numeric values participate in the
1019
- * reduction. When no case has a numeric value for the key the stat renders
1020
- * an em dash, or hides when `hideIfNoValue` is true. `label`, `format`, and
1021
- * `numberFormat` default to the matching `ColumnDef`.
1019
+ * Built-in kinds (`cases`, `passRate`, `duration`, `cacheHits`) read from
1020
+ * the latest run summary. `cacheHits` counts Agent Eval operation-level cache
1021
+ * hits over total cache operations, not LLM provider prompt-cache read
1022
+ * tokens. `kind: 'column'` aggregates a score or numeric output column across
1023
+ * the latest run's cases `key` must match one of the eval's score or column
1024
+ * keys, and only finite numeric values participate in the reduction. When no
1025
+ * case has a numeric value for the key the stat renders an em dash, or hides
1026
+ * when `hideIfNoValue` is true. `label`, `format`, and `numberFormat` default
1027
+ * to the matching `ColumnDef`.
1022
1028
  */
1023
1029
  stats?: EvalStatsConfig$1;
1024
1030
  /**
@@ -1302,8 +1308,6 @@ type EvalCaseScope = {
1302
1308
  logs: RunLogEntry$1[];
1303
1309
  spans: EvalTraceSpan$2[];
1304
1310
  checkpoints: Map<string, unknown>;
1305
- spanStack: string[];
1306
- activeSpanStack: EvalTraceSpan$2[];
1307
1311
  /**
1308
1312
  * Stack of active cache recorders. Ops are written to the top-most frame
1309
1313
  * when it exists and `replayingDepth === 0`.
@@ -1984,8 +1988,8 @@ declare const traceSpanSchema$1: z$1.ZodObject<{
1984
1988
  status: z$1.ZodEnum<{
1985
1989
  error: "error";
1986
1990
  running: "running";
1987
- ok: "ok";
1988
1991
  cancelled: "cancelled";
1992
+ ok: "ok";
1989
1993
  }>;
1990
1994
  attributes: z$1.ZodOptional<z$1.ZodRecord<z$1.ZodString, z$1.ZodUnknown>>;
1991
1995
  error: z$1.ZodOptional<z$1.ZodObject<{
@@ -2036,6 +2040,8 @@ declare const evalStatAggregateSchema: z$1.ZodEnum<{
2036
2040
  type EvalStatAggregate = z$1.infer<typeof evalStatAggregateSchema>;
2037
2041
  /**
2038
2042
  * One entry in the EvalCard stats row. Built-in kinds use latest run totals;
2043
+ * `cacheHits` counts Agent Eval operation-level cache hits from spans and
2044
+ * `evalTracer.cache(...)` refs, not LLM provider prompt-cache read tokens.
2039
2045
  * `column` aggregates a score or numeric output column across the latest run.
2040
2046
  */
2041
2047
  declare const evalStatItemSchema: z$1.ZodDiscriminatedUnion<[z$1.ZodObject<{
@@ -2048,6 +2054,9 @@ declare const evalStatItemSchema: z$1.ZodDiscriminatedUnion<[z$1.ZodObject<{
2048
2054
  }, z$1.core.$strip>, z$1.ZodObject<{
2049
2055
  hideIfNoValue: z$1.ZodOptional<z$1.ZodBoolean>;
2050
2056
  kind: z$1.ZodLiteral<"duration">;
2057
+ }, z$1.core.$strip>, z$1.ZodObject<{
2058
+ hideIfNoValue: z$1.ZodOptional<z$1.ZodBoolean>;
2059
+ kind: z$1.ZodLiteral<"cacheHits">;
2051
2060
  }, z$1.core.$strip>, z$1.ZodObject<{
2052
2061
  hideIfNoValue: z$1.ZodOptional<z$1.ZodBoolean>;
2053
2062
  kind: z$1.ZodLiteral<"column">;
@@ -2090,6 +2099,9 @@ declare const evalStatsConfigSchema: z$1.ZodArray<z$1.ZodDiscriminatedUnion<[z$1
2090
2099
  }, z$1.core.$strip>, z$1.ZodObject<{
2091
2100
  hideIfNoValue: z$1.ZodOptional<z$1.ZodBoolean>;
2092
2101
  kind: z$1.ZodLiteral<"duration">;
2102
+ }, z$1.core.$strip>, z$1.ZodObject<{
2103
+ hideIfNoValue: z$1.ZodOptional<z$1.ZodBoolean>;
2104
+ kind: z$1.ZodLiteral<"cacheHits">;
2093
2105
  }, z$1.core.$strip>, z$1.ZodObject<{
2094
2106
  hideIfNoValue: z$1.ZodOptional<z$1.ZodBoolean>;
2095
2107
  kind: z$1.ZodLiteral<"column">;
@@ -2193,6 +2205,9 @@ declare const evalSummarySchema$1: z$1.ZodObject<{
2193
2205
  }, z$1.core.$strip>, z$1.ZodObject<{
2194
2206
  hideIfNoValue: z$1.ZodOptional<z$1.ZodBoolean>;
2195
2207
  kind: z$1.ZodLiteral<"duration">;
2208
+ }, z$1.core.$strip>, z$1.ZodObject<{
2209
+ hideIfNoValue: z$1.ZodOptional<z$1.ZodBoolean>;
2210
+ kind: z$1.ZodLiteral<"cacheHits">;
2196
2211
  }, z$1.core.$strip>, z$1.ZodObject<{
2197
2212
  hideIfNoValue: z$1.ZodOptional<z$1.ZodBoolean>;
2198
2213
  kind: z$1.ZodLiteral<"column">;
@@ -2239,8 +2254,8 @@ declare const evalSummarySchema$1: z$1.ZodObject<{
2239
2254
  }>;
2240
2255
  label: z$1.ZodOptional<z$1.ZodString>;
2241
2256
  color: z$1.ZodOptional<z$1.ZodEnum<{
2242
- error: "error";
2243
2257
  success: "success";
2258
+ error: "error";
2244
2259
  warning: "warning";
2245
2260
  accent: "accent";
2246
2261
  accentDim: "accentDim";
@@ -2263,8 +2278,8 @@ declare const evalSummarySchema$1: z$1.ZodObject<{
2263
2278
  }>;
2264
2279
  label: z$1.ZodOptional<z$1.ZodString>;
2265
2280
  color: z$1.ZodOptional<z$1.ZodEnum<{
2266
- error: "error";
2267
2281
  success: "success";
2282
+ error: "error";
2268
2283
  warning: "warning";
2269
2284
  accent: "accent";
2270
2285
  accentDim: "accentDim";
@@ -2396,13 +2411,15 @@ declare const caseRowSchema$1: z$1.ZodObject<{
2396
2411
  tags: z$1.ZodOptional<z$1.ZodArray<z$1.ZodString>>;
2397
2412
  status: z$1.ZodEnum<{
2398
2413
  error: "error";
2414
+ pending: "pending";
2399
2415
  running: "running";
2400
2416
  cancelled: "cancelled";
2401
2417
  pass: "pass";
2402
2418
  fail: "fail";
2403
- pending: "pending";
2404
2419
  }>;
2405
2420
  durationMs: z$1.ZodNullable<z$1.ZodNumber>;
2421
+ cacheHits: z$1.ZodOptional<z$1.ZodNumber>;
2422
+ cacheOperations: z$1.ZodOptional<z$1.ZodNumber>;
2406
2423
  costUsd: z$1.ZodOptional<z$1.ZodNullable<z$1.ZodNumber>>;
2407
2424
  columns: z$1.ZodRecord<z$1.ZodString, z$1.ZodUnion<readonly [z$1.ZodType<string | number | boolean | Record<string, unknown> | unknown[] | null, unknown, z$1.core.$ZodTypeInternals<string | number | boolean | Record<string, unknown> | unknown[] | null, unknown>>, z$1.ZodUnion<readonly [z$1.ZodObject<{
2408
2425
  source: z$1.ZodLiteral<"repo">;
@@ -2449,8 +2466,9 @@ declare const runLogLocationSchema: z$1.ZodObject<{
2449
2466
  file: z$1.ZodString;
2450
2467
  line: z$1.ZodNumber;
2451
2468
  column: z$1.ZodNumber;
2469
+ stack: z$1.ZodOptional<z$1.ZodString>;
2452
2470
  }, z$1.core.$strip>;
2453
- /** Best-effort source location for one captured case log. */
2471
+ /** Best-effort source location and captured stack for one case log. */
2454
2472
  type RunLogLocation = z$1.infer<typeof runLogLocationSchema>;
2455
2473
  /** Schema for one persisted log entry captured during a case run. */
2456
2474
  declare const runLogEntrySchema: z$1.ZodObject<{
@@ -2474,6 +2492,7 @@ declare const runLogEntrySchema: z$1.ZodObject<{
2474
2492
  file: z$1.ZodString;
2475
2493
  line: z$1.ZodNumber;
2476
2494
  column: z$1.ZodNumber;
2495
+ stack: z$1.ZodOptional<z$1.ZodString>;
2477
2496
  }, z$1.core.$strip>>;
2478
2497
  source: z$1.ZodOptional<z$1.ZodString>;
2479
2498
  }, z$1.core.$strip>;
@@ -2492,8 +2511,8 @@ declare const scoreTraceSchema: z$1.ZodObject<{
2492
2511
  status: z$1.ZodEnum<{
2493
2512
  error: "error";
2494
2513
  running: "running";
2495
- ok: "ok";
2496
2514
  cancelled: "cancelled";
2515
+ ok: "ok";
2497
2516
  }>;
2498
2517
  attributes: z$1.ZodOptional<z$1.ZodRecord<z$1.ZodString, z$1.ZodUnknown>>;
2499
2518
  error: z$1.ZodOptional<z$1.ZodObject<{
@@ -2561,11 +2580,11 @@ declare const caseDetailSchema$1: z$1.ZodObject<{
2561
2580
  tags: z$1.ZodOptional<z$1.ZodArray<z$1.ZodString>>;
2562
2581
  status: z$1.ZodEnum<{
2563
2582
  error: "error";
2583
+ pending: "pending";
2564
2584
  running: "running";
2565
2585
  cancelled: "cancelled";
2566
2586
  pass: "pass";
2567
2587
  fail: "fail";
2568
- pending: "pending";
2569
2588
  }>;
2570
2589
  input: z$1.ZodUnknown;
2571
2590
  trace: z$1.ZodArray<z$1.ZodObject<{
@@ -2579,8 +2598,8 @@ declare const caseDetailSchema$1: z$1.ZodObject<{
2579
2598
  status: z$1.ZodEnum<{
2580
2599
  error: "error";
2581
2600
  running: "running";
2582
- ok: "ok";
2583
2601
  cancelled: "cancelled";
2602
+ ok: "ok";
2584
2603
  }>;
2585
2604
  attributes: z$1.ZodOptional<z$1.ZodRecord<z$1.ZodString, z$1.ZodUnknown>>;
2586
2605
  error: z$1.ZodOptional<z$1.ZodObject<{
@@ -2648,8 +2667,8 @@ declare const caseDetailSchema$1: z$1.ZodObject<{
2648
2667
  status: z$1.ZodEnum<{
2649
2668
  error: "error";
2650
2669
  running: "running";
2651
- ok: "ok";
2652
2670
  cancelled: "cancelled";
2671
+ ok: "ok";
2653
2672
  }>;
2654
2673
  attributes: z$1.ZodOptional<z$1.ZodRecord<z$1.ZodString, z$1.ZodUnknown>>;
2655
2674
  error: z$1.ZodOptional<z$1.ZodObject<{
@@ -2746,6 +2765,7 @@ declare const caseDetailSchema$1: z$1.ZodObject<{
2746
2765
  file: z$1.ZodString;
2747
2766
  line: z$1.ZodNumber;
2748
2767
  column: z$1.ZodNumber;
2768
+ stack: z$1.ZodOptional<z$1.ZodString>;
2749
2769
  }, z$1.core.$strip>>;
2750
2770
  source: z$1.ZodOptional<z$1.ZodString>;
2751
2771
  }, z$1.core.$strip>>>;
@@ -2831,8 +2851,8 @@ type EvalChartAggregate = z$1.infer<typeof evalChartAggregateSchema>;
2831
2851
  * not emit raw hex so authored evals stay decoupled from the web theme.
2832
2852
  */
2833
2853
  declare const evalChartColorSchema: z$1.ZodEnum<{
2834
- error: "error";
2835
2854
  success: "success";
2855
+ error: "error";
2836
2856
  warning: "warning";
2837
2857
  accent: "accent";
2838
2858
  accentDim: "accentDim";
@@ -2860,8 +2880,8 @@ declare const evalChartMetricSchema: z$1.ZodDiscriminatedUnion<[z$1.ZodObject<{
2860
2880
  }>;
2861
2881
  label: z$1.ZodOptional<z$1.ZodString>;
2862
2882
  color: z$1.ZodOptional<z$1.ZodEnum<{
2863
- error: "error";
2864
2883
  success: "success";
2884
+ error: "error";
2865
2885
  warning: "warning";
2866
2886
  accent: "accent";
2867
2887
  accentDim: "accentDim";
@@ -2884,8 +2904,8 @@ declare const evalChartMetricSchema: z$1.ZodDiscriminatedUnion<[z$1.ZodObject<{
2884
2904
  }>;
2885
2905
  label: z$1.ZodOptional<z$1.ZodString>;
2886
2906
  color: z$1.ZodOptional<z$1.ZodEnum<{
2887
- error: "error";
2888
2907
  success: "success";
2908
+ error: "error";
2889
2909
  warning: "warning";
2890
2910
  accent: "accent";
2891
2911
  accentDim: "accentDim";
@@ -2943,8 +2963,8 @@ declare const evalChartConfigSchema: z$1.ZodObject<{
2943
2963
  }>;
2944
2964
  label: z$1.ZodOptional<z$1.ZodString>;
2945
2965
  color: z$1.ZodOptional<z$1.ZodEnum<{
2946
- error: "error";
2947
2966
  success: "success";
2967
+ error: "error";
2948
2968
  warning: "warning";
2949
2969
  accent: "accent";
2950
2970
  accentDim: "accentDim";
@@ -2967,8 +2987,8 @@ declare const evalChartConfigSchema: z$1.ZodObject<{
2967
2987
  }>;
2968
2988
  label: z$1.ZodOptional<z$1.ZodString>;
2969
2989
  color: z$1.ZodOptional<z$1.ZodEnum<{
2970
- error: "error";
2971
2990
  success: "success";
2991
+ error: "error";
2972
2992
  warning: "warning";
2973
2993
  accent: "accent";
2974
2994
  accentDim: "accentDim";
@@ -3033,8 +3053,8 @@ declare const evalChartsConfigSchema: z$1.ZodArray<z$1.ZodObject<{
3033
3053
  }>;
3034
3054
  label: z$1.ZodOptional<z$1.ZodString>;
3035
3055
  color: z$1.ZodOptional<z$1.ZodEnum<{
3036
- error: "error";
3037
3056
  success: "success";
3057
+ error: "error";
3038
3058
  warning: "warning";
3039
3059
  accent: "accent";
3040
3060
  accentDim: "accentDim";
@@ -3057,8 +3077,8 @@ declare const evalChartsConfigSchema: z$1.ZodArray<z$1.ZodObject<{
3057
3077
  }>;
3058
3078
  label: z$1.ZodOptional<z$1.ZodString>;
3059
3079
  color: z$1.ZodOptional<z$1.ZodEnum<{
3060
- error: "error";
3061
3080
  success: "success";
3081
+ error: "error";
3062
3082
  warning: "warning";
3063
3083
  accent: "accent";
3064
3084
  accentDim: "accentDim";
@@ -3109,10 +3129,10 @@ declare const runManifestSchema$1: z$1.ZodObject<{
3109
3129
  shortId: z$1.ZodString;
3110
3130
  status: z$1.ZodEnum<{
3111
3131
  error: "error";
3112
- running: "running";
3113
- cancelled: "cancelled";
3114
3132
  pending: "pending";
3133
+ running: "running";
3115
3134
  completed: "completed";
3135
+ cancelled: "cancelled";
3116
3136
  }>;
3117
3137
  temporary: z$1.ZodDefault<z$1.ZodOptional<z$1.ZodBoolean>>;
3118
3138
  startedAt: z$1.ZodString;
@@ -3122,8 +3142,8 @@ declare const runManifestSchema$1: z$1.ZodObject<{
3122
3142
  target: z$1.ZodObject<{
3123
3143
  mode: z$1.ZodEnum<{
3124
3144
  all: "all";
3125
- caseIds: "caseIds";
3126
3145
  evalIds: "evalIds";
3146
+ caseIds: "caseIds";
3127
3147
  }>;
3128
3148
  evalKeys: z$1.ZodOptional<z$1.ZodArray<z$1.ZodString>>;
3129
3149
  files: z$1.ZodOptional<z$1.ZodArray<z$1.ZodString>>;
@@ -3149,10 +3169,10 @@ declare const runSummarySchema$1: z$1.ZodObject<{
3149
3169
  runId: z$1.ZodString;
3150
3170
  status: z$1.ZodEnum<{
3151
3171
  error: "error";
3152
- running: "running";
3153
- cancelled: "cancelled";
3154
3172
  pending: "pending";
3173
+ running: "running";
3155
3174
  completed: "completed";
3175
+ cancelled: "cancelled";
3156
3176
  }>;
3157
3177
  totalCases: z$1.ZodNumber;
3158
3178
  passedCases: z$1.ZodNumber;
@@ -3189,6 +3209,21 @@ type ScopedCaseSummary = {
3189
3209
  pendingCases: number;
3190
3210
  runningCases: number;
3191
3211
  totalDurationMs: number | null;
3212
+ /**
3213
+ * Sum of Agent Eval operation-level cache hits across the scoped case rows.
3214
+ *
3215
+ * Missing values from older run artifacts count as zero. This is separate
3216
+ * from LLM prompt-cache token reads such as `cachedInputTokens`.
3217
+ */
3218
+ cacheHits: number;
3219
+ /**
3220
+ * Sum of Agent Eval operation-level cache activity entries across the scoped
3221
+ * case rows.
3222
+ *
3223
+ * This is the denominator for `cacheHits`. Missing values from older run
3224
+ * artifacts count as zero.
3225
+ */
3226
+ cacheOperations: number;
3192
3227
  };
3193
3228
  //#endregion
3194
3229
  //#region src/evalStatus.d.ts
@@ -4042,8 +4077,8 @@ declare const spanCacheOptionsSchema: z$1.ZodObject<{
4042
4077
  type SpanCacheOptions = z$1.infer<typeof spanCacheOptionsSchema>;
4043
4078
  /** Category of operation stored in the eval cache. */
4044
4079
  declare const cacheOperationTypeSchema: z$1.ZodEnum<{
4045
- span: "span";
4046
4080
  value: "value";
4081
+ span: "span";
4047
4082
  }>;
4048
4083
  /** Category of operation stored in the eval cache. */
4049
4084
  type CacheOperationType = z$1.infer<typeof cacheOperationTypeSchema>;
@@ -4086,8 +4121,8 @@ declare const cacheListItemSchema$1: z$1.ZodObject<{
4086
4121
  key: z$1.ZodString;
4087
4122
  namespace: z$1.ZodString;
4088
4123
  operationType: z$1.ZodEnum<{
4089
- span: "span";
4090
4124
  value: "value";
4125
+ span: "span";
4091
4126
  }>;
4092
4127
  operationName: z$1.ZodString;
4093
4128
  spanName: z$1.ZodOptional<z$1.ZodString>;
@@ -4149,8 +4184,8 @@ declare const cacheRecordingSchema: z$1.ZodObject<{
4149
4184
  finalStatus: z$1.ZodOptional<z$1.ZodEnum<{
4150
4185
  error: "error";
4151
4186
  running: "running";
4152
- ok: "ok";
4153
4187
  cancelled: "cancelled";
4188
+ ok: "ok";
4154
4189
  }>>;
4155
4190
  finalError: z$1.ZodOptional<z$1.ZodObject<{
4156
4191
  name: z$1.ZodOptional<z$1.ZodString>;
@@ -4209,8 +4244,8 @@ declare const cacheEntrySchema: z$1.ZodObject<{
4209
4244
  key: z$1.ZodString;
4210
4245
  namespace: z$1.ZodString;
4211
4246
  operationType: z$1.ZodOptional<z$1.ZodEnum<{
4212
- span: "span";
4213
4247
  value: "value";
4248
+ span: "span";
4214
4249
  }>>;
4215
4250
  operationName: z$1.ZodOptional<z$1.ZodString>;
4216
4251
  spanName: z$1.ZodOptional<z$1.ZodString>;
@@ -4222,8 +4257,8 @@ declare const cacheEntrySchema: z$1.ZodObject<{
4222
4257
  finalStatus: z$1.ZodOptional<z$1.ZodEnum<{
4223
4258
  error: "error";
4224
4259
  running: "running";
4225
- ok: "ok";
4226
4260
  cancelled: "cancelled";
4261
+ ok: "ok";
4227
4262
  }>>;
4228
4263
  finalError: z$1.ZodOptional<z$1.ZodObject<{
4229
4264
  name: z$1.ZodOptional<z$1.ZodString>;
@@ -4288,8 +4323,8 @@ declare const cacheDebugKeyEntrySchema: z$1.ZodObject<{
4288
4323
  key: z$1.ZodString;
4289
4324
  namespace: z$1.ZodString;
4290
4325
  operationType: z$1.ZodEnum<{
4291
- span: "span";
4292
4326
  value: "value";
4327
+ span: "span";
4293
4328
  }>;
4294
4329
  operationName: z$1.ZodString;
4295
4330
  storedAt: z$1.ZodString;
@@ -4299,8 +4334,8 @@ declare const cacheDebugKeyEntrySchema: z$1.ZodObject<{
4299
4334
  key: z$1.ZodString;
4300
4335
  namespace: z$1.ZodString;
4301
4336
  operationType: z$1.ZodOptional<z$1.ZodEnum<{
4302
- span: "span";
4303
4337
  value: "value";
4338
+ span: "span";
4304
4339
  }>>;
4305
4340
  operationName: z$1.ZodOptional<z$1.ZodString>;
4306
4341
  spanName: z$1.ZodOptional<z$1.ZodString>;
@@ -4312,8 +4347,8 @@ declare const cacheDebugKeyEntrySchema: z$1.ZodObject<{
4312
4347
  finalStatus: z$1.ZodOptional<z$1.ZodEnum<{
4313
4348
  error: "error";
4314
4349
  running: "running";
4315
- ok: "ok";
4316
4350
  cancelled: "cancelled";
4351
+ ok: "ok";
4317
4352
  }>>;
4318
4353
  finalError: z$1.ZodOptional<z$1.ZodObject<{
4319
4354
  name: z$1.ZodOptional<z$1.ZodString>;
@@ -4378,8 +4413,8 @@ declare const cacheEntryWithDebugKeySchema$1: z$1.ZodObject<{
4378
4413
  key: z$1.ZodString;
4379
4414
  namespace: z$1.ZodString;
4380
4415
  operationType: z$1.ZodOptional<z$1.ZodEnum<{
4381
- span: "span";
4382
4416
  value: "value";
4417
+ span: "span";
4383
4418
  }>>;
4384
4419
  operationName: z$1.ZodOptional<z$1.ZodString>;
4385
4420
  spanName: z$1.ZodOptional<z$1.ZodString>;
@@ -4391,8 +4426,8 @@ declare const cacheEntryWithDebugKeySchema$1: z$1.ZodObject<{
4391
4426
  finalStatus: z$1.ZodOptional<z$1.ZodEnum<{
4392
4427
  error: "error";
4393
4428
  running: "running";
4394
- ok: "ok";
4395
4429
  cancelled: "cancelled";
4430
+ ok: "ok";
4396
4431
  }>>;
4397
4432
  finalError: z$1.ZodOptional<z$1.ZodObject<{
4398
4433
  name: z$1.ZodOptional<z$1.ZodString>;
@@ -4448,8 +4483,8 @@ declare const cacheEntryWithDebugKeySchema$1: z$1.ZodObject<{
4448
4483
  key: z$1.ZodString;
4449
4484
  namespace: z$1.ZodString;
4450
4485
  operationType: z$1.ZodEnum<{
4451
- span: "span";
4452
4486
  value: "value";
4487
+ span: "span";
4453
4488
  }>;
4454
4489
  operationName: z$1.ZodString;
4455
4490
  storedAt: z$1.ZodString;
@@ -4459,8 +4494,8 @@ declare const cacheEntryWithDebugKeySchema$1: z$1.ZodObject<{
4459
4494
  key: z$1.ZodString;
4460
4495
  namespace: z$1.ZodString;
4461
4496
  operationType: z$1.ZodOptional<z$1.ZodEnum<{
4462
- span: "span";
4463
4497
  value: "value";
4498
+ span: "span";
4464
4499
  }>>;
4465
4500
  operationName: z$1.ZodOptional<z$1.ZodString>;
4466
4501
  spanName: z$1.ZodOptional<z$1.ZodString>;
@@ -4472,8 +4507,8 @@ declare const cacheEntryWithDebugKeySchema$1: z$1.ZodObject<{
4472
4507
  finalStatus: z$1.ZodOptional<z$1.ZodEnum<{
4473
4508
  error: "error";
4474
4509
  running: "running";
4475
- ok: "ok";
4476
4510
  cancelled: "cancelled";
4511
+ ok: "ok";
4477
4512
  }>>;
4478
4513
  finalError: z$1.ZodOptional<z$1.ZodObject<{
4479
4514
  name: z$1.ZodOptional<z$1.ZodString>;
@@ -4538,8 +4573,8 @@ declare const cacheFileSchema: z$1.ZodObject<{
4538
4573
  key: z$1.ZodString;
4539
4574
  namespace: z$1.ZodString;
4540
4575
  operationType: z$1.ZodOptional<z$1.ZodEnum<{
4541
- span: "span";
4542
4576
  value: "value";
4577
+ span: "span";
4543
4578
  }>>;
4544
4579
  operationName: z$1.ZodOptional<z$1.ZodString>;
4545
4580
  spanName: z$1.ZodOptional<z$1.ZodString>;
@@ -4551,8 +4586,8 @@ declare const cacheFileSchema: z$1.ZodObject<{
4551
4586
  finalStatus: z$1.ZodOptional<z$1.ZodEnum<{
4552
4587
  error: "error";
4553
4588
  running: "running";
4554
- ok: "ok";
4555
4589
  cancelled: "cancelled";
4590
+ ok: "ok";
4556
4591
  }>>;
4557
4592
  finalError: z$1.ZodOptional<z$1.ZodObject<{
4558
4593
  name: z$1.ZodOptional<z$1.ZodString>;
@@ -4616,8 +4651,8 @@ declare const cacheDebugKeyFileSchema: z$1.ZodObject<{
4616
4651
  key: z$1.ZodString;
4617
4652
  namespace: z$1.ZodString;
4618
4653
  operationType: z$1.ZodEnum<{
4619
- span: "span";
4620
4654
  value: "value";
4655
+ span: "span";
4621
4656
  }>;
4622
4657
  operationName: z$1.ZodString;
4623
4658
  storedAt: z$1.ZodString;
@@ -4627,8 +4662,8 @@ declare const cacheDebugKeyFileSchema: z$1.ZodObject<{
4627
4662
  key: z$1.ZodString;
4628
4663
  namespace: z$1.ZodString;
4629
4664
  operationType: z$1.ZodOptional<z$1.ZodEnum<{
4630
- span: "span";
4631
4665
  value: "value";
4666
+ span: "span";
4632
4667
  }>>;
4633
4668
  operationName: z$1.ZodOptional<z$1.ZodString>;
4634
4669
  spanName: z$1.ZodOptional<z$1.ZodString>;
@@ -4640,8 +4675,8 @@ declare const cacheDebugKeyFileSchema: z$1.ZodObject<{
4640
4675
  finalStatus: z$1.ZodOptional<z$1.ZodEnum<{
4641
4676
  error: "error";
4642
4677
  running: "running";
4643
- ok: "ok";
4644
4678
  cancelled: "cancelled";
4679
+ ok: "ok";
4645
4680
  }>>;
4646
4681
  finalError: z$1.ZodOptional<z$1.ZodObject<{
4647
4682
  name: z$1.ZodOptional<z$1.ZodString>;
@@ -4798,8 +4833,8 @@ declare const createRunRequestSchema$1: z$1.ZodObject<{
4798
4833
  target: z$1.ZodObject<{
4799
4834
  mode: z$1.ZodEnum<{
4800
4835
  all: "all";
4801
- caseIds: "caseIds";
4802
4836
  evalIds: "evalIds";
4837
+ caseIds: "caseIds";
4803
4838
  }>;
4804
4839
  evalKeys: z$1.ZodOptional<z$1.ZodArray<z$1.ZodString>>;
4805
4840
  files: z$1.ZodOptional<z$1.ZodArray<z$1.ZodString>>;
@@ -5148,6 +5183,9 @@ declare const evalSummarySchema: z$1.ZodObject<{
5148
5183
  }, z$1.core.$strip>, z$1.ZodObject<{
5149
5184
  hideIfNoValue: z$1.ZodOptional<z$1.ZodBoolean>;
5150
5185
  kind: z$1.ZodLiteral<"duration">;
5186
+ }, z$1.core.$strip>, z$1.ZodObject<{
5187
+ hideIfNoValue: z$1.ZodOptional<z$1.ZodBoolean>;
5188
+ kind: z$1.ZodLiteral<"cacheHits">;
5151
5189
  }, z$1.core.$strip>, z$1.ZodObject<{
5152
5190
  hideIfNoValue: z$1.ZodOptional<z$1.ZodBoolean>;
5153
5191
  kind: z$1.ZodLiteral<"column">;
@@ -5358,6 +5396,8 @@ declare const caseRowSchema: z$1.ZodObject<{
5358
5396
  pending: "pending";
5359
5397
  }>;
5360
5398
  durationMs: z$1.ZodNullable<z$1.ZodNumber>;
5399
+ cacheHits: z$1.ZodOptional<z$1.ZodNumber>;
5400
+ cacheOperations: z$1.ZodOptional<z$1.ZodNumber>;
5361
5401
  costUsd: z$1.ZodOptional<z$1.ZodNullable<z$1.ZodNumber>>;
5362
5402
  columns: z$1.ZodRecord<z$1.ZodString, z$1.ZodUnion<readonly [z$1.ZodType<string | number | boolean | Record<string, unknown> | unknown[] | null, unknown, z$1.core.$ZodTypeInternals<string | number | boolean | Record<string, unknown> | unknown[] | null, unknown>>, z$1.ZodUnion<readonly [z$1.ZodObject<{
5363
5403
  source: z$1.ZodLiteral<"repo">;
@@ -5567,6 +5607,7 @@ declare const caseDetailSchema: z$1.ZodObject<{
5567
5607
  file: z$1.ZodString;
5568
5608
  line: z$1.ZodNumber;
5569
5609
  column: z$1.ZodNumber;
5610
+ stack: z$1.ZodOptional<z$1.ZodString>;
5570
5611
  }, z$1.core.$strip>>;
5571
5612
  source: z$1.ZodOptional<z$1.ZodString>;
5572
5613
  }, z$1.core.$strip>>>;
package/dist/index.mjs CHANGED
@@ -1,4 +1,4 @@
1
- import { $ as getCurrentScope, B as deserializeCacheValue, F as evalSpan, G as readManualInputFile, H as serializeCacheValue, I as evalTracer, J as appendToEvalOutput, K as evalExpect, L as hashCacheKey, Lt as getEvalRegistry, M as z, N as buildTraceTree, P as captureEvalSpanError, Q as evalTime, R as hashCacheKeySync, U as repoFile, V as serializeCacheRecording, W as manualInputFileValueSchema, X as evalAssert, Z as evalLog, _t as extractLlmCalls, at as nextEvalId, ct as runInExistingEvalScope, dt as startEvalBackgroundJob, et as getEvalCaseInput, gt as extractApiCalls, ht as extractCacheHits, it as mergeEvalOutput, lt as setEvalOutput, mt as extractCacheEntries, nt as isInEvalScope, ot as runInEvalRuntimeScope, q as EvalAssertionError, st as runInEvalScope, tt as incrementEvalOutput, ut as setScopeCacheContext, vt as simulateLlmCallCost, xt as getNestedAttribute, yt as simulateTokenAllocation, z as deserializeCacheRecording } from "./runOrchestration-BFdxG9ws.mjs";
2
- import { a as materializeManualInputFiles, i as isManualInputFileValue, n as createRunner, o as stageManualInputFile, r as cleanupStagedManualInputFiles, s as stageManualInputFileFromPath, t as runCli } from "./cli-vdJYkEVk.mjs";
3
- import { n as matchesEvalTags, t as defineEval } from "./src-BRqs3kSA.mjs";
1
+ import { $ as getCurrentScope, B as deserializeCacheValue, F as evalSpan, G as readManualInputFile, H as serializeCacheValue, I as evalTracer, J as appendToEvalOutput, K as evalExpect, L as hashCacheKey, Lt as getEvalRegistry, M as z, N as buildTraceTree, P as captureEvalSpanError, Q as evalTime, R as hashCacheKeySync, U as repoFile, V as serializeCacheRecording, W as manualInputFileValueSchema, X as evalAssert, Z as evalLog, _t as extractLlmCalls, at as nextEvalId, ct as runInExistingEvalScope, dt as startEvalBackgroundJob, et as getEvalCaseInput, gt as extractApiCalls, ht as extractCacheHits, it as mergeEvalOutput, lt as setEvalOutput, mt as extractCacheEntries, nt as isInEvalScope, ot as runInEvalRuntimeScope, q as EvalAssertionError, st as runInEvalScope, tt as incrementEvalOutput, ut as setScopeCacheContext, vt as simulateLlmCallCost, xt as getNestedAttribute, yt as simulateTokenAllocation, z as deserializeCacheRecording } from "./runOrchestration-o38J7uZO.mjs";
2
+ import { a as materializeManualInputFiles, i as isManualInputFileValue, n as createRunner, o as stageManualInputFile, r as cleanupStagedManualInputFiles, s as stageManualInputFileFromPath, t as runCli } from "./cli-Cvs7tc2v.mjs";
3
+ import { n as matchesEvalTags, t as defineEval } from "./src-Jahivm6d.mjs";
4
4
  export { EvalAssertionError, appendToEvalOutput, buildTraceTree, captureEvalSpanError, cleanupStagedManualInputFiles, createRunner, defineEval, deserializeCacheRecording, deserializeCacheValue, evalAssert, evalExpect, evalLog, evalSpan, evalTime, evalTracer, extractApiCalls, extractCacheEntries, extractCacheHits, extractLlmCalls, getCurrentScope, getEvalCaseInput, getEvalRegistry, getNestedAttribute, hashCacheKey, hashCacheKeySync, incrementEvalOutput, isInEvalScope, isManualInputFileValue, manualInputFileValueSchema, matchesEvalTags, materializeManualInputFiles, mergeEvalOutput, nextEvalId, readManualInputFile, repoFile, runCli, runInEvalRuntimeScope, runInEvalScope, runInExistingEvalScope, serializeCacheRecording, serializeCacheValue, setEvalOutput, setScopeCacheContext, simulateLlmCallCost, simulateTokenAllocation, stageManualInputFile, stageManualInputFileFromPath, startEvalBackgroundJob, z };
package/dist/runChild.mjs CHANGED
@@ -1,4 +1,4 @@
1
- import { At as buildEvalKey, D as loadConfig, Dt as runSummarySchema, Et as runManifestSchema, Ft as columnDefSchema, Mt as evalStatsConfigSchema, Nt as manualInputDescriptorSchema, Pt as evalChartsConfigSchema, T as parseEvalDiscovery, Y as configureEvalRunLogs, ft as createRunRequestSchema, h as persistRunState, j as createFsCacheStore, r as getTargetEvals$1, t as executeRun } from "./runOrchestration-BFdxG9ws.mjs";
1
+ import { At as buildEvalKey, D as loadConfig, Dt as runSummarySchema, Et as runManifestSchema, Ft as columnDefSchema, Mt as evalStatsConfigSchema, Nt as manualInputDescriptorSchema, Pt as evalChartsConfigSchema, T as parseEvalDiscovery, Y as configureEvalRunLogs, ft as createRunRequestSchema, h as persistRunState, j as createFsCacheStore, r as getTargetEvals$1, t as executeRun } from "./runOrchestration-o38J7uZO.mjs";
2
2
  import { z } from "zod/v4";
3
3
  import { readFile } from "node:fs/promises";
4
4
  import { relative } from "node:path";